From 7b93670dbdceb1031fb0c7d2bc49e6b480ca5167 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Mon, 31 Aug 2020 21:44:29 +0300 Subject: [PATCH 001/123] [Vulkan] Remove old Vulkan code, change shaders directory, create empty Vulkan backend --- premake5.lua | 1 - src/xenia/app/premake5.lua | 1 - src/xenia/app/xenia_main.cc | 3 +- src/xenia/gpu/d3d12/d3d12_graphics_system.cc | 6 +- src/xenia/gpu/d3d12/pipeline_cache.cc | 20 +- src/xenia/gpu/d3d12/premake5.lua | 2 +- src/xenia/gpu/d3d12/render_target_cache.cc | 62 +- src/xenia/gpu/d3d12/texture_cache.cc | 76 +- src/xenia/gpu/premake5.lua | 4 - src/xenia/gpu/shader_compiler_main.cc | 20 +- .../{d3d12 => }/shaders/adaptive_quad.hs.hlsl | 0 .../shaders/adaptive_triangle.hs.hlsl | 0 .../bytecode/d3d12_5_1}/adaptive_quad_hs.cso | Bin .../bytecode/d3d12_5_1}/adaptive_quad_hs.h | 0 .../bytecode/d3d12_5_1}/adaptive_quad_hs.txt | 0 .../d3d12_5_1}/adaptive_triangle_hs.cso | Bin .../d3d12_5_1}/adaptive_triangle_hs.h | 0 .../d3d12_5_1}/adaptive_triangle_hs.txt | 0 .../d3d12_5_1}/continuous_quad_hs.cso | Bin .../bytecode/d3d12_5_1}/continuous_quad_hs.h | 0 .../d3d12_5_1}/continuous_quad_hs.txt | 0 .../d3d12_5_1}/continuous_triangle_hs.cso | Bin .../d3d12_5_1}/continuous_triangle_hs.h | 0 .../d3d12_5_1}/continuous_triangle_hs.txt | 0 .../bytecode/d3d12_5_1}/discrete_quad_hs.cso | Bin .../bytecode/d3d12_5_1}/discrete_quad_hs.h | 0 .../bytecode/d3d12_5_1}/discrete_quad_hs.txt | 0 .../d3d12_5_1}/discrete_triangle_hs.cso | Bin .../d3d12_5_1}/discrete_triangle_hs.h | 0 .../d3d12_5_1}/discrete_triangle_hs.txt | 0 .../d3d12_5_1}/edram_load_color_32bpp_cs.cso | Bin .../d3d12_5_1}/edram_load_color_32bpp_cs.h | 0 .../d3d12_5_1}/edram_load_color_32bpp_cs.txt | 0 .../d3d12_5_1}/edram_load_color_64bpp_cs.cso | Bin .../d3d12_5_1}/edram_load_color_64bpp_cs.h | 0 .../d3d12_5_1}/edram_load_color_64bpp_cs.txt | 0 .../d3d12_5_1}/edram_load_color_7e3_cs.cso | Bin .../d3d12_5_1}/edram_load_color_7e3_cs.h | 0 .../d3d12_5_1}/edram_load_color_7e3_cs.txt | 0 .../d3d12_5_1}/edram_load_depth_float_cs.cso | Bin .../d3d12_5_1}/edram_load_depth_float_cs.h | 0 .../d3d12_5_1}/edram_load_depth_float_cs.txt | 0 .../d3d12_5_1}/edram_load_depth_unorm_cs.cso | Bin .../d3d12_5_1}/edram_load_depth_unorm_cs.h | 0 .../d3d12_5_1}/edram_load_depth_unorm_cs.txt | 0 .../d3d12_5_1}/edram_store_color_32bpp_cs.cso | Bin .../d3d12_5_1}/edram_store_color_32bpp_cs.h | 0 .../d3d12_5_1}/edram_store_color_32bpp_cs.txt | 0 .../d3d12_5_1}/edram_store_color_64bpp_cs.cso | Bin .../d3d12_5_1}/edram_store_color_64bpp_cs.h | 0 .../d3d12_5_1}/edram_store_color_64bpp_cs.txt | 0 .../d3d12_5_1}/edram_store_color_7e3_cs.cso | Bin .../d3d12_5_1}/edram_store_color_7e3_cs.h | 0 .../d3d12_5_1}/edram_store_color_7e3_cs.txt | 0 .../d3d12_5_1}/edram_store_depth_float_cs.cso | Bin .../d3d12_5_1}/edram_store_depth_float_cs.h | 0 .../d3d12_5_1}/edram_store_depth_float_cs.txt | 0 .../d3d12_5_1}/edram_store_depth_unorm_cs.cso | Bin .../d3d12_5_1}/edram_store_depth_unorm_cs.h | 0 .../d3d12_5_1}/edram_store_depth_unorm_cs.txt | 0 .../bytecode/d3d12_5_1}/fullscreen_vs.cso | Bin .../bytecode/d3d12_5_1}/fullscreen_vs.h | 0 .../bytecode/d3d12_5_1}/fullscreen_vs.txt | 0 .../d3d12_5_1}/primitive_point_list_gs.cso | Bin .../d3d12_5_1}/primitive_point_list_gs.h | 0 .../d3d12_5_1}/primitive_point_list_gs.txt | 0 .../d3d12_5_1}/primitive_quad_list_gs.cso | Bin .../d3d12_5_1}/primitive_quad_list_gs.h | 0 .../d3d12_5_1}/primitive_quad_list_gs.txt | 0 .../primitive_rectangle_list_gs.cso | Bin .../d3d12_5_1}/primitive_rectangle_list_gs.h | 0 .../primitive_rectangle_list_gs.txt | 0 .../resolve_clear_32bpp_2xres_cs.cso | Bin .../d3d12_5_1}/resolve_clear_32bpp_2xres_cs.h | 0 .../resolve_clear_32bpp_2xres_cs.txt | 0 .../d3d12_5_1}/resolve_clear_32bpp_cs.cso | Bin .../d3d12_5_1}/resolve_clear_32bpp_cs.h | 0 .../d3d12_5_1}/resolve_clear_32bpp_cs.txt | 0 .../resolve_clear_64bpp_2xres_cs.cso | Bin .../d3d12_5_1}/resolve_clear_64bpp_2xres_cs.h | 0 .../resolve_clear_64bpp_2xres_cs.txt | 0 .../d3d12_5_1}/resolve_clear_64bpp_cs.cso | Bin .../d3d12_5_1}/resolve_clear_64bpp_cs.h | 0 .../d3d12_5_1}/resolve_clear_64bpp_cs.txt | 0 .../resolve_clear_depth_24_32_cs.cso | Bin .../d3d12_5_1}/resolve_clear_depth_24_32_cs.h | 0 .../resolve_clear_depth_24_32_cs.txt | 0 .../resolve_fast_32bpp_1x2xmsaa_cs.cso | Bin .../resolve_fast_32bpp_1x2xmsaa_cs.h | 0 .../resolve_fast_32bpp_1x2xmsaa_cs.txt | 0 .../resolve_fast_32bpp_2xres_cs.cso | Bin .../d3d12_5_1}/resolve_fast_32bpp_2xres_cs.h | 0 .../resolve_fast_32bpp_2xres_cs.txt | 0 .../resolve_fast_32bpp_4xmsaa_cs.cso | Bin .../d3d12_5_1}/resolve_fast_32bpp_4xmsaa_cs.h | 0 .../resolve_fast_32bpp_4xmsaa_cs.txt | 0 .../resolve_fast_64bpp_1x2xmsaa_cs.cso | Bin .../resolve_fast_64bpp_1x2xmsaa_cs.h | 0 .../resolve_fast_64bpp_1x2xmsaa_cs.txt | 0 .../resolve_fast_64bpp_2xres_cs.cso | Bin .../d3d12_5_1}/resolve_fast_64bpp_2xres_cs.h | 0 .../resolve_fast_64bpp_2xres_cs.txt | 0 .../resolve_fast_64bpp_4xmsaa_cs.cso | Bin .../d3d12_5_1}/resolve_fast_64bpp_4xmsaa_cs.h | 0 .../resolve_fast_64bpp_4xmsaa_cs.txt | 0 .../resolve_full_128bpp_2xres_cs.cso | Bin .../d3d12_5_1}/resolve_full_128bpp_2xres_cs.h | 0 .../resolve_full_128bpp_2xres_cs.txt | 0 .../d3d12_5_1}/resolve_full_128bpp_cs.cso | Bin .../d3d12_5_1}/resolve_full_128bpp_cs.h | 0 .../d3d12_5_1}/resolve_full_128bpp_cs.txt | 0 .../resolve_full_16bpp_2xres_cs.cso | Bin .../d3d12_5_1}/resolve_full_16bpp_2xres_cs.h | 0 .../resolve_full_16bpp_2xres_cs.txt | 0 .../d3d12_5_1}/resolve_full_16bpp_cs.cso | Bin .../d3d12_5_1}/resolve_full_16bpp_cs.h | 0 .../d3d12_5_1}/resolve_full_16bpp_cs.txt | 0 .../resolve_full_32bpp_2xres_cs.cso | Bin .../d3d12_5_1}/resolve_full_32bpp_2xres_cs.h | 0 .../resolve_full_32bpp_2xres_cs.txt | 0 .../d3d12_5_1}/resolve_full_32bpp_cs.cso | Bin .../d3d12_5_1}/resolve_full_32bpp_cs.h | 0 .../d3d12_5_1}/resolve_full_32bpp_cs.txt | 0 .../resolve_full_64bpp_2xres_cs.cso | Bin .../d3d12_5_1}/resolve_full_64bpp_2xres_cs.h | 0 .../resolve_full_64bpp_2xres_cs.txt | 0 .../d3d12_5_1}/resolve_full_64bpp_cs.cso | Bin .../d3d12_5_1}/resolve_full_64bpp_cs.h | 0 .../d3d12_5_1}/resolve_full_64bpp_cs.txt | 0 .../d3d12_5_1}/resolve_full_8bpp_2xres_cs.cso | Bin .../d3d12_5_1}/resolve_full_8bpp_2xres_cs.h | 0 .../d3d12_5_1}/resolve_full_8bpp_2xres_cs.txt | 0 .../d3d12_5_1}/resolve_full_8bpp_cs.cso | Bin .../d3d12_5_1}/resolve_full_8bpp_cs.h | 0 .../d3d12_5_1}/resolve_full_8bpp_cs.txt | 0 .../bytecode/d3d12_5_1}/stretch_gamma_ps.cso | Bin .../bytecode/d3d12_5_1}/stretch_gamma_ps.h | 0 .../bytecode/d3d12_5_1}/stretch_gamma_ps.txt | 0 .../bytecode/d3d12_5_1}/stretch_ps.cso | Bin .../bytecode/d3d12_5_1}/stretch_ps.h | 0 .../bytecode/d3d12_5_1}/stretch_ps.txt | 0 .../bytecode/d3d12_5_1}/tessellation_vs.cso | Bin .../bytecode/d3d12_5_1}/tessellation_vs.h | 0 .../bytecode/d3d12_5_1}/tessellation_vs.txt | 0 .../d3d12_5_1}/texture_load_128bpb_2x_cs.cso | Bin .../d3d12_5_1}/texture_load_128bpb_2x_cs.h | 0 .../d3d12_5_1}/texture_load_128bpb_2x_cs.txt | 0 .../d3d12_5_1}/texture_load_128bpb_cs.cso | Bin .../d3d12_5_1}/texture_load_128bpb_cs.h | 0 .../d3d12_5_1}/texture_load_128bpb_cs.txt | 0 .../d3d12_5_1}/texture_load_16bpb_2x_cs.cso | Bin .../d3d12_5_1}/texture_load_16bpb_2x_cs.h | 0 .../d3d12_5_1}/texture_load_16bpb_2x_cs.txt | 0 .../d3d12_5_1}/texture_load_16bpb_cs.cso | Bin .../d3d12_5_1}/texture_load_16bpb_cs.h | 0 .../d3d12_5_1}/texture_load_16bpb_cs.txt | 0 .../d3d12_5_1}/texture_load_32bpb_2x_cs.cso | Bin .../d3d12_5_1}/texture_load_32bpb_2x_cs.h | 0 .../d3d12_5_1}/texture_load_32bpb_2x_cs.txt | 0 .../d3d12_5_1}/texture_load_32bpb_cs.cso | Bin .../d3d12_5_1}/texture_load_32bpb_cs.h | 0 .../d3d12_5_1}/texture_load_32bpb_cs.txt | 0 .../d3d12_5_1}/texture_load_64bpb_2x_cs.cso | Bin .../d3d12_5_1}/texture_load_64bpb_2x_cs.h | 0 .../d3d12_5_1}/texture_load_64bpb_2x_cs.txt | 0 .../d3d12_5_1}/texture_load_64bpb_cs.cso | Bin .../d3d12_5_1}/texture_load_64bpb_cs.h | 0 .../d3d12_5_1}/texture_load_64bpb_cs.txt | 0 .../d3d12_5_1}/texture_load_8bpb_2x_cs.cso | Bin .../d3d12_5_1}/texture_load_8bpb_2x_cs.h | 0 .../d3d12_5_1}/texture_load_8bpb_2x_cs.txt | 0 .../d3d12_5_1}/texture_load_8bpb_cs.cso | Bin .../d3d12_5_1}/texture_load_8bpb_cs.h | 0 .../d3d12_5_1}/texture_load_8bpb_cs.txt | 0 .../d3d12_5_1}/texture_load_ctx1_cs.cso | Bin .../d3d12_5_1}/texture_load_ctx1_cs.h | 0 .../d3d12_5_1}/texture_load_ctx1_cs.txt | 0 .../texture_load_depth_float_2x_cs.cso | Bin .../texture_load_depth_float_2x_cs.h | 0 .../texture_load_depth_float_2x_cs.txt | 0 .../texture_load_depth_float_cs.cso | Bin .../d3d12_5_1}/texture_load_depth_float_cs.h | 0 .../texture_load_depth_float_cs.txt | 0 .../texture_load_depth_unorm_2x_cs.cso | Bin .../texture_load_depth_unorm_2x_cs.h | 0 .../texture_load_depth_unorm_2x_cs.txt | 0 .../texture_load_depth_unorm_cs.cso | Bin .../d3d12_5_1}/texture_load_depth_unorm_cs.h | 0 .../texture_load_depth_unorm_cs.txt | 0 .../d3d12_5_1}/texture_load_dxn_rg8_cs.cso | Bin .../d3d12_5_1}/texture_load_dxn_rg8_cs.h | 0 .../d3d12_5_1}/texture_load_dxn_rg8_cs.txt | 0 .../d3d12_5_1}/texture_load_dxt1_rgba8_cs.cso | Bin .../d3d12_5_1}/texture_load_dxt1_rgba8_cs.h | 0 .../d3d12_5_1}/texture_load_dxt1_rgba8_cs.txt | 0 .../d3d12_5_1}/texture_load_dxt3_rgba8_cs.cso | Bin .../d3d12_5_1}/texture_load_dxt3_rgba8_cs.h | 0 .../d3d12_5_1}/texture_load_dxt3_rgba8_cs.txt | 0 .../d3d12_5_1}/texture_load_dxt3a_cs.cso | Bin .../d3d12_5_1}/texture_load_dxt3a_cs.h | 0 .../d3d12_5_1}/texture_load_dxt3a_cs.txt | 0 .../texture_load_dxt3aas1111_cs.cso | Bin .../d3d12_5_1}/texture_load_dxt3aas1111_cs.h | 0 .../texture_load_dxt3aas1111_cs.txt | 0 .../d3d12_5_1}/texture_load_dxt5_rgba8_cs.cso | Bin .../d3d12_5_1}/texture_load_dxt5_rgba8_cs.h | 0 .../d3d12_5_1}/texture_load_dxt5_rgba8_cs.txt | 0 .../d3d12_5_1}/texture_load_dxt5a_r8_cs.cso | Bin .../d3d12_5_1}/texture_load_dxt5a_r8_cs.h | 0 .../d3d12_5_1}/texture_load_dxt5a_r8_cs.txt | 0 .../texture_load_r10g11b11_rgba16_2x_cs.cso | Bin .../texture_load_r10g11b11_rgba16_2x_cs.h | 0 .../texture_load_r10g11b11_rgba16_2x_cs.txt | 0 .../texture_load_r10g11b11_rgba16_cs.cso | Bin .../texture_load_r10g11b11_rgba16_cs.h | 0 .../texture_load_r10g11b11_rgba16_cs.txt | 0 ...ture_load_r10g11b11_rgba16_snorm_2x_cs.cso | Bin ...exture_load_r10g11b11_rgba16_snorm_2x_cs.h | 0 ...ture_load_r10g11b11_rgba16_snorm_2x_cs.txt | 0 ...texture_load_r10g11b11_rgba16_snorm_cs.cso | Bin .../texture_load_r10g11b11_rgba16_snorm_cs.h | 0 ...texture_load_r10g11b11_rgba16_snorm_cs.txt | 0 .../texture_load_r11g11b10_rgba16_2x_cs.cso | Bin .../texture_load_r11g11b10_rgba16_2x_cs.h | 0 .../texture_load_r11g11b10_rgba16_2x_cs.txt | 0 .../texture_load_r11g11b10_rgba16_cs.cso | Bin .../texture_load_r11g11b10_rgba16_cs.h | 0 .../texture_load_r11g11b10_rgba16_cs.txt | 0 ...ture_load_r11g11b10_rgba16_snorm_2x_cs.cso | Bin ...exture_load_r11g11b10_rgba16_snorm_2x_cs.h | 0 ...ture_load_r11g11b10_rgba16_snorm_2x_cs.txt | 0 ...texture_load_r11g11b10_rgba16_snorm_cs.cso | Bin .../texture_load_r11g11b10_rgba16_snorm_cs.h | 0 ...texture_load_r11g11b10_rgba16_snorm_cs.txt | 0 .../texture_load_r4g4b4a4_b4g4r4a4_2x_cs.cso | Bin .../texture_load_r4g4b4a4_b4g4r4a4_2x_cs.h | 0 .../texture_load_r4g4b4a4_b4g4r4a4_2x_cs.txt | 0 .../texture_load_r4g4b4a4_b4g4r4a4_cs.cso | Bin .../texture_load_r4g4b4a4_b4g4r4a4_cs.h | 0 .../texture_load_r4g4b4a4_b4g4r4a4_cs.txt | 0 .../texture_load_r5g5b5a1_b5g5r5a1_2x_cs.cso | Bin .../texture_load_r5g5b5a1_b5g5r5a1_2x_cs.h | 0 .../texture_load_r5g5b5a1_b5g5r5a1_2x_cs.txt | 0 .../texture_load_r5g5b5a1_b5g5r5a1_cs.cso | Bin .../texture_load_r5g5b5a1_b5g5r5a1_cs.h | 0 .../texture_load_r5g5b5a1_b5g5r5a1_cs.txt | 0 ..._load_r5g5b6_b5g6r5_swizzle_rbga_2x_cs.cso | Bin ...re_load_r5g5b6_b5g6r5_swizzle_rbga_2x_cs.h | 0 ..._load_r5g5b6_b5g6r5_swizzle_rbga_2x_cs.txt | 0 ...ure_load_r5g5b6_b5g6r5_swizzle_rbga_cs.cso | Bin ...xture_load_r5g5b6_b5g6r5_swizzle_rbga_cs.h | 0 ...ure_load_r5g5b6_b5g6r5_swizzle_rbga_cs.txt | 0 .../texture_load_r5g6b5_b5g6r5_2x_cs.cso | Bin .../texture_load_r5g6b5_b5g6r5_2x_cs.h | 0 .../texture_load_r5g6b5_b5g6r5_2x_cs.txt | 0 .../texture_load_r5g6b5_b5g6r5_cs.cso | Bin .../texture_load_r5g6b5_b5g6r5_cs.h | 0 .../texture_load_r5g6b5_b5g6r5_cs.txt | 0 .../shaders/continuous_quad.hs.hlsl | 0 .../shaders/continuous_triangle.hs.hlsl | 0 .../{d3d12 => }/shaders/discrete_quad.hs.hlsl | 0 .../shaders/discrete_triangle.hs.hlsl | 0 src/xenia/gpu/{d3d12 => }/shaders/edram.hlsli | 0 .../shaders/edram_load_color_32bpp.cs.hlsl | 0 .../shaders/edram_load_color_64bpp.cs.hlsl | 0 .../shaders/edram_load_color_7e3.cs.hlsl | 0 .../shaders/edram_load_depth_float.cs.hlsl | 0 .../shaders/edram_load_depth_unorm.cs.hlsl | 0 .../shaders/edram_load_store.hlsli | 0 .../shaders/edram_store_color_32bpp.cs.hlsl | 0 .../shaders/edram_store_color_64bpp.cs.hlsl | 0 .../shaders/edram_store_color_7e3.cs.hlsl | 0 .../shaders/edram_store_depth_float.cs.hlsl | 0 .../shaders/edram_store_depth_unorm.cs.hlsl | 0 .../gpu/{d3d12 => }/shaders/endian.hlsli | 0 .../{d3d12 => }/shaders/fullscreen.vs.hlsl | 0 .../{d3d12 => }/shaders/pixel_formats.hlsli | 0 .../shaders/primitive_point_list.gs.hlsl | 0 .../shaders/primitive_quad_list.gs.hlsl | 0 .../shaders/primitive_rectangle_list.gs.hlsl | 0 .../gpu/{d3d12 => }/shaders/resolve.hlsli | 0 .../shaders/resolve_clear_32bpp.cs.hlsl | 0 .../shaders/resolve_clear_32bpp_2xres.cs.hlsl | 0 .../shaders/resolve_clear_64bpp.cs.hlsl | 0 .../shaders/resolve_clear_64bpp_2xres.cs.hlsl | 0 .../shaders/resolve_clear_depth_24_32.cs.hlsl | 0 .../resolve_fast_32bpp_1x2xmsaa.cs.hlsl | 0 .../shaders/resolve_fast_32bpp_2xres.cs.hlsl | 0 .../shaders/resolve_fast_32bpp_4xmsaa.cs.hlsl | 0 .../resolve_fast_64bpp_1x2xmsaa.cs.hlsl | 0 .../shaders/resolve_fast_64bpp_2xres.cs.hlsl | 0 .../shaders/resolve_fast_64bpp_4xmsaa.cs.hlsl | 0 .../shaders/resolve_full_128bpp.cs.hlsl | 0 .../shaders/resolve_full_128bpp_2xres.cs.hlsl | 0 .../shaders/resolve_full_16bpp.cs.hlsl | 0 .../shaders/resolve_full_16bpp_2xres.cs.hlsl | 0 .../shaders/resolve_full_32bpp.cs.hlsl | 0 .../shaders/resolve_full_32bpp_2xres.cs.hlsl | 0 .../shaders/resolve_full_64bpp.cs.hlsl | 0 .../shaders/resolve_full_64bpp_2xres.cs.hlsl | 0 .../shaders/resolve_full_8bpp.cs.hlsl | 0 .../shaders/resolve_full_8bpp_2xres.cs.hlsl | 0 .../gpu/{d3d12 => }/shaders/stretch.ps.hlsl | 0 .../{d3d12 => }/shaders/stretch_gamma.ps.hlsl | 0 .../{d3d12 => }/shaders/tessellation.vs.hlsl | 0 .../{d3d12 => }/shaders/texture_address.hlsli | 0 .../{d3d12 => }/shaders/texture_load.hlsli | 0 .../shaders/texture_load_128bpb.cs.hlsl | 0 .../shaders/texture_load_128bpb_2x.cs.hlsl | 0 .../shaders/texture_load_16bpb.cs.hlsl | 0 .../shaders/texture_load_16bpb.hlsli | 0 .../shaders/texture_load_16bpb_2x.cs.hlsl | 0 .../shaders/texture_load_16bpb_2x.hlsli | 0 .../shaders/texture_load_32bpb.cs.hlsl | 0 .../shaders/texture_load_32bpb_2x.cs.hlsl | 0 .../shaders/texture_load_32bpb_64bpb.hlsli | 0 .../shaders/texture_load_32bpb_64bpb_2x.hlsli | 0 .../shaders/texture_load_64bpb.cs.hlsl | 0 .../shaders/texture_load_64bpb_2x.cs.hlsl | 0 .../shaders/texture_load_8bpb.cs.hlsl | 0 .../shaders/texture_load_8bpb_2x.cs.hlsl | 0 .../shaders/texture_load_ctx1.cs.hlsl | 0 .../shaders/texture_load_depth_float.cs.hlsl | 0 .../texture_load_depth_float_2x.cs.hlsl | 0 .../shaders/texture_load_depth_unorm.cs.hlsl | 0 .../texture_load_depth_unorm_2x.cs.hlsl | 0 .../shaders/texture_load_dxn_rg8.cs.hlsl | 0 .../shaders/texture_load_dxt1_rgba8.cs.hlsl | 0 .../shaders/texture_load_dxt3_rgba8.cs.hlsl | 0 .../shaders/texture_load_dxt3a.cs.hlsl | 0 .../shaders/texture_load_dxt3aas1111.cs.hlsl | 0 .../shaders/texture_load_dxt5_rgba8.cs.hlsl | 0 .../shaders/texture_load_dxt5a_r8.cs.hlsl | 0 .../texture_load_r10g11b11_rgba16.cs.hlsl | 0 .../texture_load_r10g11b11_rgba16_2x.cs.hlsl | 0 ...exture_load_r10g11b11_rgba16_snorm.cs.hlsl | 0 ...ure_load_r10g11b11_rgba16_snorm_2x.cs.hlsl | 0 .../texture_load_r11g11b10_rgba16.cs.hlsl | 0 .../texture_load_r11g11b10_rgba16_2x.cs.hlsl | 0 ...exture_load_r11g11b10_rgba16_snorm.cs.hlsl | 0 ...ure_load_r11g11b10_rgba16_snorm_2x.cs.hlsl | 0 .../texture_load_r4g4b4a4_b4g4r4a4.cs.hlsl | 0 .../texture_load_r4g4b4a4_b4g4r4a4_2x.cs.hlsl | 0 .../texture_load_r5g5b5a1_b5g5r5a1.cs.hlsl | 0 .../texture_load_r5g5b5a1_b5g5r5a1_2x.cs.hlsl | 0 ...re_load_r5g5b6_b5g6r5_swizzle_rbga.cs.hlsl | 0 ...load_r5g5b6_b5g6r5_swizzle_rbga_2x.cs.hlsl | 0 .../texture_load_r5g6b5_b5g6r5.cs.hlsl | 0 .../texture_load_r5g6b5_b5g6r5_2x.cs.hlsl | 0 .../gpu/{d3d12 => }/shaders/xenos_draw.hlsli | 0 src/xenia/gpu/spirv/compiler.cc | 36 - src/xenia/gpu/spirv/compiler.h | 41 - src/xenia/gpu/spirv/compiler_pass.h | 37 - .../passes/control_flow_analysis_pass.cpp | 30 - .../spirv/passes/control_flow_analysis_pass.h | 34 - .../control_flow_simplification_pass.cc | 48 - .../passes/control_flow_simplification_pass.h | 34 - src/xenia/gpu/spirv_shader_translator.cc | 3471 ----------------- src/xenia/gpu/spirv_shader_translator.h | 187 - src/xenia/gpu/vulkan/buffer_cache.cc | 809 ---- src/xenia/gpu/vulkan/buffer_cache.h | 177 - src/xenia/gpu/vulkan/pipeline_cache.cc | 1597 -------- src/xenia/gpu/vulkan/pipeline_cache.h | 311 -- src/xenia/gpu/vulkan/premake5.lua | 135 +- src/xenia/gpu/vulkan/render_cache.cc | 1404 ------- src/xenia/gpu/vulkan/render_cache.h | 406 -- src/xenia/gpu/vulkan/shaders/bin/dummy_frag.h | 50 - .../gpu/vulkan/shaders/bin/dummy_frag.txt | 37 - .../vulkan/shaders/bin/line_quad_list_geom.h | 183 - .../shaders/bin/line_quad_list_geom.txt | 132 - .../gpu/vulkan/shaders/bin/point_list_geom.h | 245 -- .../vulkan/shaders/bin/point_list_geom.txt | 167 - .../gpu/vulkan/shaders/bin/quad_list_geom.h | 171 - .../gpu/vulkan/shaders/bin/quad_list_geom.txt | 120 - .../gpu/vulkan/shaders/bin/rect_list_geom.h | 374 -- .../gpu/vulkan/shaders/bin/rect_list_geom.txt | 274 -- src/xenia/gpu/vulkan/shaders/dummy.frag | 35 - .../gpu/vulkan/shaders/line_quad_list.geom | 53 - src/xenia/gpu/vulkan/shaders/point_list.geom | 63 - src/xenia/gpu/vulkan/shaders/quad_list.geom | 42 - src/xenia/gpu/vulkan/shaders/rect_list.geom | 124 - src/xenia/gpu/vulkan/texture_cache.cc | 1664 -------- src/xenia/gpu/vulkan/texture_cache.h | 244 -- src/xenia/gpu/vulkan/texture_config.cc | 146 - src/xenia/gpu/vulkan/texture_config.h | 50 - .../gpu/vulkan/vulkan_command_processor.cc | 1292 +----- .../gpu/vulkan/vulkan_command_processor.h | 106 +- src/xenia/gpu/vulkan/vulkan_gpu_flags.cc | 16 - src/xenia/gpu/vulkan/vulkan_gpu_flags.h | 20 - .../gpu/vulkan/vulkan_graphics_system.cc | 298 +- src/xenia/gpu/vulkan/vulkan_graphics_system.h | 19 +- src/xenia/gpu/vulkan/vulkan_shader.cc | 64 - src/xenia/gpu/vulkan/vulkan_shader.h | 43 - .../gpu/vulkan/vulkan_trace_dump_main.cc | 60 - .../gpu/vulkan/vulkan_trace_viewer_main.cc | 76 - src/xenia/ui/d3d12/d3d12_context.h | 3 +- src/xenia/ui/d3d12/d3d12_immediate_drawer.cc | 4 +- src/xenia/ui/d3d12/premake5.lua | 2 +- .../bytecode/d3d12_5_1}/immediate_ps.cso | Bin .../bytecode/d3d12_5_1}/immediate_ps.h | 0 .../bytecode/d3d12_5_1}/immediate_ps.txt | 0 .../bytecode/d3d12_5_1}/immediate_vs.cso | Bin .../bytecode/d3d12_5_1}/immediate_vs.h | 0 .../bytecode/d3d12_5_1}/immediate_vs.txt | 0 .../ui/{d3d12 => }/shaders/immediate.ps.hlsl | 0 .../ui/{d3d12 => }/shaders/immediate.vs.hlsl | 0 src/xenia/ui/spirv/premake5.lua | 19 - src/xenia/ui/spirv/spirv_assembler.cc | 78 - src/xenia/ui/spirv/spirv_assembler.h | 69 - src/xenia/ui/spirv/spirv_disassembler.cc | 82 - src/xenia/ui/spirv/spirv_disassembler.h | 66 - src/xenia/ui/spirv/spirv_util.cc | 20 - src/xenia/ui/spirv/spirv_util.h | 36 - src/xenia/ui/spirv/spirv_validator.cc | 80 - src/xenia/ui/spirv/spirv_validator.h | 66 - src/xenia/ui/vulkan/blitter.cc | 588 --- src/xenia/ui/vulkan/blitter.h | 101 - src/xenia/ui/vulkan/circular_buffer.cc | 281 -- src/xenia/ui/vulkan/circular_buffer.h | 93 - src/xenia/ui/vulkan/fenced_pools.cc | 124 - src/xenia/ui/vulkan/fenced_pools.h | 334 -- src/xenia/ui/vulkan/premake5.lua | 47 +- .../ui/vulkan/shaders/bin/blit_color_frag.h | 88 - .../ui/vulkan/shaders/bin/blit_color_frag.spv | Bin 1000 -> 0 bytes .../ui/vulkan/shaders/bin/blit_color_frag.txt | 67 - .../ui/vulkan/shaders/bin/blit_depth_frag.h | 59 - .../ui/vulkan/shaders/bin/blit_depth_frag.spv | Bin 660 -> 0 bytes .../ui/vulkan/shaders/bin/blit_depth_frag.txt | 46 - src/xenia/ui/vulkan/shaders/bin/blit_vert.h | 149 - src/xenia/ui/vulkan/shaders/bin/blit_vert.spv | Bin 1732 -> 0 bytes src/xenia/ui/vulkan/shaders/bin/blit_vert.txt | 99 - .../ui/vulkan/shaders/bin/immediate_frag.h | 109 - .../ui/vulkan/shaders/bin/immediate_frag.spv | Bin 1252 -> 0 bytes .../ui/vulkan/shaders/bin/immediate_frag.txt | 83 - .../ui/vulkan/shaders/bin/immediate_vert.h | 128 - .../ui/vulkan/shaders/bin/immediate_vert.spv | Bin 1488 -> 0 bytes .../ui/vulkan/shaders/bin/immediate_vert.txt | 90 - src/xenia/ui/vulkan/shaders/blit.vert | 31 - src/xenia/ui/vulkan/shaders/blit_color.frag | 20 - src/xenia/ui/vulkan/shaders/blit_depth.frag | 19 - src/xenia/ui/vulkan/shaders/immediate.frag | 28 - src/xenia/ui/vulkan/shaders/immediate.vert | 23 - src/xenia/ui/vulkan/vulkan.cc | 18 - src/xenia/ui/vulkan/vulkan.h | 33 - src/xenia/ui/vulkan/vulkan_context.cc | 173 +- src/xenia/ui/vulkan/vulkan_context.h | 38 +- src/xenia/ui/vulkan/vulkan_device.cc | 417 -- src/xenia/ui/vulkan/vulkan_device.h | 131 - .../ui/vulkan/vulkan_immediate_drawer.cc | 882 +---- src/xenia/ui/vulkan/vulkan_immediate_drawer.h | 43 +- src/xenia/ui/vulkan/vulkan_instance.cc | 540 --- src/xenia/ui/vulkan/vulkan_instance.h | 105 - src/xenia/ui/vulkan/vulkan_mem_alloc.h | 44 - src/xenia/ui/vulkan/vulkan_provider.cc | 61 +- src/xenia/ui/vulkan/vulkan_provider.h | 15 +- src/xenia/ui/vulkan/vulkan_swap_chain.cc | 811 ---- src/xenia/ui/vulkan/vulkan_swap_chain.h | 106 - src/xenia/ui/vulkan/vulkan_util.cc | 504 --- src/xenia/ui/vulkan/vulkan_util.h | 136 - src/xenia/ui/vulkan/vulkan_window_demo.cc | 29 - xenia-build | 17 +- 461 files changed, 161 insertions(+), 22194 deletions(-) rename src/xenia/gpu/{d3d12 => }/shaders/adaptive_quad.hs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/adaptive_triangle.hs.hlsl (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/adaptive_quad_hs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/adaptive_quad_hs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/adaptive_quad_hs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/adaptive_triangle_hs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/adaptive_triangle_hs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/adaptive_triangle_hs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/continuous_quad_hs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/continuous_quad_hs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/continuous_quad_hs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/continuous_triangle_hs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/continuous_triangle_hs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/continuous_triangle_hs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/discrete_quad_hs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/discrete_quad_hs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/discrete_quad_hs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/discrete_triangle_hs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/discrete_triangle_hs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/discrete_triangle_hs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_load_color_32bpp_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_load_color_32bpp_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_load_color_32bpp_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_load_color_64bpp_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_load_color_64bpp_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_load_color_64bpp_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_load_color_7e3_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_load_color_7e3_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_load_color_7e3_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_load_depth_float_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_load_depth_float_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_load_depth_float_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_load_depth_unorm_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_load_depth_unorm_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_load_depth_unorm_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_store_color_32bpp_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_store_color_32bpp_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_store_color_32bpp_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_store_color_64bpp_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_store_color_64bpp_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_store_color_64bpp_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_store_color_7e3_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_store_color_7e3_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_store_color_7e3_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_store_depth_float_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_store_depth_float_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_store_depth_float_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_store_depth_unorm_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_store_depth_unorm_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/edram_store_depth_unorm_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/fullscreen_vs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/fullscreen_vs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/fullscreen_vs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/primitive_point_list_gs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/primitive_point_list_gs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/primitive_point_list_gs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/primitive_quad_list_gs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/primitive_quad_list_gs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/primitive_quad_list_gs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/primitive_rectangle_list_gs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/primitive_rectangle_list_gs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/primitive_rectangle_list_gs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_clear_32bpp_2xres_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_clear_32bpp_2xres_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_clear_32bpp_2xres_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_clear_32bpp_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_clear_32bpp_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_clear_32bpp_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_clear_64bpp_2xres_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_clear_64bpp_2xres_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_clear_64bpp_2xres_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_clear_64bpp_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_clear_64bpp_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_clear_64bpp_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_clear_depth_24_32_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_clear_depth_24_32_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_clear_depth_24_32_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_fast_32bpp_1x2xmsaa_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_fast_32bpp_1x2xmsaa_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_fast_32bpp_1x2xmsaa_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_fast_32bpp_2xres_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_fast_32bpp_2xres_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_fast_32bpp_2xres_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_fast_32bpp_4xmsaa_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_fast_32bpp_4xmsaa_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_fast_32bpp_4xmsaa_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_fast_64bpp_1x2xmsaa_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_fast_64bpp_1x2xmsaa_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_fast_64bpp_1x2xmsaa_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_fast_64bpp_2xres_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_fast_64bpp_2xres_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_fast_64bpp_2xres_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_fast_64bpp_4xmsaa_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_fast_64bpp_4xmsaa_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_fast_64bpp_4xmsaa_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_128bpp_2xres_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_128bpp_2xres_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_128bpp_2xres_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_128bpp_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_128bpp_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_128bpp_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_16bpp_2xres_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_16bpp_2xres_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_16bpp_2xres_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_16bpp_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_16bpp_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_16bpp_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_32bpp_2xres_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_32bpp_2xres_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_32bpp_2xres_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_32bpp_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_32bpp_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_32bpp_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_64bpp_2xres_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_64bpp_2xres_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_64bpp_2xres_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_64bpp_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_64bpp_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_64bpp_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_8bpp_2xres_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_8bpp_2xres_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_8bpp_2xres_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_8bpp_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_8bpp_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/resolve_full_8bpp_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/stretch_gamma_ps.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/stretch_gamma_ps.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/stretch_gamma_ps.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/stretch_ps.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/stretch_ps.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/stretch_ps.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/tessellation_vs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/tessellation_vs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/tessellation_vs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_128bpb_2x_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_128bpb_2x_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_128bpb_2x_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_128bpb_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_128bpb_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_128bpb_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_16bpb_2x_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_16bpb_2x_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_16bpb_2x_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_16bpb_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_16bpb_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_16bpb_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_32bpb_2x_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_32bpb_2x_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_32bpb_2x_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_32bpb_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_32bpb_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_32bpb_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_64bpb_2x_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_64bpb_2x_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_64bpb_2x_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_64bpb_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_64bpb_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_64bpb_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_8bpb_2x_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_8bpb_2x_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_8bpb_2x_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_8bpb_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_8bpb_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_8bpb_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_ctx1_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_ctx1_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_ctx1_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_depth_float_2x_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_depth_float_2x_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_depth_float_2x_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_depth_float_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_depth_float_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_depth_float_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_depth_unorm_2x_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_depth_unorm_2x_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_depth_unorm_2x_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_depth_unorm_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_depth_unorm_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_depth_unorm_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_dxn_rg8_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_dxn_rg8_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_dxn_rg8_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_dxt1_rgba8_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_dxt1_rgba8_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_dxt1_rgba8_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_dxt3_rgba8_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_dxt3_rgba8_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_dxt3_rgba8_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_dxt3a_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_dxt3a_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_dxt3a_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_dxt3aas1111_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_dxt3aas1111_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_dxt3aas1111_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_dxt5_rgba8_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_dxt5_rgba8_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_dxt5_rgba8_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_dxt5a_r8_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_dxt5a_r8_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_dxt5a_r8_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r10g11b11_rgba16_2x_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r10g11b11_rgba16_2x_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r10g11b11_rgba16_2x_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r10g11b11_rgba16_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r10g11b11_rgba16_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r10g11b11_rgba16_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r10g11b11_rgba16_snorm_2x_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r10g11b11_rgba16_snorm_2x_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r10g11b11_rgba16_snorm_2x_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r10g11b11_rgba16_snorm_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r10g11b11_rgba16_snorm_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r10g11b11_rgba16_snorm_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r11g11b10_rgba16_2x_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r11g11b10_rgba16_2x_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r11g11b10_rgba16_2x_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r11g11b10_rgba16_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r11g11b10_rgba16_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r11g11b10_rgba16_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r11g11b10_rgba16_snorm_2x_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r11g11b10_rgba16_snorm_2x_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r11g11b10_rgba16_snorm_2x_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r11g11b10_rgba16_snorm_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r11g11b10_rgba16_snorm_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r11g11b10_rgba16_snorm_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r4g4b4a4_b4g4r4a4_2x_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r4g4b4a4_b4g4r4a4_2x_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r4g4b4a4_b4g4r4a4_2x_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r4g4b4a4_b4g4r4a4_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r4g4b4a4_b4g4r4a4_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r4g4b4a4_b4g4r4a4_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r5g5b5a1_b5g5r5a1_2x_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r5g5b5a1_b5g5r5a1_2x_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r5g5b5a1_b5g5r5a1_2x_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r5g5b5a1_b5g5r5a1_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r5g5b5a1_b5g5r5a1_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r5g5b5a1_b5g5r5a1_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r5g5b6_b5g6r5_swizzle_rbga_2x_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r5g5b6_b5g6r5_swizzle_rbga_2x_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r5g5b6_b5g6r5_swizzle_rbga_2x_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r5g6b5_b5g6r5_2x_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r5g6b5_b5g6r5_2x_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r5g6b5_b5g6r5_2x_cs.txt (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r5g6b5_b5g6r5_cs.cso (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r5g6b5_b5g6r5_cs.h (100%) rename src/xenia/gpu/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/texture_load_r5g6b5_b5g6r5_cs.txt (100%) rename src/xenia/gpu/{d3d12 => }/shaders/continuous_quad.hs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/continuous_triangle.hs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/discrete_quad.hs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/discrete_triangle.hs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/edram.hlsli (100%) rename src/xenia/gpu/{d3d12 => }/shaders/edram_load_color_32bpp.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/edram_load_color_64bpp.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/edram_load_color_7e3.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/edram_load_depth_float.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/edram_load_depth_unorm.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/edram_load_store.hlsli (100%) rename src/xenia/gpu/{d3d12 => }/shaders/edram_store_color_32bpp.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/edram_store_color_64bpp.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/edram_store_color_7e3.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/edram_store_depth_float.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/edram_store_depth_unorm.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/endian.hlsli (100%) rename src/xenia/gpu/{d3d12 => }/shaders/fullscreen.vs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/pixel_formats.hlsli (100%) rename src/xenia/gpu/{d3d12 => }/shaders/primitive_point_list.gs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/primitive_quad_list.gs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/primitive_rectangle_list.gs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/resolve.hlsli (100%) rename src/xenia/gpu/{d3d12 => }/shaders/resolve_clear_32bpp.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/resolve_clear_32bpp_2xres.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/resolve_clear_64bpp.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/resolve_clear_64bpp_2xres.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/resolve_clear_depth_24_32.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/resolve_fast_32bpp_1x2xmsaa.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/resolve_fast_32bpp_2xres.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/resolve_fast_32bpp_4xmsaa.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/resolve_fast_64bpp_1x2xmsaa.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/resolve_fast_64bpp_2xres.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/resolve_fast_64bpp_4xmsaa.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/resolve_full_128bpp.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/resolve_full_128bpp_2xres.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/resolve_full_16bpp.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/resolve_full_16bpp_2xres.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/resolve_full_32bpp.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/resolve_full_32bpp_2xres.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/resolve_full_64bpp.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/resolve_full_64bpp_2xres.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/resolve_full_8bpp.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/resolve_full_8bpp_2xres.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/stretch.ps.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/stretch_gamma.ps.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/tessellation.vs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_address.hlsli (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load.hlsli (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_128bpb.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_128bpb_2x.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_16bpb.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_16bpb.hlsli (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_16bpb_2x.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_16bpb_2x.hlsli (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_32bpb.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_32bpb_2x.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_32bpb_64bpb.hlsli (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_32bpb_64bpb_2x.hlsli (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_64bpb.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_64bpb_2x.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_8bpb.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_8bpb_2x.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_ctx1.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_depth_float.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_depth_float_2x.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_depth_unorm.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_depth_unorm_2x.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_dxn_rg8.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_dxt1_rgba8.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_dxt3_rgba8.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_dxt3a.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_dxt3aas1111.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_dxt5_rgba8.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_dxt5a_r8.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_r10g11b11_rgba16.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_r10g11b11_rgba16_2x.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_r10g11b11_rgba16_snorm.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_r10g11b11_rgba16_snorm_2x.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_r11g11b10_rgba16.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_r11g11b10_rgba16_2x.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_r11g11b10_rgba16_snorm.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_r11g11b10_rgba16_snorm_2x.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_r4g4b4a4_b4g4r4a4.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_r4g4b4a4_b4g4r4a4_2x.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_r5g5b5a1_b5g5r5a1.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_r5g5b5a1_b5g5r5a1_2x.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_r5g5b6_b5g6r5_swizzle_rbga.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_r5g5b6_b5g6r5_swizzle_rbga_2x.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_r5g6b5_b5g6r5.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/texture_load_r5g6b5_b5g6r5_2x.cs.hlsl (100%) rename src/xenia/gpu/{d3d12 => }/shaders/xenos_draw.hlsli (100%) delete mode 100644 src/xenia/gpu/spirv/compiler.cc delete mode 100644 src/xenia/gpu/spirv/compiler.h delete mode 100644 src/xenia/gpu/spirv/compiler_pass.h delete mode 100644 src/xenia/gpu/spirv/passes/control_flow_analysis_pass.cpp delete mode 100644 src/xenia/gpu/spirv/passes/control_flow_analysis_pass.h delete mode 100644 src/xenia/gpu/spirv/passes/control_flow_simplification_pass.cc delete mode 100644 src/xenia/gpu/spirv/passes/control_flow_simplification_pass.h delete mode 100644 src/xenia/gpu/spirv_shader_translator.cc delete mode 100644 src/xenia/gpu/spirv_shader_translator.h delete mode 100644 src/xenia/gpu/vulkan/buffer_cache.cc delete mode 100644 src/xenia/gpu/vulkan/buffer_cache.h delete mode 100644 src/xenia/gpu/vulkan/pipeline_cache.cc delete mode 100644 src/xenia/gpu/vulkan/pipeline_cache.h delete mode 100644 src/xenia/gpu/vulkan/render_cache.cc delete mode 100644 src/xenia/gpu/vulkan/render_cache.h delete mode 100644 src/xenia/gpu/vulkan/shaders/bin/dummy_frag.h delete mode 100644 src/xenia/gpu/vulkan/shaders/bin/dummy_frag.txt delete mode 100644 src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.h delete mode 100644 src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.txt delete mode 100644 src/xenia/gpu/vulkan/shaders/bin/point_list_geom.h delete mode 100644 src/xenia/gpu/vulkan/shaders/bin/point_list_geom.txt delete mode 100644 src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.h delete mode 100644 src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.txt delete mode 100644 src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.h delete mode 100644 src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.txt delete mode 100644 src/xenia/gpu/vulkan/shaders/dummy.frag delete mode 100644 src/xenia/gpu/vulkan/shaders/line_quad_list.geom delete mode 100644 src/xenia/gpu/vulkan/shaders/point_list.geom delete mode 100644 src/xenia/gpu/vulkan/shaders/quad_list.geom delete mode 100644 src/xenia/gpu/vulkan/shaders/rect_list.geom delete mode 100644 src/xenia/gpu/vulkan/texture_cache.cc delete mode 100644 src/xenia/gpu/vulkan/texture_cache.h delete mode 100644 src/xenia/gpu/vulkan/texture_config.cc delete mode 100644 src/xenia/gpu/vulkan/texture_config.h delete mode 100644 src/xenia/gpu/vulkan/vulkan_gpu_flags.cc delete mode 100644 src/xenia/gpu/vulkan/vulkan_gpu_flags.h delete mode 100644 src/xenia/gpu/vulkan/vulkan_shader.cc delete mode 100644 src/xenia/gpu/vulkan/vulkan_shader.h delete mode 100644 src/xenia/gpu/vulkan/vulkan_trace_dump_main.cc delete mode 100644 src/xenia/gpu/vulkan/vulkan_trace_viewer_main.cc rename src/xenia/ui/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/immediate_ps.cso (100%) rename src/xenia/ui/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/immediate_ps.h (100%) rename src/xenia/ui/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/immediate_ps.txt (100%) rename src/xenia/ui/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/immediate_vs.cso (100%) rename src/xenia/ui/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/immediate_vs.h (100%) rename src/xenia/ui/{d3d12/shaders/dxbc => shaders/bytecode/d3d12_5_1}/immediate_vs.txt (100%) rename src/xenia/ui/{d3d12 => }/shaders/immediate.ps.hlsl (100%) rename src/xenia/ui/{d3d12 => }/shaders/immediate.vs.hlsl (100%) delete mode 100644 src/xenia/ui/spirv/premake5.lua delete mode 100644 src/xenia/ui/spirv/spirv_assembler.cc delete mode 100644 src/xenia/ui/spirv/spirv_assembler.h delete mode 100644 src/xenia/ui/spirv/spirv_disassembler.cc delete mode 100644 src/xenia/ui/spirv/spirv_disassembler.h delete mode 100644 src/xenia/ui/spirv/spirv_util.cc delete mode 100644 src/xenia/ui/spirv/spirv_util.h delete mode 100644 src/xenia/ui/spirv/spirv_validator.cc delete mode 100644 src/xenia/ui/spirv/spirv_validator.h delete mode 100644 src/xenia/ui/vulkan/blitter.cc delete mode 100644 src/xenia/ui/vulkan/blitter.h delete mode 100644 src/xenia/ui/vulkan/circular_buffer.cc delete mode 100644 src/xenia/ui/vulkan/circular_buffer.h delete mode 100644 src/xenia/ui/vulkan/fenced_pools.cc delete mode 100644 src/xenia/ui/vulkan/fenced_pools.h delete mode 100644 src/xenia/ui/vulkan/shaders/bin/blit_color_frag.h delete mode 100644 src/xenia/ui/vulkan/shaders/bin/blit_color_frag.spv delete mode 100644 src/xenia/ui/vulkan/shaders/bin/blit_color_frag.txt delete mode 100644 src/xenia/ui/vulkan/shaders/bin/blit_depth_frag.h delete mode 100644 src/xenia/ui/vulkan/shaders/bin/blit_depth_frag.spv delete mode 100644 src/xenia/ui/vulkan/shaders/bin/blit_depth_frag.txt delete mode 100644 src/xenia/ui/vulkan/shaders/bin/blit_vert.h delete mode 100644 src/xenia/ui/vulkan/shaders/bin/blit_vert.spv delete mode 100644 src/xenia/ui/vulkan/shaders/bin/blit_vert.txt delete mode 100644 src/xenia/ui/vulkan/shaders/bin/immediate_frag.h delete mode 100644 src/xenia/ui/vulkan/shaders/bin/immediate_frag.spv delete mode 100644 src/xenia/ui/vulkan/shaders/bin/immediate_frag.txt delete mode 100644 src/xenia/ui/vulkan/shaders/bin/immediate_vert.h delete mode 100644 src/xenia/ui/vulkan/shaders/bin/immediate_vert.spv delete mode 100644 src/xenia/ui/vulkan/shaders/bin/immediate_vert.txt delete mode 100644 src/xenia/ui/vulkan/shaders/blit.vert delete mode 100644 src/xenia/ui/vulkan/shaders/blit_color.frag delete mode 100644 src/xenia/ui/vulkan/shaders/blit_depth.frag delete mode 100644 src/xenia/ui/vulkan/shaders/immediate.frag delete mode 100644 src/xenia/ui/vulkan/shaders/immediate.vert delete mode 100644 src/xenia/ui/vulkan/vulkan.cc delete mode 100644 src/xenia/ui/vulkan/vulkan.h delete mode 100644 src/xenia/ui/vulkan/vulkan_device.cc delete mode 100644 src/xenia/ui/vulkan/vulkan_device.h delete mode 100644 src/xenia/ui/vulkan/vulkan_instance.cc delete mode 100644 src/xenia/ui/vulkan/vulkan_instance.h delete mode 100644 src/xenia/ui/vulkan/vulkan_mem_alloc.h delete mode 100644 src/xenia/ui/vulkan/vulkan_swap_chain.cc delete mode 100644 src/xenia/ui/vulkan/vulkan_swap_chain.h delete mode 100644 src/xenia/ui/vulkan/vulkan_util.cc delete mode 100644 src/xenia/ui/vulkan/vulkan_util.h delete mode 100644 src/xenia/ui/vulkan/vulkan_window_demo.cc diff --git a/premake5.lua b/premake5.lua index 4af8c50bd..e0c8f8d92 100644 --- a/premake5.lua +++ b/premake5.lua @@ -248,7 +248,6 @@ solution("xenia") include("src/xenia/hid/sdl") include("src/xenia/kernel") include("src/xenia/ui") - include("src/xenia/ui/spirv") include("src/xenia/ui/vulkan") include("src/xenia/vfs") diff --git a/src/xenia/app/premake5.lua b/src/xenia/app/premake5.lua index ac3f48eb4..520da24e4 100644 --- a/src/xenia/app/premake5.lua +++ b/src/xenia/app/premake5.lua @@ -39,7 +39,6 @@ project("xenia-app") "xenia-hid-sdl", "xenia-kernel", "xenia-ui", - "xenia-ui-spirv", "xenia-ui-vulkan", "xenia-vfs", "xxhash", diff --git a/src/xenia/app/xenia_main.cc b/src/xenia/app/xenia_main.cc index bd109681b..a52656814 100644 --- a/src/xenia/app/xenia_main.cc +++ b/src/xenia/app/xenia_main.cc @@ -171,10 +171,11 @@ std::unique_ptr CreateAudioSystem(cpu::Processor* processor) { std::unique_ptr CreateGraphicsSystem() { Factory factory; + factory.Add("vulkan"); + // TODO(Triang3l): Move D3D12 back to the top. #if XE_PLATFORM_WIN32 factory.Add("d3d12"); #endif // XE_PLATFORM_WIN32 - factory.Add("vulkan"); factory.Add("null"); return factory.Create(cvars::gpu); } diff --git a/src/xenia/gpu/d3d12/d3d12_graphics_system.cc b/src/xenia/gpu/d3d12/d3d12_graphics_system.cc index 30b322f87..ee4656eec 100644 --- a/src/xenia/gpu/d3d12/d3d12_graphics_system.cc +++ b/src/xenia/gpu/d3d12/d3d12_graphics_system.cc @@ -19,9 +19,9 @@ namespace gpu { namespace d3d12 { // Generated with `xb buildhlsl`. -#include "xenia/gpu/d3d12/shaders/dxbc/fullscreen_vs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/stretch_gamma_ps.h" -#include "xenia/gpu/d3d12/shaders/dxbc/stretch_ps.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/fullscreen_vs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/stretch_gamma_ps.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/stretch_ps.h" D3D12GraphicsSystem::D3D12GraphicsSystem() {} diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc index e9af039d2..41bb72790 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.cc +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -57,16 +57,16 @@ namespace gpu { namespace d3d12 { // Generated with `xb buildhlsl`. -#include "xenia/gpu/d3d12/shaders/dxbc/adaptive_quad_hs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/adaptive_triangle_hs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/continuous_quad_hs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/continuous_triangle_hs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/discrete_quad_hs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/discrete_triangle_hs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/primitive_quad_list_gs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/primitive_rectangle_list_gs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/tessellation_vs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/adaptive_quad_hs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/adaptive_triangle_hs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/continuous_quad_hs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/continuous_triangle_hs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/discrete_quad_hs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/discrete_triangle_hs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_point_list_gs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_quad_list_gs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_rectangle_list_gs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/tessellation_vs.h" PipelineCache::PipelineCache(D3D12CommandProcessor& command_processor, const RegisterFile& register_file, diff --git a/src/xenia/gpu/d3d12/premake5.lua b/src/xenia/gpu/d3d12/premake5.lua index 812e3cc85..d6623b51e 100644 --- a/src/xenia/gpu/d3d12/premake5.lua +++ b/src/xenia/gpu/d3d12/premake5.lua @@ -16,7 +16,7 @@ project("xenia-gpu-d3d12") }) local_platform_files() files({ - "shaders/bin/*.h", + "../shaders/bytecode/d3d12_5_1/*.h", }) group("src") diff --git a/src/xenia/gpu/d3d12/render_target_cache.cc b/src/xenia/gpu/d3d12/render_target_cache.cc index bd25b738a..0c09b6864 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.cc +++ b/src/xenia/gpu/d3d12/render_target_cache.cc @@ -38,37 +38,37 @@ namespace gpu { namespace d3d12 { // Generated with `xb buildhlsl`. -#include "xenia/gpu/d3d12/shaders/dxbc/edram_load_color_32bpp_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/edram_load_color_64bpp_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/edram_load_color_7e3_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/edram_load_depth_float_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/edram_load_depth_unorm_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/edram_store_color_32bpp_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/edram_store_color_64bpp_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/edram_store_color_7e3_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/edram_store_depth_float_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/edram_store_depth_unorm_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/resolve_clear_32bpp_2xres_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/resolve_clear_32bpp_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/resolve_clear_64bpp_2xres_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/resolve_clear_64bpp_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/resolve_clear_depth_24_32_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/resolve_fast_32bpp_1x2xmsaa_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/resolve_fast_32bpp_2xres_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/resolve_fast_32bpp_4xmsaa_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/resolve_fast_64bpp_1x2xmsaa_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/resolve_fast_64bpp_2xres_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/resolve_fast_64bpp_4xmsaa_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/resolve_full_128bpp_2xres_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/resolve_full_128bpp_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/resolve_full_16bpp_2xres_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/resolve_full_16bpp_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/resolve_full_32bpp_2xres_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/resolve_full_32bpp_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/resolve_full_64bpp_2xres_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/resolve_full_64bpp_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/resolve_full_8bpp_2xres_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/resolve_full_8bpp_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_32bpp_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_64bpp_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_7e3_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_float_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_unorm_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_color_32bpp_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_color_64bpp_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_color_7e3_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_float_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_unorm_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_32bpp_2xres_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_32bpp_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_64bpp_2xres_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_64bpp_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_depth_24_32_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_32bpp_1x2xmsaa_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_32bpp_2xres_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_32bpp_4xmsaa_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_64bpp_1x2xmsaa_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_64bpp_2xres_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_64bpp_4xmsaa_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_128bpp_2xres_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_128bpp_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_16bpp_2xres_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_16bpp_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_32bpp_2xres_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_32bpp_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_64bpp_2xres_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_64bpp_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_8bpp_2xres_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_8bpp_cs.h" const RenderTargetCache::EdramLoadStoreModeInfo RenderTargetCache::edram_load_store_mode_info_[size_t( diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index 0f096fb49..01c7812c9 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -54,44 +54,44 @@ namespace gpu { namespace d3d12 { // Generated with `xb buildhlsl`. -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_128bpb_2x_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_128bpb_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_16bpb_2x_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_16bpb_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_32bpb_2x_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_32bpb_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_64bpb_2x_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_64bpb_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_8bpb_2x_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_8bpb_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_ctx1_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_float_2x_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_float_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_unorm_2x_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_unorm_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_dxn_rg8_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt1_rgba8_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt3_rgba8_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt3a_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt3aas1111_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt5_rgba8_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt5a_r8_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_r10g11b11_rgba16_2x_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_r10g11b11_rgba16_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_r10g11b11_rgba16_snorm_2x_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_r10g11b11_rgba16_snorm_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_2x_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_snorm_2x_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_snorm_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_r4g4b4a4_b4g4r4a4_2x_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_r4g4b4a4_b4g4r4a4_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g5b5a1_b5g5r5a1_2x_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g5b5a1_b5g5r5a1_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g5b6_b5g6r5_swizzle_rbga_2x_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g6b5_b5g6r5_2x_cs.h" -#include "xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g6b5_b5g6r5_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_128bpb_2x_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_128bpb_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_16bpb_2x_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_16bpb_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_32bpb_2x_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_32bpb_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_64bpb_2x_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_64bpb_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_8bpb_2x_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_8bpb_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_ctx1_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_depth_float_2x_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_depth_float_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_depth_unorm_2x_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_depth_unorm_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxn_rg8_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt1_rgba8_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt3_rgba8_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt3a_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt3aas1111_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt5_rgba8_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt5a_r8_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_2x_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_snorm_2x_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_snorm_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_2x_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_snorm_2x_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_snorm_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r4g4b4a4_b4g4r4a4_2x_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r4g4b4a4_b4g4r4a4_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g5b5a1_b5g5r5a1_2x_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g5b5a1_b5g5r5a1_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g5b6_b5g6r5_swizzle_rbga_2x_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g6b5_b5g6r5_2x_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g6b5_b5g6r5_cs.h" // For formats with less than 4 components, assuming the last component is // replicated into the non-existent ones, similar to what is done for unused diff --git a/src/xenia/gpu/premake5.lua b/src/xenia/gpu/premake5.lua index 33c6a228d..92f798458 100644 --- a/src/xenia/gpu/premake5.lua +++ b/src/xenia/gpu/premake5.lua @@ -14,7 +14,6 @@ project("xenia-gpu") "spirv-tools", "xenia-base", "xenia-ui", - "xenia-ui-spirv", "xxhash", }) defines({ @@ -23,8 +22,6 @@ project("xenia-gpu") project_root.."/third_party/spirv-tools/external/include", }) local_platform_files() - -- local_platform_files("spirv") - -- local_platform_files("spirv/passes") group("src") project("xenia-gpu-shader-compiler") @@ -38,7 +35,6 @@ project("xenia-gpu-shader-compiler") "spirv-tools", "xenia-base", "xenia-gpu", - "xenia-ui-spirv", }) defines({ }) diff --git a/src/xenia/gpu/shader_compiler_main.cc b/src/xenia/gpu/shader_compiler_main.cc index f5392216b..b8949f948 100644 --- a/src/xenia/gpu/shader_compiler_main.cc +++ b/src/xenia/gpu/shader_compiler_main.cc @@ -19,8 +19,6 @@ #include "xenia/base/string.h" #include "xenia/gpu/dxbc_shader_translator.h" #include "xenia/gpu/shader_translator.h" -#include "xenia/gpu/spirv_shader_translator.h" -#include "xenia/ui/spirv/spirv_disassembler.h" // For D3DDisassemble: #if XE_PLATFORM_WIN32 @@ -33,8 +31,7 @@ DEFINE_string(shader_input_type, "", "GPU"); DEFINE_path(shader_output, "", "Output shader file path.", "GPU"); DEFINE_string(shader_output_type, "ucode", - "Translator to use: [ucode, spirv, spirvtext, dxbc, dxbctext].", - "GPU"); + "Translator to use: [ucode, dxbc, dxbctext].", "GPU"); DEFINE_string( vertex_shader_output_type, "", "Type of the host interface to produce the vertex or domain shader for: " @@ -105,11 +102,8 @@ int shader_compiler_main(const std::vector& args) { shader_type, ucode_data_hash, ucode_dwords.data(), ucode_dwords.size()); std::unique_ptr translator; - if (cvars::shader_output_type == "spirv" || - cvars::shader_output_type == "spirvtext") { - translator = std::make_unique(); - } else if (cvars::shader_output_type == "dxbc" || - cvars::shader_output_type == "dxbctext") { + if (cvars::shader_output_type == "dxbc" || + cvars::shader_output_type == "dxbctext") { translator = std::make_unique( 0, cvars::shader_output_bindless_resources, cvars::shader_output_dxbc_rov); @@ -146,14 +140,6 @@ int shader_compiler_main(const std::vector& args) { const void* source_data = shader->translated_binary().data(); size_t source_data_size = shader->translated_binary().size(); - std::unique_ptr spirv_disasm_result; - if (cvars::shader_output_type == "spirvtext") { - // Disassemble SPIRV. - spirv_disasm_result = xe::ui::spirv::SpirvDisassembler().Disassemble( - reinterpret_cast(source_data), source_data_size / 4); - source_data = spirv_disasm_result->text(); - source_data_size = std::strlen(spirv_disasm_result->text()) + 1; - } #if XE_PLATFORM_WIN32 ID3DBlob* dxbc_disasm_blob = nullptr; if (cvars::shader_output_type == "dxbctext") { diff --git a/src/xenia/gpu/d3d12/shaders/adaptive_quad.hs.hlsl b/src/xenia/gpu/shaders/adaptive_quad.hs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/adaptive_quad.hs.hlsl rename to src/xenia/gpu/shaders/adaptive_quad.hs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/adaptive_triangle.hs.hlsl b/src/xenia/gpu/shaders/adaptive_triangle.hs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/adaptive_triangle.hs.hlsl rename to src/xenia/gpu/shaders/adaptive_triangle.hs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/adaptive_quad_hs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/adaptive_quad_hs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/adaptive_quad_hs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/adaptive_quad_hs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/adaptive_quad_hs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/adaptive_quad_hs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/adaptive_quad_hs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/adaptive_quad_hs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/adaptive_quad_hs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/adaptive_quad_hs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/adaptive_quad_hs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/adaptive_quad_hs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/adaptive_triangle_hs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/adaptive_triangle_hs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/adaptive_triangle_hs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/adaptive_triangle_hs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/adaptive_triangle_hs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/adaptive_triangle_hs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/adaptive_triangle_hs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/adaptive_triangle_hs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/adaptive_triangle_hs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/adaptive_triangle_hs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/adaptive_triangle_hs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/adaptive_triangle_hs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/continuous_quad_hs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/continuous_quad_hs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/continuous_quad_hs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/continuous_quad_hs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/continuous_quad_hs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/continuous_quad_hs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/continuous_quad_hs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/continuous_quad_hs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/continuous_quad_hs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/continuous_quad_hs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/continuous_quad_hs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/continuous_quad_hs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/continuous_triangle_hs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/continuous_triangle_hs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/continuous_triangle_hs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/continuous_triangle_hs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/continuous_triangle_hs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/continuous_triangle_hs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/continuous_triangle_hs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/continuous_triangle_hs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/continuous_triangle_hs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/continuous_triangle_hs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/continuous_triangle_hs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/continuous_triangle_hs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/discrete_quad_hs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/discrete_quad_hs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/discrete_quad_hs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/discrete_quad_hs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/discrete_quad_hs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/discrete_quad_hs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/discrete_quad_hs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/discrete_quad_hs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/discrete_quad_hs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/discrete_quad_hs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/discrete_quad_hs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/discrete_quad_hs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/discrete_triangle_hs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/discrete_triangle_hs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/discrete_triangle_hs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/discrete_triangle_hs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/discrete_triangle_hs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/discrete_triangle_hs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/discrete_triangle_hs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/discrete_triangle_hs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/discrete_triangle_hs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/discrete_triangle_hs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/discrete_triangle_hs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/discrete_triangle_hs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_load_color_32bpp_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_32bpp_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_load_color_32bpp_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_32bpp_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_load_color_32bpp_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_32bpp_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_load_color_32bpp_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_32bpp_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_load_color_32bpp_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_32bpp_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_load_color_32bpp_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_32bpp_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_load_color_64bpp_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_64bpp_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_load_color_64bpp_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_64bpp_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_load_color_64bpp_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_64bpp_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_load_color_64bpp_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_64bpp_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_load_color_64bpp_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_64bpp_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_load_color_64bpp_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_64bpp_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_load_color_7e3_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_7e3_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_load_color_7e3_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_7e3_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_load_color_7e3_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_7e3_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_load_color_7e3_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_7e3_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_load_color_7e3_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_7e3_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_load_color_7e3_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_7e3_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_load_depth_float_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_float_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_load_depth_float_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_float_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_load_depth_float_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_float_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_load_depth_float_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_float_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_load_depth_float_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_float_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_load_depth_float_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_float_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_load_depth_unorm_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_unorm_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_load_depth_unorm_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_unorm_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_load_depth_unorm_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_unorm_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_load_depth_unorm_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_unorm_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_load_depth_unorm_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_unorm_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_load_depth_unorm_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_unorm_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_store_color_32bpp_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_color_32bpp_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_store_color_32bpp_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_color_32bpp_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_store_color_32bpp_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_color_32bpp_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_store_color_32bpp_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_color_32bpp_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_store_color_32bpp_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_color_32bpp_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_store_color_32bpp_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_color_32bpp_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_store_color_64bpp_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_color_64bpp_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_store_color_64bpp_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_color_64bpp_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_store_color_64bpp_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_color_64bpp_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_store_color_64bpp_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_color_64bpp_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_store_color_64bpp_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_color_64bpp_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_store_color_64bpp_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_color_64bpp_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_store_color_7e3_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_color_7e3_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_store_color_7e3_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_color_7e3_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_store_color_7e3_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_color_7e3_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_store_color_7e3_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_color_7e3_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_store_color_7e3_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_color_7e3_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_store_color_7e3_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_color_7e3_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_store_depth_float_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_float_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_store_depth_float_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_float_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_store_depth_float_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_float_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_store_depth_float_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_float_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_store_depth_float_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_float_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_store_depth_float_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_float_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_store_depth_unorm_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_unorm_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_store_depth_unorm_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_unorm_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_store_depth_unorm_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_unorm_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_store_depth_unorm_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_unorm_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_store_depth_unorm_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_unorm_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/edram_store_depth_unorm_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_unorm_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/fullscreen_vs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/fullscreen_vs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/fullscreen_vs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/fullscreen_vs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/fullscreen_vs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/fullscreen_vs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/fullscreen_vs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/fullscreen_vs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/fullscreen_vs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/fullscreen_vs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/fullscreen_vs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/fullscreen_vs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_point_list_gs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_point_list_gs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_point_list_gs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_point_list_gs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_point_list_gs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_point_list_gs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/primitive_quad_list_gs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_quad_list_gs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/primitive_quad_list_gs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_quad_list_gs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/primitive_quad_list_gs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_quad_list_gs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/primitive_quad_list_gs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_quad_list_gs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/primitive_quad_list_gs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_quad_list_gs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/primitive_quad_list_gs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_quad_list_gs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/primitive_rectangle_list_gs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_rectangle_list_gs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/primitive_rectangle_list_gs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_rectangle_list_gs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/primitive_rectangle_list_gs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_rectangle_list_gs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/primitive_rectangle_list_gs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_rectangle_list_gs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/primitive_rectangle_list_gs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_rectangle_list_gs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/primitive_rectangle_list_gs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_rectangle_list_gs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_32bpp_2xres_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_32bpp_2xres_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_32bpp_2xres_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_32bpp_2xres_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_32bpp_2xres_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_32bpp_2xres_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_32bpp_2xres_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_32bpp_2xres_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_32bpp_2xres_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_32bpp_2xres_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_32bpp_2xres_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_32bpp_2xres_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_32bpp_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_32bpp_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_32bpp_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_32bpp_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_32bpp_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_32bpp_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_32bpp_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_32bpp_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_32bpp_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_32bpp_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_32bpp_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_32bpp_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_64bpp_2xres_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_64bpp_2xres_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_64bpp_2xres_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_64bpp_2xres_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_64bpp_2xres_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_64bpp_2xres_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_64bpp_2xres_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_64bpp_2xres_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_64bpp_2xres_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_64bpp_2xres_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_64bpp_2xres_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_64bpp_2xres_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_64bpp_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_64bpp_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_64bpp_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_64bpp_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_64bpp_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_64bpp_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_64bpp_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_64bpp_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_64bpp_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_64bpp_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_64bpp_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_64bpp_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_depth_24_32_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_depth_24_32_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_depth_24_32_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_depth_24_32_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_depth_24_32_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_depth_24_32_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_depth_24_32_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_depth_24_32_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_depth_24_32_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_depth_24_32_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_clear_depth_24_32_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_depth_24_32_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_32bpp_1x2xmsaa_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_32bpp_1x2xmsaa_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_32bpp_1x2xmsaa_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_32bpp_1x2xmsaa_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_32bpp_1x2xmsaa_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_32bpp_1x2xmsaa_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_32bpp_1x2xmsaa_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_32bpp_1x2xmsaa_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_32bpp_1x2xmsaa_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_32bpp_1x2xmsaa_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_32bpp_1x2xmsaa_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_32bpp_1x2xmsaa_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_32bpp_2xres_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_32bpp_2xres_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_32bpp_2xres_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_32bpp_2xres_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_32bpp_2xres_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_32bpp_2xres_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_32bpp_2xres_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_32bpp_2xres_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_32bpp_2xres_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_32bpp_2xres_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_32bpp_2xres_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_32bpp_2xres_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_32bpp_4xmsaa_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_32bpp_4xmsaa_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_32bpp_4xmsaa_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_32bpp_4xmsaa_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_32bpp_4xmsaa_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_32bpp_4xmsaa_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_32bpp_4xmsaa_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_32bpp_4xmsaa_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_32bpp_4xmsaa_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_32bpp_4xmsaa_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_32bpp_4xmsaa_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_32bpp_4xmsaa_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_64bpp_1x2xmsaa_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_64bpp_1x2xmsaa_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_64bpp_1x2xmsaa_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_64bpp_1x2xmsaa_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_64bpp_1x2xmsaa_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_64bpp_1x2xmsaa_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_64bpp_1x2xmsaa_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_64bpp_1x2xmsaa_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_64bpp_1x2xmsaa_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_64bpp_1x2xmsaa_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_64bpp_1x2xmsaa_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_64bpp_1x2xmsaa_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_64bpp_2xres_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_64bpp_2xres_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_64bpp_2xres_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_64bpp_2xres_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_64bpp_2xres_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_64bpp_2xres_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_64bpp_2xres_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_64bpp_2xres_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_64bpp_2xres_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_64bpp_2xres_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_64bpp_2xres_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_64bpp_2xres_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_64bpp_4xmsaa_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_64bpp_4xmsaa_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_64bpp_4xmsaa_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_64bpp_4xmsaa_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_64bpp_4xmsaa_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_64bpp_4xmsaa_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_64bpp_4xmsaa_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_64bpp_4xmsaa_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_64bpp_4xmsaa_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_64bpp_4xmsaa_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_fast_64bpp_4xmsaa_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_fast_64bpp_4xmsaa_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_128bpp_2xres_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_128bpp_2xres_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_128bpp_2xres_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_128bpp_2xres_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_128bpp_2xres_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_128bpp_2xres_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_128bpp_2xres_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_128bpp_2xres_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_128bpp_2xres_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_128bpp_2xres_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_128bpp_2xres_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_128bpp_2xres_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_128bpp_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_128bpp_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_128bpp_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_128bpp_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_128bpp_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_128bpp_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_128bpp_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_128bpp_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_128bpp_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_128bpp_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_128bpp_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_128bpp_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_16bpp_2xres_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_16bpp_2xres_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_16bpp_2xres_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_16bpp_2xres_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_16bpp_2xres_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_16bpp_2xres_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_16bpp_2xres_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_16bpp_2xres_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_16bpp_2xres_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_16bpp_2xres_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_16bpp_2xres_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_16bpp_2xres_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_16bpp_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_16bpp_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_16bpp_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_16bpp_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_16bpp_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_16bpp_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_16bpp_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_16bpp_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_16bpp_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_16bpp_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_16bpp_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_16bpp_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_32bpp_2xres_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_32bpp_2xres_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_32bpp_2xres_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_32bpp_2xres_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_32bpp_2xres_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_32bpp_2xres_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_32bpp_2xres_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_32bpp_2xres_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_32bpp_2xres_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_32bpp_2xres_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_32bpp_2xres_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_32bpp_2xres_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_32bpp_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_32bpp_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_32bpp_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_32bpp_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_32bpp_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_32bpp_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_32bpp_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_32bpp_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_32bpp_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_32bpp_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_32bpp_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_32bpp_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_64bpp_2xres_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_64bpp_2xres_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_64bpp_2xres_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_64bpp_2xres_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_64bpp_2xres_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_64bpp_2xres_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_64bpp_2xres_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_64bpp_2xres_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_64bpp_2xres_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_64bpp_2xres_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_64bpp_2xres_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_64bpp_2xres_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_64bpp_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_64bpp_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_64bpp_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_64bpp_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_64bpp_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_64bpp_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_64bpp_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_64bpp_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_64bpp_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_64bpp_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_64bpp_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_64bpp_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_8bpp_2xres_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_8bpp_2xres_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_8bpp_2xres_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_8bpp_2xres_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_8bpp_2xres_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_8bpp_2xres_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_8bpp_2xres_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_8bpp_2xres_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_8bpp_2xres_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_8bpp_2xres_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_8bpp_2xres_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_8bpp_2xres_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_8bpp_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_8bpp_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_8bpp_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_8bpp_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_8bpp_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_8bpp_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_8bpp_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_8bpp_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_8bpp_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_8bpp_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/resolve_full_8bpp_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_full_8bpp_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/stretch_gamma_ps.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/stretch_gamma_ps.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/stretch_gamma_ps.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/stretch_gamma_ps.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/stretch_gamma_ps.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/stretch_gamma_ps.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/stretch_gamma_ps.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/stretch_gamma_ps.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/stretch_gamma_ps.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/stretch_gamma_ps.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/stretch_gamma_ps.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/stretch_gamma_ps.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/stretch_ps.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/stretch_ps.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/stretch_ps.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/stretch_ps.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/stretch_ps.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/stretch_ps.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/stretch_ps.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/stretch_ps.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/stretch_ps.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/stretch_ps.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/stretch_ps.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/stretch_ps.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/tessellation_vs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/tessellation_vs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/tessellation_vs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/tessellation_vs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/tessellation_vs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/tessellation_vs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/tessellation_vs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/tessellation_vs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/tessellation_vs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/tessellation_vs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/tessellation_vs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/tessellation_vs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_128bpb_2x_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_128bpb_2x_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_128bpb_2x_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_128bpb_2x_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_128bpb_2x_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_128bpb_2x_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_128bpb_2x_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_128bpb_2x_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_128bpb_2x_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_128bpb_2x_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_128bpb_2x_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_128bpb_2x_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_128bpb_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_128bpb_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_128bpb_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_128bpb_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_128bpb_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_128bpb_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_128bpb_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_128bpb_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_128bpb_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_128bpb_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_128bpb_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_128bpb_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_16bpb_2x_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_16bpb_2x_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_16bpb_2x_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_16bpb_2x_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_16bpb_2x_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_16bpb_2x_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_16bpb_2x_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_16bpb_2x_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_16bpb_2x_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_16bpb_2x_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_16bpb_2x_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_16bpb_2x_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_16bpb_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_16bpb_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_16bpb_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_16bpb_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_16bpb_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_16bpb_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_16bpb_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_16bpb_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_16bpb_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_16bpb_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_16bpb_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_16bpb_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_32bpb_2x_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_32bpb_2x_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_32bpb_2x_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_32bpb_2x_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_32bpb_2x_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_32bpb_2x_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_32bpb_2x_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_32bpb_2x_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_32bpb_2x_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_32bpb_2x_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_32bpb_2x_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_32bpb_2x_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_32bpb_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_32bpb_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_32bpb_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_32bpb_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_32bpb_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_32bpb_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_32bpb_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_32bpb_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_32bpb_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_32bpb_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_32bpb_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_32bpb_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_64bpb_2x_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_64bpb_2x_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_64bpb_2x_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_64bpb_2x_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_64bpb_2x_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_64bpb_2x_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_64bpb_2x_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_64bpb_2x_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_64bpb_2x_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_64bpb_2x_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_64bpb_2x_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_64bpb_2x_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_64bpb_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_64bpb_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_64bpb_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_64bpb_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_64bpb_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_64bpb_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_64bpb_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_64bpb_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_64bpb_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_64bpb_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_64bpb_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_64bpb_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_8bpb_2x_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_8bpb_2x_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_8bpb_2x_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_8bpb_2x_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_8bpb_2x_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_8bpb_2x_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_8bpb_2x_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_8bpb_2x_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_8bpb_2x_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_8bpb_2x_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_8bpb_2x_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_8bpb_2x_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_8bpb_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_8bpb_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_8bpb_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_8bpb_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_8bpb_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_8bpb_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_8bpb_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_8bpb_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_8bpb_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_8bpb_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_8bpb_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_8bpb_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_ctx1_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_ctx1_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_ctx1_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_ctx1_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_ctx1_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_ctx1_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_ctx1_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_ctx1_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_ctx1_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_ctx1_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_ctx1_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_ctx1_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_float_2x_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_depth_float_2x_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_float_2x_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_depth_float_2x_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_float_2x_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_depth_float_2x_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_float_2x_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_depth_float_2x_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_float_2x_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_depth_float_2x_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_float_2x_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_depth_float_2x_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_float_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_depth_float_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_float_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_depth_float_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_float_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_depth_float_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_float_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_depth_float_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_float_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_depth_float_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_float_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_depth_float_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_unorm_2x_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_depth_unorm_2x_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_unorm_2x_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_depth_unorm_2x_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_unorm_2x_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_depth_unorm_2x_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_unorm_2x_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_depth_unorm_2x_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_unorm_2x_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_depth_unorm_2x_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_unorm_2x_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_depth_unorm_2x_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_unorm_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_depth_unorm_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_unorm_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_depth_unorm_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_unorm_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_depth_unorm_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_unorm_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_depth_unorm_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_unorm_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_depth_unorm_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_depth_unorm_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_depth_unorm_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxn_rg8_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxn_rg8_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxn_rg8_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxn_rg8_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxn_rg8_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxn_rg8_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxn_rg8_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxn_rg8_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxn_rg8_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxn_rg8_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxn_rg8_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxn_rg8_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt1_rgba8_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt1_rgba8_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt1_rgba8_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt1_rgba8_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt1_rgba8_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt1_rgba8_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt1_rgba8_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt1_rgba8_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt1_rgba8_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt1_rgba8_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt1_rgba8_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt1_rgba8_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt3_rgba8_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt3_rgba8_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt3_rgba8_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt3_rgba8_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt3_rgba8_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt3_rgba8_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt3_rgba8_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt3_rgba8_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt3_rgba8_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt3_rgba8_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt3_rgba8_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt3_rgba8_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt3a_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt3a_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt3a_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt3a_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt3a_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt3a_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt3a_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt3a_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt3a_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt3a_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt3a_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt3a_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt3aas1111_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt3aas1111_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt3aas1111_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt3aas1111_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt3aas1111_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt3aas1111_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt3aas1111_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt3aas1111_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt3aas1111_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt3aas1111_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt3aas1111_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt3aas1111_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt5_rgba8_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt5_rgba8_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt5_rgba8_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt5_rgba8_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt5_rgba8_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt5_rgba8_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt5_rgba8_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt5_rgba8_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt5_rgba8_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt5_rgba8_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt5_rgba8_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt5_rgba8_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt5a_r8_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt5a_r8_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt5a_r8_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt5a_r8_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt5a_r8_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt5a_r8_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt5a_r8_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt5a_r8_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt5a_r8_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt5a_r8_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_dxt5a_r8_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_dxt5a_r8_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r10g11b11_rgba16_2x_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_2x_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r10g11b11_rgba16_2x_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_2x_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r10g11b11_rgba16_2x_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_2x_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r10g11b11_rgba16_2x_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_2x_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r10g11b11_rgba16_2x_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_2x_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r10g11b11_rgba16_2x_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_2x_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r10g11b11_rgba16_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r10g11b11_rgba16_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r10g11b11_rgba16_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r10g11b11_rgba16_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r10g11b11_rgba16_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r10g11b11_rgba16_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r10g11b11_rgba16_snorm_2x_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_snorm_2x_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r10g11b11_rgba16_snorm_2x_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_snorm_2x_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r10g11b11_rgba16_snorm_2x_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_snorm_2x_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r10g11b11_rgba16_snorm_2x_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_snorm_2x_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r10g11b11_rgba16_snorm_2x_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_snorm_2x_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r10g11b11_rgba16_snorm_2x_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_snorm_2x_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r10g11b11_rgba16_snorm_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_snorm_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r10g11b11_rgba16_snorm_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_snorm_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r10g11b11_rgba16_snorm_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_snorm_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r10g11b11_rgba16_snorm_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_snorm_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r10g11b11_rgba16_snorm_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_snorm_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r10g11b11_rgba16_snorm_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r10g11b11_rgba16_snorm_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_2x_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_2x_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_2x_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_2x_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_2x_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_2x_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_2x_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_2x_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_2x_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_2x_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_2x_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_2x_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_snorm_2x_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_snorm_2x_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_snorm_2x_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_snorm_2x_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_snorm_2x_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_snorm_2x_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_snorm_2x_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_snorm_2x_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_snorm_2x_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_snorm_2x_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_snorm_2x_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_snorm_2x_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_snorm_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_snorm_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_snorm_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_snorm_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_snorm_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_snorm_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_snorm_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_snorm_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_snorm_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_snorm_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r11g11b10_rgba16_snorm_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r11g11b10_rgba16_snorm_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r4g4b4a4_b4g4r4a4_2x_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r4g4b4a4_b4g4r4a4_2x_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r4g4b4a4_b4g4r4a4_2x_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r4g4b4a4_b4g4r4a4_2x_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r4g4b4a4_b4g4r4a4_2x_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r4g4b4a4_b4g4r4a4_2x_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r4g4b4a4_b4g4r4a4_2x_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r4g4b4a4_b4g4r4a4_2x_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r4g4b4a4_b4g4r4a4_2x_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r4g4b4a4_b4g4r4a4_2x_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r4g4b4a4_b4g4r4a4_2x_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r4g4b4a4_b4g4r4a4_2x_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r4g4b4a4_b4g4r4a4_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r4g4b4a4_b4g4r4a4_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r4g4b4a4_b4g4r4a4_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r4g4b4a4_b4g4r4a4_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r4g4b4a4_b4g4r4a4_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r4g4b4a4_b4g4r4a4_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r4g4b4a4_b4g4r4a4_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r4g4b4a4_b4g4r4a4_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r4g4b4a4_b4g4r4a4_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r4g4b4a4_b4g4r4a4_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r4g4b4a4_b4g4r4a4_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r4g4b4a4_b4g4r4a4_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g5b5a1_b5g5r5a1_2x_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g5b5a1_b5g5r5a1_2x_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g5b5a1_b5g5r5a1_2x_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g5b5a1_b5g5r5a1_2x_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g5b5a1_b5g5r5a1_2x_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g5b5a1_b5g5r5a1_2x_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g5b5a1_b5g5r5a1_2x_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g5b5a1_b5g5r5a1_2x_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g5b5a1_b5g5r5a1_2x_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g5b5a1_b5g5r5a1_2x_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g5b5a1_b5g5r5a1_2x_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g5b5a1_b5g5r5a1_2x_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g5b5a1_b5g5r5a1_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g5b5a1_b5g5r5a1_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g5b5a1_b5g5r5a1_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g5b5a1_b5g5r5a1_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g5b5a1_b5g5r5a1_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g5b5a1_b5g5r5a1_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g5b5a1_b5g5r5a1_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g5b5a1_b5g5r5a1_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g5b5a1_b5g5r5a1_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g5b5a1_b5g5r5a1_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g5b5a1_b5g5r5a1_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g5b5a1_b5g5r5a1_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g5b6_b5g6r5_swizzle_rbga_2x_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g5b6_b5g6r5_swizzle_rbga_2x_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g5b6_b5g6r5_swizzle_rbga_2x_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g5b6_b5g6r5_swizzle_rbga_2x_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g5b6_b5g6r5_swizzle_rbga_2x_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g5b6_b5g6r5_swizzle_rbga_2x_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g5b6_b5g6r5_swizzle_rbga_2x_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g5b6_b5g6r5_swizzle_rbga_2x_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g5b6_b5g6r5_swizzle_rbga_2x_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g5b6_b5g6r5_swizzle_rbga_2x_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g5b6_b5g6r5_swizzle_rbga_2x_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g5b6_b5g6r5_swizzle_rbga_2x_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g6b5_b5g6r5_2x_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g6b5_b5g6r5_2x_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g6b5_b5g6r5_2x_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g6b5_b5g6r5_2x_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g6b5_b5g6r5_2x_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g6b5_b5g6r5_2x_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g6b5_b5g6r5_2x_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g6b5_b5g6r5_2x_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g6b5_b5g6r5_2x_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g6b5_b5g6r5_2x_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g6b5_b5g6r5_2x_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g6b5_b5g6r5_2x_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g6b5_b5g6r5_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g6b5_b5g6r5_cs.cso similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g6b5_b5g6r5_cs.cso rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g6b5_b5g6r5_cs.cso diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g6b5_b5g6r5_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g6b5_b5g6r5_cs.h similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g6b5_b5g6r5_cs.h rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g6b5_b5g6r5_cs.h diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g6b5_b5g6r5_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g6b5_b5g6r5_cs.txt similarity index 100% rename from src/xenia/gpu/d3d12/shaders/dxbc/texture_load_r5g6b5_b5g6r5_cs.txt rename to src/xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g6b5_b5g6r5_cs.txt diff --git a/src/xenia/gpu/d3d12/shaders/continuous_quad.hs.hlsl b/src/xenia/gpu/shaders/continuous_quad.hs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/continuous_quad.hs.hlsl rename to src/xenia/gpu/shaders/continuous_quad.hs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/continuous_triangle.hs.hlsl b/src/xenia/gpu/shaders/continuous_triangle.hs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/continuous_triangle.hs.hlsl rename to src/xenia/gpu/shaders/continuous_triangle.hs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/discrete_quad.hs.hlsl b/src/xenia/gpu/shaders/discrete_quad.hs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/discrete_quad.hs.hlsl rename to src/xenia/gpu/shaders/discrete_quad.hs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/discrete_triangle.hs.hlsl b/src/xenia/gpu/shaders/discrete_triangle.hs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/discrete_triangle.hs.hlsl rename to src/xenia/gpu/shaders/discrete_triangle.hs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/edram.hlsli b/src/xenia/gpu/shaders/edram.hlsli similarity index 100% rename from src/xenia/gpu/d3d12/shaders/edram.hlsli rename to src/xenia/gpu/shaders/edram.hlsli diff --git a/src/xenia/gpu/d3d12/shaders/edram_load_color_32bpp.cs.hlsl b/src/xenia/gpu/shaders/edram_load_color_32bpp.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/edram_load_color_32bpp.cs.hlsl rename to src/xenia/gpu/shaders/edram_load_color_32bpp.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/edram_load_color_64bpp.cs.hlsl b/src/xenia/gpu/shaders/edram_load_color_64bpp.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/edram_load_color_64bpp.cs.hlsl rename to src/xenia/gpu/shaders/edram_load_color_64bpp.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/edram_load_color_7e3.cs.hlsl b/src/xenia/gpu/shaders/edram_load_color_7e3.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/edram_load_color_7e3.cs.hlsl rename to src/xenia/gpu/shaders/edram_load_color_7e3.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/edram_load_depth_float.cs.hlsl b/src/xenia/gpu/shaders/edram_load_depth_float.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/edram_load_depth_float.cs.hlsl rename to src/xenia/gpu/shaders/edram_load_depth_float.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/edram_load_depth_unorm.cs.hlsl b/src/xenia/gpu/shaders/edram_load_depth_unorm.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/edram_load_depth_unorm.cs.hlsl rename to src/xenia/gpu/shaders/edram_load_depth_unorm.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/edram_load_store.hlsli b/src/xenia/gpu/shaders/edram_load_store.hlsli similarity index 100% rename from src/xenia/gpu/d3d12/shaders/edram_load_store.hlsli rename to src/xenia/gpu/shaders/edram_load_store.hlsli diff --git a/src/xenia/gpu/d3d12/shaders/edram_store_color_32bpp.cs.hlsl b/src/xenia/gpu/shaders/edram_store_color_32bpp.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/edram_store_color_32bpp.cs.hlsl rename to src/xenia/gpu/shaders/edram_store_color_32bpp.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/edram_store_color_64bpp.cs.hlsl b/src/xenia/gpu/shaders/edram_store_color_64bpp.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/edram_store_color_64bpp.cs.hlsl rename to src/xenia/gpu/shaders/edram_store_color_64bpp.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/edram_store_color_7e3.cs.hlsl b/src/xenia/gpu/shaders/edram_store_color_7e3.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/edram_store_color_7e3.cs.hlsl rename to src/xenia/gpu/shaders/edram_store_color_7e3.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/edram_store_depth_float.cs.hlsl b/src/xenia/gpu/shaders/edram_store_depth_float.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/edram_store_depth_float.cs.hlsl rename to src/xenia/gpu/shaders/edram_store_depth_float.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/edram_store_depth_unorm.cs.hlsl b/src/xenia/gpu/shaders/edram_store_depth_unorm.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/edram_store_depth_unorm.cs.hlsl rename to src/xenia/gpu/shaders/edram_store_depth_unorm.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/endian.hlsli b/src/xenia/gpu/shaders/endian.hlsli similarity index 100% rename from src/xenia/gpu/d3d12/shaders/endian.hlsli rename to src/xenia/gpu/shaders/endian.hlsli diff --git a/src/xenia/gpu/d3d12/shaders/fullscreen.vs.hlsl b/src/xenia/gpu/shaders/fullscreen.vs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/fullscreen.vs.hlsl rename to src/xenia/gpu/shaders/fullscreen.vs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/pixel_formats.hlsli b/src/xenia/gpu/shaders/pixel_formats.hlsli similarity index 100% rename from src/xenia/gpu/d3d12/shaders/pixel_formats.hlsli rename to src/xenia/gpu/shaders/pixel_formats.hlsli diff --git a/src/xenia/gpu/d3d12/shaders/primitive_point_list.gs.hlsl b/src/xenia/gpu/shaders/primitive_point_list.gs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/primitive_point_list.gs.hlsl rename to src/xenia/gpu/shaders/primitive_point_list.gs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/primitive_quad_list.gs.hlsl b/src/xenia/gpu/shaders/primitive_quad_list.gs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/primitive_quad_list.gs.hlsl rename to src/xenia/gpu/shaders/primitive_quad_list.gs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/primitive_rectangle_list.gs.hlsl b/src/xenia/gpu/shaders/primitive_rectangle_list.gs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/primitive_rectangle_list.gs.hlsl rename to src/xenia/gpu/shaders/primitive_rectangle_list.gs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/resolve.hlsli b/src/xenia/gpu/shaders/resolve.hlsli similarity index 100% rename from src/xenia/gpu/d3d12/shaders/resolve.hlsli rename to src/xenia/gpu/shaders/resolve.hlsli diff --git a/src/xenia/gpu/d3d12/shaders/resolve_clear_32bpp.cs.hlsl b/src/xenia/gpu/shaders/resolve_clear_32bpp.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/resolve_clear_32bpp.cs.hlsl rename to src/xenia/gpu/shaders/resolve_clear_32bpp.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/resolve_clear_32bpp_2xres.cs.hlsl b/src/xenia/gpu/shaders/resolve_clear_32bpp_2xres.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/resolve_clear_32bpp_2xres.cs.hlsl rename to src/xenia/gpu/shaders/resolve_clear_32bpp_2xres.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/resolve_clear_64bpp.cs.hlsl b/src/xenia/gpu/shaders/resolve_clear_64bpp.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/resolve_clear_64bpp.cs.hlsl rename to src/xenia/gpu/shaders/resolve_clear_64bpp.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/resolve_clear_64bpp_2xres.cs.hlsl b/src/xenia/gpu/shaders/resolve_clear_64bpp_2xres.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/resolve_clear_64bpp_2xres.cs.hlsl rename to src/xenia/gpu/shaders/resolve_clear_64bpp_2xres.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/resolve_clear_depth_24_32.cs.hlsl b/src/xenia/gpu/shaders/resolve_clear_depth_24_32.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/resolve_clear_depth_24_32.cs.hlsl rename to src/xenia/gpu/shaders/resolve_clear_depth_24_32.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/resolve_fast_32bpp_1x2xmsaa.cs.hlsl b/src/xenia/gpu/shaders/resolve_fast_32bpp_1x2xmsaa.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/resolve_fast_32bpp_1x2xmsaa.cs.hlsl rename to src/xenia/gpu/shaders/resolve_fast_32bpp_1x2xmsaa.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/resolve_fast_32bpp_2xres.cs.hlsl b/src/xenia/gpu/shaders/resolve_fast_32bpp_2xres.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/resolve_fast_32bpp_2xres.cs.hlsl rename to src/xenia/gpu/shaders/resolve_fast_32bpp_2xres.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/resolve_fast_32bpp_4xmsaa.cs.hlsl b/src/xenia/gpu/shaders/resolve_fast_32bpp_4xmsaa.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/resolve_fast_32bpp_4xmsaa.cs.hlsl rename to src/xenia/gpu/shaders/resolve_fast_32bpp_4xmsaa.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/resolve_fast_64bpp_1x2xmsaa.cs.hlsl b/src/xenia/gpu/shaders/resolve_fast_64bpp_1x2xmsaa.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/resolve_fast_64bpp_1x2xmsaa.cs.hlsl rename to src/xenia/gpu/shaders/resolve_fast_64bpp_1x2xmsaa.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/resolve_fast_64bpp_2xres.cs.hlsl b/src/xenia/gpu/shaders/resolve_fast_64bpp_2xres.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/resolve_fast_64bpp_2xres.cs.hlsl rename to src/xenia/gpu/shaders/resolve_fast_64bpp_2xres.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/resolve_fast_64bpp_4xmsaa.cs.hlsl b/src/xenia/gpu/shaders/resolve_fast_64bpp_4xmsaa.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/resolve_fast_64bpp_4xmsaa.cs.hlsl rename to src/xenia/gpu/shaders/resolve_fast_64bpp_4xmsaa.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/resolve_full_128bpp.cs.hlsl b/src/xenia/gpu/shaders/resolve_full_128bpp.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/resolve_full_128bpp.cs.hlsl rename to src/xenia/gpu/shaders/resolve_full_128bpp.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/resolve_full_128bpp_2xres.cs.hlsl b/src/xenia/gpu/shaders/resolve_full_128bpp_2xres.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/resolve_full_128bpp_2xres.cs.hlsl rename to src/xenia/gpu/shaders/resolve_full_128bpp_2xres.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/resolve_full_16bpp.cs.hlsl b/src/xenia/gpu/shaders/resolve_full_16bpp.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/resolve_full_16bpp.cs.hlsl rename to src/xenia/gpu/shaders/resolve_full_16bpp.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/resolve_full_16bpp_2xres.cs.hlsl b/src/xenia/gpu/shaders/resolve_full_16bpp_2xres.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/resolve_full_16bpp_2xres.cs.hlsl rename to src/xenia/gpu/shaders/resolve_full_16bpp_2xres.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/resolve_full_32bpp.cs.hlsl b/src/xenia/gpu/shaders/resolve_full_32bpp.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/resolve_full_32bpp.cs.hlsl rename to src/xenia/gpu/shaders/resolve_full_32bpp.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/resolve_full_32bpp_2xres.cs.hlsl b/src/xenia/gpu/shaders/resolve_full_32bpp_2xres.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/resolve_full_32bpp_2xres.cs.hlsl rename to src/xenia/gpu/shaders/resolve_full_32bpp_2xres.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/resolve_full_64bpp.cs.hlsl b/src/xenia/gpu/shaders/resolve_full_64bpp.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/resolve_full_64bpp.cs.hlsl rename to src/xenia/gpu/shaders/resolve_full_64bpp.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/resolve_full_64bpp_2xres.cs.hlsl b/src/xenia/gpu/shaders/resolve_full_64bpp_2xres.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/resolve_full_64bpp_2xres.cs.hlsl rename to src/xenia/gpu/shaders/resolve_full_64bpp_2xres.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/resolve_full_8bpp.cs.hlsl b/src/xenia/gpu/shaders/resolve_full_8bpp.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/resolve_full_8bpp.cs.hlsl rename to src/xenia/gpu/shaders/resolve_full_8bpp.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/resolve_full_8bpp_2xres.cs.hlsl b/src/xenia/gpu/shaders/resolve_full_8bpp_2xres.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/resolve_full_8bpp_2xres.cs.hlsl rename to src/xenia/gpu/shaders/resolve_full_8bpp_2xres.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/stretch.ps.hlsl b/src/xenia/gpu/shaders/stretch.ps.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/stretch.ps.hlsl rename to src/xenia/gpu/shaders/stretch.ps.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/stretch_gamma.ps.hlsl b/src/xenia/gpu/shaders/stretch_gamma.ps.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/stretch_gamma.ps.hlsl rename to src/xenia/gpu/shaders/stretch_gamma.ps.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/tessellation.vs.hlsl b/src/xenia/gpu/shaders/tessellation.vs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/tessellation.vs.hlsl rename to src/xenia/gpu/shaders/tessellation.vs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_address.hlsli b/src/xenia/gpu/shaders/texture_address.hlsli similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_address.hlsli rename to src/xenia/gpu/shaders/texture_address.hlsli diff --git a/src/xenia/gpu/d3d12/shaders/texture_load.hlsli b/src/xenia/gpu/shaders/texture_load.hlsli similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load.hlsli rename to src/xenia/gpu/shaders/texture_load.hlsli diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_128bpb.cs.hlsl b/src/xenia/gpu/shaders/texture_load_128bpb.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_128bpb.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_128bpb.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_128bpb_2x.cs.hlsl b/src/xenia/gpu/shaders/texture_load_128bpb_2x.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_128bpb_2x.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_128bpb_2x.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_16bpb.cs.hlsl b/src/xenia/gpu/shaders/texture_load_16bpb.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_16bpb.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_16bpb.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_16bpb.hlsli b/src/xenia/gpu/shaders/texture_load_16bpb.hlsli similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_16bpb.hlsli rename to src/xenia/gpu/shaders/texture_load_16bpb.hlsli diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_16bpb_2x.cs.hlsl b/src/xenia/gpu/shaders/texture_load_16bpb_2x.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_16bpb_2x.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_16bpb_2x.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_16bpb_2x.hlsli b/src/xenia/gpu/shaders/texture_load_16bpb_2x.hlsli similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_16bpb_2x.hlsli rename to src/xenia/gpu/shaders/texture_load_16bpb_2x.hlsli diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_32bpb.cs.hlsl b/src/xenia/gpu/shaders/texture_load_32bpb.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_32bpb.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_32bpb.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_32bpb_2x.cs.hlsl b/src/xenia/gpu/shaders/texture_load_32bpb_2x.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_32bpb_2x.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_32bpb_2x.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_32bpb_64bpb.hlsli b/src/xenia/gpu/shaders/texture_load_32bpb_64bpb.hlsli similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_32bpb_64bpb.hlsli rename to src/xenia/gpu/shaders/texture_load_32bpb_64bpb.hlsli diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_32bpb_64bpb_2x.hlsli b/src/xenia/gpu/shaders/texture_load_32bpb_64bpb_2x.hlsli similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_32bpb_64bpb_2x.hlsli rename to src/xenia/gpu/shaders/texture_load_32bpb_64bpb_2x.hlsli diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_64bpb.cs.hlsl b/src/xenia/gpu/shaders/texture_load_64bpb.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_64bpb.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_64bpb.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_64bpb_2x.cs.hlsl b/src/xenia/gpu/shaders/texture_load_64bpb_2x.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_64bpb_2x.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_64bpb_2x.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_8bpb.cs.hlsl b/src/xenia/gpu/shaders/texture_load_8bpb.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_8bpb.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_8bpb.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_8bpb_2x.cs.hlsl b/src/xenia/gpu/shaders/texture_load_8bpb_2x.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_8bpb_2x.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_8bpb_2x.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_ctx1.cs.hlsl b/src/xenia/gpu/shaders/texture_load_ctx1.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_ctx1.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_ctx1.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_depth_float.cs.hlsl b/src/xenia/gpu/shaders/texture_load_depth_float.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_depth_float.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_depth_float.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_depth_float_2x.cs.hlsl b/src/xenia/gpu/shaders/texture_load_depth_float_2x.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_depth_float_2x.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_depth_float_2x.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_depth_unorm.cs.hlsl b/src/xenia/gpu/shaders/texture_load_depth_unorm.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_depth_unorm.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_depth_unorm.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_depth_unorm_2x.cs.hlsl b/src/xenia/gpu/shaders/texture_load_depth_unorm_2x.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_depth_unorm_2x.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_depth_unorm_2x.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_dxn_rg8.cs.hlsl b/src/xenia/gpu/shaders/texture_load_dxn_rg8.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_dxn_rg8.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_dxn_rg8.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_dxt1_rgba8.cs.hlsl b/src/xenia/gpu/shaders/texture_load_dxt1_rgba8.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_dxt1_rgba8.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_dxt1_rgba8.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_dxt3_rgba8.cs.hlsl b/src/xenia/gpu/shaders/texture_load_dxt3_rgba8.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_dxt3_rgba8.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_dxt3_rgba8.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_dxt3a.cs.hlsl b/src/xenia/gpu/shaders/texture_load_dxt3a.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_dxt3a.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_dxt3a.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_dxt3aas1111.cs.hlsl b/src/xenia/gpu/shaders/texture_load_dxt3aas1111.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_dxt3aas1111.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_dxt3aas1111.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_dxt5_rgba8.cs.hlsl b/src/xenia/gpu/shaders/texture_load_dxt5_rgba8.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_dxt5_rgba8.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_dxt5_rgba8.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_dxt5a_r8.cs.hlsl b/src/xenia/gpu/shaders/texture_load_dxt5a_r8.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_dxt5a_r8.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_dxt5a_r8.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_r10g11b11_rgba16.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r10g11b11_rgba16.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_r10g11b11_rgba16.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_r10g11b11_rgba16.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_r10g11b11_rgba16_2x.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r10g11b11_rgba16_2x.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_r10g11b11_rgba16_2x.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_r10g11b11_rgba16_2x.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_r10g11b11_rgba16_snorm.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r10g11b11_rgba16_snorm.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_r10g11b11_rgba16_snorm.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_r10g11b11_rgba16_snorm.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_r10g11b11_rgba16_snorm_2x.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r10g11b11_rgba16_snorm_2x.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_r10g11b11_rgba16_snorm_2x.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_r10g11b11_rgba16_snorm_2x.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_r11g11b10_rgba16.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r11g11b10_rgba16.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_r11g11b10_rgba16.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_r11g11b10_rgba16.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_r11g11b10_rgba16_2x.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r11g11b10_rgba16_2x.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_r11g11b10_rgba16_2x.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_r11g11b10_rgba16_2x.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_r11g11b10_rgba16_snorm.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r11g11b10_rgba16_snorm.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_r11g11b10_rgba16_snorm.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_r11g11b10_rgba16_snorm.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_r11g11b10_rgba16_snorm_2x.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r11g11b10_rgba16_snorm_2x.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_r11g11b10_rgba16_snorm_2x.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_r11g11b10_rgba16_snorm_2x.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_r4g4b4a4_b4g4r4a4.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r4g4b4a4_b4g4r4a4.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_r4g4b4a4_b4g4r4a4.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_r4g4b4a4_b4g4r4a4.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_r4g4b4a4_b4g4r4a4_2x.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r4g4b4a4_b4g4r4a4_2x.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_r4g4b4a4_b4g4r4a4_2x.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_r4g4b4a4_b4g4r4a4_2x.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_r5g5b5a1_b5g5r5a1.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r5g5b5a1_b5g5r5a1.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_r5g5b5a1_b5g5r5a1.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_r5g5b5a1_b5g5r5a1.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_r5g5b5a1_b5g5r5a1_2x.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r5g5b5a1_b5g5r5a1_2x.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_r5g5b5a1_b5g5r5a1_2x.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_r5g5b5a1_b5g5r5a1_2x.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_r5g5b6_b5g6r5_swizzle_rbga.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r5g5b6_b5g6r5_swizzle_rbga.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_r5g5b6_b5g6r5_swizzle_rbga.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_r5g5b6_b5g6r5_swizzle_rbga.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_r5g5b6_b5g6r5_swizzle_rbga_2x.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r5g5b6_b5g6r5_swizzle_rbga_2x.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_r5g5b6_b5g6r5_swizzle_rbga_2x.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_r5g5b6_b5g6r5_swizzle_rbga_2x.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_r5g6b5_b5g6r5.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r5g6b5_b5g6r5.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_r5g6b5_b5g6r5.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_r5g6b5_b5g6r5.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/texture_load_r5g6b5_b5g6r5_2x.cs.hlsl b/src/xenia/gpu/shaders/texture_load_r5g6b5_b5g6r5_2x.cs.hlsl similarity index 100% rename from src/xenia/gpu/d3d12/shaders/texture_load_r5g6b5_b5g6r5_2x.cs.hlsl rename to src/xenia/gpu/shaders/texture_load_r5g6b5_b5g6r5_2x.cs.hlsl diff --git a/src/xenia/gpu/d3d12/shaders/xenos_draw.hlsli b/src/xenia/gpu/shaders/xenos_draw.hlsli similarity index 100% rename from src/xenia/gpu/d3d12/shaders/xenos_draw.hlsli rename to src/xenia/gpu/shaders/xenos_draw.hlsli diff --git a/src/xenia/gpu/spirv/compiler.cc b/src/xenia/gpu/spirv/compiler.cc deleted file mode 100644 index d31b36996..000000000 --- a/src/xenia/gpu/spirv/compiler.cc +++ /dev/null @@ -1,36 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/gpu/spirv/compiler.h" - -namespace xe { -namespace gpu { -namespace spirv { - -Compiler::Compiler() {} - -void Compiler::AddPass(std::unique_ptr pass) { - compiler_passes_.push_back(std::move(pass)); -} - -bool Compiler::Compile(spv::Module* module) { - for (auto& pass : compiler_passes_) { - if (!pass->Run(module)) { - return false; - } - } - - return true; -} - -void Compiler::Reset() { compiler_passes_.clear(); } - -} // namespace spirv -} // namespace gpu -} // namespace xe \ No newline at end of file diff --git a/src/xenia/gpu/spirv/compiler.h b/src/xenia/gpu/spirv/compiler.h deleted file mode 100644 index fd27969ee..000000000 --- a/src/xenia/gpu/spirv/compiler.h +++ /dev/null @@ -1,41 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_SPIRV_COMPILER_H_ -#define XENIA_GPU_SPIRV_COMPILER_H_ - -#include "xenia/base/arena.h" -#include "xenia/gpu/spirv/compiler_pass.h" - -#include "third_party/glslang-spirv/SpvBuilder.h" -#include "third_party/spirv/GLSL.std.450.hpp11" - -namespace xe { -namespace gpu { -namespace spirv { - -// SPIR-V Compiler. Designed to optimize SPIR-V code before feeding it into the -// drivers. -class Compiler { - public: - Compiler(); - - void AddPass(std::unique_ptr pass); - void Reset(); - bool Compile(spv::Module* module); - - private: - std::vector> compiler_passes_; -}; - -} // namespace spirv -} // namespace gpu -} // namespace xe - -#endif // XENIA_GPU_SPIRV_COMPILER_H_ \ No newline at end of file diff --git a/src/xenia/gpu/spirv/compiler_pass.h b/src/xenia/gpu/spirv/compiler_pass.h deleted file mode 100644 index 0d81aeeee..000000000 --- a/src/xenia/gpu/spirv/compiler_pass.h +++ /dev/null @@ -1,37 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_SPIRV_COMPILER_PASS_H_ -#define XENIA_GPU_SPIRV_COMPILER_PASS_H_ - -#include "xenia/base/arena.h" - -#include "third_party/glslang-spirv/SpvBuilder.h" -#include "third_party/spirv/GLSL.std.450.hpp11" - -namespace xe { -namespace gpu { -namespace spirv { - -class CompilerPass { - public: - CompilerPass() = default; - virtual ~CompilerPass() {} - - virtual bool Run(spv::Module* module) = 0; - - private: - xe::Arena ir_arena_; -}; - -} // namespace spirv -} // namespace gpu -} // namespace xe - -#endif \ No newline at end of file diff --git a/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.cpp b/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.cpp deleted file mode 100644 index 4d719f769..000000000 --- a/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/gpu/spirv/passes/control_flow_analysis_pass.h" - -namespace xe { -namespace gpu { -namespace spirv { - -ControlFlowAnalysisPass::ControlFlowAnalysisPass() {} - -bool ControlFlowAnalysisPass::Run(spv::Module* module) { - for (auto function : module->getFunctions()) { - // For each OpBranchConditional, see if we can find a point where control - // flow converges and then append an OpSelectionMerge. - // Potential problems: while loops constructed from branch instructions - } - - return true; -} - -} // namespace spirv -} // namespace gpu -} // namespace xe \ No newline at end of file diff --git a/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.h b/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.h deleted file mode 100644 index 6b279e251..000000000 --- a/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.h +++ /dev/null @@ -1,34 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_ -#define XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_ - -#include "xenia/gpu/spirv/compiler_pass.h" - -namespace xe { -namespace gpu { -namespace spirv { - -// Control-flow analysis pass. Runs through control-flow and adds merge opcodes -// where necessary. -class ControlFlowAnalysisPass : public CompilerPass { - public: - ControlFlowAnalysisPass(); - - bool Run(spv::Module* module) override; - - private: -}; - -} // namespace spirv -} // namespace gpu -} // namespace xe - -#endif // XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_ \ No newline at end of file diff --git a/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.cc b/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.cc deleted file mode 100644 index d32997d47..000000000 --- a/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.cc +++ /dev/null @@ -1,48 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/gpu/spirv/passes/control_flow_simplification_pass.h" - -namespace xe { -namespace gpu { -namespace spirv { - -ControlFlowSimplificationPass::ControlFlowSimplificationPass() {} - -bool ControlFlowSimplificationPass::Run(spv::Module* module) { - for (auto function : module->getFunctions()) { - // Walk through the blocks in the function and merge any blocks which are - // unconditionally dominated. - for (auto it = function->getBlocks().end() - 1; - it != function->getBlocks().begin();) { - auto block = *it; - if (!block->isUnreachable() && block->getPredecessors().size() == 1) { - auto prev_block = block->getPredecessors()[0]; - auto last_instr = - prev_block->getInstruction(prev_block->getInstructionCount() - 1); - if (last_instr->getOpCode() == spv::Op::OpBranch) { - if (prev_block->getSuccessors().size() == 1 && - prev_block->getSuccessors()[0] == block) { - // We're dominated by this block. Merge into it. - prev_block->merge(block); - block->setUnreachable(); - } - } - } - - --it; - } - } - - return true; -} - -} // namespace spirv -} // namespace gpu -} // namespace xe diff --git a/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.h b/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.h deleted file mode 100644 index f851d24f1..000000000 --- a/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.h +++ /dev/null @@ -1,34 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_SIMPLIFICATION_PASS_H_ -#define XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_SIMPLIFICATION_PASS_H_ - -#include "xenia/gpu/spirv/compiler_pass.h" - -namespace xe { -namespace gpu { -namespace spirv { - -// Control-flow simplification pass. Combines adjacent blocks and marks -// any unreachable blocks. -class ControlFlowSimplificationPass : public CompilerPass { - public: - ControlFlowSimplificationPass(); - - bool Run(spv::Module* module) override; - - private: -}; - -} // namespace spirv -} // namespace gpu -} // namespace xe - -#endif // XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_SIMPLIFICATION_PASS_H_ \ No newline at end of file diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc deleted file mode 100644 index bb1bb51f0..000000000 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ /dev/null @@ -1,3471 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/gpu/spirv_shader_translator.h" - -#include -#include -#include -#include -#include - -#include "third_party/fmt/include/fmt/format.h" -#include "xenia/base/cvar.h" -#include "xenia/base/logging.h" -#include "xenia/base/math.h" - -DEFINE_bool(spv_validate, false, "Validate SPIR-V shaders after generation", - "GPU"); -DEFINE_bool(spv_disasm, false, "Disassemble SPIR-V shaders after generation", - "GPU"); - -namespace xe { -namespace gpu { -using namespace ucode; - -constexpr uint32_t kMaxInterpolators = 16; -constexpr uint32_t kMaxTemporaryRegisters = 64; - -using spv::GLSLstd450; -using spv::Id; -using spv::Op; - -SpirvShaderTranslator::SpirvShaderTranslator() {} -SpirvShaderTranslator::~SpirvShaderTranslator() = default; - -void SpirvShaderTranslator::StartTranslation() { - // Create a new builder. - builder_ = std::make_unique(0x10000, 0xFFFFFFFF, nullptr); - auto& b = *builder_; - - // Import required modules. - glsl_std_450_instruction_set_ = b.import("GLSL.std.450"); - - // Configure environment. - b.setSource(spv::SourceLanguage::SourceLanguageUnknown, 0); - b.setMemoryModel(spv::AddressingModel::AddressingModelLogical, - spv::MemoryModel::MemoryModelGLSL450); - b.addCapability(spv::Capability::CapabilityShader); - b.addCapability(spv::Capability::CapabilityImageQuery); - - if (is_vertex_shader()) { - b.addCapability(spv::Capability::CapabilityClipDistance); - b.addCapability(spv::Capability::CapabilityCullDistance); - } - if (is_pixel_shader()) { - b.addCapability(spv::Capability::CapabilityDerivativeControl); - } - - bool_type_ = b.makeBoolType(); - float_type_ = b.makeFloatType(32); - int_type_ = b.makeIntType(32); - uint_type_ = b.makeUintType(32); - vec2_int_type_ = b.makeVectorType(int_type_, 2); - vec2_uint_type_ = b.makeVectorType(uint_type_, 2); - vec2_float_type_ = b.makeVectorType(float_type_, 2); - vec3_int_type_ = b.makeVectorType(int_type_, 3); - vec3_float_type_ = b.makeVectorType(float_type_, 3); - vec4_float_type_ = b.makeVectorType(float_type_, 4); - vec4_int_type_ = b.makeVectorType(int_type_, 4); - vec4_uint_type_ = b.makeVectorType(uint_type_, 4); - vec2_bool_type_ = b.makeVectorType(bool_type_, 2); - vec3_bool_type_ = b.makeVectorType(bool_type_, 3); - vec4_bool_type_ = b.makeVectorType(bool_type_, 4); - - vec4_float_one_ = b.makeCompositeConstant( - vec4_float_type_, - std::vector({b.makeFloatConstant(1.f), b.makeFloatConstant(1.f), - b.makeFloatConstant(1.f), b.makeFloatConstant(1.f)})); - vec4_float_zero_ = b.makeCompositeConstant( - vec4_float_type_, - std::vector({b.makeFloatConstant(0.f), b.makeFloatConstant(0.f), - b.makeFloatConstant(0.f), b.makeFloatConstant(0.f)})); - - cube_function_ = CreateCubeFunction(); - - spv::Block* function_block = nullptr; - translated_main_ = - b.makeFunctionEntry(spv::NoPrecision, b.makeVoidType(), "translated_main", - {}, {}, &function_block); - - assert_not_zero(register_count()); - registers_type_ = b.makeArrayType(vec4_float_type_, - b.makeUintConstant(register_count()), 0); - registers_ptr_ = b.createVariable(spv::StorageClass::StorageClassFunction, - registers_type_, "r"); - - aL_ = b.createVariable(spv::StorageClass::StorageClassFunction, - vec4_uint_type_, "aL"); - - loop_count_ = b.createVariable(spv::StorageClass::StorageClassFunction, - vec4_uint_type_, "loop_count"); - p0_ = b.createVariable(spv::StorageClass::StorageClassFunction, bool_type_, - "p0"); - ps_ = b.createVariable(spv::StorageClass::StorageClassFunction, float_type_, - "ps"); - pv_ = b.createVariable(spv::StorageClass::StorageClassFunction, - vec4_float_type_, "pv"); - pc_ = b.createVariable(spv::StorageClass::StorageClassFunction, int_type_, - "pc"); - a0_ = b.createVariable(spv::StorageClass::StorageClassFunction, int_type_, - "a0"); - lod_ = b.createVariable(spv::StorageClass::StorageClassFunction, float_type_, - "lod"); - - // Uniform constants. - Id float_consts_type = - b.makeArrayType(vec4_float_type_, b.makeUintConstant(512), 1); - Id loop_consts_type = - b.makeArrayType(vec4_uint_type_, b.makeUintConstant(8), 1); - Id bool_consts_type = - b.makeArrayType(vec4_uint_type_, b.makeUintConstant(2), 1); - - // Strides - b.addDecoration(float_consts_type, spv::Decoration::DecorationArrayStride, - 4 * sizeof(float)); - b.addDecoration(loop_consts_type, spv::Decoration::DecorationArrayStride, - 4 * sizeof(uint32_t)); - b.addDecoration(bool_consts_type, spv::Decoration::DecorationArrayStride, - 4 * sizeof(uint32_t)); - - Id consts_struct_type = b.makeStructType( - {float_consts_type, loop_consts_type, bool_consts_type}, "consts_type"); - b.addDecoration(consts_struct_type, spv::Decoration::DecorationBlock); - - // Constants member decorations. - b.addMemberDecoration(consts_struct_type, 0, - spv::Decoration::DecorationOffset, 0); - b.addMemberName(consts_struct_type, 0, "float_consts"); - - b.addMemberDecoration(consts_struct_type, 1, - spv::Decoration::DecorationOffset, - 512 * 4 * sizeof(float)); - b.addMemberName(consts_struct_type, 1, "loop_consts"); - - b.addMemberDecoration(consts_struct_type, 2, - spv::Decoration::DecorationOffset, - 512 * 4 * sizeof(float) + 32 * sizeof(uint32_t)); - b.addMemberName(consts_struct_type, 2, "bool_consts"); - - consts_ = b.createVariable(spv::StorageClass::StorageClassUniform, - consts_struct_type, "consts"); - - b.addDecoration(consts_, spv::Decoration::DecorationDescriptorSet, 0); - if (is_vertex_shader()) { - b.addDecoration(consts_, spv::Decoration::DecorationBinding, 0); - } else if (is_pixel_shader()) { - b.addDecoration(consts_, spv::Decoration::DecorationBinding, 1); - } - - // Push constants, represented by SpirvPushConstants. - Id push_constants_type = - b.makeStructType({vec4_float_type_, vec4_float_type_, vec4_float_type_, - vec4_float_type_, vec4_float_type_, uint_type_}, - "push_consts_type"); - b.addDecoration(push_constants_type, spv::Decoration::DecorationBlock); - - // float4 window_scale; - b.addMemberDecoration( - push_constants_type, 0, spv::Decoration::DecorationOffset, - static_cast(offsetof(SpirvPushConstants, window_scale))); - b.addMemberName(push_constants_type, 0, "window_scale"); - // float4 vtx_fmt; - b.addMemberDecoration( - push_constants_type, 1, spv::Decoration::DecorationOffset, - static_cast(offsetof(SpirvPushConstants, vtx_fmt))); - b.addMemberName(push_constants_type, 1, "vtx_fmt"); - // float4 vtx_fmt; - b.addMemberDecoration( - push_constants_type, 2, spv::Decoration::DecorationOffset, - static_cast(offsetof(SpirvPushConstants, point_size))); - b.addMemberName(push_constants_type, 2, "point_size"); - // float4 alpha_test; - b.addMemberDecoration( - push_constants_type, 3, spv::Decoration::DecorationOffset, - static_cast(offsetof(SpirvPushConstants, alpha_test))); - b.addMemberName(push_constants_type, 3, "alpha_test"); - // float4 color_exp_bias; - b.addMemberDecoration( - push_constants_type, 4, spv::Decoration::DecorationOffset, - static_cast(offsetof(SpirvPushConstants, color_exp_bias))); - b.addMemberName(push_constants_type, 4, "color_exp_bias"); - // uint ps_param_gen; - b.addMemberDecoration( - push_constants_type, 5, spv::Decoration::DecorationOffset, - static_cast(offsetof(SpirvPushConstants, ps_param_gen))); - b.addMemberName(push_constants_type, 5, "ps_param_gen"); - push_consts_ = b.createVariable(spv::StorageClass::StorageClassPushConstant, - push_constants_type, "push_consts"); - - if (!texture_bindings().empty()) { - image_2d_type_ = - b.makeImageType(float_type_, spv::Dim::Dim2D, false, false, false, 1, - spv::ImageFormat::ImageFormatUnknown); - image_3d_type_ = - b.makeImageType(float_type_, spv::Dim::Dim3D, false, false, false, 1, - spv::ImageFormat::ImageFormatUnknown); - image_cube_type_ = - b.makeImageType(float_type_, spv::Dim::DimCube, false, false, false, 1, - spv::ImageFormat::ImageFormatUnknown); - - // Texture bindings - Id tex_t[] = {b.makeSampledImageType(image_2d_type_), - b.makeSampledImageType(image_3d_type_), - b.makeSampledImageType(image_cube_type_)}; - - uint32_t num_tex_bindings = 0; - for (const auto& binding : texture_bindings()) { - // Calculate the highest binding index. - num_tex_bindings = - std::max(num_tex_bindings, uint32_t(binding.binding_index + 1)); - } - - Id tex_a_t[] = { - b.makeArrayType(tex_t[0], b.makeUintConstant(num_tex_bindings), 0), - b.makeArrayType(tex_t[1], b.makeUintConstant(num_tex_bindings), 0), - b.makeArrayType(tex_t[2], b.makeUintConstant(num_tex_bindings), 0)}; - - // Create 3 texture types, all aliased on the same binding - for (int i = 0; i < 3; i++) { - tex_[i] = b.createVariable(spv::StorageClass::StorageClassUniformConstant, - tex_a_t[i], - fmt::format("textures{}D", i + 2).c_str()); - b.addDecoration(tex_[i], spv::Decoration::DecorationDescriptorSet, 1); - b.addDecoration(tex_[i], spv::Decoration::DecorationBinding, 0); - } - - // Set up the map from binding -> ssbo index - for (const auto& binding : texture_bindings()) { - tex_binding_map_[binding.fetch_constant] = - uint32_t(binding.binding_index); - } - } - - // Interpolators. - Id interpolators_type = b.makeArrayType( - vec4_float_type_, b.makeUintConstant(kMaxInterpolators), 0); - if (is_vertex_shader()) { - // Vertex inputs/outputs - // Inputs: 32 SSBOs on DS 2 binding 0 - - if (!vertex_bindings().empty()) { - // Runtime array for vertex data - Id vtx_t = b.makeRuntimeArray(uint_type_); - b.addDecoration(vtx_t, spv::Decoration::DecorationArrayStride, - sizeof(uint32_t)); - - Id vtx_s = b.makeStructType({vtx_t}, "vertex_type"); - b.addDecoration(vtx_s, spv::Decoration::DecorationBufferBlock); - - // Describe the actual data - b.addMemberName(vtx_s, 0, "data"); - b.addMemberDecoration(vtx_s, 0, spv::Decoration::DecorationOffset, 0); - - // Create the vertex bindings variable. - Id vtx_a_t = b.makeArrayType( - vtx_s, b.makeUintConstant(uint32_t(vertex_bindings().size())), 0); - vtx_ = b.createVariable(spv::StorageClass::StorageClassUniform, vtx_a_t, - "vertex_bindings"); - - // DS 2 binding 0 - b.addDecoration(vtx_, spv::Decoration::DecorationDescriptorSet, 2); - b.addDecoration(vtx_, spv::Decoration::DecorationBinding, 0); - b.addDecoration(vtx_, spv::Decoration::DecorationNonWritable); - - // Set up the map from binding -> ssbo index - for (const auto& binding : vertex_bindings()) { - vtx_binding_map_[binding.fetch_constant] = binding.binding_index; - } - } - - // Outputs - interpolators_ = b.createVariable(spv::StorageClass::StorageClassOutput, - interpolators_type, "interpolators"); - b.addDecoration(interpolators_, spv::Decoration::DecorationLocation, 0); - for (uint32_t i = 0; i < std::min(register_count(), kMaxInterpolators); - i++) { - // Zero interpolators. - auto ptr = b.createAccessChain(spv::StorageClass::StorageClassOutput, - interpolators_, - std::vector({b.makeUintConstant(i)})); - b.createStore(vec4_float_zero_, ptr); - } - - point_size_ = b.createVariable(spv::StorageClass::StorageClassOutput, - float_type_, "point_size"); - b.addDecoration(point_size_, spv::Decoration::DecorationLocation, 17); - // Set default point-size value (-1.0f, indicating to the geometry shader - // that the register value should be used instead of the per-vertex value) - b.createStore(b.makeFloatConstant(-1.0f), point_size_); - - point_coord_ = b.createVariable(spv::StorageClass::StorageClassOutput, - vec2_float_type_, "point_coord"); - b.addDecoration(point_coord_, spv::Decoration::DecorationLocation, 16); - // point_coord is only ever populated in a geometry shader. Just write - // zero to it in the vertex shader. - b.createStore( - b.makeCompositeConstant(vec2_float_type_, - std::vector({b.makeFloatConstant(0.0f), - b.makeFloatConstant(0.0f)})), - point_coord_); - - pos_ = b.createVariable(spv::StorageClass::StorageClassOutput, - vec4_float_type_, "gl_Position"); - b.addDecoration(pos_, spv::Decoration::DecorationBuiltIn, - spv::BuiltIn::BuiltInPosition); - - vertex_idx_ = b.createVariable(spv::StorageClass::StorageClassInput, - int_type_, "gl_VertexIndex"); - b.addDecoration(vertex_idx_, spv::Decoration::DecorationBuiltIn, - spv::BuiltIn::BuiltInVertexIndex); - - interface_ids_.push_back(interpolators_); - interface_ids_.push_back(point_coord_); - interface_ids_.push_back(point_size_); - interface_ids_.push_back(pos_); - interface_ids_.push_back(vertex_idx_); - - auto vertex_idx = b.createLoad(vertex_idx_); - vertex_idx = - b.createUnaryOp(spv::Op::OpConvertSToF, float_type_, vertex_idx); - auto r0_ptr = b.createAccessChain(spv::StorageClass::StorageClassFunction, - registers_ptr_, - std::vector({b.makeUintConstant(0)})); - auto r0 = b.createLoad(r0_ptr); - r0 = b.createCompositeInsert(vertex_idx, r0, vec4_float_type_, 0); - b.createStore(r0, r0_ptr); - } else { - // Pixel inputs from vertex shader. - interpolators_ = b.createVariable(spv::StorageClass::StorageClassInput, - interpolators_type, "interpolators"); - b.addDecoration(interpolators_, spv::Decoration::DecorationLocation, 0); - - point_coord_ = b.createVariable(spv::StorageClass::StorageClassInput, - vec2_float_type_, "point_coord"); - b.addDecoration(point_coord_, spv::Decoration::DecorationLocation, 16); - - // Pixel fragment outputs (one per render target). - Id frag_outputs_type = - b.makeArrayType(vec4_float_type_, b.makeUintConstant(4), 0); - frag_outputs_ = b.createVariable(spv::StorageClass::StorageClassOutput, - frag_outputs_type, "oC"); - b.addDecoration(frag_outputs_, spv::Decoration::DecorationLocation, 0); - - frag_depth_ = b.createVariable(spv::StorageClass::StorageClassOutput, - float_type_, "gl_FragDepth"); - b.addDecoration(frag_depth_, spv::Decoration::DecorationBuiltIn, - spv::BuiltIn::BuiltInFragDepth); - - interface_ids_.push_back(interpolators_); - interface_ids_.push_back(point_coord_); - interface_ids_.push_back(frag_outputs_); - interface_ids_.push_back(frag_depth_); - // TODO(benvanik): frag depth, etc. - - // TODO(DrChat): Verify this naive, stupid approach to uninitialized values. - for (uint32_t i = 0; i < 4; i++) { - auto idx = b.makeUintConstant(i); - auto oC = b.createAccessChain(spv::StorageClass::StorageClassOutput, - frag_outputs_, std::vector({idx})); - b.createStore(vec4_float_zero_, oC); - } - - // Copy interpolators to r[0..16]. - // TODO: Need physical addressing in order to do this. - // b.createNoResultOp(spv::Op::OpCopyMemorySized, - // {registers_ptr_, interpolators_, - // b.makeUintConstant(16 * 4 * sizeof(float))}); - for (uint32_t i = 0; i < std::min(register_count(), kMaxInterpolators); - i++) { - // For now, copy interpolators register-by-register :/ - auto idx = b.makeUintConstant(i); - auto i_a = b.createAccessChain(spv::StorageClass::StorageClassInput, - interpolators_, std::vector({idx})); - auto r_a = b.createAccessChain(spv::StorageClass::StorageClassFunction, - registers_ptr_, std::vector({idx})); - b.createNoResultOp(spv::Op::OpCopyMemory, std::vector({r_a, i_a})); - } - - // Setup ps_param_gen - auto ps_param_gen_idx_ptr = b.createAccessChain( - spv::StorageClass::StorageClassPushConstant, push_consts_, - std::vector({b.makeUintConstant(5)})); - auto ps_param_gen_idx = b.createLoad(ps_param_gen_idx_ptr); - - auto frag_coord = b.createVariable(spv::StorageClass::StorageClassInput, - vec4_float_type_, "gl_FragCoord"); - b.addDecoration(frag_coord, spv::Decoration::DecorationBuiltIn, - spv::BuiltIn::BuiltInFragCoord); - - interface_ids_.push_back(frag_coord); - - auto param = b.createOp( - spv::Op::OpVectorShuffle, vec4_float_type_, - {b.createLoad(frag_coord), b.createLoad(point_coord_), 0, 1, 4, 5}); - /* - // TODO: gl_FrontFacing - auto param_x = b.createCompositeExtract(param, float_type_, 0); - auto param_x_inv = b.createBinOp(spv::Op::OpFMul, float_type_, param_x, - b.makeFloatConstant(-1.f)); - param_x = b.createCompositeInsert(param_x_inv, param, vec4_float_type_, 0); - */ - - auto cond = b.createBinOp(spv::Op::OpINotEqual, bool_type_, - ps_param_gen_idx, b.makeUintConstant(-1)); - spv::Builder::If ifb(cond, 0, b); - - // FYI: We do this instead of r[ps_param_gen_idx] because that causes - // nvidia to move all registers into local memory (slow!) - for (uint32_t i = 0; i < std::min(register_count(), kMaxInterpolators); - i++) { - auto reg_ptr = b.createAccessChain( - spv::StorageClass::StorageClassFunction, registers_ptr_, - std::vector({b.makeUintConstant(i)})); - - auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, ps_param_gen_idx, - b.makeUintConstant(i)); - cond = b.smearScalar(spv::NoPrecision, cond, vec4_bool_type_); - auto reg = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, cond, param, - b.createLoad(reg_ptr)); - b.createStore(reg, reg_ptr); - } - - ifb.makeEndIf(); - } - - b.createStore(b.makeIntConstant(0x0), pc_); - - loop_head_block_ = &b.makeNewBlock(); - auto block = &b.makeNewBlock(); - loop_body_block_ = &b.makeNewBlock(); - loop_cont_block_ = &b.makeNewBlock(); - loop_exit_block_ = &b.makeNewBlock(); - b.createBranch(loop_head_block_); - - // Setup continue block - b.setBuildPoint(loop_cont_block_); - b.createBranch(loop_head_block_); - - // While loop header block - b.setBuildPoint(loop_head_block_); - b.createLoopMerge(loop_exit_block_, loop_cont_block_, - spv::LoopControlMask::LoopControlDontUnrollMask, 0); - b.createBranch(block); - - // Condition block - b.setBuildPoint(block); - - // while (pc != 0xFFFF) - auto c = b.createBinOp(spv::Op::OpINotEqual, bool_type_, b.createLoad(pc_), - b.makeIntConstant(0xFFFF)); - b.createConditionalBranch(c, loop_body_block_, loop_exit_block_); - b.setBuildPoint(loop_body_block_); -} - -std::vector SpirvShaderTranslator::CompleteTranslation() { - auto& b = *builder_; - - assert_false(open_predicated_block_); - b.setBuildPoint(loop_exit_block_); - b.makeReturn(false); - exec_cond_ = false; - exec_skip_block_ = nullptr; - - // main() entry point. - spv::Block* entry_block; - auto mainFn = b.makeFunctionEntry(spv::NoPrecision, b.makeVoidType(), "main", - {}, {}, &entry_block); - if (is_vertex_shader()) { - auto entry = b.addEntryPoint(spv::ExecutionModel::ExecutionModelVertex, - mainFn, "main"); - - for (auto id : interface_ids_) { - entry->addIdOperand(id); - } - } else { - auto entry = b.addEntryPoint(spv::ExecutionModel::ExecutionModelFragment, - mainFn, "main"); - b.addExecutionMode(mainFn, spv::ExecutionModeOriginUpperLeft); - - // If we write a new depth value, we must declare this mode! - if (writes_depth()) { - b.addExecutionMode(mainFn, spv::ExecutionModeDepthReplacing); - } - - for (auto id : interface_ids_) { - entry->addIdOperand(id); - } - } - - // TODO(benvanik): transform feedback. - if (false) { - b.addCapability(spv::Capability::CapabilityTransformFeedback); - b.addExecutionMode(mainFn, spv::ExecutionMode::ExecutionModeXfb); - } - - b.createFunctionCall(translated_main_, std::vector({})); - if (is_vertex_shader()) { - // gl_Position transform - auto vtx_fmt_ptr = b.createAccessChain( - spv::StorageClass::StorageClassPushConstant, push_consts_, - std::vector({b.makeUintConstant(1)})); - auto window_scale_ptr = b.createAccessChain( - spv::StorageClass::StorageClassPushConstant, push_consts_, - std::vector({b.makeUintConstant(0)})); - auto vtx_fmt = b.createLoad(vtx_fmt_ptr); - auto window_scale = b.createLoad(window_scale_ptr); - - auto p = b.createLoad(pos_); - auto c = b.createBinOp(spv::Op::OpFOrdNotEqual, vec4_bool_type_, vtx_fmt, - vec4_float_zero_); - - // pos.w = vtx_fmt.w == 0.0 ? 1.0 / pos.w : pos.w - auto c_w = b.createCompositeExtract(c, bool_type_, 3); - auto p_w = b.createCompositeExtract(p, float_type_, 3); - auto p_w_inv = b.createBinOp(spv::Op::OpFDiv, float_type_, - b.makeFloatConstant(1.f), p_w); - p_w = b.createTriOp(spv::Op::OpSelect, float_type_, c_w, p_w, p_w_inv); - - // pos.xyz = vtx_fmt.xyz != 0.0 ? pos.xyz / pos.w : pos.xyz - auto p_all_w = b.smearScalar(spv::NoPrecision, p_w, vec4_float_type_); - auto p_inv = b.createBinOp(spv::Op::OpFDiv, vec4_float_type_, p, p_all_w); - p = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, p_inv, p); - - // Reinsert w - p = b.createCompositeInsert(p_w, p, vec4_float_type_, 3); - - // Apply window offset - // pos.xy += window_scale.zw - auto window_offset = b.createOp(spv::Op::OpVectorShuffle, vec4_float_type_, - {window_scale, window_scale, 2, 3, 0, 1}); - auto p_offset = - b.createBinOp(spv::Op::OpFAdd, vec4_float_type_, p, window_offset); - - // Apply window scaling - // pos.xy *= window_scale.xy - auto p_scaled = b.createBinOp(spv::Op::OpFMul, vec4_float_type_, p_offset, - window_scale); - - p = b.createOp(spv::Op::OpVectorShuffle, vec4_float_type_, - {p, p_scaled, 4, 5, 2, 3}); - - b.createStore(p, pos_); - } else { - // Color exponent bias - { - auto bias_ptr = b.createAccessChain( - spv::StorageClass::StorageClassPushConstant, push_consts_, - std::vector({b.makeUintConstant(4)})); - auto bias = b.createLoad(bias_ptr); - for (uint32_t i = 0; i < 4; i++) { - auto bias_value = b.createOp(spv::Op::OpVectorShuffle, vec4_float_type_, - {bias, bias, i, i, i, i}); - auto oC_ptr = b.createAccessChain( - spv::StorageClass::StorageClassOutput, frag_outputs_, - std::vector({b.makeUintConstant(i)})); - auto oC_biased = b.createBinOp(spv::Op::OpFMul, vec4_float_type_, - b.createLoad(oC_ptr), bias_value); - b.createStore(oC_biased, oC_ptr); - } - } - - // Alpha test - { - auto alpha_test_ptr = b.createAccessChain( - spv::StorageClass::StorageClassPushConstant, push_consts_, - std::vector({b.makeUintConstant(3)})); - auto alpha_test = b.createLoad(alpha_test_ptr); - - auto alpha_test_enabled = - b.createCompositeExtract(alpha_test, float_type_, 0); - auto alpha_test_func = - b.createCompositeExtract(alpha_test, float_type_, 1); - auto alpha_test_ref = - b.createCompositeExtract(alpha_test, float_type_, 2); - - alpha_test_func = - b.createUnaryOp(spv::Op::OpConvertFToU, uint_type_, alpha_test_func); - - auto oC0_ptr = b.createAccessChain( - spv::StorageClass::StorageClassOutput, frag_outputs_, - std::vector({b.makeUintConstant(0)})); - auto oC0_alpha = - b.createCompositeExtract(b.createLoad(oC0_ptr), float_type_, 3); - - auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, - alpha_test_enabled, b.makeFloatConstant(1.f)); - spv::Builder::If alpha_if(cond, 0, b); - - std::vector switch_segments; - b.makeSwitch( - alpha_test_func, 0, 8, std::vector({0, 1, 2, 3, 4, 5, 6, 7}), - std::vector({0, 1, 2, 3, 4, 5, 6, 7}), 7, switch_segments); - - const static spv::Op alpha_op_map[] = { - spv::Op::OpNop, - spv::Op::OpFOrdGreaterThanEqual, - spv::Op::OpFOrdNotEqual, - spv::Op::OpFOrdGreaterThan, - spv::Op::OpFOrdLessThanEqual, - spv::Op::OpFOrdEqual, - spv::Op::OpFOrdLessThan, - spv::Op::OpNop, - }; - - // if (alpha_func == 0) passes = false; - b.nextSwitchSegment(switch_segments, 0); - b.makeDiscard(); - b.addSwitchBreak(); - - for (int i = 1; i < 7; i++) { - b.nextSwitchSegment(switch_segments, i); - auto cond = b.createBinOp(alpha_op_map[i], bool_type_, oC0_alpha, - alpha_test_ref); - spv::Builder::If discard_if(cond, 0, b); - b.makeDiscard(); - discard_if.makeEndIf(); - b.addSwitchBreak(); - } - - // if (alpha_func == 7) passes = true; - b.nextSwitchSegment(switch_segments, 7); - b.endSwitch(switch_segments); - - alpha_if.makeEndIf(); - } - } - - b.makeReturn(false); - - // Compile the spv IR - // compiler_.Compile(b.getModule()); - - std::vector spirv_words; - b.dump(spirv_words); - - // Cleanup builder. - cf_blocks_.clear(); - loop_head_block_ = nullptr; - loop_body_block_ = nullptr; - loop_cont_block_ = nullptr; - loop_exit_block_ = nullptr; - builder_.reset(); - - interface_ids_.clear(); - - // Copy bytes out. - // TODO(benvanik): avoid copy? - std::vector spirv_bytes; - spirv_bytes.resize(spirv_words.size() * 4); - std::memcpy(spirv_bytes.data(), spirv_words.data(), spirv_bytes.size()); - return spirv_bytes; -} - -void SpirvShaderTranslator::PostTranslation(Shader* shader) { - // Validation. - if (cvars::spv_validate) { - auto validation = validator_.Validate( - reinterpret_cast(shader->translated_binary().data()), - shader->translated_binary().size() / sizeof(uint32_t)); - if (validation->has_error()) { - XELOGE("SPIR-V Shader Validation failed! Error: {}", - validation->error_string()); - } - } - - if (cvars::spv_disasm) { - // TODO(benvanik): only if needed? could be slowish. - auto disasm = disassembler_.Disassemble( - reinterpret_cast(shader->translated_binary().data()), - shader->translated_binary().size() / 4); - if (disasm->has_error()) { - XELOGE("Failed to disassemble SPIRV - invalid?"); - } else { - set_host_disassembly(shader, disasm->to_string()); - } - } -} - -void SpirvShaderTranslator::PreProcessControlFlowInstructions( - std::vector instrs) { - auto& b = *builder_; - - auto default_block = &b.makeNewBlock(); - switch_break_block_ = &b.makeNewBlock(); - - b.setBuildPoint(default_block); - b.createStore(b.makeIntConstant(0xFFFF), pc_); - b.createBranch(switch_break_block_); - - b.setBuildPoint(switch_break_block_); - b.createBranch(loop_cont_block_); - - // Now setup the switch. - default_block->addPredecessor(loop_body_block_); - b.setBuildPoint(loop_body_block_); - - cf_blocks_.resize(instrs.size()); - for (size_t i = 0; i < cf_blocks_.size(); i++) { - cf_blocks_[i].block = &b.makeNewBlock(); - cf_blocks_[i].labelled = false; - } - - std::vector operands; - operands.push_back(b.createLoad(pc_)); // Selector - operands.push_back(default_block->getId()); // Default - - // Always have a case for block 0. - operands.push_back(0); - operands.push_back(cf_blocks_[0].block->getId()); - cf_blocks_[0].block->addPredecessor(loop_body_block_); - cf_blocks_[0].labelled = true; - - for (size_t i = 0; i < instrs.size(); i++) { - auto& instr = instrs[i]; - if (instr.opcode() == ucode::ControlFlowOpcode::kCondJmp) { - uint32_t address = instr.cond_jmp.address(); - - if (!cf_blocks_[address].labelled) { - cf_blocks_[address].labelled = true; - operands.push_back(address); - operands.push_back(cf_blocks_[address].block->getId()); - cf_blocks_[address].block->addPredecessor(loop_body_block_); - } - - if (!cf_blocks_[i + 1].labelled) { - cf_blocks_[i + 1].labelled = true; - operands.push_back(uint32_t(i + 1)); - operands.push_back(cf_blocks_[i + 1].block->getId()); - cf_blocks_[i + 1].block->addPredecessor(loop_body_block_); - } - } else if (instr.opcode() == ucode::ControlFlowOpcode::kLoopStart) { - uint32_t address = instr.loop_start.address(); - - // Label the body - if (!cf_blocks_[i + 1].labelled) { - cf_blocks_[i + 1].labelled = true; - operands.push_back(uint32_t(i + 1)); - operands.push_back(cf_blocks_[i + 1].block->getId()); - cf_blocks_[i + 1].block->addPredecessor(loop_body_block_); - } - - // Label the loop skip address. - if (!cf_blocks_[address].labelled) { - cf_blocks_[address].labelled = true; - operands.push_back(address); - operands.push_back(cf_blocks_[address].block->getId()); - cf_blocks_[address].block->addPredecessor(loop_body_block_); - } - } else if (instr.opcode() == ucode::ControlFlowOpcode::kLoopEnd) { - uint32_t address = instr.loop_end.address(); - - if (!cf_blocks_[address].labelled) { - cf_blocks_[address].labelled = true; - operands.push_back(address); - operands.push_back(cf_blocks_[address].block->getId()); - cf_blocks_[address].block->addPredecessor(loop_body_block_); - } - } - } - - b.createSelectionMerge(switch_break_block_, 0); - b.createNoResultOp(spv::Op::OpSwitch, operands); -} - -void SpirvShaderTranslator::ProcessLabel(uint32_t cf_index) { - auto& b = *builder_; -} - -void SpirvShaderTranslator::ProcessControlFlowInstructionBegin( - uint32_t cf_index) { - auto& b = *builder_; -} - -void SpirvShaderTranslator::ProcessControlFlowInstructionEnd( - uint32_t cf_index) { - auto& b = *builder_; -} - -void SpirvShaderTranslator::ProcessControlFlowNopInstruction( - uint32_t cf_index) { - auto& b = *builder_; - - auto head = cf_blocks_[cf_index].block; - b.setBuildPoint(head); - b.createNoResultOp(spv::Op::OpNop); - if (cf_blocks_.size() > cf_index + 1) { - b.createBranch(cf_blocks_[cf_index + 1].block); - } else { - b.makeReturn(false); - } -} - -void SpirvShaderTranslator::ProcessExecInstructionBegin( - const ParsedExecInstruction& instr) { - auto& b = *builder_; - - assert_false(open_predicated_block_); - open_predicated_block_ = false; - predicated_block_cond_ = false; - predicated_block_end_ = nullptr; - - // Head has the logic to check if the body should execute. - auto head = cf_blocks_[instr.dword_index].block; - b.setBuildPoint(head); - auto body = head; - switch (instr.type) { - case ParsedExecInstruction::Type::kUnconditional: { - // No need to do anything. - exec_cond_ = false; - } break; - case ParsedExecInstruction::Type::kConditional: { - // Based off of bool_consts - std::vector offsets; - offsets.push_back(b.makeUintConstant(2)); // bool_consts - uint32_t bitfield_index = instr.bool_constant_index / 32; - offsets.push_back(b.makeUintConstant(bitfield_index / 4)); - auto v = b.createAccessChain(spv::StorageClass::StorageClassUniform, - consts_, offsets); - v = b.createLoad(v); - v = b.createCompositeExtract(v, uint_type_, bitfield_index % 4); - - // Bitfield extract the bool constant. - // FIXME: NVidia's compiler seems to be broken on this instruction? - /* - v = b.createTriOp(spv::Op::OpBitFieldUExtract, uint_type_, v, - b.makeUintConstant(instr.bool_constant_index % 32), - b.makeUintConstant(1)); - - // Conditional branch - auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, v, - b.makeUintConstant(instr.condition ? 1 : 0)); - */ - v = b.createBinOp( - spv::Op::OpBitwiseAnd, uint_type_, v, - b.makeUintConstant(1 << (instr.bool_constant_index % 32))); - auto cond = b.createBinOp( - instr.condition ? spv::Op::OpINotEqual : spv::Op::OpIEqual, - bool_type_, v, b.makeUintConstant(0)); - - // Conditional branch - body = &b.makeNewBlock(); - exec_cond_ = true; - exec_skip_block_ = &b.makeNewBlock(); - - b.createSelectionMerge( - exec_skip_block_, - spv::SelectionControlMask::SelectionControlMaskNone); - b.createConditionalBranch(cond, body, exec_skip_block_); - - b.setBuildPoint(exec_skip_block_); - if (!instr.is_end || cf_blocks_.size() > instr.dword_index + 1) { - assert_true(cf_blocks_.size() > instr.dword_index + 1); - b.createBranch(cf_blocks_[instr.dword_index + 1].block); - } else { - b.makeReturn(false); - } - } break; - case ParsedExecInstruction::Type::kPredicated: { - // Branch based on p0. - body = &b.makeNewBlock(); - exec_cond_ = true; - exec_skip_block_ = &b.makeNewBlock(); - - auto cond = - b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), - b.makeBoolConstant(instr.condition)); - b.createSelectionMerge( - exec_skip_block_, - spv::SelectionControlMask::SelectionControlMaskNone); - b.createConditionalBranch(cond, body, exec_skip_block_); - - b.setBuildPoint(exec_skip_block_); - if (!instr.is_end || cf_blocks_.size() > instr.dword_index + 1) { - assert_true(cf_blocks_.size() > instr.dword_index + 1); - b.createBranch(cf_blocks_[instr.dword_index + 1].block); - } else { - b.makeReturn(false); - } - } break; - } - b.setBuildPoint(body); -} - -void SpirvShaderTranslator::ProcessExecInstructionEnd( - const ParsedExecInstruction& instr) { - auto& b = *builder_; - - if (open_predicated_block_) { - b.createBranch(predicated_block_end_); - b.setBuildPoint(predicated_block_end_); - open_predicated_block_ = false; - predicated_block_cond_ = false; - predicated_block_end_ = nullptr; - } - - if (instr.is_end) { - b.makeReturn(false); - } else if (exec_cond_) { - b.createBranch(exec_skip_block_); - } else { - assert_true(cf_blocks_.size() > instr.dword_index + 1); - b.createBranch(cf_blocks_[instr.dword_index + 1].block); - } -} - -void SpirvShaderTranslator::ProcessLoopStartInstruction( - const ParsedLoopStartInstruction& instr) { - auto& b = *builder_; - - auto head = cf_blocks_[instr.dword_index].block; - b.setBuildPoint(head); - - // loop il, L - loop with loop data il, end @ L - - std::vector offsets; - offsets.push_back(b.makeUintConstant(1)); // loop_consts - offsets.push_back(b.makeUintConstant(instr.loop_constant_index / 4)); - auto loop_const = b.createAccessChain(spv::StorageClass::StorageClassUniform, - consts_, offsets); - loop_const = b.createLoad(loop_const); - loop_const = b.createCompositeExtract(loop_const, uint_type_, - instr.loop_constant_index % 4); - - // uint loop_count_value = loop_const & 0xFF; - auto loop_count_value = b.createBinOp(spv::Op::OpBitwiseAnd, uint_type_, - loop_const, b.makeUintConstant(0xFF)); - - // uint loop_aL_value = (loop_const >> 8) & 0xFF; - auto loop_aL_value = b.createBinOp(spv::Op::OpShiftRightLogical, uint_type_, - loop_const, b.makeUintConstant(8)); - loop_aL_value = b.createBinOp(spv::Op::OpBitwiseAnd, uint_type_, - loop_aL_value, b.makeUintConstant(0xFF)); - - // loop_count_ = uvec4(loop_count_value, loop_count_.xyz); - auto loop_count = b.createLoad(loop_count_); - loop_count = - b.createRvalueSwizzle(spv::NoPrecision, vec4_uint_type_, loop_count, - std::vector({0, 0, 1, 2})); - loop_count = - b.createCompositeInsert(loop_count_value, loop_count, vec4_uint_type_, 0); - b.createStore(loop_count, loop_count_); - - // aL = aL.xxyz; - auto aL = b.createLoad(aL_); - aL = b.createRvalueSwizzle(spv::NoPrecision, vec4_uint_type_, aL, - std::vector({0, 0, 1, 2})); - if (!instr.is_repeat) { - // aL.x = loop_aL_value; - aL = b.createCompositeInsert(loop_aL_value, aL, vec4_uint_type_, 0); - } - b.createStore(aL, aL_); - - // Short-circuit if loop counter is 0 - auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, loop_count_value, - b.makeUintConstant(0)); - auto next_pc = b.createTriOp(spv::Op::OpSelect, int_type_, cond, - b.makeIntConstant(instr.loop_skip_address), - b.makeIntConstant(instr.dword_index + 1)); - b.createStore(next_pc, pc_); - b.createBranch(switch_break_block_); -} - -void SpirvShaderTranslator::ProcessLoopEndInstruction( - const ParsedLoopEndInstruction& instr) { - auto& b = *builder_; - - auto head = cf_blocks_[instr.dword_index].block; - b.setBuildPoint(head); - - // endloop il, L - end loop w/ data il, head @ L - auto loop_count = b.createLoad(loop_count_); - auto count = b.createCompositeExtract(loop_count, uint_type_, 0); - count = - b.createBinOp(spv::Op::OpISub, uint_type_, count, b.makeUintConstant(1)); - loop_count = b.createCompositeInsert(count, loop_count, vec4_uint_type_, 0); - b.createStore(loop_count, loop_count_); - - // if (--loop_count_.x == 0 || [!]p0) - auto c1 = b.createBinOp(spv::Op::OpIEqual, bool_type_, count, - b.makeUintConstant(0)); - auto c2 = - b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), - b.makeBoolConstant(instr.predicate_condition)); - auto cond = b.createBinOp(spv::Op::OpLogicalOr, bool_type_, c1, c2); - - auto loop = &b.makeNewBlock(); - auto end = &b.makeNewBlock(); - auto tail = &b.makeNewBlock(); - b.createSelectionMerge(tail, spv::SelectionControlMaskNone); - b.createConditionalBranch(cond, end, loop); - - // ================================================ - // Loop completed - pop the current loop off the stack and exit - b.setBuildPoint(end); - loop_count = b.createLoad(loop_count_); - auto aL = b.createLoad(aL_); - - // loop_count = loop_count.yzw0 - loop_count = - b.createRvalueSwizzle(spv::NoPrecision, vec4_uint_type_, loop_count, - std::vector({1, 2, 3, 3})); - loop_count = b.createCompositeInsert(b.makeUintConstant(0), loop_count, - vec4_uint_type_, 3); - b.createStore(loop_count, loop_count_); - - // aL = aL.yzw0 - aL = b.createRvalueSwizzle(spv::NoPrecision, vec4_uint_type_, aL, - std::vector({1, 2, 3, 3})); - aL = b.createCompositeInsert(b.makeUintConstant(0), aL, vec4_uint_type_, 3); - b.createStore(aL, aL_); - - // Update pc with the next block - // pc_ = instr.dword_index + 1 - b.createStore(b.makeIntConstant(instr.dword_index + 1), pc_); - b.createBranch(tail); - - // ================================================ - // Still looping - increment aL and loop - b.setBuildPoint(loop); - aL = b.createLoad(aL_); - auto aL_x = b.createCompositeExtract(aL, uint_type_, 0); - - std::vector offsets; - offsets.push_back(b.makeUintConstant(1)); // loop_consts - offsets.push_back(b.makeUintConstant(instr.loop_constant_index / 4)); - auto loop_const = b.createAccessChain(spv::StorageClass::StorageClassUniform, - consts_, offsets); - loop_const = b.createLoad(loop_const); - loop_const = b.createCompositeExtract(loop_const, uint_type_, - instr.loop_constant_index % 4); - - // uint loop_aL_value = (loop_const >> 16) & 0xFF; - auto loop_aL_value = b.createBinOp(spv::Op::OpShiftRightLogical, uint_type_, - loop_const, b.makeUintConstant(16)); - loop_aL_value = b.createBinOp(spv::Op::OpBitwiseAnd, uint_type_, - loop_aL_value, b.makeUintConstant(0xFF)); - - aL_x = b.createBinOp(spv::Op::OpIAdd, uint_type_, aL_x, loop_aL_value); - aL = b.createCompositeInsert(aL_x, aL, vec4_uint_type_, 0); - b.createStore(aL, aL_); - - // pc_ = instr.loop_body_address; - b.createStore(b.makeIntConstant(instr.loop_body_address), pc_); - b.createBranch(tail); - - // ================================================ - b.setBuildPoint(tail); - b.createBranch(switch_break_block_); -} - -void SpirvShaderTranslator::ProcessCallInstruction( - const ParsedCallInstruction& instr) { - auto& b = *builder_; - - auto head = cf_blocks_[instr.dword_index].block; - b.setBuildPoint(head); - - // Unused instruction(?) - assert_always(); - EmitTranslationError("call is unimplemented", false); - - assert_true(cf_blocks_.size() > instr.dword_index + 1); - b.createBranch(cf_blocks_[instr.dword_index + 1].block); -} - -void SpirvShaderTranslator::ProcessReturnInstruction( - const ParsedReturnInstruction& instr) { - auto& b = *builder_; - - auto head = cf_blocks_[instr.dword_index].block; - b.setBuildPoint(head); - - // Unused instruction(?) - assert_always(); - EmitTranslationError("ret is unimplemented", false); - - assert_true(cf_blocks_.size() > instr.dword_index + 1); - b.createBranch(cf_blocks_[instr.dword_index + 1].block); -} - -// CF jump -void SpirvShaderTranslator::ProcessJumpInstruction( - const ParsedJumpInstruction& instr) { - auto& b = *builder_; - - auto head = cf_blocks_[instr.dword_index].block; - b.setBuildPoint(head); - switch (instr.type) { - case ParsedJumpInstruction::Type::kUnconditional: { - b.createStore(b.makeIntConstant(instr.target_address), pc_); - b.createBranch(switch_break_block_); - } break; - case ParsedJumpInstruction::Type::kConditional: { - assert_true(cf_blocks_.size() > instr.dword_index + 1); - - // Based off of bool_consts - std::vector offsets; - offsets.push_back(b.makeUintConstant(2)); // bool_consts - uint32_t bitfield_index = instr.bool_constant_index / 32; - offsets.push_back(b.makeUintConstant(bitfield_index / 4)); - auto v = b.createAccessChain(spv::StorageClass::StorageClassUniform, - consts_, offsets); - v = b.createLoad(v); - v = b.createCompositeExtract(v, uint_type_, bitfield_index % 4); - - // Bitfield extract the bool constant. - // FIXME: NVidia's compiler seems to be broken on this instruction? - /* - v = b.createTriOp(spv::Op::OpBitFieldUExtract, uint_type_, v, - b.makeUintConstant(instr.bool_constant_index % 32), - b.makeUintConstant(1)); - - // Conditional branch - auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, v, - b.makeUintConstant(instr.condition ? 1 : 0)); - */ - v = b.createBinOp( - spv::Op::OpBitwiseAnd, uint_type_, v, - b.makeUintConstant(1 << (instr.bool_constant_index % 32))); - auto cond = b.createBinOp( - instr.condition ? spv::Op::OpINotEqual : spv::Op::OpIEqual, - bool_type_, v, b.makeUintConstant(0)); - - auto next_pc = b.createTriOp(spv::Op::OpSelect, int_type_, cond, - b.makeIntConstant(instr.target_address), - b.makeIntConstant(instr.dword_index + 1)); - b.createStore(next_pc, pc_); - b.createBranch(switch_break_block_); - } break; - case ParsedJumpInstruction::Type::kPredicated: { - assert_true(cf_blocks_.size() > instr.dword_index + 1); - - auto cond = - b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), - b.makeBoolConstant(instr.condition)); - - auto next_pc = b.createTriOp(spv::Op::OpSelect, int_type_, cond, - b.makeIntConstant(instr.target_address), - b.makeIntConstant(instr.dword_index + 1)); - b.createStore(next_pc, pc_); - b.createBranch(switch_break_block_); - } break; - } -} - -void SpirvShaderTranslator::ProcessAllocInstruction( - const ParsedAllocInstruction& instr) { - auto& b = *builder_; - - auto head = cf_blocks_[instr.dword_index].block; - b.setBuildPoint(head); - - switch (instr.type) { - case AllocType::kNone: { - // ? - } break; - case AllocType::kVsPosition: { - assert_true(is_vertex_shader()); - } break; - // Also PS Colors - case AllocType::kVsInterpolators: { - // Already included, nothing to do here. - } break; - case AllocType::kMemory: { - // Nothing to do for this. - } break; - default: - break; - } - - assert_true(cf_blocks_.size() > instr.dword_index + 1); - b.createBranch(cf_blocks_[instr.dword_index + 1].block); -} - -spv::Id SpirvShaderTranslator::BitfieldExtract(spv::Id result_type, - spv::Id base, bool is_signed, - uint32_t offset, - uint32_t count) { - auto& b = *builder_; - - spv::Id base_type = b.getTypeId(base); - - // <-- 32 - (offset + count) ------ [bits] -?- - if (32 - (offset + count) > 0) { - base = b.createBinOp(spv::Op::OpShiftLeftLogical, base_type, base, - b.makeUintConstant(32 - (offset + count))); - } - // [bits] -?-?-?--------------------------- - auto op = is_signed ? spv::Op::OpShiftRightArithmetic - : spv::Op::OpShiftRightLogical; - base = b.createBinOp(op, base_type, base, b.makeUintConstant(32 - count)); - - return base; -} - -spv::Id SpirvShaderTranslator::ConvertNormVar(spv::Id var, spv::Id result_type, - uint32_t bits, bool is_signed) { - auto& b = *builder_; - if (is_signed) { - auto c = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, var, - b.makeFloatConstant(-float(1 << (bits - 1)))); - auto v = b.createBinOp(spv::Op::OpFDiv, result_type, var, - b.makeFloatConstant(float((1 << (bits - 1)) - 1))); - var = b.createTriOp(spv::Op::OpSelect, result_type, c, - b.makeFloatConstant(-1.f), v); - } else { - var = b.createBinOp(spv::Op::OpFDiv, result_type, var, - b.makeFloatConstant(float((1 << bits) - 1))); - } - - return var; -} - -void SpirvShaderTranslator::ProcessVertexFetchInstruction( - const ParsedVertexFetchInstruction& instr) { - auto& b = *builder_; - assert_true(is_vertex_shader()); - assert_not_zero(vertex_idx_); - - // Close the open predicated block if this instr isn't predicated or the - // conditions do not match. - if (open_predicated_block_ && - (!instr.is_predicated || - instr.predicate_condition != predicated_block_cond_)) { - b.createBranch(predicated_block_end_); - b.setBuildPoint(predicated_block_end_); - open_predicated_block_ = false; - predicated_block_cond_ = false; - predicated_block_end_ = nullptr; - } - - if (!open_predicated_block_ && instr.is_predicated) { - Id pred_cond = - b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), - b.makeBoolConstant(instr.predicate_condition)); - auto block = &b.makeNewBlock(); - open_predicated_block_ = true; - predicated_block_cond_ = instr.predicate_condition; - predicated_block_end_ = &b.makeNewBlock(); - - b.createSelectionMerge(predicated_block_end_, - spv::SelectionControlMaskNone); - b.createConditionalBranch(pred_cond, block, predicated_block_end_); - b.setBuildPoint(block); - } - - // Operand 0 is the index - // Operand 1 is the binding - // TODO: Indexed fetch - auto vertex_idx = LoadFromOperand(instr.operands[0]); - vertex_idx = b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, vertex_idx); - - // vertex_idx * stride + offset - vertex_idx = b.createBinOp(spv::Op::OpIMul, int_type_, vertex_idx, - b.makeUintConstant(instr.attributes.stride)); - vertex_idx = b.createBinOp(spv::Op::OpIAdd, int_type_, vertex_idx, - b.makeUintConstant(instr.attributes.offset)); - - auto data_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, vtx_, - {b.makeUintConstant(vtx_binding_map_[instr.operands[1].storage_index]), - b.makeUintConstant(0)}); - - spv::Id vertex = 0; - switch (instr.attributes.data_format) { - case xenos::VertexFormat::k_8_8_8_8: { - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {vertex_idx}); - auto vertex_data = b.createLoad(vertex_ptr); - - if (instr.attributes.is_integer) { - spv::Id components[4] = {}; - - auto op = instr.attributes.is_signed ? spv::Op::OpConvertSToF - : spv::Op::OpConvertUToF; - auto comp_type = instr.attributes.is_signed ? int_type_ : uint_type_; - - for (int i = 0; i < 4; i++) { - components[i] = BitfieldExtract(comp_type, vertex_data, - instr.attributes.is_signed, 8 * i, 8); - components[i] = b.createUnaryOp(op, float_type_, components[i]); - } - - vertex = b.createCompositeConstruct( - vec4_float_type_, - {components[0], components[1], components[2], components[3]}); - } else { - spv::GLSLstd450 op; - if (instr.attributes.is_signed) { - op = spv::GLSLstd450::kUnpackSnorm4x8; - } else { - op = spv::GLSLstd450::kUnpackUnorm4x8; - } - vertex = CreateGlslStd450InstructionCall( - spv::NoPrecision, vec4_float_type_, op, {vertex_data}); - } - } break; - - case xenos::VertexFormat::k_16_16: { - spv::Id components[1] = {}; - for (uint32_t i = 0; i < 1; i++) { - auto index = b.createBinOp(spv::Op::OpIAdd, int_type_, vertex_idx, - b.makeUintConstant(i)); - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {index}); - auto vertex_data = b.createLoad(vertex_ptr); - - if (instr.attributes.is_integer) { - spv::Id comp[2] = {}; - - bool is_signed = instr.attributes.is_signed; - bool is_integer = instr.attributes.is_integer; - auto comp_type = is_signed ? int_type_ : uint_type_; - - if (is_signed) { - vertex_data = - b.createUnaryOp(spv::Op::OpBitcast, int_type_, vertex_data); - } - - comp[0] = BitfieldExtract(comp_type, vertex_data, is_signed, 0, 16); - comp[1] = BitfieldExtract(comp_type, vertex_data, is_signed, 16, 16); - - auto op = is_signed ? spv::Op::OpConvertSToF : spv::Op::OpConvertUToF; - for (int i = 0; i < xe::countof(comp); i++) { - comp[i] = b.createUnaryOp(op, float_type_, comp[i]); - } - - components[i] = - b.createCompositeConstruct(vec2_float_type_, {comp[0], comp[1]}); - } else { - spv::GLSLstd450 op; - if (instr.attributes.is_signed) { - op = spv::GLSLstd450::kUnpackSnorm2x16; - } else { - op = spv::GLSLstd450::kUnpackUnorm2x16; - } - - components[i] = CreateGlslStd450InstructionCall( - spv::NoPrecision, vec2_float_type_, op, {vertex_data}); - } - } - - vertex = components[0]; - } break; - - case xenos::VertexFormat::k_16_16_16_16: { - spv::Id components[2] = {}; - for (uint32_t i = 0; i < 2; i++) { - auto index = b.createBinOp(spv::Op::OpIAdd, int_type_, vertex_idx, - b.makeUintConstant(i)); - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {index}); - auto vertex_data = b.createLoad(vertex_ptr); - - if (instr.attributes.is_integer) { - spv::Id comp[2] = {}; - - bool is_signed = instr.attributes.is_signed; - bool is_integer = instr.attributes.is_integer; - auto comp_type = is_signed ? int_type_ : uint_type_; - - if (is_signed) { - vertex_data = - b.createUnaryOp(spv::Op::OpBitcast, int_type_, vertex_data); - } - - comp[0] = BitfieldExtract(comp_type, vertex_data, is_signed, 0, 16); - comp[1] = BitfieldExtract(comp_type, vertex_data, is_signed, 16, 16); - - auto op = is_signed ? spv::Op::OpConvertSToF : spv::Op::OpConvertUToF; - for (int i = 0; i < xe::countof(comp); i++) { - comp[i] = b.createUnaryOp(op, float_type_, comp[i]); - } - - components[i] = - b.createCompositeConstruct(vec2_float_type_, {comp[0], comp[1]}); - } else { - spv::GLSLstd450 op; - if (instr.attributes.is_signed) { - op = spv::GLSLstd450::kUnpackSnorm2x16; - } else { - op = spv::GLSLstd450::kUnpackUnorm2x16; - } - - components[i] = CreateGlslStd450InstructionCall( - spv::NoPrecision, vec2_float_type_, op, {vertex_data}); - } - } - - vertex = b.createConstructor( - spv::NoPrecision, {components[0], components[1]}, vec4_float_type_); - } break; - - case xenos::VertexFormat::k_16_16_FLOAT: { - spv::Id components[1] = {}; - for (uint32_t i = 0; i < 1; i++) { - auto index = b.createBinOp(spv::Op::OpIAdd, int_type_, vertex_idx, - b.makeUintConstant(i)); - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {index}); - auto vertex_data = b.createLoad(vertex_ptr); - - assert_true(instr.attributes.is_integer); - assert_true(instr.attributes.is_signed); - components[i] = CreateGlslStd450InstructionCall( - spv::NoPrecision, vec2_float_type_, - spv::GLSLstd450::kUnpackHalf2x16, {vertex_data}); - } - - vertex = components[0]; - } break; - - case xenos::VertexFormat::k_16_16_16_16_FLOAT: { - spv::Id components[2] = {}; - for (uint32_t i = 0; i < 2; i++) { - auto index = b.createBinOp(spv::Op::OpIAdd, int_type_, vertex_idx, - b.makeUintConstant(i)); - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {index}); - auto vertex_data = b.createLoad(vertex_ptr); - - assert_true(instr.attributes.is_integer); - assert_true(instr.attributes.is_signed); - components[i] = CreateGlslStd450InstructionCall( - spv::NoPrecision, vec2_float_type_, - spv::GLSLstd450::kUnpackHalf2x16, {vertex_data}); - } - - vertex = b.createConstructor( - spv::NoPrecision, {components[0], components[1]}, vec4_float_type_); - } break; - - case xenos::VertexFormat::k_32: { - spv::Id components[1] = {}; - for (uint32_t i = 0; i < 1; i++) { - auto index = b.createBinOp(spv::Op::OpIAdd, int_type_, vertex_idx, - b.makeUintConstant(i)); - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {index}); - auto vertex_data = b.createLoad(vertex_ptr); - - if (instr.attributes.is_integer) { - if (instr.attributes.is_signed) { - components[i] = - b.createUnaryOp(spv::Op::OpBitcast, int_type_, vertex_data); - components[i] = b.createUnaryOp(spv::Op::OpConvertSToF, float_type_, - vertex_data); - } else { - components[i] = b.createUnaryOp(spv::Op::OpConvertUToF, float_type_, - vertex_data); - } - } else { - if (instr.attributes.is_signed) { - // TODO(DrChat): This is gonna be harder to convert. There's not - // enough precision in a float to shove INT_MAX into it. - assert_always(); - components[i] = b.makeFloatConstant(0.f); - } else { - components[i] = ConvertNormVar(vertex_data, uint_type_, 32, false); - } - } - } - - // vertex = b.createCompositeConstruct(float_type_, {components[0]}); - vertex = components[0]; - } break; - - case xenos::VertexFormat::k_32_32: { - spv::Id components[2] = {}; - for (uint32_t i = 0; i < 2; i++) { - auto index = b.createBinOp(spv::Op::OpIAdd, int_type_, vertex_idx, - b.makeUintConstant(i)); - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {index}); - auto vertex_data = b.createLoad(vertex_ptr); - - if (instr.attributes.is_integer) { - if (instr.attributes.is_signed) { - components[i] = - b.createUnaryOp(spv::Op::OpBitcast, int_type_, vertex_data); - components[i] = b.createUnaryOp(spv::Op::OpConvertSToF, float_type_, - vertex_data); - } else { - components[i] = b.createUnaryOp(spv::Op::OpConvertUToF, float_type_, - vertex_data); - } - } else { - if (instr.attributes.is_signed) { - // TODO(DrChat): This is gonna be harder to convert. There's not - // enough precision in a float to shove INT_MAX into it. - assert_always(); - components[i] = b.makeFloatConstant(0.f); - } else { - components[i] = ConvertNormVar(vertex_data, uint_type_, 32, false); - } - } - } - - vertex = b.createCompositeConstruct(vec2_float_type_, - {components[0], components[1]}); - } break; - - case xenos::VertexFormat::k_32_32_32_32: { - spv::Id components[4] = {}; - for (uint32_t i = 0; i < 4; i++) { - auto index = b.createBinOp(spv::Op::OpIAdd, int_type_, vertex_idx, - b.makeUintConstant(i)); - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {index}); - auto vertex_data = b.createLoad(vertex_ptr); - - if (instr.attributes.is_integer) { - if (instr.attributes.is_signed) { - components[i] = - b.createUnaryOp(spv::Op::OpBitcast, int_type_, vertex_data); - components[i] = b.createUnaryOp(spv::Op::OpConvertSToF, float_type_, - vertex_data); - } else { - components[i] = b.createUnaryOp(spv::Op::OpConvertUToF, float_type_, - vertex_data); - } - } else { - if (instr.attributes.is_signed) { - // TODO(DrChat): This is gonna be harder to convert. There's not - // enough precision in a float to shove INT_MAX into it. - assert_always(); - components[i] = b.makeFloatConstant(0.f); - } else { - components[i] = ConvertNormVar(vertex_data, uint_type_, 32, false); - } - } - } - - vertex = b.createCompositeConstruct( - vec2_float_type_, - {components[0], components[1], components[2], components[3]}); - } break; - - case xenos::VertexFormat::k_32_FLOAT: { - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {vertex_idx}); - auto vertex_data = b.createLoad(vertex_ptr); - - vertex = b.createUnaryOp(spv::Op::OpBitcast, float_type_, vertex_data); - } break; - - case xenos::VertexFormat::k_32_32_FLOAT: { - spv::Id components[2] = {}; - for (uint32_t i = 0; i < 2; i++) { - auto index = b.createBinOp(spv::Op::OpIAdd, int_type_, vertex_idx, - b.makeUintConstant(i)); - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {index}); - auto vertex_data = b.createLoad(vertex_ptr); - - components[i] = - b.createUnaryOp(spv::Op::OpBitcast, float_type_, vertex_data); - } - - vertex = b.createCompositeConstruct(vec2_float_type_, - {components[0], components[1]}); - } break; - - case xenos::VertexFormat::k_32_32_32_FLOAT: { - spv::Id components[3] = {}; - for (uint32_t i = 0; i < 3; i++) { - auto index = b.createBinOp(spv::Op::OpIAdd, int_type_, vertex_idx, - b.makeUintConstant(i)); - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {index}); - auto vertex_data = b.createLoad(vertex_ptr); - - components[i] = - b.createUnaryOp(spv::Op::OpBitcast, float_type_, vertex_data); - } - - vertex = b.createCompositeConstruct( - vec3_float_type_, {components[0], components[1], components[2]}); - } break; - - case xenos::VertexFormat::k_32_32_32_32_FLOAT: { - spv::Id components[4] = {}; - for (uint32_t i = 0; i < 4; i++) { - auto index = b.createBinOp(spv::Op::OpIAdd, int_type_, vertex_idx, - b.makeUintConstant(i)); - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {index}); - auto vertex_data = b.createLoad(vertex_ptr); - - components[i] = - b.createUnaryOp(spv::Op::OpBitcast, float_type_, vertex_data); - } - - vertex = b.createCompositeConstruct( - vec4_float_type_, - {components[0], components[1], components[2], components[3]}); - } break; - - case xenos::VertexFormat::k_2_10_10_10: { - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {vertex_idx}); - auto vertex_data = b.createLoad(vertex_ptr); - assert(b.getTypeId(vertex_data) == uint_type_); - - // This needs to be converted. - bool is_signed = instr.attributes.is_signed; - bool is_integer = instr.attributes.is_integer; - auto comp_type = is_signed ? int_type_ : uint_type_; - - if (is_signed) { - vertex_data = - b.createUnaryOp(spv::Op::OpBitcast, int_type_, vertex_data); - } - - spv::Id components[4] = {0}; - components[0] = - BitfieldExtract(comp_type, vertex_data, is_signed, 00, 10); - components[1] = - BitfieldExtract(comp_type, vertex_data, is_signed, 10, 10); - components[2] = - BitfieldExtract(comp_type, vertex_data, is_signed, 20, 10); - components[3] = - BitfieldExtract(comp_type, vertex_data, is_signed, 30, 02); - - auto op = is_signed ? spv::Op::OpConvertSToF : spv::Op::OpConvertUToF; - for (int i = 0; i < xe::countof(components); i++) { - components[i] = b.createUnaryOp(op, float_type_, components[i]); - } - - if (!is_integer) { - components[0] = - ConvertNormVar(components[0], float_type_, 10, is_signed); - components[1] = - ConvertNormVar(components[1], float_type_, 10, is_signed); - components[2] = - ConvertNormVar(components[2], float_type_, 10, is_signed); - components[3] = - ConvertNormVar(components[3], float_type_, 02, is_signed); - } - - vertex = b.createCompositeConstruct( - vec4_float_type_, std::vector({components[0], components[1], - components[2], components[3]})); - } break; - - case xenos::VertexFormat::k_10_11_11: { - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {vertex_idx}); - auto vertex_data = b.createLoad(vertex_ptr); - assert(b.getTypeId(vertex_data) == uint_type_); - - // This needs to be converted. - bool is_signed = instr.attributes.is_signed; - bool is_integer = instr.attributes.is_integer; - auto op = - is_signed ? spv::Op::OpBitFieldSExtract : spv::Op::OpBitFieldUExtract; - auto comp_type = is_signed ? int_type_ : uint_type_; - - if (is_signed) { - vertex_data = - b.createUnaryOp(spv::Op::OpBitcast, int_type_, vertex_data); - } - - assert_true(comp_type == b.getTypeId(vertex_data)); - - spv::Id components[3] = {0}; - /* - components[2] = b.createTriOp( - op, comp_type, vertex, b.makeUintConstant(0), b.makeUintConstant(10)); - components[1] = - b.createTriOp(op, comp_type, vertex, b.makeUintConstant(10), - b.makeUintConstant(11)); - components[0] = - b.createTriOp(op, comp_type, vertex, b.makeUintConstant(21), - b.makeUintConstant(11)); - */ - // Workaround until NVIDIA fixes their compiler :| - components[0] = - BitfieldExtract(comp_type, vertex_data, is_signed, 00, 11); - components[1] = - BitfieldExtract(comp_type, vertex_data, is_signed, 11, 11); - components[2] = - BitfieldExtract(comp_type, vertex_data, is_signed, 22, 10); - - op = is_signed ? spv::Op::OpConvertSToF : spv::Op::OpConvertUToF; - for (int i = 0; i < 3; i++) { - components[i] = b.createUnaryOp(op, float_type_, components[i]); - } - - if (!is_integer) { - components[0] = - ConvertNormVar(components[0], float_type_, 11, is_signed); - components[1] = - ConvertNormVar(components[1], float_type_, 11, is_signed); - components[2] = - ConvertNormVar(components[2], float_type_, 10, is_signed); - } - - vertex = b.createCompositeConstruct( - vec3_float_type_, - std::vector({components[0], components[1], components[2]})); - } break; - - case xenos::VertexFormat::k_11_11_10: { - auto vertex_ptr = b.createAccessChain( - spv::StorageClass::StorageClassUniform, data_ptr, {vertex_idx}); - auto vertex_data = b.createLoad(vertex_ptr); - assert(b.getTypeId(vertex_data) == uint_type_); - - // This needs to be converted. - bool is_signed = instr.attributes.is_signed; - bool is_integer = instr.attributes.is_integer; - auto op = - is_signed ? spv::Op::OpBitFieldSExtract : spv::Op::OpBitFieldUExtract; - auto comp_type = is_signed ? int_type_ : uint_type_; - - spv::Id components[3] = {0}; - /* - components[2] = b.createTriOp( - op, comp_type, vertex, b.makeUintConstant(0), b.makeUintConstant(11)); - components[1] = - b.createTriOp(op, comp_type, vertex, b.makeUintConstant(11), - b.makeUintConstant(11)); - components[0] = - b.createTriOp(op, comp_type, vertex, b.makeUintConstant(22), - b.makeUintConstant(10)); - */ - // Workaround until NVIDIA fixes their compiler :| - components[0] = - BitfieldExtract(comp_type, vertex_data, is_signed, 00, 10); - components[1] = - BitfieldExtract(comp_type, vertex_data, is_signed, 10, 11); - components[2] = - BitfieldExtract(comp_type, vertex_data, is_signed, 21, 11); - - op = is_signed ? spv::Op::OpConvertSToF : spv::Op::OpConvertUToF; - for (int i = 0; i < 3; i++) { - components[i] = b.createUnaryOp(op, float_type_, components[i]); - } - - if (!is_integer) { - components[0] = - ConvertNormVar(components[0], float_type_, 11, is_signed); - components[1] = - ConvertNormVar(components[1], float_type_, 11, is_signed); - components[2] = - ConvertNormVar(components[2], float_type_, 10, is_signed); - } - - vertex = b.createCompositeConstruct( - vec3_float_type_, - std::vector({components[0], components[1], components[2]})); - } break; - - case xenos::VertexFormat::kUndefined: - break; - } - - assert_not_zero(vertex); - StoreToResult(vertex, instr.result); -} - -void SpirvShaderTranslator::ProcessTextureFetchInstruction( - const ParsedTextureFetchInstruction& instr) { - auto& b = *builder_; - - // Close the open predicated block if this instr isn't predicated or the - // conditions do not match. - if (open_predicated_block_ && - (!instr.is_predicated || - instr.predicate_condition != predicated_block_cond_)) { - b.createBranch(predicated_block_end_); - b.setBuildPoint(predicated_block_end_); - open_predicated_block_ = false; - predicated_block_cond_ = false; - predicated_block_end_ = nullptr; - } - - if (!open_predicated_block_ && instr.is_predicated) { - Id pred_cond = - b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), - b.makeBoolConstant(instr.predicate_condition)); - auto block = &b.makeNewBlock(); - open_predicated_block_ = true; - predicated_block_cond_ = instr.predicate_condition; - predicated_block_end_ = &b.makeNewBlock(); - - b.createSelectionMerge(predicated_block_end_, - spv::SelectionControlMaskNone); - b.createConditionalBranch(pred_cond, block, predicated_block_end_); - b.setBuildPoint(block); - } - - // Operand 0 is the offset - // Operand 1 is the sampler index - Id dest = vec4_float_zero_; - Id src = LoadFromOperand(instr.operands[0]); - assert_not_zero(src); - - uint32_t dim_idx = 0; - switch (instr.dimension) { - case xenos::FetchOpDimension::k1D: - case xenos::FetchOpDimension::k2D: { - dim_idx = 0; - } break; - case xenos::FetchOpDimension::k3DOrStacked: { - dim_idx = 1; - } break; - case xenos::FetchOpDimension::kCube: { - dim_idx = 2; - } break; - default: - assert_unhandled_case(instr.dimension); - } - - switch (instr.opcode) { - case FetchOpcode::kTextureFetch: { - auto texture_index = - b.makeUintConstant(tex_binding_map_[instr.operands[1].storage_index]); - auto texture_ptr = - b.createAccessChain(spv::StorageClass::StorageClassUniformConstant, - tex_[dim_idx], std::vector({texture_index})); - auto texture = b.createLoad(texture_ptr); - - if (instr.dimension == xenos::FetchOpDimension::k1D) { - // Upgrade 1D src coordinate into 2D - src = b.createCompositeConstruct(vec2_float_type_, - {src, b.makeFloatConstant(0.f)}); - } - - spv::Builder::TextureParameters params = {0}; - params.coords = src; - params.sampler = texture; - if (instr.attributes.use_register_lod) { - params.lod = b.createLoad(lod_); - } - if (instr.attributes.offset_x || instr.attributes.offset_y || - instr.attributes.offset_z) { - float offset_x = instr.attributes.offset_x; - float offset_y = instr.attributes.offset_y; - float offset_z = instr.attributes.offset_z; - - // Round numbers away from zero. No effect if offset is 0. - offset_x += instr.attributes.offset_x < 0 ? -0.5f : 0.5f; - offset_y += instr.attributes.offset_y < 0 ? -0.5f : 0.5f; - offset_z += instr.attributes.offset_z < 0 ? -0.5f : 0.5f; - - Id offset = 0; - switch (instr.dimension) { - case xenos::FetchOpDimension::k1D: { - // https://msdn.microsoft.com/en-us/library/windows/desktop/bb944006.aspx - // "Because the runtime does not support 1D textures, the compiler - // will use a 2D texture with the knowledge that the y-coordinate is - // unimportant." - offset = b.makeCompositeConstant( - vec2_int_type_, - {b.makeIntConstant(int(offset_x)), b.makeIntConstant(0)}); - } break; - case xenos::FetchOpDimension::k2D: { - offset = b.makeCompositeConstant( - vec2_int_type_, {b.makeIntConstant(int(offset_x)), - b.makeIntConstant(int(offset_y))}); - } break; - case xenos::FetchOpDimension::k3DOrStacked: { - offset = b.makeCompositeConstant( - vec3_int_type_, {b.makeIntConstant(int(offset_x)), - b.makeIntConstant(int(offset_y)), - b.makeIntConstant(int(offset_z))}); - } break; - case xenos::FetchOpDimension::kCube: { - // FIXME(DrChat): Is this the correct dimension? I forget - offset = b.makeCompositeConstant( - vec3_int_type_, {b.makeIntConstant(int(offset_x)), - b.makeIntConstant(int(offset_y)), - b.makeIntConstant(int(offset_z))}); - } break; - } - - params.offset = offset; - } - - dest = - b.createTextureCall(spv::NoPrecision, vec4_float_type_, false, false, - false, false, is_vertex_shader(), params); - } break; - - case FetchOpcode::kGetTextureGradients: { - Id src_x = b.createCompositeExtract(src, float_type_, 0); - Id src_y = b.createCompositeExtract(src, float_type_, 1); - - dest = b.createCompositeConstruct( - vec4_float_type_, - { - b.createUnaryOp(spv::OpDPdx, float_type_, src_x), - b.createUnaryOp(spv::OpDPdy, float_type_, src_x), - b.createUnaryOp(spv::OpDPdx, float_type_, src_y), - b.createUnaryOp(spv::OpDPdy, float_type_, src_y), - }); - } break; - - case FetchOpcode::kGetTextureWeights: { - // fract(src0 * textureSize); - auto texture_index = - b.makeUintConstant(tex_binding_map_[instr.operands[1].storage_index]); - auto texture_ptr = - b.createAccessChain(spv::StorageClass::StorageClassUniformConstant, - tex_[dim_idx], std::vector({texture_index})); - auto texture = b.createLoad(texture_ptr); - auto image = - b.createUnaryOp(spv::OpImage, b.getImageType(texture), texture); - - switch (instr.dimension) { - case xenos::FetchOpDimension::k1D: - case xenos::FetchOpDimension::k2D: { - spv::Builder::TextureParameters params; - std::memset(¶ms, 0, sizeof(params)); - params.sampler = image; - params.lod = b.makeIntConstant(0); - auto size = b.createTextureQueryCall(spv::Op::OpImageQuerySizeLod, - params, true); - size = - b.createUnaryOp(spv::Op::OpConvertUToF, vec2_float_type_, size); - - auto weight = - b.createBinOp(spv::Op::OpFMul, vec2_float_type_, size, src); - weight = CreateGlslStd450InstructionCall( - spv::NoPrecision, vec2_float_type_, spv::GLSLstd450::kFract, - {weight}); - - dest = b.createOp(spv::Op::OpVectorShuffle, vec4_float_type_, - {weight, vec4_float_zero_, 0, 1, 2, 2}); - } break; - - default: - // TODO(DrChat): The rest of these. - assert_unhandled_case(instr.dimension); - break; - } - } break; - - case FetchOpcode::kGetTextureComputedLod: { - // TODO(DrChat): Verify if this implementation is correct. - // This is only valid in pixel shaders. - assert_true(is_pixel_shader()); - - auto texture_index = - b.makeUintConstant(tex_binding_map_[instr.operands[1].storage_index]); - auto texture_ptr = - b.createAccessChain(spv::StorageClass::StorageClassUniformConstant, - tex_[dim_idx], std::vector({texture_index})); - auto texture = b.createLoad(texture_ptr); - - if (instr.dimension == xenos::FetchOpDimension::k1D) { - // Upgrade 1D src coordinate into 2D - src = b.createCompositeConstruct(vec2_float_type_, - {src, b.makeFloatConstant(0.f)}); - } - - spv::Builder::TextureParameters params = {}; - params.sampler = texture; - params.coords = src; - auto lod = - b.createTextureQueryCall(spv::Op::OpImageQueryLod, params, false); - - dest = b.createCompositeExtract(lod, float_type_, 1); - dest = b.smearScalar(spv::NoPrecision, dest, vec4_float_type_); - } break; - - case FetchOpcode::kSetTextureLod: { - // = src1.x (MIP level) - // ... immediately after - // tfetch UseRegisterLOD=true - b.createStore(src, lod_); - } break; - - default: - // TODO: the rest of these - assert_unhandled_case(instr.opcode); - break; - } - - if (dest) { - b.createStore(dest, pv_); - StoreToResult(dest, instr.result); - } -} - -void SpirvShaderTranslator::ProcessAluInstruction( - const ParsedAluInstruction& instr) { - if (instr.IsNop()) { - return; - } - - auto& b = *builder_; - - // Close the open predicated block if this instr isn't predicated or the - // conditions do not match. - if (open_predicated_block_ && - (!instr.is_predicated || - instr.predicate_condition != predicated_block_cond_)) { - b.createBranch(predicated_block_end_); - b.setBuildPoint(predicated_block_end_); - open_predicated_block_ = false; - predicated_block_cond_ = false; - predicated_block_end_ = nullptr; - } - - if (!open_predicated_block_ && instr.is_predicated) { - Id pred_cond = - b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), - b.makeBoolConstant(instr.predicate_condition)); - auto block = &b.makeNewBlock(); - open_predicated_block_ = true; - predicated_block_cond_ = instr.predicate_condition; - predicated_block_end_ = &b.makeNewBlock(); - - b.createSelectionMerge(predicated_block_end_, - spv::SelectionControlMaskNone); - b.createConditionalBranch(pred_cond, block, predicated_block_end_); - b.setBuildPoint(block); - } - - bool close_predicated_block_vector = false; - bool store_vector = - ProcessVectorAluOperation(instr, close_predicated_block_vector); - bool close_predicated_block_scalar = false; - bool store_scalar = - ProcessScalarAluOperation(instr, close_predicated_block_scalar); - - if (store_vector) { - StoreToResult(b.createLoad(pv_), instr.vector_and_constant_result); - } - if (store_scalar) { - StoreToResult(b.createLoad(ps_), instr.scalar_result); - } - - if ((close_predicated_block_vector || close_predicated_block_scalar) && - open_predicated_block_) { - b.createBranch(predicated_block_end_); - b.setBuildPoint(predicated_block_end_); - open_predicated_block_ = false; - predicated_block_cond_ = false; - predicated_block_end_ = nullptr; - } -} - -spv::Function* SpirvShaderTranslator::CreateCubeFunction() { - auto& b = *builder_; - spv::Block* function_block = nullptr; - auto function = b.makeFunctionEntry(spv::NoPrecision, vec4_float_type_, - "cube", {vec4_float_type_}, - {{spv::NoPrecision}}, &function_block); - auto src = function->getParamId(0); - auto face_id = b.createVariable(spv::StorageClass::StorageClassFunction, - float_type_, "face_id"); - auto sc = b.createVariable(spv::StorageClass::StorageClassFunction, - float_type_, "sc"); - auto tc = b.createVariable(spv::StorageClass::StorageClassFunction, - float_type_, "tc"); - auto ma = b.createVariable(spv::StorageClass::StorageClassFunction, - float_type_, "ma"); - - // Pseudocode: - /* - vec4 cube(vec4 src1) { - vec3 src = vec3(src1.y, src1.x, src1.z); - vec3 abs_src = abs(src); - int face_id; - float sc; - float tc; - float ma; - if (abs_src.x > abs_src.y && abs_src.x > abs_src.z) { - if (src.x > 0.0) { - face_id = 0; sc = -abs_src.z; tc = -abs_src.y; ma = abs_src.x; - } else { - face_id = 1; sc = abs_src.z; tc = -abs_src.y; ma = abs_src.x; - } - } else if (abs_src.y > abs_src.x && abs_src.y > abs_src.z) { - if (src.y > 0.0) { - face_id = 2; sc = abs_src.x; tc = abs_src.z; ma = abs_src.y; - } else { - face_id = 3; sc = abs_src.x; tc = -abs_src.z; ma = abs_src.y; - } - } else { - if (src.z > 0.0) { - face_id = 4; sc = abs_src.x; tc = -abs_src.y; ma = abs_src.z; - } else { - face_id = 5; sc = -abs_src.x; tc = -abs_src.y; ma = abs_src.z; - } - } - float s = (sc / ma + 1.0) / 2.0; - float t = (tc / ma + 1.0) / 2.0; - return vec4(t, s, 2.0 * ma, float(face_id)); - } - */ - - auto abs_src = CreateGlslStd450InstructionCall( - spv::NoPrecision, vec4_float_type_, spv::GLSLstd450::kFAbs, {src}); - auto abs_src_x = b.createCompositeExtract(abs_src, float_type_, 0); - auto abs_src_y = b.createCompositeExtract(abs_src, float_type_, 1); - auto abs_src_z = b.createCompositeExtract(abs_src, float_type_, 2); - auto neg_src_x = b.createUnaryOp(spv::Op::OpFNegate, float_type_, abs_src_x); - auto neg_src_y = b.createUnaryOp(spv::Op::OpFNegate, float_type_, abs_src_y); - auto neg_src_z = b.createUnaryOp(spv::Op::OpFNegate, float_type_, abs_src_z); - - // Case 1: abs(src).x > abs(src).yz - { - auto x_gt_y = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, - abs_src_x, abs_src_y); - auto x_gt_z = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, - abs_src_x, abs_src_z); - auto c1 = b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, x_gt_y, x_gt_z); - spv::Builder::If if1(c1, 0, b); - - // sc = abs(src).y - b.createStore(abs_src_y, sc); - // ma = abs(src).x - b.createStore(abs_src_x, ma); - - auto src_x = b.createCompositeExtract(src, float_type_, 0); - auto c2 = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, src_x, - b.makeFloatConstant(0)); - // src.x > 0: - // face_id = 2 - // tc = -abs(src).z - // src.x <= 0: - // face_id = 3 - // tc = abs(src).z - auto tmp_face_id = - b.createTriOp(spv::Op::OpSelect, float_type_, c2, - b.makeFloatConstant(2), b.makeFloatConstant(3)); - auto tmp_tc = - b.createTriOp(spv::Op::OpSelect, float_type_, c2, neg_src_z, abs_src_z); - - b.createStore(tmp_face_id, face_id); - b.createStore(tmp_tc, tc); - - if1.makeEndIf(); - } - - // Case 2: abs(src).y > abs(src).xz - { - auto y_gt_x = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, - abs_src_y, abs_src_x); - auto y_gt_z = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, - abs_src_y, abs_src_z); - auto c1 = b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, y_gt_x, y_gt_z); - spv::Builder::If if1(c1, 0, b); - - // tc = -abs(src).x - b.createStore(neg_src_x, tc); - // ma = abs(src).y - b.createStore(abs_src_y, ma); - - auto src_y = b.createCompositeExtract(src, float_type_, 1); - auto c2 = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, src_y, - b.makeFloatConstant(0)); - // src.y > 0: - // face_id = 0 - // sc = -abs(src).z - // src.y <= 0: - // face_id = 1 - // sc = abs(src).z - auto tmp_face_id = - b.createTriOp(spv::Op::OpSelect, float_type_, c2, - b.makeFloatConstant(0), b.makeFloatConstant(1)); - auto tmp_sc = - b.createTriOp(spv::Op::OpSelect, float_type_, c2, neg_src_z, abs_src_z); - - b.createStore(tmp_face_id, face_id); - b.createStore(tmp_sc, sc); - - if1.makeEndIf(); - } - - // Case 3: abs(src).z > abs(src).yx - { - auto z_gt_x = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, - abs_src_z, abs_src_x); - auto z_gt_y = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, - abs_src_z, abs_src_y); - auto c1 = b.createBinOp(spv::Op::OpLogicalAnd, bool_type_, z_gt_x, z_gt_y); - spv::Builder::If if1(c1, 0, b); - - // tc = -abs(src).x - b.createStore(neg_src_x, tc); - // ma = abs(src).z - b.createStore(abs_src_z, ma); - - auto src_z = b.createCompositeExtract(src, float_type_, 2); - auto c2 = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, src_z, - b.makeFloatConstant(0)); - // src.z > 0: - // face_id = 4 - // sc = -abs(src).y - // src.z <= 0: - // face_id = 5 - // sc = abs(src).y - auto tmp_face_id = - b.createTriOp(spv::Op::OpSelect, float_type_, c2, - b.makeFloatConstant(4), b.makeFloatConstant(5)); - auto tmp_sc = - b.createTriOp(spv::Op::OpSelect, float_type_, c2, neg_src_y, abs_src_y); - - b.createStore(tmp_face_id, face_id); - b.createStore(tmp_sc, sc); - - if1.makeEndIf(); - } - - // s = (sc / ma + 1.0) / 2.0 - auto s = b.createBinOp(spv::Op::OpFDiv, float_type_, b.createLoad(sc), - b.createLoad(ma)); - s = b.createBinOp(spv::Op::OpFAdd, float_type_, s, b.makeFloatConstant(1.0)); - s = b.createBinOp(spv::Op::OpFDiv, float_type_, s, b.makeFloatConstant(2.0)); - - // t = (tc / ma + 1.0) / 2.0 - auto t = b.createBinOp(spv::Op::OpFDiv, float_type_, b.createLoad(tc), - b.createLoad(ma)); - t = b.createBinOp(spv::Op::OpFAdd, float_type_, t, b.makeFloatConstant(1.0)); - t = b.createBinOp(spv::Op::OpFDiv, float_type_, t, b.makeFloatConstant(2.0)); - - auto ma_times_two = b.createBinOp(spv::Op::OpFMul, float_type_, - b.createLoad(ma), b.makeFloatConstant(2.0)); - - // dest = vec4(t, s, 2.0 * ma, face_id) - auto ret = b.createCompositeConstruct( - vec4_float_type_, - std::vector({t, s, ma_times_two, b.createLoad(face_id)})); - b.makeReturn(false, ret); - - return function; -} - -bool SpirvShaderTranslator::ProcessVectorAluOperation( - const ParsedAluInstruction& instr, bool& close_predicated_block) { - close_predicated_block = false; - - if (!instr.vector_and_constant_result.GetUsedWriteMask() && - !AluVectorOpHasSideEffects(instr.vector_opcode)) { - return false; - } - - auto& b = *builder_; - - // TODO: If we have identical operands, reuse previous one. - Id sources[3] = {0}; - Id dest = vec4_float_zero_; - for (uint32_t i = 0; i < instr.vector_operand_count; i++) { - sources[i] = LoadFromOperand(instr.vector_operands[i]); - } - - switch (instr.vector_opcode) { - case AluVectorOpcode::kAdd: { - dest = b.createBinOp(spv::Op::OpFAdd, vec4_float_type_, sources[0], - sources[1]); - } break; - - case AluVectorOpcode::kCndEq: { - // dest = src0 == 0.0 ? src1 : src2; - auto c = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[0], - vec4_float_zero_); - dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, sources[1], - sources[2]); - } break; - - case AluVectorOpcode::kCndGe: { - // dest = src0 >= 0.0 ? src1 : src2; - auto c = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, vec4_bool_type_, - sources[0], vec4_float_zero_); - dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, sources[1], - sources[2]); - } break; - - case AluVectorOpcode::kCndGt: { - // dest = src0 > 0.0 ? src1 : src2; - auto c = b.createBinOp(spv::Op::OpFOrdGreaterThan, vec4_bool_type_, - sources[0], vec4_float_zero_); - dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, sources[1], - sources[2]); - } break; - - case AluVectorOpcode::kCube: { - dest = - b.createFunctionCall(cube_function_, std::vector({sources[1]})); - } break; - - case AluVectorOpcode::kDst: { - auto src0_y = b.createCompositeExtract(sources[0], float_type_, 1); - auto src1_y = b.createCompositeExtract(sources[1], float_type_, 1); - auto dst_y = b.createBinOp(spv::Op::OpFMul, float_type_, src0_y, src1_y); - - auto src0_z = b.createCompositeExtract(sources[0], float_type_, 2); - auto src1_w = b.createCompositeExtract(sources[1], float_type_, 3); - dest = b.createCompositeConstruct( - vec4_float_type_, - std::vector({b.makeFloatConstant(1.f), dst_y, src0_z, src1_w})); - } break; - - case AluVectorOpcode::kDp2Add: { - auto src0_xy = b.createOp(spv::Op::OpVectorShuffle, vec2_float_type_, - {sources[0], sources[0], 0, 1}); - auto src1_xy = b.createOp(spv::Op::OpVectorShuffle, vec2_float_type_, - {sources[1], sources[1], 0, 1}); - auto src2_x = b.createCompositeExtract(sources[2], float_type_, 0); - dest = b.createBinOp(spv::Op::OpDot, float_type_, src0_xy, src1_xy); - dest = b.createBinOp(spv::Op::OpFAdd, float_type_, dest, src2_x); - dest = b.smearScalar(spv::NoPrecision, dest, vec4_float_type_); - } break; - - case AluVectorOpcode::kDp3: { - auto src0_xyz = b.createOp(spv::Op::OpVectorShuffle, vec3_float_type_, - {sources[0], sources[0], 0, 1, 2}); - auto src1_xyz = b.createOp(spv::Op::OpVectorShuffle, vec3_float_type_, - {sources[1], sources[1], 0, 1, 2}); - dest = b.createBinOp(spv::Op::OpDot, float_type_, src0_xyz, src1_xyz); - dest = b.smearScalar(spv::NoPrecision, dest, vec4_float_type_); - } break; - - case AluVectorOpcode::kDp4: { - dest = b.createBinOp(spv::Op::OpDot, float_type_, sources[0], sources[1]); - dest = b.smearScalar(spv::NoPrecision, dest, vec4_float_type_); - } break; - - case AluVectorOpcode::kFloor: { - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_, - spv::GLSLstd450::kFloor, - {sources[0]}); - } break; - - case AluVectorOpcode::kFrc: { - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_, - spv::GLSLstd450::kFract, - {sources[0]}); - } break; - - case AluVectorOpcode::kKillEq: { - auto continue_block = &b.makeNewBlock(); - auto kill_block = &b.makeNewBlock(); - auto cond = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, - sources[0], sources[1]); - cond = b.createUnaryOp(spv::Op::OpAny, bool_type_, cond); - b.createConditionalBranch(cond, kill_block, continue_block); - - b.setBuildPoint(kill_block); - b.createNoResultOp(spv::Op::OpKill); - - b.setBuildPoint(continue_block); - dest = vec4_float_zero_; - } break; - - case AluVectorOpcode::kKillGe: { - auto continue_block = &b.makeNewBlock(); - auto kill_block = &b.makeNewBlock(); - auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, - vec4_bool_type_, sources[0], sources[1]); - cond = b.createUnaryOp(spv::Op::OpAny, bool_type_, cond); - b.createConditionalBranch(cond, kill_block, continue_block); - - b.setBuildPoint(kill_block); - b.createNoResultOp(spv::Op::OpKill); - - b.setBuildPoint(continue_block); - dest = vec4_float_zero_; - } break; - - case AluVectorOpcode::kKillGt: { - auto continue_block = &b.makeNewBlock(); - auto kill_block = &b.makeNewBlock(); - auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThan, vec4_bool_type_, - sources[0], sources[1]); - cond = b.createUnaryOp(spv::Op::OpAny, bool_type_, cond); - b.createConditionalBranch(cond, kill_block, continue_block); - - b.setBuildPoint(kill_block); - b.createNoResultOp(spv::Op::OpKill); - - b.setBuildPoint(continue_block); - dest = vec4_float_zero_; - } break; - - case AluVectorOpcode::kKillNe: { - auto continue_block = &b.makeNewBlock(); - auto kill_block = &b.makeNewBlock(); - auto cond = b.createBinOp(spv::Op::OpFOrdNotEqual, vec4_bool_type_, - sources[0], sources[1]); - cond = b.createUnaryOp(spv::Op::OpAny, bool_type_, cond); - b.createConditionalBranch(cond, kill_block, continue_block); - - b.setBuildPoint(kill_block); - b.createNoResultOp(spv::Op::OpKill); - - b.setBuildPoint(continue_block); - dest = vec4_float_zero_; - } break; - - case AluVectorOpcode::kMad: { - dest = b.createBinOp(spv::Op::OpFMul, vec4_float_type_, sources[0], - sources[1]); - dest = b.createBinOp(spv::Op::OpFAdd, vec4_float_type_, dest, sources[2]); - } break; - - case AluVectorOpcode::kMax4: { - auto src0_x = b.createCompositeExtract(sources[0], float_type_, 0); - auto src0_y = b.createCompositeExtract(sources[0], float_type_, 1); - auto src0_z = b.createCompositeExtract(sources[0], float_type_, 2); - auto src0_w = b.createCompositeExtract(sources[0], float_type_, 3); - - auto max_xy = CreateGlslStd450InstructionCall( - spv::NoPrecision, float_type_, spv::GLSLstd450::kFMax, - {src0_x, src0_y}); - auto max_zw = CreateGlslStd450InstructionCall( - spv::NoPrecision, float_type_, spv::GLSLstd450::kFMax, - {src0_z, src0_w}); - auto max_xyzw = CreateGlslStd450InstructionCall( - spv::NoPrecision, float_type_, spv::GLSLstd450::kFMax, - {max_xy, max_zw}); - - // FIXME: Docs say this only updates pv.x? - dest = b.smearScalar(spv::NoPrecision, max_xyzw, vec4_float_type_); - } break; - - case AluVectorOpcode::kMaxA: { - // a0 = clamp(floor(src0.w + 0.5), -256, 255) - auto addr = b.createCompositeExtract(sources[0], float_type_, 3); - addr = b.createBinOp(spv::Op::OpFAdd, float_type_, addr, - b.makeFloatConstant(0.5f)); - addr = b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, addr); - addr = CreateGlslStd450InstructionCall( - spv::NoPrecision, int_type_, spv::GLSLstd450::kSClamp, - {addr, b.makeIntConstant(-256), b.makeIntConstant(255)}); - b.createStore(addr, a0_); - - // dest = src0 >= src1 ? src0 : src1 - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_, - spv::GLSLstd450::kFMax, - {sources[0], sources[1]}); - } break; - - case AluVectorOpcode::kMax: { - if (sources[0] == sources[1]) { - // mov dst, src - dest = sources[0]; - break; - } - - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_, - spv::GLSLstd450::kFMax, - {sources[0], sources[1]}); - } break; - - case AluVectorOpcode::kMin: { - if (sources[0] == sources[1]) { - // mov dst, src - dest = sources[0]; - break; - } - - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_, - spv::GLSLstd450::kFMin, - {sources[0], sources[1]}); - } break; - - case AluVectorOpcode::kMul: { - dest = b.createBinOp(spv::Op::OpFMul, vec4_float_type_, sources[0], - sources[1]); - } break; - - case AluVectorOpcode::kSetpEqPush: { - auto c0 = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[0], - vec4_float_zero_); - auto c1 = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[1], - vec4_float_zero_); - auto c_and = - b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1); - auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0); - c_and_x = b.smearScalar(spv::NoPrecision, c_and_x, vec4_bool_type_); - auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3); - - // p0 - b.createStore(c_and_w, p0_); - close_predicated_block = true; - - // dest - auto s0_x = b.createCompositeExtract(sources[0], float_type_, 0); - s0_x = b.createBinOp(spv::Op::OpFAdd, float_type_, s0_x, - b.makeFloatConstant(1.f)); - auto s0 = b.smearScalar(spv::NoPrecision, s0_x, vec4_float_type_); - - dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c_and_x, - vec4_float_zero_, s0); - } break; - - case AluVectorOpcode::kSetpGePush: { - auto c0 = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[0], - vec4_float_zero_); - auto c1 = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, vec4_bool_type_, - sources[1], vec4_float_zero_); - auto c_and = - b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1); - auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0); - c_and_x = b.smearScalar(spv::NoPrecision, c_and_x, vec4_bool_type_); - auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3); - - // p0 - b.createStore(c_and_w, p0_); - close_predicated_block = true; - - // dest - auto s0_x = b.createCompositeExtract(sources[0], float_type_, 0); - s0_x = b.createBinOp(spv::Op::OpFAdd, float_type_, s0_x, - b.makeFloatConstant(1.f)); - auto s0 = b.smearScalar(spv::NoPrecision, s0_x, vec4_float_type_); - - dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c_and_x, - vec4_float_zero_, s0); - } break; - - case AluVectorOpcode::kSetpGtPush: { - auto c0 = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[0], - vec4_float_zero_); - auto c1 = b.createBinOp(spv::Op::OpFOrdGreaterThan, vec4_bool_type_, - sources[1], vec4_float_zero_); - auto c_and = - b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1); - auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0); - c_and_x = b.smearScalar(spv::NoPrecision, c_and_x, vec4_bool_type_); - auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3); - - // p0 - b.createStore(c_and_w, p0_); - close_predicated_block = true; - - // dest - auto s0_x = b.createCompositeExtract(sources[0], float_type_, 0); - s0_x = b.createBinOp(spv::Op::OpFAdd, float_type_, s0_x, - b.makeFloatConstant(1.f)); - auto s0 = b.smearScalar(spv::NoPrecision, s0_x, vec4_float_type_); - - dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c_and_x, - vec4_float_zero_, s0); - } break; - - case AluVectorOpcode::kSetpNePush: { - auto c0 = b.createBinOp(spv::Op::OpFOrdNotEqual, vec4_bool_type_, - sources[0], vec4_float_zero_); - auto c1 = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[1], - vec4_float_zero_); - auto c_and = - b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1); - auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0); - c_and_x = b.smearScalar(spv::NoPrecision, c_and_x, vec4_bool_type_); - auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3); - - // p0 - b.createStore(c_and_w, p0_); - close_predicated_block = true; - - // dest - auto s0_x = b.createCompositeExtract(sources[0], float_type_, 0); - s0_x = b.createBinOp(spv::Op::OpFAdd, float_type_, s0_x, - b.makeFloatConstant(1.f)); - auto s0 = b.smearScalar(spv::NoPrecision, s0_x, vec4_float_type_); - - dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c_and_x, - vec4_float_zero_, s0); - } break; - - case AluVectorOpcode::kSeq: { - // foreach(el) src0 == src1 ? 1.0 : 0.0 - auto c = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[0], - sources[1]); - dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, - vec4_float_one_, vec4_float_zero_); - } break; - - case AluVectorOpcode::kSge: { - // foreach(el) src0 >= src1 ? 1.0 : 0.0 - auto c = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, vec4_bool_type_, - sources[0], sources[1]); - dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, - vec4_float_one_, vec4_float_zero_); - } break; - - case AluVectorOpcode::kSgt: { - // foreach(el) src0 > src1 ? 1.0 : 0.0 - auto c = b.createBinOp(spv::Op::OpFOrdGreaterThan, vec4_bool_type_, - sources[0], sources[1]); - dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, - vec4_float_one_, vec4_float_zero_); - } break; - - case AluVectorOpcode::kSne: { - // foreach(el) src0 != src1 ? 1.0 : 0.0 - auto c = b.createBinOp(spv::Op::OpFOrdNotEqual, vec4_bool_type_, - sources[0], sources[1]); - dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, - vec4_float_one_, vec4_float_zero_); - } break; - - case AluVectorOpcode::kTrunc: { - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_, - GLSLstd450::kTrunc, {sources[0]}); - } break; - - default: - assert_unhandled_case(instr.vector_opcode); - break; - } - - assert_not_zero(dest); - assert_true(b.getTypeId(dest) == vec4_float_type_); - if (dest) { - b.createStore(dest, pv_); - return true; - } - return false; -} - -bool SpirvShaderTranslator::ProcessScalarAluOperation( - const ParsedAluInstruction& instr, bool& close_predicated_block) { - close_predicated_block = false; - - if (instr.scalar_opcode == ucode::AluScalarOpcode::kRetainPrev && - !instr.scalar_result.GetUsedWriteMask()) { - return false; - } - - auto& b = *builder_; - - // TODO: If we have identical operands, reuse previous one. - Id sources[3] = {0}; - Id dest = b.makeFloatConstant(0); - for (uint32_t i = 0, x = 0; i < instr.scalar_operand_count; i++) { - auto src = LoadFromOperand(instr.scalar_operands[i]); - - // Pull components out of the vector operands and use them as sources. - if (instr.scalar_operands[i].component_count > 1) { - for (uint32_t j = 0; j < instr.scalar_operands[i].component_count; j++) { - sources[x++] = b.createCompositeExtract(src, float_type_, j); - } - } else { - sources[x++] = src; - } - } - - switch (instr.scalar_opcode) { - case AluScalarOpcode::kAdds: - case AluScalarOpcode::kAddsc0: - case AluScalarOpcode::kAddsc1: { - // dest = src0 + src1 - dest = - b.createBinOp(spv::Op::OpFAdd, float_type_, sources[0], sources[1]); - } break; - - case AluScalarOpcode::kAddsPrev: { - // dest = src0 + ps - dest = b.createBinOp(spv::Op::OpFAdd, float_type_, sources[0], - b.createLoad(ps_)); - } break; - - case AluScalarOpcode::kCos: { - // dest = cos(src0) - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, - GLSLstd450::kCos, {sources[0]}); - } break; - - case AluScalarOpcode::kExp: { - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, - GLSLstd450::kExp2, {sources[0]}); - } break; - - case AluScalarOpcode::kFloors: { - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, - GLSLstd450::kFloor, {sources[0]}); - } break; - - case AluScalarOpcode::kFrcs: { - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, - GLSLstd450::kFract, {sources[0]}); - } break; - - case AluScalarOpcode::kKillsEq: { - auto continue_block = &b.makeNewBlock(); - auto kill_block = &b.makeNewBlock(); - auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], - b.makeFloatConstant(0.f)); - b.createConditionalBranch(cond, kill_block, continue_block); - - b.setBuildPoint(kill_block); - b.createNoResultOp(spv::Op::OpKill); - - b.setBuildPoint(continue_block); - dest = b.makeFloatConstant(0.f); - } break; - - case AluScalarOpcode::kKillsGe: { - auto continue_block = &b.makeNewBlock(); - auto kill_block = &b.makeNewBlock(); - auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, bool_type_, - sources[0], b.makeFloatConstant(0.f)); - b.createConditionalBranch(cond, kill_block, continue_block); - - b.setBuildPoint(kill_block); - b.createNoResultOp(spv::Op::OpKill); - - b.setBuildPoint(continue_block); - dest = b.makeFloatConstant(0.f); - } break; - - case AluScalarOpcode::kKillsGt: { - auto continue_block = &b.makeNewBlock(); - auto kill_block = &b.makeNewBlock(); - auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, - sources[0], b.makeFloatConstant(0.f)); - b.createConditionalBranch(cond, kill_block, continue_block); - - b.setBuildPoint(kill_block); - b.createNoResultOp(spv::Op::OpKill); - - b.setBuildPoint(continue_block); - dest = b.makeFloatConstant(0.f); - } break; - - case AluScalarOpcode::kKillsNe: { - auto continue_block = &b.makeNewBlock(); - auto kill_block = &b.makeNewBlock(); - auto cond = b.createBinOp(spv::Op::OpFOrdNotEqual, bool_type_, sources[0], - b.makeFloatConstant(0.f)); - b.createConditionalBranch(cond, kill_block, continue_block); - - b.setBuildPoint(kill_block); - b.createNoResultOp(spv::Op::OpKill); - - b.setBuildPoint(continue_block); - dest = b.makeFloatConstant(0.f); - } break; - - case AluScalarOpcode::kKillsOne: { - auto continue_block = &b.makeNewBlock(); - auto kill_block = &b.makeNewBlock(); - auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], - b.makeFloatConstant(1.f)); - b.createConditionalBranch(cond, kill_block, continue_block); - - b.setBuildPoint(kill_block); - b.createNoResultOp(spv::Op::OpKill); - - b.setBuildPoint(continue_block); - dest = b.makeFloatConstant(0.f); - } break; - - case AluScalarOpcode::kLogc: { - auto t = CreateGlslStd450InstructionCall( - spv::NoPrecision, float_type_, spv::GLSLstd450::kLog2, {sources[0]}); - - // FIXME: We don't check to see if t == -INF, we just check for INF - auto c = b.createUnaryOp(spv::Op::OpIsInf, bool_type_, t); - dest = b.createTriOp(spv::Op::OpSelect, float_type_, c, - b.makeFloatConstant(-FLT_MAX), t); - } break; - - case AluScalarOpcode::kLog: { - dest = CreateGlslStd450InstructionCall( - spv::NoPrecision, float_type_, spv::GLSLstd450::kLog2, {sources[0]}); - } break; - - case AluScalarOpcode::kMaxAsf: { - auto addr = - b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, sources[0]); - addr = CreateGlslStd450InstructionCall( - spv::NoPrecision, int_type_, spv::GLSLstd450::kSClamp, - {addr, b.makeIntConstant(-256), b.makeIntConstant(255)}); - b.createStore(addr, a0_); - - // dest = src0 >= src1 ? src0 : src1 - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, - spv::GLSLstd450::kFMax, - {sources[0], sources[1]}); - } break; - - case AluScalarOpcode::kMaxAs: { - // a0 = clamp(floor(src0 + 0.5), -256, 255) - auto addr = b.createBinOp(spv::Op::OpFAdd, float_type_, sources[0], - b.makeFloatConstant(0.5f)); - addr = b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, addr); - addr = CreateGlslStd450InstructionCall( - spv::NoPrecision, int_type_, spv::GLSLstd450::kSClamp, - {addr, b.makeIntConstant(-256), b.makeIntConstant(255)}); - b.createStore(addr, a0_); - - // dest = src0 >= src1 ? src0 : src1 - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, - spv::GLSLstd450::kFMax, - {sources[0], sources[1]}); - } break; - - case AluScalarOpcode::kMaxs: { - if (sources[0] == sources[1]) { - // mov dst, src - dest = sources[0]; - } - - // dest = max(src0, src1) - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, - GLSLstd450::kFMax, - {sources[0], sources[1]}); - } break; - - case AluScalarOpcode::kMins: { - if (sources[0] == sources[1]) { - // mov dst, src - dest = sources[0]; - } - - // dest = min(src0, src1) - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, - GLSLstd450::kFMin, - {sources[0], sources[1]}); - } break; - - case AluScalarOpcode::kMuls: - case AluScalarOpcode::kMulsc0: - case AluScalarOpcode::kMulsc1: { - // dest = src0 * src1 - dest = - b.createBinOp(spv::Op::OpFMul, float_type_, sources[0], sources[1]); - } break; - - case AluScalarOpcode::kMulsPrev: { - // dest = src0 * ps - dest = b.createBinOp(spv::Op::OpFMul, float_type_, sources[0], - b.createLoad(ps_)); - } break; - - case AluScalarOpcode::kMulsPrev2: { - // TODO: Uh... see GLSL translator for impl. - } break; - - case AluScalarOpcode::kRcpc: { - dest = b.createBinOp(spv::Op::OpFDiv, float_type_, - b.makeFloatConstant(1.f), sources[0]); - dest = CreateGlslStd450InstructionCall( - spv::NoPrecision, float_type_, spv::GLSLstd450::kFClamp, - {dest, b.makeFloatConstant(-FLT_MAX), b.makeFloatConstant(FLT_MAX)}); - } break; - - case AluScalarOpcode::kRcpf: { - dest = b.createBinOp(spv::Op::OpFDiv, float_type_, - b.makeFloatConstant(1.f), sources[0]); - auto c = b.createUnaryOp(spv::Op::OpIsInf, bool_type_, dest); - dest = b.createTriOp(spv::Op::OpSelect, float_type_, c, - b.makeFloatConstant(0.f), dest); - } break; - - case AluScalarOpcode::kRcp: { - // dest = src0 != 0.0 ? 1.0 / src0 : 0.0; - auto c = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], - b.makeFloatConstant(0.f)); - auto d = b.createBinOp(spv::Op::OpFDiv, float_type_, - b.makeFloatConstant(1.f), sources[0]); - dest = b.createTriOp(spv::Op::OpSelect, float_type_, c, - b.makeFloatConstant(0.f), d); - } break; - - case AluScalarOpcode::kRsqc: { - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, - spv::GLSLstd450::kInverseSqrt, - {sources[0]}); - dest = CreateGlslStd450InstructionCall( - spv::NoPrecision, float_type_, spv::GLSLstd450::kFClamp, - {dest, b.makeFloatConstant(-FLT_MAX), b.makeFloatConstant(FLT_MAX)}); - } break; - - case AluScalarOpcode::kRsqf: { - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, - spv::GLSLstd450::kInverseSqrt, - {sources[0]}); - auto c1 = b.createUnaryOp(spv::Op::OpIsInf, bool_type_, dest); - auto c2 = b.createUnaryOp(spv::Op::OpIsNan, bool_type_, dest); - auto c = b.createBinOp(spv::Op::OpLogicalOr, bool_type_, c1, c2); - dest = b.createTriOp(spv::Op::OpSelect, float_type_, c, - b.makeFloatConstant(0.f), dest); - } break; - - case AluScalarOpcode::kRsq: { - // dest = src0 > 0.0 ? inversesqrt(src0) : 0.0; - auto c = b.createBinOp(spv::Op::OpFOrdLessThanEqual, bool_type_, - sources[0], b.makeFloatConstant(0.f)); - auto d = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, - spv::GLSLstd450::kInverseSqrt, - {sources[0]}); - dest = b.createTriOp(spv::Op::OpSelect, float_type_, c, - b.makeFloatConstant(0.f), d); - } break; - - case AluScalarOpcode::kSeqs: { - // dest = src0 == 0.0 ? 1.0 : 0.0; - auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], - b.makeFloatConstant(0.f)); - dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, - b.makeFloatConstant(1.f), b.makeFloatConstant(0.f)); - } break; - - case AluScalarOpcode::kSges: { - // dest = src0 >= 0.0 ? 1.0 : 0.0; - auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, bool_type_, - sources[0], b.makeFloatConstant(0.f)); - dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, - b.makeFloatConstant(1.f), b.makeFloatConstant(0.f)); - } break; - - case AluScalarOpcode::kSgts: { - // dest = src0 > 0.0 ? 1.0 : 0.0; - auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, - sources[0], b.makeFloatConstant(0.f)); - dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, - b.makeFloatConstant(1.f), b.makeFloatConstant(0.f)); - } break; - - case AluScalarOpcode::kSnes: { - // dest = src0 != 0.0 ? 1.0 : 0.0; - auto cond = b.createBinOp(spv::Op::OpFOrdNotEqual, bool_type_, sources[0], - b.makeFloatConstant(0.f)); - dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, - b.makeFloatConstant(1.f), b.makeFloatConstant(0.f)); - } break; - - case AluScalarOpcode::kSetpClr: { - b.createStore(b.makeBoolConstant(false), p0_); - close_predicated_block = true; - dest = b.makeFloatConstant(FLT_MAX); - } break; - - case AluScalarOpcode::kSetpEq: { - auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], - b.makeFloatConstant(0.f)); - // p0 = cond - b.createStore(cond, p0_); - close_predicated_block = true; - - // dest = cond ? 0.f : 1.f; - dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, - b.makeFloatConstant(0.f), b.makeFloatConstant(1.f)); - } break; - - case AluScalarOpcode::kSetpGe: { - auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, bool_type_, - sources[0], b.makeFloatConstant(0.f)); - // p0 = cond - b.createStore(cond, p0_); - close_predicated_block = true; - - // dest = cond ? 0.f : 1.f; - dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, - b.makeFloatConstant(0.f), b.makeFloatConstant(1.f)); - } break; - - case AluScalarOpcode::kSetpGt: { - auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, - sources[0], b.makeFloatConstant(0.f)); - // p0 = cond - b.createStore(cond, p0_); - close_predicated_block = true; - - // dest = cond ? 0.f : 1.f; - dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, - b.makeFloatConstant(0.f), b.makeFloatConstant(1.f)); - } break; - - case AluScalarOpcode::kSetpInv: { - // p0 = src0 == 1.0 - auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], - b.makeFloatConstant(1.f)); - b.createStore(cond, p0_); - close_predicated_block = true; - - // if (!cond) dest = src0 == 0.0 ? 1.0 : src0; - auto dst_cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, - sources[0], b.makeFloatConstant(0.f)); - auto dst_false = b.createTriOp(spv::Op::OpSelect, float_type_, dst_cond, - b.makeFloatConstant(1.f), sources[0]); - dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, - b.makeFloatConstant(0.f), dst_false); - } break; - - case AluScalarOpcode::kSetpNe: { - auto cond = b.createBinOp(spv::Op::OpFOrdNotEqual, bool_type_, sources[0], - b.makeFloatConstant(0.f)); - - // p0 = cond - b.createStore(cond, p0_); - close_predicated_block = true; - - // dest = cond ? 0.f : 1.f; - dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, - b.makeFloatConstant(0.f), b.makeFloatConstant(1.f)); - } break; - - case AluScalarOpcode::kSetpPop: { - auto src = b.createBinOp(spv::Op::OpFSub, float_type_, sources[0], - b.makeFloatConstant(1.f)); - auto c = b.createBinOp(spv::Op::OpFOrdLessThanEqual, bool_type_, src, - b.makeFloatConstant(0.f)); - b.createStore(c, p0_); - close_predicated_block = true; - - dest = CreateGlslStd450InstructionCall( - spv::NoPrecision, float_type_, GLSLstd450::kFMax, - {sources[0], b.makeFloatConstant(0.f)}); - } break; - - case AluScalarOpcode::kSetpRstr: { - auto c = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], - b.makeFloatConstant(0.f)); - b.createStore(c, p0_); - close_predicated_block = true; - dest = sources[0]; - } break; - - case AluScalarOpcode::kSin: { - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, - GLSLstd450::kSin, {sources[0]}); - } break; - - case AluScalarOpcode::kSqrt: { - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, - GLSLstd450::kSqrt, {sources[0]}); - } break; - - case AluScalarOpcode::kSubs: - case AluScalarOpcode::kSubsc0: - case AluScalarOpcode::kSubsc1: { - dest = - b.createBinOp(spv::Op::OpFSub, float_type_, sources[0], sources[1]); - } break; - - case AluScalarOpcode::kSubsPrev: { - dest = b.createBinOp(spv::Op::OpFSub, float_type_, sources[0], - b.createLoad(ps_)); - } break; - - case AluScalarOpcode::kTruncs: { - dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_, - GLSLstd450::kTrunc, {sources[0]}); - } break; - - default: - assert_unhandled_case(instr.scalar_opcode); - break; - } - - assert_not_zero(dest); - assert_true(b.getTypeId(dest) == float_type_); - if (dest) { - b.createStore(dest, ps_); - return true; - } - return false; -} - -Id SpirvShaderTranslator::CreateGlslStd450InstructionCall( - spv::Decoration precision, Id result_type, GLSLstd450 instruction_ordinal, - std::vector args) { - return builder_->createBuiltinCall(result_type, glsl_std_450_instruction_set_, - static_cast(instruction_ordinal), - args); -} - -Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) { - auto& b = *builder_; - - Id storage_pointer = 0; - Id storage_type = vec4_float_type_; - spv::StorageClass storage_class; - Id storage_index = 0; // Storage index at lowest level - std::vector storage_offsets; // Offsets in nested arrays -> storage - - // Out of the 512 constant registers pixel shaders get the last 256. - uint32_t storage_base = 0; - if (op.storage_source == InstructionStorageSource::kConstantFloat) { - storage_base = is_pixel_shader() ? 256 : 0; - } - - switch (op.storage_addressing_mode) { - case InstructionStorageAddressingMode::kStatic: { - storage_index = b.makeUintConstant(storage_base + op.storage_index); - } break; - case InstructionStorageAddressingMode::kAddressAbsolute: { - // storage_index + a0 - storage_index = - b.createBinOp(spv::Op::OpIAdd, uint_type_, b.createLoad(a0_), - b.makeUintConstant(storage_base + op.storage_index)); - } break; - case InstructionStorageAddressingMode::kAddressRelative: { - // storage_index + aL.x - auto idx = b.createCompositeExtract(b.createLoad(aL_), uint_type_, 0); - storage_index = - b.createBinOp(spv::Op::OpIAdd, uint_type_, idx, - b.makeUintConstant(storage_base + op.storage_index)); - } break; - default: - assert_always(); - break; - } - - switch (op.storage_source) { - case InstructionStorageSource::kRegister: - storage_pointer = registers_ptr_; - storage_class = spv::StorageClass::StorageClassFunction; - storage_type = vec4_float_type_; - storage_offsets.push_back(storage_index); - assert_true(uint32_t(op.storage_index) < register_count()); - break; - case InstructionStorageSource::kConstantFloat: - storage_pointer = consts_; - storage_class = spv::StorageClass::StorageClassUniform; - storage_type = vec4_float_type_; - storage_offsets.push_back(b.makeUintConstant(0)); - storage_offsets.push_back(storage_index); - break; - case InstructionStorageSource::kVertexFetchConstant: - case InstructionStorageSource::kTextureFetchConstant: - // Should not reach this. - assert_always(); - break; - default: - assert_always(); - break; - } - - if (!storage_pointer) { - return b.createUndefined(vec4_float_type_); - } - - storage_pointer = - b.createAccessChain(storage_class, storage_pointer, storage_offsets); - auto storage_value = b.createLoad(storage_pointer); - assert_true(b.getTypeId(storage_value) == vec4_float_type_); - - if (op.component_count == 1) { - // Don't bother handling constant 0/1 fetches, as they're invalid in scalar - // opcodes. - uint32_t index = 0; - switch (op.components[0]) { - case SwizzleSource::kX: - index = 0; - break; - case SwizzleSource::kY: - index = 1; - break; - case SwizzleSource::kZ: - index = 2; - break; - case SwizzleSource::kW: - index = 3; - break; - case SwizzleSource::k0: - assert_always(); - break; - case SwizzleSource::k1: - assert_always(); - break; - } - - storage_value = b.createCompositeExtract(storage_value, float_type_, index); - storage_type = float_type_; - } - - if (op.is_absolute_value) { - storage_value = CreateGlslStd450InstructionCall( - spv::NoPrecision, storage_type, GLSLstd450::kFAbs, {storage_value}); - } - if (op.is_negated) { - storage_value = - b.createUnaryOp(spv::Op::OpFNegate, storage_type, storage_value); - } - - // swizzle - if (op.component_count > 1 && !op.IsStandardSwizzle()) { - std::vector operands; - operands.push_back(storage_value); - operands.push_back(b.makeCompositeConstant( - vec2_float_type_, - std::vector({b.makeFloatConstant(0.f), b.makeFloatConstant(1.f)}))); - - // Components start from left and are duplicated rightwards - // e.g. count = 1, xxxx / count = 2, xyyy ... - for (uint32_t i = 0; i < 4; i++) { - auto swiz = op.components[i]; - if (i > op.component_count - 1) { - swiz = op.components[op.component_count - 1]; - } - - switch (swiz) { - case SwizzleSource::kX: - operands.push_back(0); - break; - case SwizzleSource::kY: - operands.push_back(1); - break; - case SwizzleSource::kZ: - operands.push_back(2); - break; - case SwizzleSource::kW: - operands.push_back(3); - break; - case SwizzleSource::k0: - operands.push_back(4); - break; - case SwizzleSource::k1: - operands.push_back(5); - break; - } - } - - storage_value = - b.createOp(spv::Op::OpVectorShuffle, storage_type, operands); - } - - return storage_value; -} - -void SpirvShaderTranslator::StoreToResult(Id source_value_id, - const InstructionResult& result) { - auto& b = *builder_; - - if (result.storage_target == InstructionStorageTarget::kNone) { - // No-op? - return; - } - - uint32_t used_write_mask = result.GetUsedWriteMask(); - if (!used_write_mask) { - return; - } - - Id storage_pointer = 0; - Id storage_type = vec4_float_type_; - spv::StorageClass storage_class; - Id storage_index = 0; // Storage index at lowest level - std::vector storage_offsets; // Offsets in nested arrays -> storage - - switch (result.storage_addressing_mode) { - case InstructionStorageAddressingMode::kStatic: { - storage_index = b.makeUintConstant(result.storage_index); - } break; - case InstructionStorageAddressingMode::kAddressAbsolute: { - // storage_index + a0 - storage_index = - b.createBinOp(spv::Op::OpIAdd, uint_type_, b.createLoad(a0_), - b.makeUintConstant(result.storage_index)); - } break; - case InstructionStorageAddressingMode::kAddressRelative: { - // storage_index + aL.x - auto idx = b.createCompositeExtract(b.createLoad(aL_), uint_type_, 0); - storage_index = b.createBinOp(spv::Op::OpIAdd, uint_type_, idx, - b.makeUintConstant(result.storage_index)); - } break; - default: - assert_always(); - return; - } - - bool storage_array; - switch (result.storage_target) { - case InstructionStorageTarget::kRegister: - storage_pointer = registers_ptr_; - storage_class = spv::StorageClass::StorageClassFunction; - storage_type = vec4_float_type_; - storage_offsets.push_back(storage_index); - storage_array = true; - assert_true(uint32_t(result.storage_index) < register_count()); - break; - case InstructionStorageTarget::kInterpolator: - assert_true(is_vertex_shader()); - storage_pointer = interpolators_; - storage_class = spv::StorageClass::StorageClassOutput; - storage_type = vec4_float_type_; - storage_offsets.push_back(storage_index); - storage_array = true; - break; - case InstructionStorageTarget::kPosition: - assert_true(is_vertex_shader()); - assert_not_zero(pos_); - storage_pointer = pos_; - storage_class = spv::StorageClass::StorageClassOutput; - storage_type = vec4_float_type_; - storage_offsets.push_back(0); - storage_array = false; - break; - case InstructionStorageTarget::kPointSizeEdgeFlagKillVertex: - assert_true(is_vertex_shader()); - storage_pointer = point_size_; - storage_class = spv::StorageClass::StorageClassOutput; - storage_type = float_type_; - storage_offsets.push_back(0); - storage_array = false; - break; - case InstructionStorageTarget::kColor: - assert_true(is_pixel_shader()); - assert_not_zero(frag_outputs_); - storage_pointer = frag_outputs_; - storage_class = spv::StorageClass::StorageClassOutput; - storage_type = vec4_float_type_; - storage_offsets.push_back(storage_index); - storage_array = true; - break; - case InstructionStorageTarget::kDepth: - assert_true(is_pixel_shader()); - storage_pointer = frag_depth_; - storage_class = spv::StorageClass::StorageClassOutput; - storage_type = float_type_; - storage_offsets.push_back(0); - storage_array = false; - break; - case InstructionStorageTarget::kNone: - assert_always(); - break; - default: - assert_unhandled_case(result.storage_target); - break; - } - - if (!storage_pointer) { - assert_always(); - return; - } - - if (storage_array) { - storage_pointer = - b.createAccessChain(storage_class, storage_pointer, storage_offsets); - } - - bool source_is_scalar = b.isScalar(source_value_id); - bool storage_is_scalar = b.isScalarType(b.getDerefTypeId(storage_pointer)); - spv::Id source_type = b.getTypeId(source_value_id); - - // Only load from storage if we need it later. - Id storage_value = 0; - if ((source_is_scalar && !storage_is_scalar) || used_write_mask != 0b1111) { - storage_value = b.createLoad(storage_pointer); - } - - // Clamp the input value. - if (result.is_clamped) { - source_value_id = CreateGlslStd450InstructionCall( - spv::NoPrecision, source_type, spv::GLSLstd450::kFClamp, - {source_value_id, - b.smearScalar(spv::NoPrecision, b.makeFloatConstant(0.f), source_type), - b.smearScalar(spv::NoPrecision, b.makeFloatConstant(1.f), - source_type)}); - } - - // destination swizzle - if (!result.IsStandardSwizzle() && !source_is_scalar) { - std::vector operands; - operands.push_back(source_value_id); - operands.push_back(b.makeCompositeConstant( - vec2_float_type_, - std::vector({b.makeFloatConstant(0.f), b.makeFloatConstant(1.f)}))); - - // Components start from left and are duplicated rightwards - // e.g. count = 1, xxxx / count = 2, xyyy ... - uint32_t source_components = b.getNumComponents(source_value_id); - for (int i = 0; i < 4; i++) { - if (!(used_write_mask & (1 << i))) { - // Undefined / don't care. - operands.push_back(0); - continue; - } - - auto swiz = result.components[i]; - switch (swiz) { - case SwizzleSource::kX: - operands.push_back(0); - break; - case SwizzleSource::kY: - operands.push_back(1); - break; - case SwizzleSource::kZ: - operands.push_back(2); - break; - case SwizzleSource::kW: - operands.push_back(3); - break; - case SwizzleSource::k0: - operands.push_back(source_components + 0); - break; - case SwizzleSource::k1: - operands.push_back(source_components + 1); - break; - } - } - - source_value_id = - b.createOp(spv::Op::OpVectorShuffle, vec4_float_type_, operands); - } - - // write mask - if (used_write_mask != 0b1111 && !source_is_scalar && !storage_is_scalar) { - std::vector operands; - operands.push_back(source_value_id); - operands.push_back(storage_value); - - for (int i = 0; i < b.getNumTypeComponents(storage_type); i++) { - operands.push_back((used_write_mask & (1 << i)) - ? i - : b.getNumComponents(source_value_id) + i); - } - - source_value_id = - b.createOp(spv::Op::OpVectorShuffle, storage_type, operands); - } else if (source_is_scalar && !storage_is_scalar) { - assert_not_zero(used_write_mask); - - if (used_write_mask == 0b1111) { - source_value_id = - b.smearScalar(spv::NoPrecision, source_value_id, storage_type); - } else { - // Find first enabled component - uint32_t index = 0; - for (uint32_t i = 0; i < 4; i++) { - if (used_write_mask & (1 << i)) { - index = i; - break; - } - } - source_value_id = b.createCompositeInsert(source_value_id, storage_value, - storage_type, index); - } - } else if (!source_is_scalar && storage_is_scalar) { - // Num writes /needs/ to be 1, and let's assume it's the first element. - assert_true(xe::bit_count(used_write_mask) == 1); - - for (uint32_t i = 0; i < 4; i++) { - if (used_write_mask & (1 << i)) { - source_value_id = - b.createCompositeExtract(source_value_id, storage_type, 0); - break; - } - } - } - - // Perform store into the pointer. - assert_true(b.getNumComponents(source_value_id) == - b.getNumTypeComponents(storage_type)); - - assert_true(b.getTypeId(source_value_id) == - b.getDerefTypeId(storage_pointer)); - b.createStore(source_value_id, storage_pointer); -} - -} // namespace gpu -} // namespace xe diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h deleted file mode 100644 index 044dea019..000000000 --- a/src/xenia/gpu/spirv_shader_translator.h +++ /dev/null @@ -1,187 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_SPIRV_SHADER_TRANSLATOR_H_ -#define XENIA_GPU_SPIRV_SHADER_TRANSLATOR_H_ - -#include -#include -#include -#include - -#include "third_party/glslang-spirv/SpvBuilder.h" -#include "third_party/spirv/GLSL.std.450.hpp11" -#include "xenia/gpu/shader_translator.h" -#include "xenia/ui/spirv/spirv_disassembler.h" -#include "xenia/ui/spirv/spirv_validator.h" - -namespace xe { -namespace gpu { - -// Push constants embedded within the command buffer. -// The total size of this struct must be <= 128b (as that's the commonly -// supported size). -struct SpirvPushConstants { - // Accessible to vertex shader only: - float window_scale[4]; // scale x/y, offset x/y (pixels) - float vtx_fmt[4]; - - // Accessible to geometry shader only: - float point_size[4]; // psx, psy, unused, unused - - // Accessible to fragment shader only: - float alpha_test[4]; // alpha test enable, func, ref - float color_exp_bias[4]; - uint32_t ps_param_gen; -}; -static_assert(sizeof(SpirvPushConstants) <= 128, - "Push constants must fit <= 128b"); -constexpr uint32_t kSpirvPushConstantVertexRangeOffset = 0; -constexpr uint32_t kSpirvPushConstantVertexRangeSize = (sizeof(float) * 4) * 2; -constexpr uint32_t kSpirvPushConstantGeometryRangeOffset = - kSpirvPushConstantVertexRangeOffset + kSpirvPushConstantVertexRangeSize; -constexpr uint32_t kSpirvPushConstantGeometryRangeSize = (sizeof(float) * 4); -constexpr uint32_t kSpirvPushConstantFragmentRangeOffset = - kSpirvPushConstantGeometryRangeOffset + kSpirvPushConstantGeometryRangeSize; -constexpr uint32_t kSpirvPushConstantFragmentRangeSize = - (sizeof(float) * 4) + sizeof(uint32_t); -constexpr uint32_t kSpirvPushConstantsSize = sizeof(SpirvPushConstants); - -class SpirvShaderTranslator : public ShaderTranslator { - public: - SpirvShaderTranslator(); - ~SpirvShaderTranslator() override; - - protected: - void StartTranslation() override; - std::vector CompleteTranslation() override; - void PostTranslation(Shader* shader) override; - - void PreProcessControlFlowInstructions( - std::vector instrs) override; - void ProcessLabel(uint32_t cf_index) override; - void ProcessControlFlowInstructionBegin(uint32_t cf_index) override; - void ProcessControlFlowInstructionEnd(uint32_t cf_index) override; - void ProcessControlFlowNopInstruction(uint32_t cf_index) override; - void ProcessExecInstructionBegin(const ParsedExecInstruction& instr) override; - void ProcessExecInstructionEnd(const ParsedExecInstruction& instr) override; - void ProcessLoopStartInstruction( - const ParsedLoopStartInstruction& instr) override; - void ProcessLoopEndInstruction( - const ParsedLoopEndInstruction& instr) override; - void ProcessCallInstruction(const ParsedCallInstruction& instr) override; - void ProcessReturnInstruction(const ParsedReturnInstruction& instr) override; - void ProcessJumpInstruction(const ParsedJumpInstruction& instr) override; - void ProcessAllocInstruction(const ParsedAllocInstruction& instr) override; - void ProcessVertexFetchInstruction( - const ParsedVertexFetchInstruction& instr) override; - void ProcessTextureFetchInstruction( - const ParsedTextureFetchInstruction& instr) override; - void ProcessAluInstruction(const ParsedAluInstruction& instr) override; - - private: - spv::Function* CreateCubeFunction(); - - bool ProcessVectorAluOperation(const ParsedAluInstruction& instr, - bool& close_predicate_block); - bool ProcessScalarAluOperation(const ParsedAluInstruction& instr, - bool& close_predicate_block); - - spv::Id BitfieldExtract(spv::Id result_type, spv::Id base, bool is_signed, - uint32_t offset, uint32_t count); - spv::Id ConvertNormVar(spv::Id var, spv::Id result_type, uint32_t bits, - bool is_signed); - - // Creates a call to the given GLSL intrinsic. - spv::Id CreateGlslStd450InstructionCall(spv::Decoration precision, - spv::Id result_type, - spv::GLSLstd450 instruction_ordinal, - std::vector args); - - // Loads an operand into a value. - // The value returned will be in the form described in the operand (number of - // components, etc). - spv::Id LoadFromOperand(const InstructionOperand& op); - // Stores a value based on the specified result information. - // The value will be transformed into the appropriate form for the result and - // the proper components will be selected. - void StoreToResult(spv::Id source_value_id, const InstructionResult& result); - - xe::ui::spirv::SpirvDisassembler disassembler_; - xe::ui::spirv::SpirvValidator validator_; - - // True if there's an open predicated block - bool open_predicated_block_ = false; - bool predicated_block_cond_ = false; - spv::Block* predicated_block_end_ = nullptr; - - // Exec block conditional? - bool exec_cond_ = false; - spv::Block* exec_skip_block_ = nullptr; - - // TODO(benvanik): replace with something better, make reusable, etc. - std::unique_ptr builder_; - spv::Id glsl_std_450_instruction_set_ = 0; - - // Generated function - spv::Function* translated_main_ = nullptr; - spv::Function* cube_function_ = nullptr; - - // Types. - spv::Id float_type_ = 0, bool_type_ = 0, int_type_ = 0, uint_type_ = 0; - spv::Id vec2_int_type_ = 0, vec2_uint_type_ = 0, vec3_int_type_ = 0; - spv::Id vec2_float_type_ = 0, vec3_float_type_ = 0, vec4_float_type_ = 0; - spv::Id vec4_int_type_ = 0, vec4_uint_type_ = 0; - spv::Id vec2_bool_type_ = 0, vec3_bool_type_ = 0, vec4_bool_type_ = 0; - spv::Id image_2d_type_ = 0, image_3d_type_ = 0, image_cube_type_ = 0; - - // Constants. - spv::Id vec4_float_zero_ = 0, vec4_float_one_ = 0; - - // Array of AMD registers. - // These values are all pointers. - spv::Id registers_ptr_ = 0, registers_type_ = 0; - spv::Id consts_ = 0, a0_ = 0, p0_ = 0; - spv::Id aL_ = 0; // Loop index stack - .x is active loop - spv::Id loop_count_ = 0; // Loop counter stack - spv::Id ps_ = 0, pv_ = 0; // IDs of previous results - spv::Id pc_ = 0; // Program counter - spv::Id lod_ = 0; // LOD register - spv::Id pos_ = 0; - spv::Id push_consts_ = 0; - spv::Id interpolators_ = 0; - spv::Id point_size_ = 0; - spv::Id point_coord_ = 0; - spv::Id vertex_idx_ = 0; - spv::Id frag_outputs_ = 0, frag_depth_ = 0; - spv::Id samplers_ = 0; - spv::Id tex_[3] = {0}; // Images {2D, 3D, Cube} - std::unordered_map tex_binding_map_; - spv::Id vtx_ = 0; // Vertex buffer array (32 runtime arrays) - std::unordered_map vtx_binding_map_; - - // SPIR-V IDs that are part of the in/out interface. - std::vector interface_ids_; - - struct CFBlock { - spv::Block* block = nullptr; - bool labelled = false; - }; - std::vector cf_blocks_; - spv::Block* switch_break_block_ = nullptr; - spv::Block* loop_head_block_ = nullptr; - spv::Block* loop_body_block_ = nullptr; - spv::Block* loop_cont_block_ = nullptr; - spv::Block* loop_exit_block_ = nullptr; -}; - -} // namespace gpu -} // namespace xe - -#endif // XENIA_GPU_SPIRV_SHADER_TRANSLATOR_H_ diff --git a/src/xenia/gpu/vulkan/buffer_cache.cc b/src/xenia/gpu/vulkan/buffer_cache.cc deleted file mode 100644 index e953e72a9..000000000 --- a/src/xenia/gpu/vulkan/buffer_cache.cc +++ /dev/null @@ -1,809 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/gpu/vulkan/buffer_cache.h" - -#include "xenia/base/logging.h" -#include "xenia/base/math.h" -#include "xenia/base/memory.h" -#include "xenia/base/profiling.h" -#include "xenia/gpu/gpu_flags.h" -#include "xenia/gpu/vulkan/vulkan_gpu_flags.h" -#include "xenia/ui/vulkan/vulkan_mem_alloc.h" - -using namespace xe::gpu::xenos; - -namespace xe { -namespace gpu { -namespace vulkan { - -#if XE_ARCH_AMD64 -void copy_cmp_swap_16_unaligned(void* dest_ptr, const void* src_ptr, - uint16_t cmp_value, size_t count) { - auto dest = reinterpret_cast(dest_ptr); - auto src = reinterpret_cast(src_ptr); - __m128i shufmask = - _mm_set_epi8(0x0E, 0x0F, 0x0C, 0x0D, 0x0A, 0x0B, 0x08, 0x09, 0x06, 0x07, - 0x04, 0x05, 0x02, 0x03, 0x00, 0x01); - __m128i cmpval = _mm_set1_epi16(cmp_value); - - size_t i; - for (i = 0; i + 8 <= count; i += 8) { - __m128i input = _mm_loadu_si128(reinterpret_cast(&src[i])); - __m128i output = _mm_shuffle_epi8(input, shufmask); - - __m128i mask = _mm_cmpeq_epi16(output, cmpval); - output = _mm_or_si128(output, mask); - _mm_storeu_si128(reinterpret_cast<__m128i*>(&dest[i]), output); - } - for (; i < count; ++i) { // handle residual elements - dest[i] = byte_swap(src[i]); - } -} - -void copy_cmp_swap_32_unaligned(void* dest_ptr, const void* src_ptr, - uint32_t cmp_value, size_t count) { - auto dest = reinterpret_cast(dest_ptr); - auto src = reinterpret_cast(src_ptr); - __m128i shufmask = - _mm_set_epi8(0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B, 0x04, 0x05, - 0x06, 0x07, 0x00, 0x01, 0x02, 0x03); - __m128i cmpval = _mm_set1_epi32(cmp_value); - - size_t i; - for (i = 0; i + 4 <= count; i += 4) { - __m128i input = _mm_loadu_si128(reinterpret_cast(&src[i])); - __m128i output = _mm_shuffle_epi8(input, shufmask); - - __m128i mask = _mm_cmpeq_epi32(output, cmpval); - output = _mm_or_si128(output, mask); - _mm_storeu_si128(reinterpret_cast<__m128i*>(&dest[i]), output); - } - for (; i < count; ++i) { // handle residual elements - dest[i] = byte_swap(src[i]); - } -} -#else -void copy_and_swap_16_unaligned(void* dest_ptr, const void* src_ptr, - uint16_t cmp_value, size_t count) { - auto dest = reinterpret_cast(dest_ptr); - auto src = reinterpret_cast(src_ptr); - for (size_t i = 0; i < count; ++i) { - uint16_t value = byte_swap(src[i]); - dest[i] = value == cmp_value ? 0xFFFF : value; - } -} - -void copy_and_swap_32_unaligned(void* dest_ptr, const void* src_ptr, - uint32_t cmp_value, size_t count) { - auto dest = reinterpret_cast(dest_ptr); - auto src = reinterpret_cast(src_ptr); - for (size_t i = 0; i < count; ++i) { - uint32_t value = byte_swap(src[i]); - dest[i] = value == cmp_value ? 0xFFFFFFFF : value; - } -} -#endif - -using xe::ui::vulkan::CheckResult; - -constexpr VkDeviceSize kConstantRegisterUniformRange = - 512 * 4 * 4 + 8 * 4 + 32 * 4; - -BufferCache::BufferCache(RegisterFile* register_file, Memory* memory, - ui::vulkan::VulkanDevice* device, size_t capacity) - : register_file_(register_file), memory_(memory), device_(device) { - transient_buffer_ = std::make_unique( - device_, - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, - capacity, 256); -} - -BufferCache::~BufferCache() { Shutdown(); } - -VkResult BufferCache::Initialize() { - VkMemoryRequirements pool_reqs; - transient_buffer_->GetBufferMemoryRequirements(&pool_reqs); - gpu_memory_pool_ = device_->AllocateMemory(pool_reqs); - - VkResult status = transient_buffer_->Initialize(gpu_memory_pool_, 0); - if (status != VK_SUCCESS) { - return status; - } - - // Create a memory allocator for textures. - VmaVulkanFunctions vulkan_funcs = {}; - ui::vulkan::FillVMAVulkanFunctions(&vulkan_funcs); - - VmaAllocatorCreateInfo alloc_info = { - 0, *device_, *device_, 0, 0, nullptr, nullptr, 0, nullptr, &vulkan_funcs, - }; - - status = vmaCreateAllocator(&alloc_info, &mem_allocator_); - if (status != VK_SUCCESS) { - return status; - } - - status = CreateConstantDescriptorSet(); - if (status != VK_SUCCESS) { - return status; - } - - status = CreateVertexDescriptorPool(); - if (status != VK_SUCCESS) { - return status; - } - - return VK_SUCCESS; -} - -VkResult xe::gpu::vulkan::BufferCache::CreateVertexDescriptorPool() { - VkResult status; - - std::vector pool_sizes; - pool_sizes.push_back({ - VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - 32 * 16384, - }); - vertex_descriptor_pool_ = std::make_unique( - *device_, 32 * 16384, pool_sizes); - - // 32 storage buffers available to vertex shader. - // TODO(DrChat): In the future, this could hold memexport staging data. - VkDescriptorSetLayoutBinding binding = { - 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - 32, VK_SHADER_STAGE_VERTEX_BIT, - nullptr, - }; - - VkDescriptorSetLayoutCreateInfo layout_info = { - VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - nullptr, - 0, - 1, - &binding, - }; - status = vkCreateDescriptorSetLayout(*device_, &layout_info, nullptr, - &vertex_descriptor_set_layout_); - if (status != VK_SUCCESS) { - return status; - } - - return VK_SUCCESS; -} - -void xe::gpu::vulkan::BufferCache::FreeVertexDescriptorPool() { - vertex_descriptor_pool_.reset(); - - VK_SAFE_DESTROY(vkDestroyDescriptorSetLayout, *device_, - vertex_descriptor_set_layout_, nullptr); -} - -VkResult BufferCache::CreateConstantDescriptorSet() { - VkResult status = VK_SUCCESS; - - // Descriptor pool used for all of our cached descriptors. - // In the steady state we don't allocate anything, so these are all manually - // managed. - VkDescriptorPoolCreateInfo transient_descriptor_pool_info; - transient_descriptor_pool_info.sType = - VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; - transient_descriptor_pool_info.pNext = nullptr; - transient_descriptor_pool_info.flags = - VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; - transient_descriptor_pool_info.maxSets = 1; - VkDescriptorPoolSize pool_sizes[1]; - pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; - pool_sizes[0].descriptorCount = 2; - transient_descriptor_pool_info.poolSizeCount = 1; - transient_descriptor_pool_info.pPoolSizes = pool_sizes; - status = vkCreateDescriptorPool(*device_, &transient_descriptor_pool_info, - nullptr, &constant_descriptor_pool_); - if (status != VK_SUCCESS) { - return status; - } - - // Create the descriptor set layout used for our uniform buffer. - // As it is a static binding that uses dynamic offsets during draws we can - // create this once and reuse it forever. - VkDescriptorSetLayoutBinding bindings[2] = {}; - - // Vertex constants - bindings[0].binding = 0; - bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; - bindings[0].descriptorCount = 1; - bindings[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - bindings[0].pImmutableSamplers = nullptr; - - // Fragment constants - bindings[1].binding = 1; - bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; - bindings[1].descriptorCount = 1; - bindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[1].pImmutableSamplers = nullptr; - - VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info = {}; - descriptor_set_layout_info.sType = - VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - descriptor_set_layout_info.pNext = nullptr; - descriptor_set_layout_info.flags = 0; - descriptor_set_layout_info.bindingCount = - static_cast(xe::countof(bindings)); - descriptor_set_layout_info.pBindings = bindings; - status = - vkCreateDescriptorSetLayout(*device_, &descriptor_set_layout_info, - nullptr, &constant_descriptor_set_layout_); - if (status != VK_SUCCESS) { - return status; - } - - // Create the descriptor we'll use for the uniform buffer. - // This is what we hand out to everyone (who then also needs to use our - // offsets). - VkDescriptorSetAllocateInfo set_alloc_info; - set_alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; - set_alloc_info.pNext = nullptr; - set_alloc_info.descriptorPool = constant_descriptor_pool_; - set_alloc_info.descriptorSetCount = 1; - set_alloc_info.pSetLayouts = &constant_descriptor_set_layout_; - status = vkAllocateDescriptorSets(*device_, &set_alloc_info, - &constant_descriptor_set_); - if (status != VK_SUCCESS) { - return status; - } - - // Initialize descriptor set with our buffers. - VkDescriptorBufferInfo buffer_info; - buffer_info.buffer = transient_buffer_->gpu_buffer(); - buffer_info.offset = 0; - buffer_info.range = kConstantRegisterUniformRange; - - VkWriteDescriptorSet descriptor_writes[2]; - auto& vertex_uniform_binding_write = descriptor_writes[0]; - vertex_uniform_binding_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - vertex_uniform_binding_write.pNext = nullptr; - vertex_uniform_binding_write.dstSet = constant_descriptor_set_; - vertex_uniform_binding_write.dstBinding = 0; - vertex_uniform_binding_write.dstArrayElement = 0; - vertex_uniform_binding_write.descriptorCount = 1; - vertex_uniform_binding_write.descriptorType = - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; - vertex_uniform_binding_write.pBufferInfo = &buffer_info; - auto& fragment_uniform_binding_write = descriptor_writes[1]; - fragment_uniform_binding_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - fragment_uniform_binding_write.pNext = nullptr; - fragment_uniform_binding_write.dstSet = constant_descriptor_set_; - fragment_uniform_binding_write.dstBinding = 1; - fragment_uniform_binding_write.dstArrayElement = 0; - fragment_uniform_binding_write.descriptorCount = 1; - fragment_uniform_binding_write.descriptorType = - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; - fragment_uniform_binding_write.pBufferInfo = &buffer_info; - vkUpdateDescriptorSets(*device_, 2, descriptor_writes, 0, nullptr); - - return VK_SUCCESS; -} - -void BufferCache::FreeConstantDescriptorSet() { - if (constant_descriptor_set_) { - vkFreeDescriptorSets(*device_, constant_descriptor_pool_, 1, - &constant_descriptor_set_); - constant_descriptor_set_ = nullptr; - } - - VK_SAFE_DESTROY(vkDestroyDescriptorSetLayout, *device_, - constant_descriptor_set_layout_, nullptr); - VK_SAFE_DESTROY(vkDestroyDescriptorPool, *device_, constant_descriptor_pool_, - nullptr); -} - -void BufferCache::Shutdown() { - if (mem_allocator_) { - vmaDestroyAllocator(mem_allocator_); - mem_allocator_ = nullptr; - } - - FreeConstantDescriptorSet(); - FreeVertexDescriptorPool(); - - transient_buffer_->Shutdown(); - VK_SAFE_DESTROY(vkFreeMemory, *device_, gpu_memory_pool_, nullptr); -} - -std::pair BufferCache::UploadConstantRegisters( - VkCommandBuffer command_buffer, - const Shader::ConstantRegisterMap& vertex_constant_register_map, - const Shader::ConstantRegisterMap& pixel_constant_register_map, - VkFence fence) { - // Fat struct, including all registers: - // struct { - // vec4 float[512]; - // uint bool[8]; - // uint loop[32]; - // }; - auto offset = AllocateTransientData(kConstantRegisterUniformRange, fence); - if (offset == VK_WHOLE_SIZE) { - // OOM. - return {VK_WHOLE_SIZE, VK_WHOLE_SIZE}; - } - - // Copy over all the registers. - const auto& values = register_file_->values; - uint8_t* dest_ptr = transient_buffer_->host_base() + offset; - std::memcpy(dest_ptr, &values[XE_GPU_REG_SHADER_CONSTANT_000_X].f32, - (512 * 4 * 4)); - dest_ptr += 512 * 4 * 4; - std::memcpy(dest_ptr, &values[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32, - 8 * 4); - dest_ptr += 8 * 4; - std::memcpy(dest_ptr, &values[XE_GPU_REG_SHADER_CONSTANT_LOOP_00].u32, - 32 * 4); - dest_ptr += 32 * 4; - - transient_buffer_->Flush(offset, kConstantRegisterUniformRange); - - // Append a barrier to the command buffer. - VkBufferMemoryBarrier barrier = { - VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - nullptr, - VK_ACCESS_HOST_WRITE_BIT, - VK_ACCESS_UNIFORM_READ_BIT | VK_ACCESS_SHADER_READ_BIT, - VK_QUEUE_FAMILY_IGNORED, - VK_QUEUE_FAMILY_IGNORED, - transient_buffer_->gpu_buffer(), - offset, - kConstantRegisterUniformRange, - }; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT, - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, nullptr, 1, - &barrier, 0, nullptr); - - return {offset, offset}; - -// Packed upload code. -// This is not currently supported by the shaders, but would be awesome. -// We should be able to use this for any shader that does not do dynamic -// constant indexing. -#if 0 - // Allocate space in the buffer for our data. - auto offset = - AllocateTransientData(constant_register_map.packed_byte_length, fence); - if (offset == VK_WHOLE_SIZE) { - // OOM. - return VK_WHOLE_SIZE; - } - - // Run through registers and copy them into the buffer. - // TODO(benvanik): optimize this - it's hit twice every call. - const auto& values = register_file_->values; - uint8_t* dest_ptr = - reinterpret_cast(transient_buffer_data_) + offset; - for (int i = 0; i < 4; ++i) { - auto piece = constant_register_map.float_bitmap[i]; - if (!piece) { - continue; - } - for (int j = 0, sh = 0; j < 64; ++j, sh << 1) { - if (piece & sh) { - xe::copy_128_aligned( - dest_ptr, - &values[XE_GPU_REG_SHADER_CONSTANT_000_X + i * 64 + j].f32, 1); - dest_ptr += 16; - } - } - } - for (int i = 0; i < 32; ++i) { - if (constant_register_map.loop_bitmap & (1 << i)) { - xe::store(dest_ptr, - values[XE_GPU_REG_SHADER_CONSTANT_LOOP_00 + i].u32); - dest_ptr += 4; - } - } - for (int i = 0; i < 8; ++i) { - if (constant_register_map.bool_bitmap[i]) { - xe::store( - dest_ptr, values[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 + i].u32); - dest_ptr += 4; - } - } - - return offset; -#endif // 0 -} - -std::pair BufferCache::UploadIndexBuffer( - VkCommandBuffer command_buffer, uint32_t source_addr, - uint32_t source_length, xenos::IndexFormat format, VkFence fence) { - // Allocate space in the buffer for our data. - auto offset = AllocateTransientData(source_length, fence); - if (offset == VK_WHOLE_SIZE) { - // OOM. - return {nullptr, VK_WHOLE_SIZE}; - } - - const void* source_ptr = memory_->TranslatePhysical(source_addr); - - uint32_t prim_reset_index = - register_file_->values[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX].u32; - bool prim_reset_enabled = - !!(register_file_->values[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 21)); - - // Copy data into the buffer. If primitive reset is enabled, translate any - // primitive reset indices to something Vulkan understands. - // TODO(benvanik): memcpy then use compute shaders to swap? - if (prim_reset_enabled) { - if (format == xenos::IndexFormat::kInt16) { - // Endian::k8in16, swap half-words. - copy_cmp_swap_16_unaligned( - transient_buffer_->host_base() + offset, source_ptr, - static_cast(prim_reset_index), source_length / 2); - } else if (format == xenos::IndexFormat::kInt32) { - // Endian::k8in32, swap words. - copy_cmp_swap_32_unaligned(transient_buffer_->host_base() + offset, - source_ptr, prim_reset_index, - source_length / 4); - } - } else { - if (format == xenos::IndexFormat::kInt16) { - // Endian::k8in16, swap half-words. - xe::copy_and_swap_16_unaligned(transient_buffer_->host_base() + offset, - source_ptr, source_length / 2); - } else if (format == xenos::IndexFormat::kInt32) { - // Endian::k8in32, swap words. - xe::copy_and_swap_32_unaligned(transient_buffer_->host_base() + offset, - source_ptr, source_length / 4); - } - } - - transient_buffer_->Flush(offset, source_length); - - // Append a barrier to the command buffer. - VkBufferMemoryBarrier barrier = { - VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - nullptr, - VK_ACCESS_HOST_WRITE_BIT, - VK_ACCESS_INDEX_READ_BIT, - VK_QUEUE_FAMILY_IGNORED, - VK_QUEUE_FAMILY_IGNORED, - transient_buffer_->gpu_buffer(), - offset, - source_length, - }; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, 0, nullptr, 1, - &barrier, 0, nullptr); - - return {transient_buffer_->gpu_buffer(), offset}; -} - -std::pair BufferCache::UploadVertexBuffer( - VkCommandBuffer command_buffer, uint32_t source_addr, - uint32_t source_length, xenos::Endian endian, VkFence fence) { - auto offset = FindCachedTransientData(source_addr, source_length); - if (offset != VK_WHOLE_SIZE) { - return {transient_buffer_->gpu_buffer(), offset}; - } - - // Slow path :) - // Expand the region up to the allocation boundary - auto physical_heap = memory_->GetPhysicalHeap(); - uint32_t upload_base = source_addr; - uint32_t upload_size = source_length; - - // Ping the memory subsystem for allocation size. - // TODO(DrChat): Artifacting occurring in GripShift with this enabled. - // physical_heap->QueryBaseAndSize(&upload_base, &upload_size); - assert(upload_base <= source_addr); - uint32_t source_offset = source_addr - upload_base; - - // Allocate space in the buffer for our data. - offset = AllocateTransientData(upload_size, fence); - if (offset == VK_WHOLE_SIZE) { - // OOM. - XELOGW( - "Failed to allocate transient data for vertex buffer! Wanted to " - "allocate {} bytes.", - upload_size); - return {nullptr, VK_WHOLE_SIZE}; - } - - const void* upload_ptr = memory_->TranslatePhysical(upload_base); - - // Copy data into the buffer. - // TODO(benvanik): memcpy then use compute shaders to swap? - if (endian == xenos::Endian::k8in32) { - // Endian::k8in32, swap words. - xe::copy_and_swap_32_unaligned(transient_buffer_->host_base() + offset, - upload_ptr, source_length / 4); - } else if (endian == xenos::Endian::k16in32) { - xe::copy_and_swap_16_in_32_unaligned( - transient_buffer_->host_base() + offset, upload_ptr, source_length / 4); - } else { - assert_always(); - } - - transient_buffer_->Flush(offset, upload_size); - - // Append a barrier to the command buffer. - VkBufferMemoryBarrier barrier = { - VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - nullptr, - VK_ACCESS_HOST_WRITE_BIT, - VK_ACCESS_SHADER_READ_BIT, - VK_QUEUE_FAMILY_IGNORED, - VK_QUEUE_FAMILY_IGNORED, - transient_buffer_->gpu_buffer(), - offset, - upload_size, - }; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT, - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, 0, 0, nullptr, 1, - &barrier, 0, nullptr); - - CacheTransientData(upload_base, upload_size, offset); - return {transient_buffer_->gpu_buffer(), offset + source_offset}; -} - -void BufferCache::HashVertexBindings( - XXH64_state_t* hash_state, - const std::vector& vertex_bindings) { - auto& regs = *register_file_; - for (const auto& vertex_binding : vertex_bindings) { -#if 0 - XXH64_update(hash_state, &vertex_binding.binding_index, sizeof(vertex_binding.binding_index)); - XXH64_update(hash_state, &vertex_binding.fetch_constant, sizeof(vertex_binding.fetch_constant)); - XXH64_update(hash_state, &vertex_binding.stride_words, sizeof(vertex_binding.stride_words)); -#endif - int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + - (vertex_binding.fetch_constant / 3) * 6; - const auto group = reinterpret_cast(®s.values[r]); - switch (vertex_binding.fetch_constant % 3) { - case 0: { - auto& fetch = group->vertex_fetch_0; - XXH64_update(hash_state, &fetch, sizeof(fetch)); - } break; - case 1: { - auto& fetch = group->vertex_fetch_1; - XXH64_update(hash_state, &fetch, sizeof(fetch)); - } break; - case 2: { - auto& fetch = group->vertex_fetch_2; - XXH64_update(hash_state, &fetch, sizeof(fetch)); - } break; - } - } -} - -VkDescriptorSet BufferCache::PrepareVertexSet( - VkCommandBuffer command_buffer, VkFence fence, - const std::vector& vertex_bindings) { - // (quickly) Generate a hash. - XXH64_state_t hash_state; - XXH64_reset(&hash_state, 0); - - // (quickly) Generate a hash. - HashVertexBindings(&hash_state, vertex_bindings); - uint64_t hash = XXH64_digest(&hash_state); - for (auto it = vertex_sets_.find(hash); it != vertex_sets_.end(); ++it) { - // TODO(DrChat): We need to compare the bindings and ensure they're equal. - return it->second; - } - - if (!vertex_descriptor_pool_->has_open_batch()) { - vertex_descriptor_pool_->BeginBatch(fence); - } - - VkDescriptorSet set = - vertex_descriptor_pool_->AcquireEntry(vertex_descriptor_set_layout_); - if (!set) { - return nullptr; - } - - // TODO(DrChat): Define magic number 32 as a constant somewhere. - VkDescriptorBufferInfo buffer_infos[32] = {}; - VkWriteDescriptorSet descriptor_write = { - VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - nullptr, - set, - 0, - 0, - 0, - VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - nullptr, - buffer_infos, - nullptr, - }; - - auto& regs = *register_file_; - for (const auto& vertex_binding : vertex_bindings) { - int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + - (vertex_binding.fetch_constant / 3) * 6; - const auto group = reinterpret_cast(®s.values[r]); - const xe_gpu_vertex_fetch_t* fetch = nullptr; - switch (vertex_binding.fetch_constant % 3) { - case 0: - fetch = &group->vertex_fetch_0; - break; - case 1: - fetch = &group->vertex_fetch_1; - break; - case 2: - fetch = &group->vertex_fetch_2; - break; - } - - // TODO(DrChat): Some games use type kInvalidTexture (with no data). - switch (fetch->type) { - case xenos::FetchConstantType::kVertex: - break; - case xenos::FetchConstantType::kInvalidVertex: - if (cvars::gpu_allow_invalid_fetch_constants) { - break; - } - XELOGW( - "Vertex fetch constant {} ({:08X} {:08X}) has \"invalid\" type! " - "This " - "is incorrect behavior, but you can try bypassing this by " - "launching Xenia with --gpu_allow_invalid_fetch_constants=true.", - vertex_binding.fetch_constant, fetch->dword_0, fetch->dword_1); - return nullptr; - default: - XELOGW( - "Vertex fetch constant {} ({:08X} {:08X}) is completely invalid!", - vertex_binding.fetch_constant, fetch->dword_0, fetch->dword_1); - return nullptr; - } - - // TODO(benvanik): compute based on indices or vertex count. - // THIS CAN BE MASSIVELY INCORRECT (too large). - // This may not be possible (with indexed vfetch). - uint32_t source_length = fetch->size * 4; - uint32_t physical_address = fetch->address << 2; - - // TODO(DrChat): This needs to be put in gpu::CommandProcessor - // trace_writer_.WriteMemoryRead(physical_address, source_length); - - // Upload (or get a cached copy of) the buffer. - auto buffer_ref = UploadVertexBuffer(command_buffer, physical_address, - source_length, fetch->endian, fence); - if (buffer_ref.second == VK_WHOLE_SIZE) { - // Failed to upload buffer. - XELOGW("Failed to upload vertex buffer!"); - return nullptr; - } - - // Stash the buffer reference for our bulk bind at the end. - buffer_infos[descriptor_write.descriptorCount++] = { - buffer_ref.first, - buffer_ref.second, - source_length, - }; - } - - vkUpdateDescriptorSets(*device_, 1, &descriptor_write, 0, nullptr); - vertex_sets_[hash] = set; - return set; -} - -VkDeviceSize BufferCache::AllocateTransientData(VkDeviceSize length, - VkFence fence) { - // Try fast path (if we have space). - VkDeviceSize offset = TryAllocateTransientData(length, fence); - if (offset != VK_WHOLE_SIZE) { - return offset; - } - - // Ran out of easy allocations. - // Try consuming fences before we panic. - transient_buffer_->Scavenge(); - - // Try again. It may still fail if we didn't get enough space back. - offset = TryAllocateTransientData(length, fence); - return offset; -} - -VkDeviceSize BufferCache::TryAllocateTransientData(VkDeviceSize length, - VkFence fence) { - auto alloc = transient_buffer_->Acquire(length, fence); - if (alloc) { - return alloc->offset; - } - - // No more space. - return VK_WHOLE_SIZE; -} - -VkDeviceSize BufferCache::FindCachedTransientData(uint32_t guest_address, - uint32_t guest_length) { - if (transient_cache_.empty()) { - // Short-circuit exit. - return VK_WHOLE_SIZE; - } - - // Find the first element > guest_address - auto it = transient_cache_.upper_bound(guest_address); - if (it != transient_cache_.begin()) { - // it = first element <= guest_address - --it; - - if ((it->first + it->second.first) >= (guest_address + guest_length)) { - // This data is contained within some existing transient data. - auto source_offset = static_cast(guest_address - it->first); - return it->second.second + source_offset; - } - } - - return VK_WHOLE_SIZE; -} - -void BufferCache::CacheTransientData(uint32_t guest_address, - uint32_t guest_length, - VkDeviceSize offset) { - transient_cache_[guest_address] = {guest_length, offset}; - - // Erase any entries contained within - auto it = transient_cache_.upper_bound(guest_address); - while (it != transient_cache_.end()) { - if ((guest_address + guest_length) >= (it->first + it->second.first)) { - it = transient_cache_.erase(it); - } else { - break; - } - } -} - -void BufferCache::Flush(VkCommandBuffer command_buffer) { - // If we are flushing a big enough chunk queue up an event. - // We don't want to do this for everything but often enough so that we won't - // run out of space. - if (true) { - // VkEvent finish_event; - // vkCmdSetEvent(cmd_buffer, finish_event, - // VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); - } - - // Flush memory. - // TODO(benvanik): subrange. - VkMappedMemoryRange dirty_range; - dirty_range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - dirty_range.pNext = nullptr; - dirty_range.memory = transient_buffer_->gpu_memory(); - dirty_range.offset = 0; - dirty_range.size = transient_buffer_->capacity(); - vkFlushMappedMemoryRanges(*device_, 1, &dirty_range); -} - -void BufferCache::InvalidateCache() { - // Called by VulkanCommandProcessor::MakeCoherent() - // Discard everything? - transient_cache_.clear(); -} - -void BufferCache::ClearCache() { transient_cache_.clear(); } - -void BufferCache::Scavenge() { - SCOPE_profile_cpu_f("gpu"); - - transient_cache_.clear(); - transient_buffer_->Scavenge(); - - // TODO(DrChat): These could persist across frames, we just need a smart way - // to delete unused ones. - vertex_sets_.clear(); - if (vertex_descriptor_pool_->has_open_batch()) { - vertex_descriptor_pool_->EndBatch(); - } - - vertex_descriptor_pool_->Scavenge(); -} - -} // namespace vulkan -} // namespace gpu -} // namespace xe diff --git a/src/xenia/gpu/vulkan/buffer_cache.h b/src/xenia/gpu/vulkan/buffer_cache.h deleted file mode 100644 index f53359cd3..000000000 --- a/src/xenia/gpu/vulkan/buffer_cache.h +++ /dev/null @@ -1,177 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_VULKAN_BUFFER_CACHE_H_ -#define XENIA_GPU_VULKAN_BUFFER_CACHE_H_ - -#include "xenia/gpu/register_file.h" -#include "xenia/gpu/shader.h" -#include "xenia/gpu/xenos.h" -#include "xenia/memory.h" -#include "xenia/ui/vulkan/circular_buffer.h" -#include "xenia/ui/vulkan/fenced_pools.h" -#include "xenia/ui/vulkan/vulkan.h" -#include "xenia/ui/vulkan/vulkan_device.h" - -#include "third_party/vulkan/vk_mem_alloc.h" -#include "third_party/xxhash/xxhash.h" - -#include -#include - -namespace xe { -namespace gpu { -namespace vulkan { - -// Efficiently manages buffers of various kinds. -// Used primarily for uploading index and vertex data from guest memory and -// transient data like shader constants. -class BufferCache { - public: - BufferCache(RegisterFile* register_file, Memory* memory, - ui::vulkan::VulkanDevice* device, size_t capacity); - ~BufferCache(); - - VkResult Initialize(); - void Shutdown(); - - // Descriptor set containing the dynamic uniform buffer used for constant - // uploads. Used in conjunction with a dynamic offset returned by - // UploadConstantRegisters. - // The set contains two bindings: - // binding = 0: for use in vertex shaders - // binding = 1: for use in fragment shaders - VkDescriptorSet constant_descriptor_set() const { - return constant_descriptor_set_; - } - VkDescriptorSetLayout constant_descriptor_set_layout() const { - return constant_descriptor_set_layout_; - } - - // Descriptor set containing vertex buffers stored in storage buffers. - // This set contains one binding with an array of 32 storage buffers. - VkDescriptorSetLayout vertex_descriptor_set_layout() const { - return vertex_descriptor_set_layout_; - } - - // Uploads the constants specified in the register maps to the transient - // uniform storage buffer. - // The registers are tightly packed in order as [floats, ints, bools]. - // Returns an offset that can be used with the transient_descriptor_set or - // VK_WHOLE_SIZE if the constants could not be uploaded (OOM). - // The returned offsets may alias. - std::pair UploadConstantRegisters( - VkCommandBuffer command_buffer, - const Shader::ConstantRegisterMap& vertex_constant_register_map, - const Shader::ConstantRegisterMap& pixel_constant_register_map, - VkFence fence); - - // Uploads index buffer data from guest memory, possibly eliding with - // recently uploaded data or cached copies. - // Returns a buffer and offset that can be used with vkCmdBindIndexBuffer. - // Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM). - std::pair UploadIndexBuffer( - VkCommandBuffer command_buffer, uint32_t source_addr, - uint32_t source_length, xenos::IndexFormat format, VkFence fence); - - // Uploads vertex buffer data from guest memory, possibly eliding with - // recently uploaded data or cached copies. - // Returns a buffer and offset that can be used with vkCmdBindVertexBuffers. - // Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM). - std::pair UploadVertexBuffer( - VkCommandBuffer command_buffer, uint32_t source_addr, - uint32_t source_length, xenos::Endian endian, VkFence fence); - - // Prepares and returns a vertex descriptor set. - VkDescriptorSet PrepareVertexSet( - VkCommandBuffer setup_buffer, VkFence fence, - const std::vector& vertex_bindings); - - // Flushes all pending data to the GPU. - // Until this is called the GPU is not guaranteed to see any data. - // The given command buffer will be used to queue up events so that the - // cache can determine when data has been consumed. - void Flush(VkCommandBuffer command_buffer); - - // Marks the cache as potentially invalid. - // This is not as strong as ClearCache and is a hint that any and all data - // should be verified before being reused. - void InvalidateCache(); - - // Clears all cached content and prevents future elision with pending data. - void ClearCache(); - - // Wipes all data no longer needed. - void Scavenge(); - - private: - // This represents an uploaded vertex buffer. - struct VertexBuffer { - uint32_t guest_address; - uint32_t size; - - VmaAllocation alloc; - VmaAllocationInfo alloc_info; - }; - - VkResult CreateVertexDescriptorPool(); - void FreeVertexDescriptorPool(); - - VkResult CreateConstantDescriptorSet(); - void FreeConstantDescriptorSet(); - - void HashVertexBindings( - XXH64_state_t* hash_state, - const std::vector& vertex_bindings); - - // Allocates a block of memory in the transient buffer. - // When memory is not available fences are checked and space is reclaimed. - // Returns VK_WHOLE_SIZE if requested amount of memory is not available. - VkDeviceSize AllocateTransientData(VkDeviceSize length, VkFence fence); - // Tries to allocate a block of memory in the transient buffer. - // Returns VK_WHOLE_SIZE if requested amount of memory is not available. - VkDeviceSize TryAllocateTransientData(VkDeviceSize length, VkFence fence); - // Finds a block of data in the transient buffer sourced from the specified - // guest address and length. - VkDeviceSize FindCachedTransientData(uint32_t guest_address, - uint32_t guest_length); - // Adds a block of data to the frame cache. - void CacheTransientData(uint32_t guest_address, uint32_t guest_length, - VkDeviceSize offset); - - RegisterFile* register_file_ = nullptr; - Memory* memory_ = nullptr; - ui::vulkan::VulkanDevice* device_ = nullptr; - - VkDeviceMemory gpu_memory_pool_ = nullptr; - VmaAllocator mem_allocator_ = nullptr; - - // Staging ringbuffer we cycle through fast. Used for data we don't - // plan on keeping past the current frame. - std::unique_ptr transient_buffer_ = nullptr; - std::map> transient_cache_; - - // Vertex buffer descriptors - std::unique_ptr vertex_descriptor_pool_ = nullptr; - VkDescriptorSetLayout vertex_descriptor_set_layout_ = nullptr; - - // Current frame vertex sets. - std::unordered_map vertex_sets_; - - // Descriptor set used to hold vertex/pixel shader float constants - VkDescriptorPool constant_descriptor_pool_ = nullptr; - VkDescriptorSetLayout constant_descriptor_set_layout_ = nullptr; - VkDescriptorSet constant_descriptor_set_ = nullptr; -}; - -} // namespace vulkan -} // namespace gpu -} // namespace xe - -#endif // XENIA_GPU_VULKAN_BUFFER_CACHE_H_ diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc deleted file mode 100644 index 8db418de9..000000000 --- a/src/xenia/gpu/vulkan/pipeline_cache.cc +++ /dev/null @@ -1,1597 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/gpu/vulkan/pipeline_cache.h" - -#include "third_party/xxhash/xxhash.h" -#include "xenia/base/logging.h" -#include "xenia/base/math.h" -#include "xenia/base/memory.h" -#include "xenia/base/profiling.h" -#include "xenia/gpu/gpu_flags.h" -#include "xenia/gpu/vulkan/vulkan_gpu_flags.h" - -#include -#include - -namespace xe { -namespace gpu { -namespace vulkan { - -using xe::ui::vulkan::CheckResult; - -// Generated with `xenia-build genspirv`. -#include "xenia/gpu/vulkan/shaders/bin/dummy_frag.h" -#include "xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.h" -#include "xenia/gpu/vulkan/shaders/bin/point_list_geom.h" -#include "xenia/gpu/vulkan/shaders/bin/quad_list_geom.h" -#include "xenia/gpu/vulkan/shaders/bin/rect_list_geom.h" - -PipelineCache::PipelineCache(RegisterFile* register_file, - ui::vulkan::VulkanDevice* device) - : register_file_(register_file), device_(device) { - shader_translator_.reset(new SpirvShaderTranslator()); -} - -PipelineCache::~PipelineCache() { Shutdown(); } - -VkResult PipelineCache::Initialize( - VkDescriptorSetLayout uniform_descriptor_set_layout, - VkDescriptorSetLayout texture_descriptor_set_layout, - VkDescriptorSetLayout vertex_descriptor_set_layout) { - VkResult status; - - // Initialize the shared driver pipeline cache. - // We'll likely want to serialize this and reuse it, if that proves to be - // useful. If the shaders are expensive and this helps we could do it per - // game, otherwise a single shared cache for render state/etc. - VkPipelineCacheCreateInfo pipeline_cache_info; - pipeline_cache_info.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; - pipeline_cache_info.pNext = nullptr; - pipeline_cache_info.flags = 0; - pipeline_cache_info.initialDataSize = 0; - pipeline_cache_info.pInitialData = nullptr; - status = vkCreatePipelineCache(*device_, &pipeline_cache_info, nullptr, - &pipeline_cache_); - if (status != VK_SUCCESS) { - return status; - } - - // Descriptors used by the pipelines. - // These are the only ones we can ever bind. - VkDescriptorSetLayout set_layouts[] = { - // Per-draw constant register uniforms. - uniform_descriptor_set_layout, - // All texture bindings. - texture_descriptor_set_layout, - // Vertex bindings. - vertex_descriptor_set_layout, - }; - - // Push constants used for draw parameters. - // We need to keep these under 128b across all stages. - // TODO(benvanik): split between the stages? - VkPushConstantRange push_constant_ranges[1]; - push_constant_ranges[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | - VK_SHADER_STAGE_GEOMETRY_BIT | - VK_SHADER_STAGE_FRAGMENT_BIT; - push_constant_ranges[0].offset = 0; - push_constant_ranges[0].size = kSpirvPushConstantsSize; - - // Shared pipeline layout. - VkPipelineLayoutCreateInfo pipeline_layout_info; - pipeline_layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - pipeline_layout_info.pNext = nullptr; - pipeline_layout_info.flags = 0; - pipeline_layout_info.setLayoutCount = - static_cast(xe::countof(set_layouts)); - pipeline_layout_info.pSetLayouts = set_layouts; - pipeline_layout_info.pushConstantRangeCount = - static_cast(xe::countof(push_constant_ranges)); - pipeline_layout_info.pPushConstantRanges = push_constant_ranges; - status = vkCreatePipelineLayout(*device_, &pipeline_layout_info, nullptr, - &pipeline_layout_); - if (status != VK_SUCCESS) { - return status; - } - - // Initialize our shared geometry shaders. - // These will be used as needed to emulate primitive types Vulkan doesn't - // support. - VkShaderModuleCreateInfo shader_module_info; - shader_module_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - shader_module_info.pNext = nullptr; - shader_module_info.flags = 0; - shader_module_info.codeSize = - static_cast(sizeof(line_quad_list_geom)); - shader_module_info.pCode = - reinterpret_cast(line_quad_list_geom); - status = vkCreateShaderModule(*device_, &shader_module_info, nullptr, - &geometry_shaders_.line_quad_list); - if (status != VK_SUCCESS) { - return status; - } - device_->DbgSetObjectName(uint64_t(geometry_shaders_.line_quad_list), - VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT, - "S(g): Line Quad List"); - - shader_module_info.codeSize = static_cast(sizeof(point_list_geom)); - shader_module_info.pCode = reinterpret_cast(point_list_geom); - status = vkCreateShaderModule(*device_, &shader_module_info, nullptr, - &geometry_shaders_.point_list); - if (status != VK_SUCCESS) { - return status; - } - device_->DbgSetObjectName(uint64_t(geometry_shaders_.point_list), - VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT, - "S(g): Point List"); - - shader_module_info.codeSize = static_cast(sizeof(quad_list_geom)); - shader_module_info.pCode = reinterpret_cast(quad_list_geom); - status = vkCreateShaderModule(*device_, &shader_module_info, nullptr, - &geometry_shaders_.quad_list); - if (status != VK_SUCCESS) { - return status; - } - device_->DbgSetObjectName(uint64_t(geometry_shaders_.quad_list), - VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT, - "S(g): Quad List"); - - shader_module_info.codeSize = static_cast(sizeof(rect_list_geom)); - shader_module_info.pCode = reinterpret_cast(rect_list_geom); - status = vkCreateShaderModule(*device_, &shader_module_info, nullptr, - &geometry_shaders_.rect_list); - if (status != VK_SUCCESS) { - return status; - } - device_->DbgSetObjectName(uint64_t(geometry_shaders_.rect_list), - VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT, - "S(g): Rect List"); - - shader_module_info.codeSize = static_cast(sizeof(dummy_frag)); - shader_module_info.pCode = reinterpret_cast(dummy_frag); - status = vkCreateShaderModule(*device_, &shader_module_info, nullptr, - &dummy_pixel_shader_); - if (status != VK_SUCCESS) { - return status; - } - device_->DbgSetObjectName(uint64_t(dummy_pixel_shader_), - VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT, - "S(p): Dummy"); - - return VK_SUCCESS; -} - -void PipelineCache::Shutdown() { - ClearCache(); - - // Destroy geometry shaders. - if (geometry_shaders_.line_quad_list) { - vkDestroyShaderModule(*device_, geometry_shaders_.line_quad_list, nullptr); - geometry_shaders_.line_quad_list = nullptr; - } - if (geometry_shaders_.point_list) { - vkDestroyShaderModule(*device_, geometry_shaders_.point_list, nullptr); - geometry_shaders_.point_list = nullptr; - } - if (geometry_shaders_.quad_list) { - vkDestroyShaderModule(*device_, geometry_shaders_.quad_list, nullptr); - geometry_shaders_.quad_list = nullptr; - } - if (geometry_shaders_.rect_list) { - vkDestroyShaderModule(*device_, geometry_shaders_.rect_list, nullptr); - geometry_shaders_.rect_list = nullptr; - } - if (dummy_pixel_shader_) { - vkDestroyShaderModule(*device_, dummy_pixel_shader_, nullptr); - dummy_pixel_shader_ = nullptr; - } - - if (pipeline_layout_) { - vkDestroyPipelineLayout(*device_, pipeline_layout_, nullptr); - pipeline_layout_ = nullptr; - } - if (pipeline_cache_) { - vkDestroyPipelineCache(*device_, pipeline_cache_, nullptr); - pipeline_cache_ = nullptr; - } -} - -VulkanShader* PipelineCache::LoadShader(xenos::ShaderType shader_type, - uint32_t guest_address, - const uint32_t* host_address, - uint32_t dword_count) { - // Hash the input memory and lookup the shader. - uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0); - auto it = shader_map_.find(data_hash); - if (it != shader_map_.end()) { - // Shader has been previously loaded. - return it->second; - } - - // Always create the shader and stash it away. - // We need to track it even if it fails translation so we know not to try - // again. - VulkanShader* shader = new VulkanShader(device_, shader_type, data_hash, - host_address, dword_count); - shader_map_.insert({data_hash, shader}); - - return shader; -} - -PipelineCache::UpdateStatus PipelineCache::ConfigurePipeline( - VkCommandBuffer command_buffer, const RenderState* render_state, - VulkanShader* vertex_shader, VulkanShader* pixel_shader, - xenos::PrimitiveType primitive_type, VkPipeline* pipeline_out) { -#if FINE_GRAINED_DRAW_SCOPES - SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES - - assert_not_null(pipeline_out); - - // Perform a pass over all registers and state updating our cached structures. - // This will tell us if anything has changed that requires us to either build - // a new pipeline or use an existing one. - VkPipeline pipeline = nullptr; - auto update_status = UpdateState(vertex_shader, pixel_shader, primitive_type); - switch (update_status) { - case UpdateStatus::kCompatible: - // Requested pipeline is compatible with our previous one, so use that. - // Note that there still may be dynamic state that needs updating. - pipeline = current_pipeline_; - break; - case UpdateStatus::kMismatch: - // Pipeline state has changed. We need to either create a new one or find - // an old one that matches. - current_pipeline_ = nullptr; - break; - case UpdateStatus::kError: - // Error updating state - bail out. - // We are in an indeterminate state, so reset things for the next attempt. - current_pipeline_ = nullptr; - return update_status; - } - if (!pipeline) { - // Should have a hash key produced by the UpdateState pass. - uint64_t hash_key = XXH64_digest(&hash_state_); - pipeline = GetPipeline(render_state, hash_key); - current_pipeline_ = pipeline; - if (!pipeline) { - // Unable to create pipeline. - return UpdateStatus::kError; - } - } - - *pipeline_out = pipeline; - return update_status; -} - -void PipelineCache::ClearCache() { - // Destroy all pipelines. - for (auto it : cached_pipelines_) { - vkDestroyPipeline(*device_, it.second, nullptr); - } - cached_pipelines_.clear(); - COUNT_profile_set("gpu/pipeline_cache/pipelines", 0); - - // Destroy all shaders. - for (auto it : shader_map_) { - delete it.second; - } - shader_map_.clear(); -} - -VkPipeline PipelineCache::GetPipeline(const RenderState* render_state, - uint64_t hash_key) { - // Lookup the pipeline in the cache. - auto it = cached_pipelines_.find(hash_key); - if (it != cached_pipelines_.end()) { - // Found existing pipeline. - return it->second; - } - - VkPipelineDynamicStateCreateInfo dynamic_state_info; - dynamic_state_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; - dynamic_state_info.pNext = nullptr; - dynamic_state_info.flags = 0; - VkDynamicState dynamic_states[] = { - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, - VK_DYNAMIC_STATE_LINE_WIDTH, - VK_DYNAMIC_STATE_DEPTH_BIAS, - VK_DYNAMIC_STATE_BLEND_CONSTANTS, - VK_DYNAMIC_STATE_DEPTH_BOUNDS, - VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, - VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, - VK_DYNAMIC_STATE_STENCIL_REFERENCE, - }; - dynamic_state_info.dynamicStateCount = - static_cast(xe::countof(dynamic_states)); - dynamic_state_info.pDynamicStates = dynamic_states; - - VkGraphicsPipelineCreateInfo pipeline_info; - pipeline_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; - pipeline_info.pNext = nullptr; - pipeline_info.flags = VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT; - pipeline_info.stageCount = update_shader_stages_stage_count_; - pipeline_info.pStages = update_shader_stages_info_; - pipeline_info.pVertexInputState = &update_vertex_input_state_info_; - pipeline_info.pInputAssemblyState = &update_input_assembly_state_info_; - pipeline_info.pTessellationState = nullptr; - pipeline_info.pViewportState = &update_viewport_state_info_; - pipeline_info.pRasterizationState = &update_rasterization_state_info_; - pipeline_info.pMultisampleState = &update_multisample_state_info_; - pipeline_info.pDepthStencilState = &update_depth_stencil_state_info_; - pipeline_info.pColorBlendState = &update_color_blend_state_info_; - pipeline_info.pDynamicState = &dynamic_state_info; - pipeline_info.layout = pipeline_layout_; - pipeline_info.renderPass = render_state->render_pass_handle; - pipeline_info.subpass = 0; - pipeline_info.basePipelineHandle = nullptr; - pipeline_info.basePipelineIndex = -1; - VkPipeline pipeline = nullptr; - auto result = vkCreateGraphicsPipelines(*device_, pipeline_cache_, 1, - &pipeline_info, nullptr, &pipeline); - if (result != VK_SUCCESS) { - XELOGE("vkCreateGraphicsPipelines failed with code {}", result); - assert_always(); - return nullptr; - } - - // Dump shader disassembly. - if (cvars::vulkan_dump_disasm) { - if (device_->HasEnabledExtension(VK_AMD_SHADER_INFO_EXTENSION_NAME)) { - DumpShaderDisasmAMD(pipeline); - } else if (device_->device_info().properties.vendorID == 0x10DE) { - // NVIDIA cards - DumpShaderDisasmNV(pipeline_info); - } - } - - // Add to cache with the hash key for reuse. - cached_pipelines_.insert({hash_key, pipeline}); - COUNT_profile_set("gpu/pipeline_cache/pipelines", cached_pipelines_.size()); - - return pipeline; -} - -bool PipelineCache::TranslateShader(VulkanShader* shader, - reg::SQ_PROGRAM_CNTL cntl) { - // Perform translation. - // If this fails the shader will be marked as invalid and ignored later. - if (!shader_translator_->Translate(shader, cntl)) { - XELOGE("Shader translation failed; marking shader as ignored"); - return false; - } - - // Prepare the shader for use (creates our VkShaderModule). - // It could still fail at this point. - if (!shader->Prepare()) { - XELOGE("Shader preparation failed; marking shader as ignored"); - return false; - } - - if (shader->is_valid()) { - XELOGGPU("Generated {} shader ({}b) - hash {:016X}:\n{}\n", - shader->type() == xenos::ShaderType::kVertex ? "vertex" : "pixel", - shader->ucode_dword_count() * 4, shader->ucode_data_hash(), - shader->ucode_disassembly()); - } - - // Dump shader files if desired. - if (!cvars::dump_shaders.empty()) { - shader->Dump(cvars::dump_shaders, "vk"); - } - - return shader->is_valid(); -} - -static void DumpShaderStatisticsAMD(const VkShaderStatisticsInfoAMD& stats) { - XELOGI(" - resource usage:"); - XELOGI(" numUsedVgprs: {}", stats.resourceUsage.numUsedVgprs); - XELOGI(" numUsedSgprs: {}", stats.resourceUsage.numUsedSgprs); - XELOGI(" ldsSizePerLocalWorkGroup: {}", - stats.resourceUsage.ldsSizePerLocalWorkGroup); - XELOGI(" ldsUsageSizeInBytes : {}", - stats.resourceUsage.ldsUsageSizeInBytes); - XELOGI(" scratchMemUsageInBytes : {}", - stats.resourceUsage.scratchMemUsageInBytes); - XELOGI("numPhysicalVgprs : {}", stats.numPhysicalVgprs); - XELOGI("numPhysicalSgprs : {}", stats.numPhysicalSgprs); - XELOGI("numAvailableVgprs: {}", stats.numAvailableVgprs); - XELOGI("numAvailableSgprs: {}", stats.numAvailableSgprs); -} - -void PipelineCache::DumpShaderDisasmAMD(VkPipeline pipeline) { - auto fn_GetShaderInfoAMD = (PFN_vkGetShaderInfoAMD)vkGetDeviceProcAddr( - *device_, "vkGetShaderInfoAMD"); - - VkResult status = VK_SUCCESS; - size_t data_size = 0; - - VkShaderStatisticsInfoAMD stats; - data_size = sizeof(stats); - - // Vertex shader - status = fn_GetShaderInfoAMD(*device_, pipeline, VK_SHADER_STAGE_VERTEX_BIT, - VK_SHADER_INFO_TYPE_STATISTICS_AMD, &data_size, - &stats); - if (status == VK_SUCCESS) { - XELOGI("AMD Vertex Shader Statistics:"); - DumpShaderStatisticsAMD(stats); - } - - // Fragment shader - status = fn_GetShaderInfoAMD(*device_, pipeline, VK_SHADER_STAGE_FRAGMENT_BIT, - VK_SHADER_INFO_TYPE_STATISTICS_AMD, &data_size, - &stats); - if (status == VK_SUCCESS) { - XELOGI("AMD Fragment Shader Statistics:"); - DumpShaderStatisticsAMD(stats); - } - - // TODO(DrChat): Eventually dump the disasm... -} - -void PipelineCache::DumpShaderDisasmNV( - const VkGraphicsPipelineCreateInfo& pipeline_info) { - // !! HACK !!: This only works on NVidia drivers. Dumps shader disasm. - // This code is super ugly. Update this when NVidia includes an official - // way to dump shader disassembly. - - VkPipelineCacheCreateInfo pipeline_cache_info; - VkPipelineCache dummy_pipeline_cache; - pipeline_cache_info.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; - pipeline_cache_info.pNext = nullptr; - pipeline_cache_info.flags = 0; - pipeline_cache_info.initialDataSize = 0; - pipeline_cache_info.pInitialData = nullptr; - auto status = vkCreatePipelineCache(*device_, &pipeline_cache_info, nullptr, - &dummy_pipeline_cache); - CheckResult(status, "vkCreatePipelineCache"); - - // Create a pipeline on the dummy cache and dump it. - VkPipeline dummy_pipeline; - status = vkCreateGraphicsPipelines(*device_, dummy_pipeline_cache, 1, - &pipeline_info, nullptr, &dummy_pipeline); - - std::vector pipeline_data; - size_t data_size = 0; - status = vkGetPipelineCacheData(*device_, dummy_pipeline_cache, &data_size, - nullptr); - if (status == VK_SUCCESS) { - pipeline_data.resize(data_size); - vkGetPipelineCacheData(*device_, dummy_pipeline_cache, &data_size, - pipeline_data.data()); - - // Scan the data for the disassembly. - std::string disasm_vp, disasm_fp; - - const char* disasm_start_vp = nullptr; - const char* disasm_start_fp = nullptr; - size_t search_offset = 0; - const char* search_start = - reinterpret_cast(pipeline_data.data()); - while (true) { - auto p = reinterpret_cast( - memchr(pipeline_data.data() + search_offset, '!', - pipeline_data.size() - search_offset)); - if (!p) { - break; - } - if (!strncmp(p, "!!NV", 4)) { - if (!strncmp(p + 4, "vp", 2)) { - disasm_start_vp = p; - } else if (!strncmp(p + 4, "fp", 2)) { - disasm_start_fp = p; - } - - if (disasm_start_fp && disasm_start_vp) { - // Found all we needed. - break; - } - } - search_offset = p - search_start; - ++search_offset; - } - if (disasm_start_vp) { - disasm_vp = std::string(disasm_start_vp); - - // For some reason there's question marks all over the code. - disasm_vp.erase(std::remove(disasm_vp.begin(), disasm_vp.end(), '?'), - disasm_vp.end()); - } else { - disasm_vp = std::string("Shader disassembly not available."); - } - - if (disasm_start_fp) { - disasm_fp = std::string(disasm_start_fp); - - // For some reason there's question marks all over the code. - disasm_fp.erase(std::remove(disasm_fp.begin(), disasm_fp.end(), '?'), - disasm_fp.end()); - } else { - disasm_fp = std::string("Shader disassembly not available."); - } - - XELOGI("{}\n=====================================\n{}\n", disasm_vp, - disasm_fp); - } - - vkDestroyPipeline(*device_, dummy_pipeline, nullptr); - vkDestroyPipelineCache(*device_, dummy_pipeline_cache, nullptr); -} - -VkShaderModule PipelineCache::GetGeometryShader( - xenos::PrimitiveType primitive_type, bool is_line_mode) { - switch (primitive_type) { - case xenos::PrimitiveType::kLineList: - case xenos::PrimitiveType::kLineLoop: - case xenos::PrimitiveType::kLineStrip: - case xenos::PrimitiveType::kTriangleList: - case xenos::PrimitiveType::kTriangleFan: - case xenos::PrimitiveType::kTriangleStrip: - // Supported directly - no need to emulate. - return nullptr; - case xenos::PrimitiveType::kPointList: - return geometry_shaders_.point_list; - case xenos::PrimitiveType::kTriangleWithWFlags: - assert_always("Unknown geometry type"); - return nullptr; - case xenos::PrimitiveType::kRectangleList: - return geometry_shaders_.rect_list; - case xenos::PrimitiveType::kQuadList: - return is_line_mode ? geometry_shaders_.line_quad_list - : geometry_shaders_.quad_list; - case xenos::PrimitiveType::kQuadStrip: - // TODO(benvanik): quad strip geometry shader. - assert_always("Quad strips not implemented"); - return nullptr; - case xenos::PrimitiveType::kTrianglePatch: - case xenos::PrimitiveType::kQuadPatch: - assert_always("Tessellation is not implemented"); - return nullptr; - default: - assert_unhandled_case(primitive_type); - return nullptr; - } -} - -bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer, - bool full_update) { -#if FINE_GRAINED_DRAW_SCOPES - SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES - - auto& regs = set_dynamic_state_registers_; - - bool window_offset_dirty = SetShadowRegister(®s.pa_sc_window_offset, - XE_GPU_REG_PA_SC_WINDOW_OFFSET); - window_offset_dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, - XE_GPU_REG_PA_SU_SC_MODE_CNTL); - - // Window parameters. - // http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h - // See r200UpdateWindow: - // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c - int16_t window_offset_x = regs.pa_sc_window_offset & 0x7FFF; - int16_t window_offset_y = (regs.pa_sc_window_offset >> 16) & 0x7FFF; - if (window_offset_x & 0x4000) { - window_offset_x |= 0x8000; - } - if (window_offset_y & 0x4000) { - window_offset_y |= 0x8000; - } - - // VK_DYNAMIC_STATE_SCISSOR - bool scissor_state_dirty = full_update || window_offset_dirty; - scissor_state_dirty |= SetShadowRegister(®s.pa_sc_window_scissor_tl, - XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL); - scissor_state_dirty |= SetShadowRegister(®s.pa_sc_window_scissor_br, - XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR); - if (scissor_state_dirty) { - int32_t ws_x = regs.pa_sc_window_scissor_tl & 0x7FFF; - int32_t ws_y = (regs.pa_sc_window_scissor_tl >> 16) & 0x7FFF; - int32_t ws_w = (regs.pa_sc_window_scissor_br & 0x7FFF) - ws_x; - int32_t ws_h = ((regs.pa_sc_window_scissor_br >> 16) & 0x7FFF) - ws_y; - if (!(regs.pa_sc_window_scissor_tl & 0x80000000)) { - // ! WINDOW_OFFSET_DISABLE - ws_x += window_offset_x; - ws_y += window_offset_y; - } - - int32_t adj_x = ws_x - std::max(ws_x, 0); - int32_t adj_y = ws_y - std::max(ws_y, 0); - - VkRect2D scissor_rect; - scissor_rect.offset.x = ws_x - adj_x; - scissor_rect.offset.y = ws_y - adj_y; - scissor_rect.extent.width = std::max(ws_w + adj_x, 0); - scissor_rect.extent.height = std::max(ws_h + adj_y, 0); - vkCmdSetScissor(command_buffer, 0, 1, &scissor_rect); - } - - // VK_DYNAMIC_STATE_VIEWPORT - bool viewport_state_dirty = full_update || window_offset_dirty; - viewport_state_dirty |= - SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); - viewport_state_dirty |= - SetShadowRegister(®s.pa_cl_vte_cntl, XE_GPU_REG_PA_CL_VTE_CNTL); - viewport_state_dirty |= - SetShadowRegister(®s.pa_su_sc_vtx_cntl, XE_GPU_REG_PA_SU_VTX_CNTL); - viewport_state_dirty |= SetShadowRegister(®s.pa_cl_vport_xoffset, - XE_GPU_REG_PA_CL_VPORT_XOFFSET); - viewport_state_dirty |= SetShadowRegister(®s.pa_cl_vport_yoffset, - XE_GPU_REG_PA_CL_VPORT_YOFFSET); - viewport_state_dirty |= SetShadowRegister(®s.pa_cl_vport_zoffset, - XE_GPU_REG_PA_CL_VPORT_ZOFFSET); - viewport_state_dirty |= SetShadowRegister(®s.pa_cl_vport_xscale, - XE_GPU_REG_PA_CL_VPORT_XSCALE); - viewport_state_dirty |= SetShadowRegister(®s.pa_cl_vport_yscale, - XE_GPU_REG_PA_CL_VPORT_YSCALE); - viewport_state_dirty |= SetShadowRegister(®s.pa_cl_vport_zscale, - XE_GPU_REG_PA_CL_VPORT_ZSCALE); - // RB_SURFACE_INFO - auto surface_msaa = - static_cast((regs.rb_surface_info >> 16) & 0x3); - - // Apply a multiplier to emulate MSAA. - float window_width_scalar = 1; - float window_height_scalar = 1; - switch (surface_msaa) { - case xenos::MsaaSamples::k1X: - break; - case xenos::MsaaSamples::k2X: - window_height_scalar = 2; - break; - case xenos::MsaaSamples::k4X: - window_width_scalar = window_height_scalar = 2; - break; - } - - // Whether each of the viewport settings are enabled. - // https://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf - bool vport_xscale_enable = (regs.pa_cl_vte_cntl & (1 << 0)) > 0; - bool vport_xoffset_enable = (regs.pa_cl_vte_cntl & (1 << 1)) > 0; - bool vport_yscale_enable = (regs.pa_cl_vte_cntl & (1 << 2)) > 0; - bool vport_yoffset_enable = (regs.pa_cl_vte_cntl & (1 << 3)) > 0; - bool vport_zscale_enable = (regs.pa_cl_vte_cntl & (1 << 4)) > 0; - bool vport_zoffset_enable = (regs.pa_cl_vte_cntl & (1 << 5)) > 0; - assert_true(vport_xscale_enable == vport_yscale_enable == - vport_zscale_enable == vport_xoffset_enable == - vport_yoffset_enable == vport_zoffset_enable); - - int16_t vtx_window_offset_x = - (regs.pa_su_sc_mode_cntl >> 16) & 1 ? window_offset_x : 0; - int16_t vtx_window_offset_y = - (regs.pa_su_sc_mode_cntl >> 16) & 1 ? window_offset_y : 0; - - float vpw, vph, vpx, vpy; - if (vport_xscale_enable) { - float vox = vport_xoffset_enable ? regs.pa_cl_vport_xoffset : 0; - float voy = vport_yoffset_enable ? regs.pa_cl_vport_yoffset : 0; - float vsx = vport_xscale_enable ? regs.pa_cl_vport_xscale : 1; - float vsy = vport_yscale_enable ? regs.pa_cl_vport_yscale : 1; - - window_width_scalar = window_height_scalar = 1; - vpw = 2 * window_width_scalar * vsx; - vph = -2 * window_height_scalar * vsy; - vpx = window_width_scalar * vox - vpw / 2 + vtx_window_offset_x; - vpy = window_height_scalar * voy - vph / 2 + vtx_window_offset_y; - } else { - // TODO(DrChat): This should be the width/height of the target picture - vpw = 2560.0f; - vph = 2560.0f; - vpx = vtx_window_offset_x; - vpy = vtx_window_offset_y; - } - - if (viewport_state_dirty) { - VkViewport viewport_rect; - std::memset(&viewport_rect, 0, sizeof(VkViewport)); - viewport_rect.x = vpx; - viewport_rect.y = vpy; - viewport_rect.width = vpw; - viewport_rect.height = vph; - - float voz = vport_zoffset_enable ? regs.pa_cl_vport_zoffset : 0; - float vsz = vport_zscale_enable ? regs.pa_cl_vport_zscale : 1; - viewport_rect.minDepth = voz; - viewport_rect.maxDepth = voz + vsz; - assert_true(viewport_rect.minDepth >= 0 && viewport_rect.minDepth <= 1); - assert_true(viewport_rect.maxDepth >= -1 && viewport_rect.maxDepth <= 1); - - vkCmdSetViewport(command_buffer, 0, 1, &viewport_rect); - } - - // VK_DYNAMIC_STATE_DEPTH_BIAS - // No separate front/back bias in Vulkan - using what's more expected to work. - // No need to reset to 0 if not enabled in the pipeline - recheck conditions. - float depth_bias_scales[2] = {0}, depth_bias_offsets[2] = {0}; - auto cull_mode = regs.pa_su_sc_mode_cntl & 3; - if (cull_mode != 1) { - // Front faces are not culled. - depth_bias_scales[0] = - register_file_->values[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; - depth_bias_offsets[0] = - register_file_->values[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; - } - if (cull_mode != 2) { - // Back faces are not culled. - depth_bias_scales[1] = - register_file_->values[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32; - depth_bias_offsets[1] = - register_file_->values[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32; - } - if (depth_bias_scales[0] != 0.0f || depth_bias_scales[1] != 0.0f || - depth_bias_offsets[0] != 0.0f || depth_bias_offsets[1] != 0.0f) { - float depth_bias_scale, depth_bias_offset; - // Prefer front if not culled and offset for both is enabled. - // However, if none are culled, and there's no front offset, use back offset - // (since there was an intention to enable depth offset at all). - // As SetRenderState sets for both sides, this should be very rare anyway. - // TODO(Triang3l): Verify the intentions if this happens in real games. - if (depth_bias_scales[0] != 0.0f || depth_bias_offsets[0] != 0.0f) { - depth_bias_scale = depth_bias_scales[0]; - depth_bias_offset = depth_bias_offsets[0]; - } else { - depth_bias_scale = depth_bias_scales[1]; - depth_bias_offset = depth_bias_offsets[1]; - } - // Convert to Vulkan units based on the values in Call of Duty 4: - // r_polygonOffsetScale is -1 there, but 32 in the register. - // r_polygonOffsetBias is -1 also, but passing 2/65536. - // 1/65536 and 2 scales are applied separately, however, and for shadow maps - // 0.5/65536 is passed (while sm_polygonOffsetBias is 0.5), and with 32768 - // it would be 0.25, which seems too small. So using 65536, assuming it's a - // common scale value (which also looks less arbitrary than 32768). - // TODO(Triang3l): Investigate, also considering the depth format (kD24FS8). - // Possibly refer to: - // https://www.winehq.org/pipermail/wine-patches/2015-July/141200.html - float depth_bias_scale_vulkan = depth_bias_scale * (1.0f / 32.0f); - float depth_bias_offset_vulkan = depth_bias_offset * 65536.0f; - if (full_update || - regs.pa_su_poly_offset_scale != depth_bias_scale_vulkan || - regs.pa_su_poly_offset_offset != depth_bias_offset_vulkan) { - regs.pa_su_poly_offset_scale = depth_bias_scale_vulkan; - regs.pa_su_poly_offset_offset = depth_bias_offset_vulkan; - vkCmdSetDepthBias(command_buffer, depth_bias_offset_vulkan, 0.0f, - depth_bias_scale_vulkan); - } - } else if (full_update) { - regs.pa_su_poly_offset_scale = 0.0f; - regs.pa_su_poly_offset_offset = 0.0f; - vkCmdSetDepthBias(command_buffer, 0.0f, 0.0f, 0.0f); - } - - // VK_DYNAMIC_STATE_BLEND_CONSTANTS - bool blend_constant_state_dirty = full_update; - blend_constant_state_dirty |= - SetShadowRegister(®s.rb_blend_rgba[0], XE_GPU_REG_RB_BLEND_RED); - blend_constant_state_dirty |= - SetShadowRegister(®s.rb_blend_rgba[1], XE_GPU_REG_RB_BLEND_GREEN); - blend_constant_state_dirty |= - SetShadowRegister(®s.rb_blend_rgba[2], XE_GPU_REG_RB_BLEND_BLUE); - blend_constant_state_dirty |= - SetShadowRegister(®s.rb_blend_rgba[3], XE_GPU_REG_RB_BLEND_ALPHA); - if (blend_constant_state_dirty) { - vkCmdSetBlendConstants(command_buffer, regs.rb_blend_rgba); - } - - bool stencil_state_dirty = full_update; - stencil_state_dirty |= - SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK); - if (stencil_state_dirty) { - uint32_t stencil_ref = (regs.rb_stencilrefmask & 0xFF); - uint32_t stencil_read_mask = (regs.rb_stencilrefmask >> 8) & 0xFF; - uint32_t stencil_write_mask = (regs.rb_stencilrefmask >> 16) & 0xFF; - - // VK_DYNAMIC_STATE_STENCIL_REFERENCE - vkCmdSetStencilReference(command_buffer, VK_STENCIL_FRONT_AND_BACK, - stencil_ref); - - // VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK - vkCmdSetStencilCompareMask(command_buffer, VK_STENCIL_FRONT_AND_BACK, - stencil_read_mask); - - // VK_DYNAMIC_STATE_STENCIL_WRITE_MASK - vkCmdSetStencilWriteMask(command_buffer, VK_STENCIL_FRONT_AND_BACK, - stencil_write_mask); - } - - bool push_constants_dirty = full_update || viewport_state_dirty; - push_constants_dirty |= SetShadowRegister(®s.sq_program_cntl.value, - XE_GPU_REG_SQ_PROGRAM_CNTL); - push_constants_dirty |= - SetShadowRegister(®s.sq_context_misc, XE_GPU_REG_SQ_CONTEXT_MISC); - push_constants_dirty |= - SetShadowRegister(®s.rb_colorcontrol, XE_GPU_REG_RB_COLORCONTROL); - push_constants_dirty |= - SetShadowRegister(®s.rb_color_info.value, XE_GPU_REG_RB_COLOR_INFO); - push_constants_dirty |= - SetShadowRegister(®s.rb_color1_info.value, XE_GPU_REG_RB_COLOR1_INFO); - push_constants_dirty |= - SetShadowRegister(®s.rb_color2_info.value, XE_GPU_REG_RB_COLOR2_INFO); - push_constants_dirty |= - SetShadowRegister(®s.rb_color3_info.value, XE_GPU_REG_RB_COLOR3_INFO); - push_constants_dirty |= - SetShadowRegister(®s.rb_alpha_ref, XE_GPU_REG_RB_ALPHA_REF); - push_constants_dirty |= - SetShadowRegister(®s.pa_su_point_size, XE_GPU_REG_PA_SU_POINT_SIZE); - if (push_constants_dirty) { - // Normal vertex shaders only, for now. - assert_true(regs.sq_program_cntl.vs_export_mode == - xenos::VertexShaderExportMode::kPosition1Vector || - regs.sq_program_cntl.vs_export_mode == - xenos::VertexShaderExportMode::kPosition2VectorsSprite || - regs.sq_program_cntl.vs_export_mode == - xenos::VertexShaderExportMode::kMultipass); - assert_false(regs.sq_program_cntl.gen_index_vtx); - - SpirvPushConstants push_constants = {}; - - // Done in VS, no need to flush state. - if (vport_xscale_enable) { - push_constants.window_scale[0] = 1.0f; - push_constants.window_scale[1] = -1.0f; - push_constants.window_scale[2] = 0.f; - push_constants.window_scale[3] = 0.f; - } else { - // 1 / unscaled viewport w/h - push_constants.window_scale[0] = window_width_scalar / 1280.f; - push_constants.window_scale[1] = window_height_scalar / 1280.f; - push_constants.window_scale[2] = (-1280.f / window_width_scalar) + 0.5f; - push_constants.window_scale[3] = (-1280.f / window_height_scalar) + 0.5f; - } - - // https://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf - // VTX_XY_FMT = true: the incoming XY have already been multiplied by 1/W0. - // = false: multiply the X, Y coordinates by 1/W0. - // VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0. - // = false: multiply the Z coordinate by 1/W0. - // VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal to - // get 1/W0. - float vtx_xy_fmt = (regs.pa_cl_vte_cntl >> 8) & 0x1 ? 1.0f : 0.0f; - float vtx_z_fmt = (regs.pa_cl_vte_cntl >> 9) & 0x1 ? 1.0f : 0.0f; - float vtx_w0_fmt = (regs.pa_cl_vte_cntl >> 10) & 0x1 ? 1.0f : 0.0f; - push_constants.vtx_fmt[0] = vtx_xy_fmt; - push_constants.vtx_fmt[1] = vtx_xy_fmt; - push_constants.vtx_fmt[2] = vtx_z_fmt; - push_constants.vtx_fmt[3] = vtx_w0_fmt; - - // Point size - push_constants.point_size[0] = - static_cast((regs.pa_su_point_size & 0xffff0000) >> 16) / 8.0f; - push_constants.point_size[1] = - static_cast((regs.pa_su_point_size & 0x0000ffff)) / 8.0f; - - reg::RB_COLOR_INFO color_info[4] = { - regs.rb_color_info, - regs.rb_color1_info, - regs.rb_color2_info, - regs.rb_color3_info, - }; - for (int i = 0; i < 4; i++) { - push_constants.color_exp_bias[i] = - static_cast(1 << color_info[i].color_exp_bias); - } - - // Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE - // Emulated in shader. - // if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard; - // ALPHATESTENABLE - push_constants.alpha_test[0] = - (regs.rb_colorcontrol & 0x8) != 0 ? 1.0f : 0.0f; - // ALPHAFUNC - push_constants.alpha_test[1] = - static_cast(regs.rb_colorcontrol & 0x7); - // ALPHAREF - push_constants.alpha_test[2] = regs.rb_alpha_ref; - - // Whether to populate a register in the pixel shader with frag coord. - int ps_param_gen = (regs.sq_context_misc >> 8) & 0xFF; - push_constants.ps_param_gen = - regs.sq_program_cntl.param_gen ? ps_param_gen : -1; - - vkCmdPushConstants(command_buffer, pipeline_layout_, - VK_SHADER_STAGE_VERTEX_BIT | - VK_SHADER_STAGE_GEOMETRY_BIT | - VK_SHADER_STAGE_FRAGMENT_BIT, - 0, kSpirvPushConstantsSize, &push_constants); - } - - if (full_update) { - // VK_DYNAMIC_STATE_LINE_WIDTH - vkCmdSetLineWidth(command_buffer, 1.0f); - - // VK_DYNAMIC_STATE_DEPTH_BOUNDS - vkCmdSetDepthBounds(command_buffer, 0.0f, 1.0f); - } - - return true; -} - -bool PipelineCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) { - uint32_t value = register_file_->values[register_name].u32; - if (*dest == value) { - return false; - } - *dest = value; - return true; -} - -bool PipelineCache::SetShadowRegister(float* dest, uint32_t register_name) { - float value = register_file_->values[register_name].f32; - if (*dest == value) { - return false; - } - *dest = value; - return true; -} - -bool PipelineCache::SetShadowRegisterArray(uint32_t* dest, uint32_t num, - uint32_t register_name) { - bool dirty = false; - for (uint32_t i = 0; i < num; i++) { - uint32_t value = register_file_->values[register_name + i].u32; - if (dest[i] == value) { - continue; - } - - dest[i] = value; - dirty |= true; - } - - return dirty; -} - -PipelineCache::UpdateStatus PipelineCache::UpdateState( - VulkanShader* vertex_shader, VulkanShader* pixel_shader, - xenos::PrimitiveType primitive_type) { - bool mismatch = false; - - // Reset hash so we can build it up. - XXH64_reset(&hash_state_, 0); - -#define CHECK_UPDATE_STATUS(status, mismatch, error_message) \ - { \ - if (status == UpdateStatus::kError) { \ - XELOGE(error_message); \ - return status; \ - } else if (status == UpdateStatus::kMismatch) { \ - mismatch = true; \ - } \ - } - - UpdateStatus status; - status = UpdateRenderTargetState(); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update render target state"); - status = UpdateShaderStages(vertex_shader, pixel_shader, primitive_type); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update shader stages"); - status = UpdateVertexInputState(vertex_shader); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update vertex input state"); - status = UpdateInputAssemblyState(primitive_type); - CHECK_UPDATE_STATUS(status, mismatch, - "Unable to update input assembly state"); - status = UpdateViewportState(); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update viewport state"); - status = UpdateRasterizationState(primitive_type); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update rasterization state"); - status = UpdateMultisampleState(); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update multisample state"); - status = UpdateDepthStencilState(); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update depth/stencil state"); - status = UpdateColorBlendState(); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update color blend state"); - - return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible; -} - -PipelineCache::UpdateStatus PipelineCache::UpdateRenderTargetState() { - auto& regs = update_render_targets_regs_; - bool dirty = false; - - // Check the render target formats - struct { - reg::RB_COLOR_INFO rb_color_info; - reg::RB_DEPTH_INFO rb_depth_info; - reg::RB_COLOR_INFO rb_color1_info; - reg::RB_COLOR_INFO rb_color2_info; - reg::RB_COLOR_INFO rb_color3_info; - }* cur_regs = reinterpret_cast( - ®ister_file_->values[XE_GPU_REG_RB_COLOR_INFO].u32); - - dirty |= - regs.rb_color_info.color_format != cur_regs->rb_color_info.color_format; - dirty |= - regs.rb_depth_info.depth_format != cur_regs->rb_depth_info.depth_format; - dirty |= - regs.rb_color1_info.color_format != cur_regs->rb_color1_info.color_format; - dirty |= - regs.rb_color2_info.color_format != cur_regs->rb_color2_info.color_format; - dirty |= - regs.rb_color3_info.color_format != cur_regs->rb_color3_info.color_format; - - // And copy the regs over. - regs.rb_color_info.color_format = cur_regs->rb_color_info.color_format; - regs.rb_depth_info.depth_format = cur_regs->rb_depth_info.depth_format; - regs.rb_color1_info.color_format = cur_regs->rb_color1_info.color_format; - regs.rb_color2_info.color_format = cur_regs->rb_color2_info.color_format; - regs.rb_color3_info.color_format = cur_regs->rb_color3_info.color_format; - XXH64_update(&hash_state_, ®s, sizeof(regs)); - if (!dirty) { - return UpdateStatus::kCompatible; - } - - return UpdateStatus::kMismatch; -} - -PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages( - VulkanShader* vertex_shader, VulkanShader* pixel_shader, - xenos::PrimitiveType primitive_type) { - auto& regs = update_shader_stages_regs_; - - // These are the constant base addresses/ranges for shaders. - // We have these hardcoded right now cause nothing seems to differ. - assert_true(register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == - 0x000FF000 || - register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000); - assert_true(register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == - 0x000FF100 || - register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000); - - bool dirty = false; - dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, - XE_GPU_REG_PA_SU_SC_MODE_CNTL); - dirty |= SetShadowRegister(®s.sq_program_cntl.value, - XE_GPU_REG_SQ_PROGRAM_CNTL); - dirty |= regs.vertex_shader != vertex_shader; - dirty |= regs.pixel_shader != pixel_shader; - dirty |= regs.primitive_type != primitive_type; - regs.vertex_shader = vertex_shader; - regs.pixel_shader = pixel_shader; - regs.primitive_type = primitive_type; - XXH64_update(&hash_state_, ®s, sizeof(regs)); - if (!dirty) { - return UpdateStatus::kCompatible; - } - - if (!vertex_shader->is_translated() && - !TranslateShader(vertex_shader, regs.sq_program_cntl)) { - XELOGE("Failed to translate the vertex shader!"); - return UpdateStatus::kError; - } - - if (pixel_shader && !pixel_shader->is_translated() && - !TranslateShader(pixel_shader, regs.sq_program_cntl)) { - XELOGE("Failed to translate the pixel shader!"); - return UpdateStatus::kError; - } - - update_shader_stages_stage_count_ = 0; - - auto& vertex_pipeline_stage = - update_shader_stages_info_[update_shader_stages_stage_count_++]; - vertex_pipeline_stage.sType = - VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - vertex_pipeline_stage.pNext = nullptr; - vertex_pipeline_stage.flags = 0; - vertex_pipeline_stage.stage = VK_SHADER_STAGE_VERTEX_BIT; - vertex_pipeline_stage.module = vertex_shader->shader_module(); - vertex_pipeline_stage.pName = "main"; - vertex_pipeline_stage.pSpecializationInfo = nullptr; - - bool is_line_mode = false; - if (((regs.pa_su_sc_mode_cntl >> 3) & 0x3) != 0) { - uint32_t front_poly_mode = (regs.pa_su_sc_mode_cntl >> 5) & 0x7; - if (front_poly_mode == 1) { - is_line_mode = true; - } - } - auto geometry_shader = GetGeometryShader(primitive_type, is_line_mode); - if (geometry_shader) { - auto& geometry_pipeline_stage = - update_shader_stages_info_[update_shader_stages_stage_count_++]; - geometry_pipeline_stage.sType = - VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - geometry_pipeline_stage.pNext = nullptr; - geometry_pipeline_stage.flags = 0; - geometry_pipeline_stage.stage = VK_SHADER_STAGE_GEOMETRY_BIT; - geometry_pipeline_stage.module = geometry_shader; - geometry_pipeline_stage.pName = "main"; - geometry_pipeline_stage.pSpecializationInfo = nullptr; - } - - auto& pixel_pipeline_stage = - update_shader_stages_info_[update_shader_stages_stage_count_++]; - pixel_pipeline_stage.sType = - VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - pixel_pipeline_stage.pNext = nullptr; - pixel_pipeline_stage.flags = 0; - pixel_pipeline_stage.stage = VK_SHADER_STAGE_FRAGMENT_BIT; - pixel_pipeline_stage.module = - pixel_shader ? pixel_shader->shader_module() : dummy_pixel_shader_; - pixel_pipeline_stage.pName = "main"; - pixel_pipeline_stage.pSpecializationInfo = nullptr; - - return UpdateStatus::kMismatch; -} - -PipelineCache::UpdateStatus PipelineCache::UpdateVertexInputState( - VulkanShader* vertex_shader) { - auto& regs = update_vertex_input_state_regs_; - auto& state_info = update_vertex_input_state_info_; - - bool dirty = false; - dirty |= vertex_shader != regs.vertex_shader; - regs.vertex_shader = vertex_shader; - XXH64_update(&hash_state_, ®s, sizeof(regs)); - if (!dirty) { - return UpdateStatus::kCompatible; - } - - // We don't use vertex inputs. - state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; - state_info.pNext = nullptr; - state_info.flags = 0; - state_info.vertexBindingDescriptionCount = 0; - state_info.vertexAttributeDescriptionCount = 0; - state_info.pVertexBindingDescriptions = nullptr; - state_info.pVertexAttributeDescriptions = nullptr; - - return UpdateStatus::kCompatible; -} - -PipelineCache::UpdateStatus PipelineCache::UpdateInputAssemblyState( - xenos::PrimitiveType primitive_type) { - auto& regs = update_input_assembly_state_regs_; - auto& state_info = update_input_assembly_state_info_; - - bool dirty = false; - dirty |= primitive_type != regs.primitive_type; - dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, - XE_GPU_REG_PA_SU_SC_MODE_CNTL); - dirty |= SetShadowRegister(®s.multi_prim_ib_reset_index, - XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX); - regs.primitive_type = primitive_type; - XXH64_update(&hash_state_, ®s, sizeof(regs)); - if (!dirty) { - return UpdateStatus::kCompatible; - } - - state_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; - state_info.pNext = nullptr; - state_info.flags = 0; - - switch (primitive_type) { - case xenos::PrimitiveType::kPointList: - state_info.topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST; - break; - case xenos::PrimitiveType::kLineList: - state_info.topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST; - break; - case xenos::PrimitiveType::kLineStrip: - state_info.topology = VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; - break; - case xenos::PrimitiveType::kLineLoop: - state_info.topology = VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; - break; - case xenos::PrimitiveType::kTriangleList: - state_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; - break; - case xenos::PrimitiveType::kTriangleStrip: - state_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; - break; - case xenos::PrimitiveType::kTriangleFan: - state_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; - break; - case xenos::PrimitiveType::kRectangleList: - state_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; - break; - case xenos::PrimitiveType::kQuadList: - state_info.topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY; - break; - default: - case xenos::PrimitiveType::kTriangleWithWFlags: - XELOGE("unsupported primitive type {}", primitive_type); - assert_unhandled_case(primitive_type); - return UpdateStatus::kError; - } - - // TODO(benvanik): anything we can do about this? Vulkan seems to only support - // first. - assert_zero(regs.pa_su_sc_mode_cntl & (1 << 19)); - // if (regs.pa_su_sc_mode_cntl & (1 << 19)) { - // glProvokingVertex(GL_LAST_VERTEX_CONVENTION); - // } else { - // glProvokingVertex(GL_FIRST_VERTEX_CONVENTION); - // } - - // Primitive restart index is handled in the buffer cache. - if (regs.pa_su_sc_mode_cntl & (1 << 21)) { - state_info.primitiveRestartEnable = VK_TRUE; - } else { - state_info.primitiveRestartEnable = VK_FALSE; - } - - return UpdateStatus::kMismatch; -} - -PipelineCache::UpdateStatus PipelineCache::UpdateViewportState() { - auto& state_info = update_viewport_state_info_; - - state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; - state_info.pNext = nullptr; - state_info.flags = 0; - - state_info.viewportCount = 1; - state_info.scissorCount = 1; - - // Ignored; set dynamically. - state_info.pViewports = nullptr; - state_info.pScissors = nullptr; - - return UpdateStatus::kCompatible; -} - -PipelineCache::UpdateStatus PipelineCache::UpdateRasterizationState( - xenos::PrimitiveType primitive_type) { - auto& regs = update_rasterization_state_regs_; - auto& state_info = update_rasterization_state_info_; - - bool dirty = false; - dirty |= regs.primitive_type != primitive_type; - dirty |= SetShadowRegister(®s.pa_cl_clip_cntl, XE_GPU_REG_PA_CL_CLIP_CNTL); - dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, - XE_GPU_REG_PA_SU_SC_MODE_CNTL); - dirty |= SetShadowRegister(®s.pa_sc_screen_scissor_tl, - XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL); - dirty |= SetShadowRegister(®s.pa_sc_screen_scissor_br, - XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR); - dirty |= SetShadowRegister(®s.pa_sc_viz_query, XE_GPU_REG_PA_SC_VIZ_QUERY); - dirty |= SetShadowRegister(®s.multi_prim_ib_reset_index, - XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX); - regs.primitive_type = primitive_type; - - // Vulkan doesn't support separate depth biases for different sides. - // SetRenderState also accepts only one argument, so they should be rare. - // The culling mode must match the one in SetDynamicState, so not applying - // the primitive type exceptions to this (very unlikely to happen anyway). - bool depth_bias_enable = false; - uint32_t cull_mode = regs.pa_su_sc_mode_cntl & 0x3; - if (cull_mode != 1) { - float depth_bias_scale = - register_file_->values[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; - float depth_bias_offset = - register_file_->values[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; - depth_bias_enable = (depth_bias_scale != 0.0f && depth_bias_offset != 0.0f); - } - if (!depth_bias_enable && cull_mode != 2) { - float depth_bias_scale = - register_file_->values[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32; - float depth_bias_offset = - register_file_->values[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32; - depth_bias_enable = (depth_bias_scale != 0.0f && depth_bias_offset != 0.0f); - } - if (regs.pa_su_poly_offset_enable != - static_cast(depth_bias_enable)) { - regs.pa_su_poly_offset_enable = static_cast(depth_bias_enable); - dirty = true; - } - - XXH64_update(&hash_state_, ®s, sizeof(regs)); - if (!dirty) { - return UpdateStatus::kCompatible; - } - - state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; - state_info.pNext = nullptr; - state_info.flags = 0; - - // ZCLIP_NEAR_DISABLE - // state_info.depthClampEnable = !(regs.pa_cl_clip_cntl & (1 << 26)); - // RASTERIZER_DISABLE - // state_info.rasterizerDiscardEnable = !!(regs.pa_cl_clip_cntl & (1 << 22)); - - // CLIP_DISABLE - state_info.depthClampEnable = !!(regs.pa_cl_clip_cntl & (1 << 16)); - state_info.rasterizerDiscardEnable = VK_FALSE; - - bool poly_mode = ((regs.pa_su_sc_mode_cntl >> 3) & 0x3) != 0; - if (poly_mode) { - uint32_t front_poly_mode = (regs.pa_su_sc_mode_cntl >> 5) & 0x7; - uint32_t back_poly_mode = (regs.pa_su_sc_mode_cntl >> 8) & 0x7; - // Vulkan only supports both matching. - assert_true(front_poly_mode == back_poly_mode); - static const VkPolygonMode kFillModes[3] = { - VK_POLYGON_MODE_POINT, - VK_POLYGON_MODE_LINE, - VK_POLYGON_MODE_FILL, - }; - state_info.polygonMode = kFillModes[front_poly_mode]; - } else { - state_info.polygonMode = VK_POLYGON_MODE_FILL; - } - - switch (cull_mode) { - case 0: - state_info.cullMode = VK_CULL_MODE_NONE; - break; - case 1: - state_info.cullMode = VK_CULL_MODE_FRONT_BIT; - break; - case 2: - state_info.cullMode = VK_CULL_MODE_BACK_BIT; - break; - case 3: - // Cull both sides? - assert_always(); - break; - } - if (regs.pa_su_sc_mode_cntl & 0x4) { - state_info.frontFace = VK_FRONT_FACE_CLOCKWISE; - } else { - state_info.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; - } - if (primitive_type == xenos::PrimitiveType::kRectangleList) { - // Rectangle lists aren't culled. There may be other things they skip too. - state_info.cullMode = VK_CULL_MODE_NONE; - } else if (primitive_type == xenos::PrimitiveType::kPointList) { - // Face culling doesn't apply to point primitives. - state_info.cullMode = VK_CULL_MODE_NONE; - } - - state_info.depthBiasEnable = depth_bias_enable ? VK_TRUE : VK_FALSE; - - // Ignored; set dynamically: - state_info.depthBiasConstantFactor = 0; - state_info.depthBiasClamp = 0; - state_info.depthBiasSlopeFactor = 0; - state_info.lineWidth = 1.0f; - - return UpdateStatus::kMismatch; -} - -PipelineCache::UpdateStatus PipelineCache::UpdateMultisampleState() { - auto& regs = update_multisample_state_regs_; - auto& state_info = update_multisample_state_info_; - - bool dirty = false; - dirty |= SetShadowRegister(®s.pa_sc_aa_config, XE_GPU_REG_PA_SC_AA_CONFIG); - dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, - XE_GPU_REG_PA_SU_SC_MODE_CNTL); - dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); - XXH64_update(&hash_state_, ®s, sizeof(regs)); - if (!dirty) { - return UpdateStatus::kCompatible; - } - - state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; - state_info.pNext = nullptr; - state_info.flags = 0; - - // PA_SC_AA_CONFIG MSAA_NUM_SAMPLES (0x7) - // PA_SC_AA_MASK (0xFFFF) - // PA_SU_SC_MODE_CNTL MSAA_ENABLE (0x10000) - // If set, all samples will be sampled at set locations. Otherwise, they're - // all sampled from the pixel center. - if (cvars::vulkan_native_msaa) { - auto msaa_num_samples = - static_cast((regs.rb_surface_info >> 16) & 0x3); - switch (msaa_num_samples) { - case xenos::MsaaSamples::k1X: - state_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; - break; - case xenos::MsaaSamples::k2X: - state_info.rasterizationSamples = VK_SAMPLE_COUNT_2_BIT; - break; - case xenos::MsaaSamples::k4X: - state_info.rasterizationSamples = VK_SAMPLE_COUNT_4_BIT; - break; - default: - assert_unhandled_case(msaa_num_samples); - break; - } - } else { - state_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; - } - - state_info.sampleShadingEnable = VK_FALSE; - state_info.minSampleShading = 0; - state_info.pSampleMask = nullptr; - state_info.alphaToCoverageEnable = VK_FALSE; - state_info.alphaToOneEnable = VK_FALSE; - - return UpdateStatus::kMismatch; -} - -PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() { - auto& regs = update_depth_stencil_state_regs_; - auto& state_info = update_depth_stencil_state_info_; - - bool dirty = false; - dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL); - dirty |= - SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK); - XXH64_update(&hash_state_, ®s, sizeof(regs)); - if (!dirty) { - return UpdateStatus::kCompatible; - } - - state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; - state_info.pNext = nullptr; - state_info.flags = 0; - - static const VkCompareOp compare_func_map[] = { - /* 0 */ VK_COMPARE_OP_NEVER, - /* 1 */ VK_COMPARE_OP_LESS, - /* 2 */ VK_COMPARE_OP_EQUAL, - /* 3 */ VK_COMPARE_OP_LESS_OR_EQUAL, - /* 4 */ VK_COMPARE_OP_GREATER, - /* 5 */ VK_COMPARE_OP_NOT_EQUAL, - /* 6 */ VK_COMPARE_OP_GREATER_OR_EQUAL, - /* 7 */ VK_COMPARE_OP_ALWAYS, - }; - static const VkStencilOp stencil_op_map[] = { - /* 0 */ VK_STENCIL_OP_KEEP, - /* 1 */ VK_STENCIL_OP_ZERO, - /* 2 */ VK_STENCIL_OP_REPLACE, - /* 3 */ VK_STENCIL_OP_INCREMENT_AND_CLAMP, - /* 4 */ VK_STENCIL_OP_DECREMENT_AND_CLAMP, - /* 5 */ VK_STENCIL_OP_INVERT, - /* 6 */ VK_STENCIL_OP_INCREMENT_AND_WRAP, - /* 7 */ VK_STENCIL_OP_DECREMENT_AND_WRAP, - }; - - // Depth state - // TODO: EARLY_Z_ENABLE (needs to be enabled in shaders) - state_info.depthWriteEnable = !!(regs.rb_depthcontrol & 0x4); - state_info.depthTestEnable = !!(regs.rb_depthcontrol & 0x2); - state_info.stencilTestEnable = !!(regs.rb_depthcontrol & 0x1); - - state_info.depthCompareOp = - compare_func_map[(regs.rb_depthcontrol >> 4) & 0x7]; - state_info.depthBoundsTestEnable = VK_FALSE; - - // Stencil state - state_info.front.compareOp = - compare_func_map[(regs.rb_depthcontrol >> 8) & 0x7]; - state_info.front.failOp = stencil_op_map[(regs.rb_depthcontrol >> 11) & 0x7]; - state_info.front.passOp = stencil_op_map[(regs.rb_depthcontrol >> 14) & 0x7]; - state_info.front.depthFailOp = - stencil_op_map[(regs.rb_depthcontrol >> 17) & 0x7]; - - // BACKFACE_ENABLE - if (!!(regs.rb_depthcontrol & 0x80)) { - state_info.back.compareOp = - compare_func_map[(regs.rb_depthcontrol >> 20) & 0x7]; - state_info.back.failOp = stencil_op_map[(regs.rb_depthcontrol >> 23) & 0x7]; - state_info.back.passOp = stencil_op_map[(regs.rb_depthcontrol >> 26) & 0x7]; - state_info.back.depthFailOp = - stencil_op_map[(regs.rb_depthcontrol >> 29) & 0x7]; - } else { - // Back state is identical to front state. - std::memcpy(&state_info.back, &state_info.front, sizeof(VkStencilOpState)); - } - - // Ignored; set dynamically. - state_info.minDepthBounds = 0; - state_info.maxDepthBounds = 0; - state_info.front.compareMask = 0; - state_info.front.writeMask = 0; - state_info.front.reference = 0; - state_info.back.compareMask = 0; - state_info.back.writeMask = 0; - state_info.back.reference = 0; - - return UpdateStatus::kMismatch; -} - -PipelineCache::UpdateStatus PipelineCache::UpdateColorBlendState() { - auto& regs = update_color_blend_state_regs_; - auto& state_info = update_color_blend_state_info_; - - bool dirty = false; - dirty |= SetShadowRegister(®s.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK); - dirty |= - SetShadowRegister(®s.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL0); - dirty |= - SetShadowRegister(®s.rb_blendcontrol[1], XE_GPU_REG_RB_BLENDCONTROL1); - dirty |= - SetShadowRegister(®s.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL2); - dirty |= - SetShadowRegister(®s.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL3); - dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL); - XXH64_update(&hash_state_, ®s, sizeof(regs)); - if (!dirty) { - return UpdateStatus::kCompatible; - } - - state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; - state_info.pNext = nullptr; - state_info.flags = 0; - - state_info.logicOpEnable = VK_FALSE; - state_info.logicOp = VK_LOGIC_OP_NO_OP; - - auto enable_mode = static_cast(regs.rb_modecontrol & 0x7); - - static const VkBlendFactor kBlendFactorMap[] = { - /* 0 */ VK_BLEND_FACTOR_ZERO, - /* 1 */ VK_BLEND_FACTOR_ONE, - /* 2 */ VK_BLEND_FACTOR_ZERO, // ? - /* 3 */ VK_BLEND_FACTOR_ZERO, // ? - /* 4 */ VK_BLEND_FACTOR_SRC_COLOR, - /* 5 */ VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, - /* 6 */ VK_BLEND_FACTOR_SRC_ALPHA, - /* 7 */ VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, - /* 8 */ VK_BLEND_FACTOR_DST_COLOR, - /* 9 */ VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, - /* 10 */ VK_BLEND_FACTOR_DST_ALPHA, - /* 11 */ VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA, - /* 12 */ VK_BLEND_FACTOR_CONSTANT_COLOR, - /* 13 */ VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR, - /* 14 */ VK_BLEND_FACTOR_CONSTANT_ALPHA, - /* 15 */ VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA, - /* 16 */ VK_BLEND_FACTOR_SRC_ALPHA_SATURATE, - }; - static const VkBlendOp kBlendOpMap[] = { - /* 0 */ VK_BLEND_OP_ADD, - /* 1 */ VK_BLEND_OP_SUBTRACT, - /* 2 */ VK_BLEND_OP_MIN, - /* 3 */ VK_BLEND_OP_MAX, - /* 4 */ VK_BLEND_OP_REVERSE_SUBTRACT, - }; - auto& attachment_states = update_color_blend_attachment_states_; - for (int i = 0; i < 4; ++i) { - uint32_t blend_control = regs.rb_blendcontrol[i]; - auto& attachment_state = attachment_states[i]; - attachment_state.blendEnable = (blend_control & 0x1FFF1FFF) != 0x00010001; - // A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND - attachment_state.srcColorBlendFactor = - kBlendFactorMap[(blend_control & 0x0000001F) >> 0]; - // A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND - attachment_state.dstColorBlendFactor = - kBlendFactorMap[(blend_control & 0x00001F00) >> 8]; - // A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN - attachment_state.colorBlendOp = - kBlendOpMap[(blend_control & 0x000000E0) >> 5]; - // A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND - attachment_state.srcAlphaBlendFactor = - kBlendFactorMap[(blend_control & 0x001F0000) >> 16]; - // A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND - attachment_state.dstAlphaBlendFactor = - kBlendFactorMap[(blend_control & 0x1F000000) >> 24]; - // A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN - attachment_state.alphaBlendOp = - kBlendOpMap[(blend_control & 0x00E00000) >> 21]; - // A2XX_RB_COLOR_MASK_WRITE_* == D3DRS_COLORWRITEENABLE - // Lines up with VkColorComponentFlagBits, where R=bit 1, G=bit 2, etc.. - uint32_t write_mask = (regs.rb_color_mask >> (i * 4)) & 0xF; - attachment_state.colorWriteMask = - enable_mode == xenos::ModeControl::kColorDepth ? write_mask : 0; - } - - state_info.attachmentCount = 4; - state_info.pAttachments = attachment_states; - - // Ignored; set dynamically. - state_info.blendConstants[0] = 0.0f; - state_info.blendConstants[1] = 0.0f; - state_info.blendConstants[2] = 0.0f; - state_info.blendConstants[3] = 0.0f; - - return UpdateStatus::kMismatch; -} - -} // namespace vulkan -} // namespace gpu -} // namespace xe diff --git a/src/xenia/gpu/vulkan/pipeline_cache.h b/src/xenia/gpu/vulkan/pipeline_cache.h deleted file mode 100644 index 3e03dce1e..000000000 --- a/src/xenia/gpu/vulkan/pipeline_cache.h +++ /dev/null @@ -1,311 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_VULKAN_PIPELINE_CACHE_H_ -#define XENIA_GPU_VULKAN_PIPELINE_CACHE_H_ - -#include - -#include "third_party/xxhash/xxhash.h" - -#include "xenia/gpu/register_file.h" -#include "xenia/gpu/spirv_shader_translator.h" -#include "xenia/gpu/vulkan/render_cache.h" -#include "xenia/gpu/vulkan/vulkan_shader.h" -#include "xenia/gpu/xenos.h" -#include "xenia/ui/spirv/spirv_disassembler.h" -#include "xenia/ui/vulkan/vulkan.h" -#include "xenia/ui/vulkan/vulkan_device.h" - -namespace xe { -namespace gpu { -namespace vulkan { - -// Configures and caches pipelines based on render state. -// This is responsible for properly setting all state required for a draw -// including shaders, various blend/etc options, and input configuration. -class PipelineCache { - public: - enum class UpdateStatus { - kCompatible, - kMismatch, - kError, - }; - - PipelineCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device); - ~PipelineCache(); - - VkResult Initialize(VkDescriptorSetLayout uniform_descriptor_set_layout, - VkDescriptorSetLayout texture_descriptor_set_layout, - VkDescriptorSetLayout vertex_descriptor_set_layout); - void Shutdown(); - - // Loads a shader from the cache, possibly translating it. - VulkanShader* LoadShader(xenos::ShaderType shader_type, - uint32_t guest_address, const uint32_t* host_address, - uint32_t dword_count); - - // Configures a pipeline using the current render state and the given render - // pass. If a previously available pipeline is available it will be used, - // otherwise a new one may be created. Any state that can be set dynamically - // in the command buffer is issued at this time. - // Returns whether the pipeline could be successfully created. - UpdateStatus ConfigurePipeline(VkCommandBuffer command_buffer, - const RenderState* render_state, - VulkanShader* vertex_shader, - VulkanShader* pixel_shader, - xenos::PrimitiveType primitive_type, - VkPipeline* pipeline_out); - - // Sets required dynamic state on the command buffer. - // Only state that has changed since the last call will be set unless - // full_update is true. - bool SetDynamicState(VkCommandBuffer command_buffer, bool full_update); - - // Pipeline layout shared by all pipelines. - VkPipelineLayout pipeline_layout() const { return pipeline_layout_; } - - // Clears all cached content. - void ClearCache(); - - private: - // Creates or retrieves an existing pipeline for the currently configured - // state. - VkPipeline GetPipeline(const RenderState* render_state, uint64_t hash_key); - - bool TranslateShader(VulkanShader* shader, reg::SQ_PROGRAM_CNTL cntl); - - void DumpShaderDisasmAMD(VkPipeline pipeline); - void DumpShaderDisasmNV(const VkGraphicsPipelineCreateInfo& info); - - // Gets a geometry shader used to emulate the given primitive type. - // Returns nullptr if the primitive doesn't need to be emulated. - VkShaderModule GetGeometryShader(xenos::PrimitiveType primitive_type, - bool is_line_mode); - - RegisterFile* register_file_ = nullptr; - ui::vulkan::VulkanDevice* device_ = nullptr; - - // Reusable shader translator. - std::unique_ptr shader_translator_ = nullptr; - // Disassembler used to get the SPIRV disasm. Only used in debug. - xe::ui::spirv::SpirvDisassembler disassembler_; - // All loaded shaders mapped by their guest hash key. - std::unordered_map shader_map_; - - // Vulkan pipeline cache, which in theory helps us out. - // This can be serialized to disk and reused, if we want. - VkPipelineCache pipeline_cache_ = nullptr; - // Layout used for all pipelines describing our uniforms, textures, and push - // constants. - VkPipelineLayout pipeline_layout_ = nullptr; - - // Shared geometry shaders. - struct { - VkShaderModule line_quad_list; - VkShaderModule point_list; - VkShaderModule quad_list; - VkShaderModule rect_list; - } geometry_shaders_; - - // Shared dummy pixel shader. - VkShaderModule dummy_pixel_shader_; - - // Hash state used to incrementally produce pipeline hashes during update. - // By the time the full update pass has run the hash will represent the - // current state in a way that can uniquely identify the produced VkPipeline. - XXH64_state_t hash_state_; - // All previously generated pipelines mapped by hash. - std::unordered_map cached_pipelines_; - - // Previously used pipeline. This matches our current state settings - // and allows us to quickly(ish) reuse the pipeline if no registers have - // changed. - VkPipeline current_pipeline_ = nullptr; - - private: - UpdateStatus UpdateState(VulkanShader* vertex_shader, - VulkanShader* pixel_shader, - xenos::PrimitiveType primitive_type); - - UpdateStatus UpdateRenderTargetState(); - UpdateStatus UpdateShaderStages(VulkanShader* vertex_shader, - VulkanShader* pixel_shader, - xenos::PrimitiveType primitive_type); - UpdateStatus UpdateVertexInputState(VulkanShader* vertex_shader); - UpdateStatus UpdateInputAssemblyState(xenos::PrimitiveType primitive_type); - UpdateStatus UpdateViewportState(); - UpdateStatus UpdateRasterizationState(xenos::PrimitiveType primitive_type); - UpdateStatus UpdateMultisampleState(); - UpdateStatus UpdateDepthStencilState(); - UpdateStatus UpdateColorBlendState(); - - bool SetShadowRegister(uint32_t* dest, uint32_t register_name); - bool SetShadowRegister(float* dest, uint32_t register_name); - bool SetShadowRegisterArray(uint32_t* dest, uint32_t num, - uint32_t register_name); - - struct UpdateRenderTargetsRegisters { - uint32_t rb_modecontrol; - reg::RB_SURFACE_INFO rb_surface_info; - reg::RB_COLOR_INFO rb_color_info; - reg::RB_DEPTH_INFO rb_depth_info; - reg::RB_COLOR_INFO rb_color1_info; - reg::RB_COLOR_INFO rb_color2_info; - reg::RB_COLOR_INFO rb_color3_info; - uint32_t rb_color_mask; - uint32_t rb_depthcontrol; - uint32_t rb_stencilrefmask; - - UpdateRenderTargetsRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_render_targets_regs_; - - struct UpdateShaderStagesRegisters { - xenos::PrimitiveType primitive_type; - uint32_t pa_su_sc_mode_cntl; - reg::SQ_PROGRAM_CNTL sq_program_cntl; - VulkanShader* vertex_shader; - VulkanShader* pixel_shader; - - UpdateShaderStagesRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_shader_stages_regs_; - VkPipelineShaderStageCreateInfo update_shader_stages_info_[3]; - uint32_t update_shader_stages_stage_count_ = 0; - - struct UpdateVertexInputStateRegisters { - VulkanShader* vertex_shader; - - UpdateVertexInputStateRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_vertex_input_state_regs_; - VkPipelineVertexInputStateCreateInfo update_vertex_input_state_info_; - VkVertexInputBindingDescription update_vertex_input_state_binding_descrs_[32]; - VkVertexInputAttributeDescription - update_vertex_input_state_attrib_descrs_[96]; - - struct UpdateInputAssemblyStateRegisters { - xenos::PrimitiveType primitive_type; - uint32_t pa_su_sc_mode_cntl; - uint32_t multi_prim_ib_reset_index; - - UpdateInputAssemblyStateRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_input_assembly_state_regs_; - VkPipelineInputAssemblyStateCreateInfo update_input_assembly_state_info_; - - struct UpdateViewportStateRegisters { - // uint32_t pa_cl_clip_cntl; - uint32_t rb_surface_info; - uint32_t pa_cl_vte_cntl; - uint32_t pa_su_sc_mode_cntl; - uint32_t pa_sc_window_offset; - uint32_t pa_sc_window_scissor_tl; - uint32_t pa_sc_window_scissor_br; - float pa_cl_vport_xoffset; - float pa_cl_vport_yoffset; - float pa_cl_vport_zoffset; - float pa_cl_vport_xscale; - float pa_cl_vport_yscale; - float pa_cl_vport_zscale; - - UpdateViewportStateRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_viewport_state_regs_; - VkPipelineViewportStateCreateInfo update_viewport_state_info_; - - struct UpdateRasterizationStateRegisters { - xenos::PrimitiveType primitive_type; - uint32_t pa_cl_clip_cntl; - uint32_t pa_su_sc_mode_cntl; - uint32_t pa_sc_screen_scissor_tl; - uint32_t pa_sc_screen_scissor_br; - uint32_t pa_sc_viz_query; - uint32_t pa_su_poly_offset_enable; - uint32_t multi_prim_ib_reset_index; - - UpdateRasterizationStateRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_rasterization_state_regs_; - VkPipelineRasterizationStateCreateInfo update_rasterization_state_info_; - - struct UpdateMultisampleStateeRegisters { - uint32_t pa_sc_aa_config; - uint32_t pa_su_sc_mode_cntl; - uint32_t rb_surface_info; - - UpdateMultisampleStateeRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_multisample_state_regs_; - VkPipelineMultisampleStateCreateInfo update_multisample_state_info_; - - struct UpdateDepthStencilStateRegisters { - uint32_t rb_depthcontrol; - uint32_t rb_stencilrefmask; - - UpdateDepthStencilStateRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_depth_stencil_state_regs_; - VkPipelineDepthStencilStateCreateInfo update_depth_stencil_state_info_; - - struct UpdateColorBlendStateRegisters { - uint32_t rb_color_mask; - uint32_t rb_blendcontrol[4]; - uint32_t rb_modecontrol; - - UpdateColorBlendStateRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_color_blend_state_regs_; - VkPipelineColorBlendStateCreateInfo update_color_blend_state_info_; - VkPipelineColorBlendAttachmentState update_color_blend_attachment_states_[4]; - - struct SetDynamicStateRegisters { - uint32_t pa_sc_window_offset; - - uint32_t pa_su_sc_mode_cntl; - uint32_t pa_sc_window_scissor_tl; - uint32_t pa_sc_window_scissor_br; - - uint32_t rb_surface_info; - uint32_t pa_su_sc_vtx_cntl; - // Bias is in Vulkan units because depth format may potentially effect it. - float pa_su_poly_offset_scale; - float pa_su_poly_offset_offset; - uint32_t pa_cl_vte_cntl; - float pa_cl_vport_xoffset; - float pa_cl_vport_yoffset; - float pa_cl_vport_zoffset; - float pa_cl_vport_xscale; - float pa_cl_vport_yscale; - float pa_cl_vport_zscale; - - float rb_blend_rgba[4]; - uint32_t rb_stencilrefmask; - - reg::SQ_PROGRAM_CNTL sq_program_cntl; - uint32_t sq_context_misc; - uint32_t rb_colorcontrol; - reg::RB_COLOR_INFO rb_color_info; - reg::RB_COLOR_INFO rb_color1_info; - reg::RB_COLOR_INFO rb_color2_info; - reg::RB_COLOR_INFO rb_color3_info; - float rb_alpha_ref; - uint32_t pa_su_point_size; - - SetDynamicStateRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } set_dynamic_state_registers_; -}; - -} // namespace vulkan -} // namespace gpu -} // namespace xe - -#endif // XENIA_GPU_VULKAN_PIPELINE_CACHE_H_ diff --git a/src/xenia/gpu/vulkan/premake5.lua b/src/xenia/gpu/vulkan/premake5.lua index fada8e143..15d3ff668 100644 --- a/src/xenia/gpu/vulkan/premake5.lua +++ b/src/xenia/gpu/vulkan/premake5.lua @@ -8,146 +8,13 @@ project("xenia-gpu-vulkan") language("C++") links({ "fmt", - "volk", "xenia-base", "xenia-gpu", "xenia-ui", - "xenia-ui-spirv", "xenia-ui-vulkan", "xxhash", }) - defines({ - }) local_platform_files() files({ - "shaders/bin/*.h", + "../shaders/bytecode/vulkan_spirv/*.h", }) - --- TODO(benvanik): kill this and move to the debugger UI. -group("src") -project("xenia-gpu-vulkan-trace-viewer") - uuid("86a1dddc-a26a-4885-8c55-cf745225d93e") - kind("WindowedApp") - language("C++") - links({ - "aes_128", - "capstone", - "fmt", - "glslang-spirv", - "imgui", - "libavcodec", - "libavutil", - "mspack", - "snappy", - "spirv-tools", - "volk", - "xenia-apu", - "xenia-apu-nop", - "xenia-base", - "xenia-core", - "xenia-cpu", - "xenia-cpu-backend-x64", - "xenia-gpu", - "xenia-gpu-vulkan", - "xenia-hid", - "xenia-hid-nop", - "xenia-kernel", - "xenia-ui", - "xenia-ui-spirv", - "xenia-ui-vulkan", - "xenia-vfs", - "xxhash", - }) - defines({ - }) - files({ - "vulkan_trace_viewer_main.cc", - "../../base/main_"..platform_suffix..".cc", - }) - - filter("platforms:Linux") - links({ - "X11", - "xcb", - "X11-xcb", - "GL", - "vulkan", - }) - - filter("platforms:Windows") - links({ - "xenia-apu-xaudio2", - "xenia-hid-winkey", - "xenia-hid-xinput", - }) - - -- Only create the .user file if it doesn't already exist. - local user_file = project_root.."/build/xenia-gpu-vulkan-trace-viewer.vcxproj.user" - if not os.isfile(user_file) then - debugdir(project_root) - debugargs({ - "2>&1", - "1>scratch/stdout-trace-viewer.txt", - }) - end - -group("src") -project("xenia-gpu-vulkan-trace-dump") - uuid("0dd0dd1c-b321-494d-ab9a-6c062f0c65cc") - kind("ConsoleApp") - language("C++") - links({ - "aes_128", - "capstone", - "fmt", - "glslang-spirv", - "imgui", - "libavcodec", - "libavutil", - "mspack", - "snappy", - "spirv-tools", - "volk", - "xenia-apu", - "xenia-apu-nop", - "xenia-base", - "xenia-core", - "xenia-cpu", - "xenia-cpu-backend-x64", - "xenia-gpu", - "xenia-gpu-vulkan", - "xenia-hid", - "xenia-hid-nop", - "xenia-kernel", - "xenia-ui", - "xenia-ui-spirv", - "xenia-ui-vulkan", - "xenia-vfs", - "xxhash", - }) - defines({ - }) - files({ - "vulkan_trace_dump_main.cc", - "../../base/main_"..platform_suffix..".cc", - }) - - filter("platforms:Linux") - links({ - "X11", - "xcb", - "X11-xcb", - "GL", - "vulkan", - }) - - filter("platforms:Windows") - -- Only create the .user file if it doesn't already exist. - local user_file = project_root.."/build/xenia-gpu-vulkan-trace-dump.vcxproj.user" - if not os.isfile(user_file) then - debugdir(project_root) - debugargs({ - "2>&1", - "1>scratch/stdout-trace-dump.txt", - }) - end diff --git a/src/xenia/gpu/vulkan/render_cache.cc b/src/xenia/gpu/vulkan/render_cache.cc deleted file mode 100644 index 1882c0082..000000000 --- a/src/xenia/gpu/vulkan/render_cache.cc +++ /dev/null @@ -1,1404 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/gpu/vulkan/render_cache.h" - -#include - -#include "third_party/fmt/include/fmt/format.h" -#include "xenia/base/logging.h" -#include "xenia/base/math.h" -#include "xenia/base/memory.h" -#include "xenia/base/profiling.h" -#include "xenia/gpu/gpu_flags.h" -#include "xenia/gpu/registers.h" -#include "xenia/gpu/vulkan/vulkan_gpu_flags.h" - -namespace xe { -namespace gpu { -namespace vulkan { - -using namespace xe::gpu::xenos; -using xe::ui::vulkan::CheckResult; - -constexpr uint32_t kEdramBufferCapacity = 10 * 1024 * 1024; - -xenos::ColorRenderTargetFormat GetBaseRTFormat( - xenos::ColorRenderTargetFormat format) { - switch (format) { - case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: - return xenos::ColorRenderTargetFormat::k_8_8_8_8; - case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: - return xenos::ColorRenderTargetFormat::k_2_10_10_10; - case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16: - return xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT; - default: - return format; - } -} - -VkFormat ColorRenderTargetFormatToVkFormat( - xenos::ColorRenderTargetFormat format) { - switch (format) { - case xenos::ColorRenderTargetFormat::k_8_8_8_8: - case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: - return VK_FORMAT_R8G8B8A8_UNORM; - case xenos::ColorRenderTargetFormat::k_2_10_10_10: - case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: - return VK_FORMAT_A2R10G10B10_UNORM_PACK32; - case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT: - case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16: - return VK_FORMAT_R16G16B16A16_SFLOAT; - case xenos::ColorRenderTargetFormat::k_16_16: - return VK_FORMAT_R16G16_UNORM; - case xenos::ColorRenderTargetFormat::k_16_16_16_16: - return VK_FORMAT_R16G16B16A16_UNORM; - case xenos::ColorRenderTargetFormat::k_16_16_FLOAT: - return VK_FORMAT_R16G16_SFLOAT; - case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT: - return VK_FORMAT_R16G16B16A16_SFLOAT; - case xenos::ColorRenderTargetFormat::k_32_FLOAT: - return VK_FORMAT_R32_SFLOAT; - case xenos::ColorRenderTargetFormat::k_32_32_FLOAT: - return VK_FORMAT_R32G32_SFLOAT; - default: - assert_unhandled_case(key.edram_format); - return VK_FORMAT_UNDEFINED; - } -} - -VkFormat DepthRenderTargetFormatToVkFormat( - xenos::DepthRenderTargetFormat format) { - switch (format) { - case xenos::DepthRenderTargetFormat::kD24S8: - return VK_FORMAT_D24_UNORM_S8_UINT; - case xenos::DepthRenderTargetFormat::kD24FS8: - // Vulkan doesn't support 24-bit floats, so just promote it to 32-bit - return VK_FORMAT_D32_SFLOAT_S8_UINT; - default: - return VK_FORMAT_UNDEFINED; - } -} - -// Cached framebuffer referencing tile attachments. -// Each framebuffer is specific to a render pass. Ugh. -class CachedFramebuffer { - public: - // TODO(benvanik): optimized key? tile base + format for each? - - // Framebuffer with the attachments ready for use in the parent render pass. - VkFramebuffer handle = nullptr; - // Width of the framebuffer in pixels. - uint32_t width = 0; - // Height of the framebuffer in pixels. - uint32_t height = 0; - // References to color attachments, if used. - CachedTileView* color_attachments[4] = {nullptr}; - // Reference to depth/stencil attachment, if used. - CachedTileView* depth_stencil_attachment = nullptr; - // Associated render pass - VkRenderPass render_pass = nullptr; - - CachedFramebuffer(VkDevice device, VkRenderPass render_pass, - uint32_t surface_width, uint32_t surface_height, - CachedTileView* target_color_attachments[4], - CachedTileView* target_depth_stencil_attachment); - ~CachedFramebuffer(); - - VkResult Initialize(); - - bool IsCompatible(const RenderConfiguration& desired_config) const; - - private: - VkDevice device_ = nullptr; -}; - -// Cached render passes based on register states. -// Each render pass is dependent on the format, dimensions, and use of -// all attachments. The same render pass can be reused for multiple -// framebuffers pointing at various tile views, though those cached -// framebuffers are specific to the render pass. -class CachedRenderPass { - public: - // Configuration this pass was created with. - RenderConfiguration config; - // Initialized render pass for the register state. - VkRenderPass handle = nullptr; - // Cache of framebuffers for the various tile attachments. - std::vector cached_framebuffers; - - CachedRenderPass(VkDevice device, const RenderConfiguration& desired_config); - ~CachedRenderPass(); - - VkResult Initialize(); - - bool IsCompatible(const RenderConfiguration& desired_config) const; - - private: - VkDevice device_ = nullptr; -}; - -CachedTileView::CachedTileView(ui::vulkan::VulkanDevice* device, - VkDeviceMemory edram_memory, - TileViewKey view_key) - : device_(device), key(std::move(view_key)) {} - -CachedTileView::~CachedTileView() { - VK_SAFE_DESTROY(vkDestroyImageView, *device_, image_view, nullptr); - VK_SAFE_DESTROY(vkDestroyImageView, *device_, image_view_depth, nullptr); - VK_SAFE_DESTROY(vkDestroyImageView, *device_, image_view_stencil, nullptr); - VK_SAFE_DESTROY(vkDestroyImage, *device_, image, nullptr); - VK_SAFE_DESTROY(vkFreeMemory, *device_, memory, nullptr); -} - -VkResult CachedTileView::Initialize(VkCommandBuffer command_buffer) { - VkResult status = VK_SUCCESS; - - // Map format to Vulkan. - VkFormat vulkan_format = VK_FORMAT_UNDEFINED; - uint32_t bpp = 4; - if (key.color_or_depth) { - auto edram_format = - static_cast(key.edram_format); - vulkan_format = ColorRenderTargetFormatToVkFormat(edram_format); - switch (edram_format) { - case xenos::ColorRenderTargetFormat::k_16_16_16_16: - case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT: - case xenos::ColorRenderTargetFormat::k_32_32_FLOAT: - bpp = 8; - break; - default: - bpp = 4; - break; - } - } else { - auto edram_format = - static_cast(key.edram_format); - vulkan_format = DepthRenderTargetFormatToVkFormat(edram_format); - } - assert_true(vulkan_format != VK_FORMAT_UNDEFINED); - // FIXME(DrChat): Was this check necessary? - // assert_true(bpp == 4); - - // Create the image with the desired properties. - VkImageCreateInfo image_info; - image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; - image_info.pNext = nullptr; - // TODO(benvanik): exploit VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT so we can have - // multiple views. - image_info.flags = 0; - image_info.imageType = VK_IMAGE_TYPE_2D; - image_info.format = vulkan_format; - image_info.extent.width = key.tile_width * 80; - image_info.extent.height = key.tile_height * 16; - image_info.extent.depth = 1; - image_info.mipLevels = 1; - image_info.arrayLayers = 1; - if (cvars::vulkan_native_msaa) { - auto msaa_samples = static_cast(key.msaa_samples); - switch (msaa_samples) { - case xenos::MsaaSamples::k1X: - image_info.samples = VK_SAMPLE_COUNT_1_BIT; - break; - case xenos::MsaaSamples::k2X: - image_info.samples = VK_SAMPLE_COUNT_2_BIT; - break; - case xenos::MsaaSamples::k4X: - image_info.samples = VK_SAMPLE_COUNT_4_BIT; - break; - default: - assert_unhandled_case(msaa_samples); - } - } else { - image_info.samples = VK_SAMPLE_COUNT_1_BIT; - } - sample_count = image_info.samples; - image_info.tiling = VK_IMAGE_TILING_OPTIMAL; - image_info.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | - VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT; - image_info.usage |= key.color_or_depth - ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT - : VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; - image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - image_info.queueFamilyIndexCount = 0; - image_info.pQueueFamilyIndices = nullptr; - image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - status = vkCreateImage(*device_, &image_info, nullptr, &image); - if (status != VK_SUCCESS) { - return status; - } - - device_->DbgSetObjectName( - reinterpret_cast(image), VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT, - fmt::format("RT(d): 0x{:08X} 0x{:08X}({}) 0x{:08X}({}) {} {} {}", - uint32_t(key.tile_offset), uint32_t(key.tile_width), - uint32_t(key.tile_width), uint32_t(key.tile_height), - uint32_t(key.tile_height), uint32_t(key.color_or_depth), - uint32_t(key.msaa_samples), uint32_t(key.edram_format))); - - VkMemoryRequirements memory_requirements; - vkGetImageMemoryRequirements(*device_, image, &memory_requirements); - - // Bind to a newly allocated chunk. - // TODO: Alias from a really big buffer? - memory = device_->AllocateMemory(memory_requirements, 0); - status = vkBindImageMemory(*device_, image, memory, 0); - if (status != VK_SUCCESS) { - return status; - } - - // Create the image view we'll use to attach it to a framebuffer. - VkImageViewCreateInfo image_view_info; - image_view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - image_view_info.pNext = nullptr; - image_view_info.flags = 0; - image_view_info.image = image; - image_view_info.viewType = VK_IMAGE_VIEW_TYPE_2D; - image_view_info.format = image_info.format; - // TODO(benvanik): manipulate? may not be able to when attached. - image_view_info.components = { - VK_COMPONENT_SWIZZLE_R, - VK_COMPONENT_SWIZZLE_G, - VK_COMPONENT_SWIZZLE_B, - VK_COMPONENT_SWIZZLE_A, - }; - image_view_info.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; - if (key.color_or_depth) { - image_view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - } else { - image_view_info.subresourceRange.aspectMask = - VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - } - status = vkCreateImageView(*device_, &image_view_info, nullptr, &image_view); - if (status != VK_SUCCESS) { - return status; - } - - // Create separate depth/stencil views. - if (key.color_or_depth == 0) { - image_view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; - status = vkCreateImageView(*device_, &image_view_info, nullptr, - &image_view_depth); - if (status != VK_SUCCESS) { - return status; - } - - image_view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT; - status = vkCreateImageView(*device_, &image_view_info, nullptr, - &image_view_stencil); - if (status != VK_SUCCESS) { - return status; - } - } - - // TODO(benvanik): transition to general layout? - VkImageMemoryBarrier image_barrier; - image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - image_barrier.pNext = nullptr; - image_barrier.srcAccessMask = 0; - image_barrier.dstAccessMask = - key.color_or_depth ? VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT - : VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - image_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - image_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; - image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - image_barrier.image = image; - image_barrier.subresourceRange.aspectMask = - key.color_or_depth - ? VK_IMAGE_ASPECT_COLOR_BIT - : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - image_barrier.subresourceRange.baseMipLevel = 0; - image_barrier.subresourceRange.levelCount = 1; - image_barrier.subresourceRange.baseArrayLayer = 0; - image_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - key.color_or_depth - ? VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT - : VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT, - 0, 0, nullptr, 0, nullptr, 1, &image_barrier); - - image_layout = image_barrier.newLayout; - return VK_SUCCESS; -} - -CachedFramebuffer::CachedFramebuffer( - VkDevice device, VkRenderPass render_pass, uint32_t surface_width, - uint32_t surface_height, CachedTileView* target_color_attachments[4], - CachedTileView* target_depth_stencil_attachment) - : device_(device), - width(surface_width), - height(surface_height), - depth_stencil_attachment(target_depth_stencil_attachment), - render_pass(render_pass) { - for (int i = 0; i < 4; ++i) { - color_attachments[i] = target_color_attachments[i]; - } -} - -CachedFramebuffer::~CachedFramebuffer() { - VK_SAFE_DESTROY(vkDestroyFramebuffer, device_, handle, nullptr); -} - -VkResult CachedFramebuffer::Initialize() { - // Create framebuffer. - VkImageView image_views[5] = {nullptr}; - int image_view_count = 0; - for (int i = 0; i < 4; ++i) { - if (color_attachments[i]) { - image_views[image_view_count++] = color_attachments[i]->image_view; - } - } - if (depth_stencil_attachment) { - image_views[image_view_count++] = depth_stencil_attachment->image_view; - } - VkFramebufferCreateInfo framebuffer_info; - framebuffer_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; - framebuffer_info.pNext = nullptr; - framebuffer_info.flags = 0; - framebuffer_info.renderPass = render_pass; - framebuffer_info.attachmentCount = image_view_count; - framebuffer_info.pAttachments = image_views; - framebuffer_info.width = width; - framebuffer_info.height = height; - framebuffer_info.layers = 1; - return vkCreateFramebuffer(device_, &framebuffer_info, nullptr, &handle); -} - -bool CachedFramebuffer::IsCompatible( - const RenderConfiguration& desired_config) const { - // We already know all render pass things line up, so let's verify dimensions, - // edram offsets, etc. We need an exact match. - uint32_t surface_pitch_px = - desired_config.surface_msaa != xenos::MsaaSamples::k4X - ? desired_config.surface_pitch_px - : desired_config.surface_pitch_px * 2; - uint32_t surface_height_px = - desired_config.surface_msaa == xenos::MsaaSamples::k1X - ? desired_config.surface_height_px - : desired_config.surface_height_px * 2; - surface_pitch_px = std::min(surface_pitch_px, 2560u); - surface_height_px = std::min(surface_height_px, 2560u); - if (surface_pitch_px != width || surface_height_px != height) { - return false; - } - // TODO(benvanik): separate image views from images in tiles and store in fb? - for (int i = 0; i < 4; ++i) { - // Ensure the the attachment points to the same tile. - if (!color_attachments[i]) { - continue; - } - auto& color_info = color_attachments[i]->key; - auto& desired_color_info = desired_config.color[i]; - if (color_info.tile_offset != desired_color_info.edram_base || - color_info.edram_format != - static_cast(desired_color_info.format)) { - return false; - } - } - // Ensure depth attachment is correct. - if (depth_stencil_attachment && - (depth_stencil_attachment->key.tile_offset != - desired_config.depth_stencil.edram_base || - depth_stencil_attachment->key.edram_format != - static_cast(desired_config.depth_stencil.format))) { - return false; - } - return true; -} - -CachedRenderPass::CachedRenderPass(VkDevice device, - const RenderConfiguration& desired_config) - : device_(device) { - std::memcpy(&config, &desired_config, sizeof(config)); -} - -CachedRenderPass::~CachedRenderPass() { - for (auto framebuffer : cached_framebuffers) { - delete framebuffer; - } - cached_framebuffers.clear(); - - VK_SAFE_DESTROY(vkDestroyRenderPass, device_, handle, nullptr); -} - -VkResult CachedRenderPass::Initialize() { - VkSampleCountFlagBits sample_count; - if (cvars::vulkan_native_msaa) { - switch (config.surface_msaa) { - case xenos::MsaaSamples::k1X: - sample_count = VK_SAMPLE_COUNT_1_BIT; - break; - case xenos::MsaaSamples::k2X: - sample_count = VK_SAMPLE_COUNT_2_BIT; - break; - case xenos::MsaaSamples::k4X: - sample_count = VK_SAMPLE_COUNT_4_BIT; - break; - default: - assert_unhandled_case(config.surface_msaa); - break; - } - } else { - sample_count = VK_SAMPLE_COUNT_1_BIT; - } - - // Initialize all attachments to default unused. - // As we set layout(location=RT) in shaders we must always provide 4. - VkAttachmentDescription attachments[5]; - for (int i = 0; i < 4; ++i) { - attachments[i].flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT; - attachments[i].format = VK_FORMAT_UNDEFINED; - attachments[i].samples = sample_count; - attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE; - attachments[i].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - attachments[i].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; - attachments[i].initialLayout = VK_IMAGE_LAYOUT_GENERAL; - attachments[i].finalLayout = VK_IMAGE_LAYOUT_GENERAL; - } - auto& depth_stencil_attachment = attachments[4]; - depth_stencil_attachment.flags = 0; - depth_stencil_attachment.format = VK_FORMAT_UNDEFINED; - depth_stencil_attachment.samples = sample_count; - depth_stencil_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - depth_stencil_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; - depth_stencil_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - depth_stencil_attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; - depth_stencil_attachment.initialLayout = VK_IMAGE_LAYOUT_GENERAL; - depth_stencil_attachment.finalLayout = VK_IMAGE_LAYOUT_GENERAL; - - // Configure attachments based on what's enabled. - VkAttachmentReference color_attachment_refs[4]; - for (int i = 0; i < 4; ++i) { - auto& color_config = config.color[i]; - // TODO(benvanik): see how loose we can be with these. - attachments[i].format = - ColorRenderTargetFormatToVkFormat(color_config.format); - auto& color_attachment_ref = color_attachment_refs[i]; - color_attachment_ref.attachment = i; - color_attachment_ref.layout = VK_IMAGE_LAYOUT_GENERAL; - } - - // Configure depth. - VkAttachmentReference depth_stencil_attachment_ref; - depth_stencil_attachment_ref.layout = VK_IMAGE_LAYOUT_GENERAL; - - auto& depth_config = config.depth_stencil; - depth_stencil_attachment_ref.attachment = 4; - depth_stencil_attachment.format = - DepthRenderTargetFormatToVkFormat(depth_config.format); - - // Single subpass that writes to our attachments. - // FIXME: "Multiple attachments that alias the same memory must not be used in - // a single subpass" - // TODO: Input attachment for depth/stencil reads? - VkSubpassDescription subpass_info; - subpass_info.flags = 0; - subpass_info.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - subpass_info.inputAttachmentCount = 0; - subpass_info.pInputAttachments = nullptr; - subpass_info.colorAttachmentCount = 4; - subpass_info.pColorAttachments = color_attachment_refs; - subpass_info.pResolveAttachments = nullptr; - subpass_info.pDepthStencilAttachment = &depth_stencil_attachment_ref; - subpass_info.preserveAttachmentCount = 0; - subpass_info.pPreserveAttachments = nullptr; - - // Create the render pass. - VkRenderPassCreateInfo render_pass_info; - std::memset(&render_pass_info, 0, sizeof(render_pass_info)); - render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; - render_pass_info.pNext = nullptr; - render_pass_info.flags = 0; - render_pass_info.attachmentCount = 5; - render_pass_info.pAttachments = attachments; - render_pass_info.subpassCount = 1; - render_pass_info.pSubpasses = &subpass_info; - - // Add a dependency on external render passes -> us (MAY_ALIAS bit) - VkSubpassDependency dependencies[1]; - dependencies[0].srcSubpass = VK_SUBPASS_EXTERNAL; - dependencies[0].dstSubpass = 0; - dependencies[0].srcStageMask = VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT; - dependencies[0].dstStageMask = VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT; - dependencies[0].srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - dependencies[0].dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - dependencies[0].dependencyFlags = 0; - - render_pass_info.dependencyCount = 1; - render_pass_info.pDependencies = dependencies; - return vkCreateRenderPass(device_, &render_pass_info, nullptr, &handle); -} - -bool CachedRenderPass::IsCompatible( - const RenderConfiguration& desired_config) const { - if (config.surface_msaa != desired_config.surface_msaa && - cvars::vulkan_native_msaa) { - return false; - } - - for (int i = 0; i < 4; ++i) { - // TODO(benvanik): allow compatible vulkan formats. - if (config.color[i].format != desired_config.color[i].format) { - return false; - } - } - if (config.depth_stencil.format != desired_config.depth_stencil.format) { - return false; - } - return true; -} - -RenderCache::RenderCache(RegisterFile* register_file, - ui::vulkan::VulkanDevice* device) - : register_file_(register_file), device_(device) {} - -RenderCache::~RenderCache() { Shutdown(); } - -VkResult RenderCache::Initialize() { - VkResult status = VK_SUCCESS; - - // Create the buffer we'll bind to our memory. - VkBufferCreateInfo buffer_info; - buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - buffer_info.pNext = nullptr; - buffer_info.flags = 0; - buffer_info.size = kEdramBufferCapacity; - buffer_info.usage = - VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - buffer_info.queueFamilyIndexCount = 0; - buffer_info.pQueueFamilyIndices = nullptr; - status = vkCreateBuffer(*device_, &buffer_info, nullptr, &edram_buffer_); - CheckResult(status, "vkCreateBuffer"); - if (status != VK_SUCCESS) { - return status; - } - - // Query requirements for the buffer. - // It should be 1:1. - VkMemoryRequirements buffer_requirements; - vkGetBufferMemoryRequirements(*device_, edram_buffer_, &buffer_requirements); - assert_true(buffer_requirements.size == kEdramBufferCapacity); - - // Allocate EDRAM memory. - // TODO(benvanik): do we need it host visible? - edram_memory_ = device_->AllocateMemory(buffer_requirements); - assert_not_null(edram_memory_); - if (!edram_memory_) { - return VK_ERROR_INITIALIZATION_FAILED; - } - - // Bind buffer to map our entire memory. - status = vkBindBufferMemory(*device_, edram_buffer_, edram_memory_, 0); - CheckResult(status, "vkBindBufferMemory"); - if (status != VK_SUCCESS) { - return status; - } - - if (status == VK_SUCCESS) { - // For debugging, upload a grid into the EDRAM buffer. - uint32_t* gpu_data = nullptr; - status = vkMapMemory(*device_, edram_memory_, 0, buffer_requirements.size, - 0, reinterpret_cast(&gpu_data)); - - if (status == VK_SUCCESS) { - for (int i = 0; i < kEdramBufferCapacity / 4; i++) { - gpu_data[i] = (i % 8) >= 4 ? 0xFF0000FF : 0xFFFFFFFF; - } - - vkUnmapMemory(*device_, edram_memory_); - } - } - - return VK_SUCCESS; -} - -void RenderCache::Shutdown() { - // TODO(benvanik): wait for idle. - - // Dispose all render passes (and their framebuffers). - for (auto render_pass : cached_render_passes_) { - delete render_pass; - } - cached_render_passes_.clear(); - - // Dispose all of our cached tile views. - for (auto tile_view : cached_tile_views_) { - delete tile_view; - } - cached_tile_views_.clear(); - - // Release underlying EDRAM memory. - if (edram_buffer_) { - vkDestroyBuffer(*device_, edram_buffer_, nullptr); - edram_buffer_ = nullptr; - } - if (edram_memory_) { - vkFreeMemory(*device_, edram_memory_, nullptr); - edram_memory_ = nullptr; - } -} - -bool RenderCache::dirty() const { - auto& regs = *register_file_; - auto& cur_regs = shadow_registers_; - - bool dirty = false; - dirty |= cur_regs.rb_modecontrol.value != regs[XE_GPU_REG_RB_MODECONTROL].u32; - dirty |= - cur_regs.rb_surface_info.value != regs[XE_GPU_REG_RB_SURFACE_INFO].u32; - dirty |= cur_regs.rb_color_info.value != regs[XE_GPU_REG_RB_COLOR_INFO].u32; - dirty |= cur_regs.rb_color1_info.value != regs[XE_GPU_REG_RB_COLOR1_INFO].u32; - dirty |= cur_regs.rb_color2_info.value != regs[XE_GPU_REG_RB_COLOR2_INFO].u32; - dirty |= cur_regs.rb_color3_info.value != regs[XE_GPU_REG_RB_COLOR3_INFO].u32; - dirty |= cur_regs.rb_depth_info.value != regs[XE_GPU_REG_RB_DEPTH_INFO].u32; - dirty |= cur_regs.pa_sc_window_scissor_tl != - regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32; - dirty |= cur_regs.pa_sc_window_scissor_br != - regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32; - return dirty; -} - -const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, - VulkanShader* vertex_shader, - VulkanShader* pixel_shader) { -#if FINE_GRAINED_DRAW_SCOPES - SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES - - assert_null(current_command_buffer_); - current_command_buffer_ = command_buffer; - - // Lookup or construct a render pass compatible with our current state. - auto config = ¤t_state_.config; - CachedRenderPass* render_pass = nullptr; - CachedFramebuffer* framebuffer = nullptr; - auto& regs = shadow_registers_; - bool dirty = false; - dirty |= - SetShadowRegister(®s.rb_modecontrol.value, XE_GPU_REG_RB_MODECONTROL); - dirty |= SetShadowRegister(®s.rb_surface_info.value, - XE_GPU_REG_RB_SURFACE_INFO); - dirty |= - SetShadowRegister(®s.rb_color_info.value, XE_GPU_REG_RB_COLOR_INFO); - dirty |= - SetShadowRegister(®s.rb_color1_info.value, XE_GPU_REG_RB_COLOR1_INFO); - dirty |= - SetShadowRegister(®s.rb_color2_info.value, XE_GPU_REG_RB_COLOR2_INFO); - dirty |= - SetShadowRegister(®s.rb_color3_info.value, XE_GPU_REG_RB_COLOR3_INFO); - dirty |= - SetShadowRegister(®s.rb_depth_info.value, XE_GPU_REG_RB_DEPTH_INFO); - dirty |= SetShadowRegister(®s.pa_sc_window_scissor_tl, - XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL); - dirty |= SetShadowRegister(®s.pa_sc_window_scissor_br, - XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR); - if (!dirty && current_state_.render_pass) { - // No registers have changed so we can reuse the previous render pass - - // just begin with what we had. - render_pass = current_state_.render_pass; - framebuffer = current_state_.framebuffer; - } else { - // Re-parse configuration. - if (!ParseConfiguration(config)) { - return nullptr; - } - - // Lookup or generate a new render pass and framebuffer for the new state. - if (!ConfigureRenderPass(command_buffer, config, &render_pass, - &framebuffer)) { - return nullptr; - } - - current_state_.render_pass = render_pass; - current_state_.render_pass_handle = render_pass->handle; - current_state_.framebuffer = framebuffer; - current_state_.framebuffer_handle = framebuffer->handle; - - // TODO(DrChat): Determine if we actually need an EDRAM buffer. - /* - // Depth - auto depth_target = current_state_.framebuffer->depth_stencil_attachment; - if (depth_target && current_state_.config.depth_stencil.used) { - UpdateTileView(command_buffer, depth_target, true); - } - - // Color - for (int i = 0; i < 4; i++) { - auto target = current_state_.framebuffer->color_attachments[i]; - if (!target || !current_state_.config.color[i].used) { - continue; - } - - UpdateTileView(command_buffer, target, true); - } - */ - } - if (!render_pass) { - return nullptr; - } - - // Setup render pass in command buffer. - // This is meant to preserve previous contents as we may be called - // repeatedly. - VkRenderPassBeginInfo render_pass_begin_info; - render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - render_pass_begin_info.pNext = nullptr; - render_pass_begin_info.renderPass = render_pass->handle; - render_pass_begin_info.framebuffer = framebuffer->handle; - - // Render into the entire buffer (or at least tell the API we are doing - // this). In theory it'd be better to clip this to the scissor region, but - // the docs warn anything but the full framebuffer may be slow. - render_pass_begin_info.renderArea.offset.x = 0; - render_pass_begin_info.renderArea.offset.y = 0; - render_pass_begin_info.renderArea.extent.width = config->surface_pitch_px; - render_pass_begin_info.renderArea.extent.height = config->surface_height_px; - - if (config->surface_msaa == xenos::MsaaSamples::k2X) { - render_pass_begin_info.renderArea.extent.height = - std::min(config->surface_height_px * 2, 2560u); - } else if (config->surface_msaa == xenos::MsaaSamples::k4X) { - render_pass_begin_info.renderArea.extent.width *= 2; - render_pass_begin_info.renderArea.extent.height = - std::min(config->surface_height_px * 2, 2560u); - } - - // Configure clear color, if clearing. - // TODO(benvanik): enable clearing here during resolve? - render_pass_begin_info.clearValueCount = 0; - render_pass_begin_info.pClearValues = nullptr; - - // Begin the render pass. - vkCmdBeginRenderPass(command_buffer, &render_pass_begin_info, - VK_SUBPASS_CONTENTS_INLINE); - - return ¤t_state_; -} - -bool RenderCache::ParseConfiguration(RenderConfiguration* config) { - auto& regs = shadow_registers_; - - // RB_MODECONTROL - // Rough mode control (color, color+depth, etc). - config->mode_control = regs.rb_modecontrol.edram_mode; - - // RB_SURFACE_INFO - // https://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html - config->surface_pitch_px = regs.rb_surface_info.surface_pitch; - config->surface_msaa = regs.rb_surface_info.msaa_samples; - - // TODO(benvanik): verify min/max so we don't go out of bounds. - // TODO(benvanik): has to be a good way to get height. - // Guess the height from the scissor height. - // It's wildly inaccurate, but I've never seen it be bigger than the - // EDRAM tiling. - /* - uint32_t ws_y = (regs.pa_sc_window_scissor_tl >> 16) & 0x7FFF; - uint32_t ws_h = ((regs.pa_sc_window_scissor_br >> 16) & 0x7FFF) - ws_y; - config->surface_height_px = std::min(2560u, xe::round_up(ws_h, 16)); - */ - - // TODO(DrChat): Find an accurate way to get the surface height. Until we do, - // we're going to hardcode it to 2560, as that's the absolute maximum. - config->surface_height_px = 2560; - - // Color attachment configuration. - if (config->mode_control == ModeControl::kColorDepth) { - reg::RB_COLOR_INFO color_info[4] = { - regs.rb_color_info, - regs.rb_color1_info, - regs.rb_color2_info, - regs.rb_color3_info, - }; - for (int i = 0; i < 4; ++i) { - config->color[i].edram_base = color_info[i].color_base; - config->color[i].format = GetBaseRTFormat(color_info[i].color_format); - } - } else { - for (int i = 0; i < 4; ++i) { - config->color[i].edram_base = 0; - config->color[i].format = xenos::ColorRenderTargetFormat::k_8_8_8_8; - config->color[i].used = false; - } - } - - // Depth/stencil attachment configuration. - if (config->mode_control == ModeControl::kColorDepth || - config->mode_control == ModeControl::kDepth) { - config->depth_stencil.edram_base = regs.rb_depth_info.depth_base; - config->depth_stencil.format = regs.rb_depth_info.depth_format; - } else { - config->depth_stencil.edram_base = 0; - config->depth_stencil.format = xenos::DepthRenderTargetFormat::kD24S8; - config->depth_stencil.used = false; - } - - return true; -} - -bool RenderCache::ConfigureRenderPass(VkCommandBuffer command_buffer, - RenderConfiguration* config, - CachedRenderPass** out_render_pass, - CachedFramebuffer** out_framebuffer) { - *out_render_pass = nullptr; - *out_framebuffer = nullptr; - - // TODO(benvanik): better lookup. - // Attempt to find the render pass in our cache. - CachedRenderPass* render_pass = nullptr; - for (auto cached_render_pass : cached_render_passes_) { - if (cached_render_pass->IsCompatible(*config)) { - // Found a match. - render_pass = cached_render_pass; - break; - } - } - - // If no render pass was found in the cache create a new one. - if (!render_pass) { - render_pass = new CachedRenderPass(*device_, *config); - VkResult status = render_pass->Initialize(); - if (status != VK_SUCCESS) { - XELOGE("{}: Failed to create render pass, status {}", __func__, - ui::vulkan::to_string(status)); - delete render_pass; - return false; - } - - cached_render_passes_.push_back(render_pass); - } - - // TODO(benvanik): better lookup. - // Attempt to find the framebuffer in the render pass cache. - CachedFramebuffer* framebuffer = nullptr; - for (auto cached_framebuffer : render_pass->cached_framebuffers) { - if (cached_framebuffer->IsCompatible(*config)) { - // Found a match. - framebuffer = cached_framebuffer; - break; - } - } - - // If no framebuffer was found in the cache create a new one. - if (!framebuffer) { - uint32_t tile_width = - config->surface_msaa == xenos::MsaaSamples::k4X ? 40 : 80; - uint32_t tile_height = - config->surface_msaa != xenos::MsaaSamples::k1X ? 8 : 16; - - CachedTileView* target_color_attachments[4] = {nullptr, nullptr, nullptr, - nullptr}; - for (int i = 0; i < 4; ++i) { - TileViewKey color_key; - color_key.tile_offset = config->color[i].edram_base; - color_key.tile_width = - xe::round_up(config->surface_pitch_px, tile_width) / tile_width; - // color_key.tile_height = - // xe::round_up(config->surface_height_px, tile_height) / tile_height; - color_key.tile_height = 160; - color_key.color_or_depth = 1; - color_key.msaa_samples = - 0; // static_cast(config->surface_msaa); - color_key.edram_format = static_cast(config->color[i].format); - target_color_attachments[i] = - FindOrCreateTileView(command_buffer, color_key); - if (!target_color_attachments[i]) { - XELOGE("Failed to get tile view for color attachment"); - return false; - } - } - - TileViewKey depth_stencil_key; - depth_stencil_key.tile_offset = config->depth_stencil.edram_base; - depth_stencil_key.tile_width = - xe::round_up(config->surface_pitch_px, tile_width) / tile_width; - // depth_stencil_key.tile_height = - // xe::round_up(config->surface_height_px, tile_height) / tile_height; - depth_stencil_key.tile_height = 160; - depth_stencil_key.color_or_depth = 0; - depth_stencil_key.msaa_samples = - 0; // static_cast(config->surface_msaa); - depth_stencil_key.edram_format = - static_cast(config->depth_stencil.format); - auto target_depth_stencil_attachment = - FindOrCreateTileView(command_buffer, depth_stencil_key); - if (!target_depth_stencil_attachment) { - XELOGE("Failed to get tile view for depth/stencil attachment"); - return false; - } - - uint32_t surface_pitch_px = config->surface_msaa != xenos::MsaaSamples::k4X - ? config->surface_pitch_px - : config->surface_pitch_px * 2; - uint32_t surface_height_px = config->surface_msaa == xenos::MsaaSamples::k1X - ? config->surface_height_px - : config->surface_height_px * 2; - surface_pitch_px = std::min(surface_pitch_px, 2560u); - surface_height_px = std::min(surface_height_px, 2560u); - framebuffer = new CachedFramebuffer( - *device_, render_pass->handle, surface_pitch_px, surface_height_px, - target_color_attachments, target_depth_stencil_attachment); - VkResult status = framebuffer->Initialize(); - if (status != VK_SUCCESS) { - XELOGE("{}: Failed to create framebuffer, status {}", __func__, - ui::vulkan::to_string(status)); - delete framebuffer; - return false; - } - - render_pass->cached_framebuffers.push_back(framebuffer); - } - - *out_render_pass = render_pass; - *out_framebuffer = framebuffer; - return true; -} - -CachedTileView* RenderCache::FindTileView(uint32_t base, uint32_t pitch, - xenos::MsaaSamples samples, - bool color_or_depth, - uint32_t format) { - uint32_t tile_width = samples == xenos::MsaaSamples::k4X ? 40 : 80; - uint32_t tile_height = samples != xenos::MsaaSamples::k1X ? 8 : 16; - - if (color_or_depth) { - // Adjust similar formats for easier matching. - format = static_cast( - GetBaseRTFormat(static_cast(format))); - } - - TileViewKey key; - key.tile_offset = base; - key.tile_width = xe::round_up(pitch, tile_width) / tile_width; - key.tile_height = 160; - key.color_or_depth = color_or_depth ? 1 : 0; - key.msaa_samples = 0; - key.edram_format = static_cast(format); - auto view = FindTileView(key); - if (view) { - return view; - } - - return nullptr; -} - -CachedTileView* RenderCache::FindOrCreateTileView( - VkCommandBuffer command_buffer, const TileViewKey& view_key) { - auto tile_view = FindTileView(view_key); - if (tile_view) { - return tile_view; - } - - // Create a new tile and add to the cache. - tile_view = new CachedTileView(device_, edram_memory_, view_key); - VkResult status = tile_view->Initialize(command_buffer); - if (status != VK_SUCCESS) { - XELOGE("{}: Failed to create tile view, status {}", __func__, - ui::vulkan::to_string(status)); - - delete tile_view; - return nullptr; - } - - cached_tile_views_.push_back(tile_view); - return tile_view; -} - -void RenderCache::UpdateTileView(VkCommandBuffer command_buffer, - CachedTileView* view, bool load, - bool insert_barrier) { - uint32_t tile_width = - view->key.msaa_samples == uint16_t(xenos::MsaaSamples::k4X) ? 40 : 80; - uint32_t tile_height = - view->key.msaa_samples != uint16_t(xenos::MsaaSamples::k1X) ? 8 : 16; - - if (insert_barrier) { - VkBufferMemoryBarrier barrier; - barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - barrier.pNext = nullptr; - if (load) { - barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - } else { - barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - } - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = edram_buffer_; - barrier.offset = view->key.tile_offset * 5120; - barrier.size = view->key.tile_width * tile_width * view->key.tile_height * - tile_height * view->key.color_or_depth - ? 4 - : 1; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1, - &barrier, 0, nullptr); - } - - // TODO(DrChat): Stencil copies. - VkBufferImageCopy region; - region.bufferOffset = view->key.tile_offset * 5120; - region.bufferRowLength = 0; - region.bufferImageHeight = 0; - region.imageSubresource = {0, 0, 0, 1}; - region.imageSubresource.aspectMask = view->key.color_or_depth - ? VK_IMAGE_ASPECT_COLOR_BIT - : VK_IMAGE_ASPECT_DEPTH_BIT; - region.imageOffset = {0, 0, 0}; - region.imageExtent = {view->key.tile_width * tile_width, - view->key.tile_height * tile_height, 1}; - if (load) { - vkCmdCopyBufferToImage(command_buffer, edram_buffer_, view->image, - VK_IMAGE_LAYOUT_GENERAL, 1, ®ion); - } else { - vkCmdCopyImageToBuffer(command_buffer, view->image, VK_IMAGE_LAYOUT_GENERAL, - edram_buffer_, 1, ®ion); - } -} - -CachedTileView* RenderCache::FindTileView(const TileViewKey& view_key) const { - // Check the cache. - // TODO(benvanik): better lookup. - for (auto tile_view : cached_tile_views_) { - if (tile_view->IsEqual(view_key)) { - return tile_view; - } - } - - return nullptr; -} - -void RenderCache::EndRenderPass() { - assert_not_null(current_command_buffer_); - - // End the render pass. - vkCmdEndRenderPass(current_command_buffer_); - - // Copy all render targets back into our EDRAM buffer. - // Don't bother waiting on this command to complete, as next render pass may - // reuse previous framebuffer attachments. If they need this, they will wait. - // TODO: Should we bother re-tiling the images on copy back? - // - // FIXME: There's a case where we may have a really big render target (as we - // can't get the correct height atm) and we may end up overwriting the valid - // contents of another render target by mistake! Need to reorder copy commands - // to avoid this. - - // TODO(DrChat): Determine if we actually need an EDRAM buffer. - /* - std::vector cached_views; - - // Depth - auto depth_target = current_state_.framebuffer->depth_stencil_attachment; - if (depth_target && current_state_.config.depth_stencil.used) { - cached_views.push_back(depth_target); - } - - // Color - for (int i = 0; i < 4; i++) { - auto target = current_state_.framebuffer->color_attachments[i]; - if (!target || !current_state_.config.color[i].used) { - continue; - } - - cached_views.push_back(target); - } - - std::sort( - cached_views.begin(), cached_views.end(), - [](CachedTileView const* a, CachedTileView const* b) { return *a < *b; }); - - for (auto view : cached_views) { - UpdateTileView(current_command_buffer_, view, false, false); - } - */ - - current_command_buffer_ = nullptr; -} - -void RenderCache::ClearCache() { - // TODO(benvanik): caching. -} - -void RenderCache::RawCopyToImage(VkCommandBuffer command_buffer, - uint32_t edram_base, VkImage image, - VkImageLayout image_layout, - bool color_or_depth, VkOffset3D offset, - VkExtent3D extents) { - // Transition the texture into a transfer destination layout. - VkImageMemoryBarrier image_barrier; - image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - image_barrier.pNext = nullptr; - image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - if (image_layout != VK_IMAGE_LAYOUT_GENERAL && - image_layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { - image_barrier.srcAccessMask = 0; - image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - image_barrier.oldLayout = image_layout; - image_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - image_barrier.image = image; - image_barrier.subresourceRange = {0, 0, 1, 0, 1}; - image_barrier.subresourceRange.aspectMask = - color_or_depth - ? VK_IMAGE_ASPECT_COLOR_BIT - : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, - nullptr, 1, &image_barrier); - } - - VkBufferMemoryBarrier buffer_barrier; - buffer_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - buffer_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - buffer_barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - buffer_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - buffer_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - buffer_barrier.buffer = edram_buffer_; - buffer_barrier.offset = edram_base * 5120; - // TODO: Calculate this accurately (need texel size) - buffer_barrier.size = extents.width * extents.height * 4; - - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1, - &buffer_barrier, 0, nullptr); - - // Issue the copy command. - // TODO(DrChat): Stencil copies. - VkBufferImageCopy region; - region.bufferOffset = edram_base * 5120; - region.bufferImageHeight = 0; - region.bufferRowLength = 0; - region.imageOffset = offset; - region.imageExtent = extents; - region.imageSubresource = {0, 0, 0, 1}; - region.imageSubresource.aspectMask = - color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; - vkCmdCopyBufferToImage(command_buffer, edram_buffer_, image, image_layout, 1, - ®ion); - - // Transition the image back into its previous layout. - if (image_layout != VK_IMAGE_LAYOUT_GENERAL && - image_layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { - image_barrier.srcAccessMask = image_barrier.dstAccessMask; - image_barrier.dstAccessMask = 0; - std::swap(image_barrier.oldLayout, image_barrier.newLayout); - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, - nullptr, 1, &image_barrier); - } -} - -void RenderCache::BlitToImage(VkCommandBuffer command_buffer, - uint32_t edram_base, uint32_t pitch, - uint32_t height, xenos::MsaaSamples num_samples, - VkImage image, VkImageLayout image_layout, - bool color_or_depth, uint32_t format, - VkFilter filter, VkOffset3D offset, - VkExtent3D extents) { - if (color_or_depth) { - // Adjust similar formats for easier matching. - format = static_cast( - GetBaseRTFormat(static_cast(format))); - } - - uint32_t tile_width = num_samples == xenos::MsaaSamples::k4X ? 40 : 80; - uint32_t tile_height = num_samples != xenos::MsaaSamples::k1X ? 8 : 16; - - // Grab a tile view that represents the source image. - TileViewKey key; - key.color_or_depth = color_or_depth ? 1 : 0; - key.msaa_samples = 0; // static_cast(num_samples); - key.edram_format = format; - key.tile_offset = edram_base; - key.tile_width = xe::round_up(pitch, tile_width) / tile_width; - // key.tile_height = xe::round_up(height, tile_height) / tile_height; - key.tile_height = 160; - auto tile_view = FindOrCreateTileView(command_buffer, key); - assert_not_null(tile_view); - - // Update the view with the latest contents. - // UpdateTileView(command_buffer, tile_view, true, true); - - // Put a barrier on the tile view. - VkImageMemoryBarrier image_barrier; - image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - image_barrier.pNext = nullptr; - image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - image_barrier.srcAccessMask = - color_or_depth ? VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT - : VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - image_barrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL; - image_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; - image_barrier.image = tile_view->image; - image_barrier.subresourceRange = {0, 0, 1, 0, 1}; - image_barrier.subresourceRange.aspectMask = - color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT - : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, - nullptr, 1, &image_barrier); - - // If we overflow we'll lose the device here. - // assert_true(extents.width <= key.tile_width * tile_width); - // assert_true(extents.height <= key.tile_height * tile_height); - - // Now issue the blit to the destination. - if (tile_view->sample_count == VK_SAMPLE_COUNT_1_BIT) { - VkImageBlit image_blit; - image_blit.srcSubresource = {0, 0, 0, 1}; - image_blit.srcSubresource.aspectMask = - color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; - image_blit.srcOffsets[0] = {0, 0, offset.z}; - image_blit.srcOffsets[1] = {int32_t(extents.width), int32_t(extents.height), - int32_t(extents.depth)}; - - image_blit.dstSubresource = {0, 0, 0, 1}; - image_blit.dstSubresource.aspectMask = - color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; - image_blit.dstOffsets[0] = offset; - image_blit.dstOffsets[1] = {offset.x + int32_t(extents.width), - offset.y + int32_t(extents.height), - offset.z + int32_t(extents.depth)}; - vkCmdBlitImage(command_buffer, tile_view->image, VK_IMAGE_LAYOUT_GENERAL, - image, image_layout, 1, &image_blit, filter); - } else { - VkImageResolve image_resolve; - image_resolve.srcSubresource = {0, 0, 0, 1}; - image_resolve.srcSubresource.aspectMask = - color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; - image_resolve.srcOffset = {0, 0, 0}; - - image_resolve.dstSubresource = {0, 0, 0, 1}; - image_resolve.dstSubresource.aspectMask = - color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; - image_resolve.dstOffset = offset; - - image_resolve.extent = extents; - vkCmdResolveImage(command_buffer, tile_view->image, VK_IMAGE_LAYOUT_GENERAL, - image, image_layout, 1, &image_resolve); - } - - // Add another barrier on the tile view. - image_barrier.srcAccessMask = image_barrier.dstAccessMask; - image_barrier.dstAccessMask = - color_or_depth ? VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT - : VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - std::swap(image_barrier.oldLayout, image_barrier.newLayout); - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, - nullptr, 1, &image_barrier); -} - -void RenderCache::ClearEDRAMColor(VkCommandBuffer command_buffer, - uint32_t edram_base, - xenos::ColorRenderTargetFormat format, - uint32_t pitch, uint32_t height, - xenos::MsaaSamples num_samples, - float* color) { - // TODO: For formats <= 4 bpp, we can directly fill the EDRAM buffer. Just - // need to detect this and calculate a value. - - // Adjust similar formats for easier matching. - format = GetBaseRTFormat(static_cast(format)); - - uint32_t tile_width = num_samples == xenos::MsaaSamples::k4X ? 40 : 80; - uint32_t tile_height = num_samples != xenos::MsaaSamples::k1X ? 8 : 16; - - // Grab a tile view (as we need to clear an image first) - TileViewKey key; - key.color_or_depth = 1; - key.msaa_samples = 0; // static_cast(num_samples); - key.edram_format = static_cast(format); - key.tile_offset = edram_base; - key.tile_width = xe::round_up(pitch, tile_width) / tile_width; - // key.tile_height = xe::round_up(height, tile_height) / tile_height; - key.tile_height = 160; - auto tile_view = FindOrCreateTileView(command_buffer, key); - assert_not_null(tile_view); - - VkImageSubresourceRange range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; - VkClearColorValue clear_value; - std::memcpy(clear_value.float32, color, sizeof(float) * 4); - - // Issue a clear command - vkCmdClearColorImage(command_buffer, tile_view->image, - VK_IMAGE_LAYOUT_GENERAL, &clear_value, 1, &range); - - // Copy image back into EDRAM buffer - // UpdateTileView(command_buffer, tile_view, false, false); -} - -void RenderCache::ClearEDRAMDepthStencil(VkCommandBuffer command_buffer, - uint32_t edram_base, - xenos::DepthRenderTargetFormat format, - uint32_t pitch, uint32_t height, - xenos::MsaaSamples num_samples, - float depth, uint32_t stencil) { - // TODO: For formats <= 4 bpp, we can directly fill the EDRAM buffer. Just - // need to detect this and calculate a value. - - uint32_t tile_width = num_samples == xenos::MsaaSamples::k4X ? 40 : 80; - uint32_t tile_height = num_samples != xenos::MsaaSamples::k1X ? 8 : 16; - - // Grab a tile view (as we need to clear an image first) - TileViewKey key; - key.color_or_depth = 0; - key.msaa_samples = 0; // static_cast(num_samples); - key.edram_format = static_cast(format); - key.tile_offset = edram_base; - key.tile_width = xe::round_up(pitch, tile_width) / tile_width; - // key.tile_height = xe::round_up(height, tile_height) / tile_height; - key.tile_height = 160; - auto tile_view = FindOrCreateTileView(command_buffer, key); - assert_not_null(tile_view); - - VkImageSubresourceRange range = { - VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 1, 0, 1, - }; - VkClearDepthStencilValue clear_value; - clear_value.depth = depth; - clear_value.stencil = stencil; - - // Issue a clear command - vkCmdClearDepthStencilImage(command_buffer, tile_view->image, - VK_IMAGE_LAYOUT_GENERAL, &clear_value, 1, &range); - - // Copy image back into EDRAM buffer - // UpdateTileView(command_buffer, tile_view, false, false); -} - -void RenderCache::FillEDRAM(VkCommandBuffer command_buffer, uint32_t value) { - vkCmdFillBuffer(command_buffer, edram_buffer_, 0, kEdramBufferCapacity, - value); -} - -bool RenderCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) { - uint32_t value = register_file_->values[register_name].u32; - if (*dest == value) { - return false; - } - *dest = value; - return true; -} - -} // namespace vulkan -} // namespace gpu -} // namespace xe diff --git a/src/xenia/gpu/vulkan/render_cache.h b/src/xenia/gpu/vulkan/render_cache.h deleted file mode 100644 index 9dfbf648d..000000000 --- a/src/xenia/gpu/vulkan/render_cache.h +++ /dev/null @@ -1,406 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_VULKAN_RENDER_CACHE_H_ -#define XENIA_GPU_VULKAN_RENDER_CACHE_H_ - -#include "xenia/gpu/register_file.h" -#include "xenia/gpu/registers.h" -#include "xenia/gpu/shader.h" -#include "xenia/gpu/texture_info.h" -#include "xenia/gpu/vulkan/vulkan_shader.h" -#include "xenia/gpu/xenos.h" -#include "xenia/ui/vulkan/vulkan.h" -#include "xenia/ui/vulkan/vulkan_device.h" - -namespace xe { -namespace gpu { -namespace vulkan { - -// TODO(benvanik): make public API? -class CachedTileView; -class CachedFramebuffer; -class CachedRenderPass; - -// Uniquely identifies EDRAM tiles. -struct TileViewKey { - // Offset into EDRAM in 5120b tiles. - uint16_t tile_offset; - // Tile width of the view in base 80x16 tiles. - uint16_t tile_width; - // Tile height of the view in base 80x16 tiles. - uint16_t tile_height; - // 1 if format is ColorRenderTargetFormat, else DepthRenderTargetFormat. - uint16_t color_or_depth : 1; - // Surface MSAA samples - uint16_t msaa_samples : 2; - // Either ColorRenderTargetFormat or DepthRenderTargetFormat. - uint16_t edram_format : 13; -}; -static_assert(sizeof(TileViewKey) == 8, "Key must be tightly packed"); - -// Cached view representing EDRAM memory. -// TODO(benvanik): reuse VkImage's with multiple VkViews for compatible -// formats? -class CachedTileView { - public: - // Key identifying the view in the cache. - TileViewKey key; - // Image - VkImage image = nullptr; - // Simple view on the image matching the format. - VkImageView image_view = nullptr; - // Image layout - VkImageLayout image_layout = VK_IMAGE_LAYOUT_UNDEFINED; - // Memory buffer - VkDeviceMemory memory = nullptr; - // Image sample count - VkSampleCountFlagBits sample_count = VK_SAMPLE_COUNT_1_BIT; - - // (if a depth view) Image view of depth aspect - VkImageView image_view_depth = nullptr; - // (if a depth view) Image view of stencil aspect - VkImageView image_view_stencil = nullptr; - - CachedTileView(ui::vulkan::VulkanDevice* device, VkDeviceMemory edram_memory, - TileViewKey view_key); - ~CachedTileView(); - - VkResult Initialize(VkCommandBuffer command_buffer); - - bool IsEqual(const TileViewKey& other_key) const { - auto a = reinterpret_cast(&key); - auto b = reinterpret_cast(&other_key); - return *a == *b; - } - - bool operator<(const CachedTileView& other) const { - return key.tile_offset < other.key.tile_offset; - } - - VkExtent2D GetSize() const { - return {key.tile_width * 80u, key.tile_height * 16u}; - } - - private: - ui::vulkan::VulkanDevice* device_ = nullptr; -}; - -// Parsed render configuration from the current render state. -struct RenderConfiguration { - // Render mode (color+depth, depth-only, etc). - xenos::ModeControl mode_control; - // Target surface pitch multiplied by MSAA, in pixels. - uint32_t surface_pitch_px; - // ESTIMATED target surface height multiplied by MSAA, in pixels. - uint32_t surface_height_px; - // Surface MSAA setting. - xenos::MsaaSamples surface_msaa; - // Color attachments for the 4 render targets. - struct { - bool used; - uint32_t edram_base; - xenos::ColorRenderTargetFormat format; - } color[4]; - // Depth/stencil attachment. - struct { - bool used; - uint32_t edram_base; - xenos::DepthRenderTargetFormat format; - } depth_stencil; -}; - -// Current render state based on the register-specified configuration. -struct RenderState { - // Parsed configuration. - RenderConfiguration config; - // Render pass (to be used with pipelines/etc). - CachedRenderPass* render_pass = nullptr; - VkRenderPass render_pass_handle = nullptr; - // Target framebuffer bound to the render pass. - CachedFramebuffer* framebuffer = nullptr; - VkFramebuffer framebuffer_handle = nullptr; - - bool color_attachment_written[4] = {false}; - bool depth_attachment_written = false; -}; - -// Manages the virtualized EDRAM and the render target cache. -// -// On the 360 the render target is an opaque block of memory in EDRAM that's -// only accessible via resolves. We use this to our advantage to simulate -// something like it as best we can by having a shared backing memory with -// a multitude of views for each tile location in EDRAM. -// -// This allows us to have the same base address write to the same memory -// regardless of framebuffer format. Resolving then uses whatever format the -// resolve requests straight from the backing memory. -// -// EDRAM is a beast and we only approximate it as best we can. Basically, -// the 10MiB of EDRAM is composed of 2048 5120b tiles. Each tile is 80x16px. -// +-----+-----+-----+--- -// |tile0|tile1|tile2|... 2048 times -// +-----+-----+-----+--- -// Operations dealing with EDRAM deal in tile offsets, so base 0x100 is tile -// offset 256, 256*5120=1310720b into the buffer. All rendering operations are -// aligned to tiles so trying to draw at 256px wide will have a real width of -// 320px by rounding up to the next tile. -// -// MSAA and other settings will modify the exact pixel sizes, like 4X makes -// each tile effectively 40x8px / 2X makes each tile 80x8px, but they are still -// all 5120b. As we try to emulate this we adjust our viewport when rendering to -// stretch pixels as needed. -// -// It appears that games also take advantage of MSAA stretching tiles when doing -// clears. Games will clear a view with 1/2X pitch/height and 4X MSAA and then -// later draw to that view with 1X pitch/height and 1X MSAA. -// -// The good news is that games cannot read EDRAM directly but must use a copy -// operation to get the data out. That gives us a chance to do whatever we -// need to (re-tile, etc) only when requested. -// -// To approximate the tiled EDRAM layout we use a single large chunk of memory. -// From this memory we create many VkImages (and VkImageViews) of various -// formats and dimensions as requested by the game. These are used as -// attachments during rendering and as sources during copies. They are also -// heavily aliased - lots of images will reference the same locations in the -// underlying EDRAM buffer. The only requirement is that there are no hazards -// with specific tiles (reading/writing the same tile through different images) -// and otherwise it should be ok *fingers crossed*. -// -// One complication is the copy/resolve process itself: we need to give back -// the data asked for in the format desired and where it goes is arbitrary -// (any address in physical memory). If the game is good we get resolves of -// EDRAM into fixed base addresses with scissored regions. If the game is bad -// we are broken. -// -// Resolves from EDRAM result in tiled textures - that's texture tiles, not -// EDRAM tiles. If we wanted to ensure byte-for-byte correctness we'd need to -// then tile the images as we wrote them out. For now, we just attempt to -// get the (X, Y) in linear space and do that. This really comes into play -// when multiple resolves write to the same texture or memory aliased by -// multiple textures - which is common due to predicated tiling. The examples -// below demonstrate what this looks like, but the important thing is that -// we are aware of partial textures and overlapping regions. -// -// TODO(benvanik): what, if any, barriers do we need? any transitions? -// -// Example with multiple render targets: -// Two color targets of 256x256px tightly packed in EDRAM: -// color target 0: base 0x0, pitch 320, scissor 0,0, 256x256 -// starts at tile 0, buffer offset 0 -// contains 64 tiles (320/80)*(256/16) -// color target 1: base 0x40, pitch 320, scissor 256,0, 256x256 -// starts at tile 64 (after color target 0), buffer offset 327680b -// contains 64 tiles -// In EDRAM each set of 64 tiles is contiguous: -// +------+------+ +------+------+------+ -// |ct0.0 |ct0.1 |...|ct0.63|ct1.0 |ct1.1 |... -// +------+------+ +------+------+------+ -// To render into these, we setup two VkImages: -// image 0: bound to buffer offset 0, 320x256x4=327680b -// image 1: bound to buffer offset 327680b, 320x256x4=327680b -// So when we render to them: -// +------+-+ scissored to 256x256, actually 320x256 -// | . | | <- . appears at some untiled offset in the buffer, but -// | | | consistent if aliased with the same format -// +------+-+ -// In theory, this gives us proper aliasing in most cases. -// -// Example with horizontal predicated tiling: -// Trying to render 1024x576 @4X MSAA, splitting into two regions -// horizontally: -// +----------+ -// | 1024x288 | -// +----------+ -// | 1024x288 | -// +----------+ -// EDRAM configured for 1056x288px with tile size 2112x567px (4X MSAA): -// color target 0: base 0x0, pitch 1080, 26x36 tiles -// First render (top): -// window offset 0,0 -// scissor 0,0, 1024x288 -// First resolve (top): -// RB_COPY_DEST_BASE 0x1F45D000 -// RB_COPY_DEST_PITCH pitch=1024, height=576 -// vertices: 0,0, 1024,0, 1024,288 -// Second render (bottom): -// window offset 0,-288 -// scissor 0,288, 1024x288 -// Second resolve (bottom): -// RB_COPY_DEST_BASE 0x1F57D000 (+1179648b) -// RB_COPY_DEST_PITCH pitch=1024, height=576 -// (exactly 1024x288*4b after first resolve) -// vertices: 0,288, 1024,288, 1024,576 -// Resolving here is easy as the textures are contiguous in memory. We can -// snoop in the first resolve with the dest height to know the total size, -// and in the second resolve see that it overlaps and place it in the -// existing target. -// -// Example with vertical predicated tiling: -// Trying to render 1280x720 @2X MSAA, splitting into two regions -// vertically: -// +-----+-----+ -// | 640 | 640 | -// | x | x | -// | 720 | 720 | -// +-----+-----+ -// EDRAM configured for 640x736px with tile size 640x1472px (2X MSAA): -// color target 0: base 0x0, pitch 640, 8x92 tiles -// First render (left): -// window offset 0,0 -// scissor 0,0, 640x720 -// First resolve (left): -// RB_COPY_DEST_BASE 0x1BC6D000 -// RB_COPY_DEST_PITCH pitch=1280, height=720 -// vertices: 0,0, 640,0, 640,720 -// Second render (right): -// window offset -640,0 -// scissor 640,0, 640x720 -// Second resolve (right): -// RB_COPY_DEST_BASE 0x1BC81000 (+81920b) -// RB_COPY_DEST_PITCH pitch=1280, height=720 -// vertices: 640,0, 1280,0, 1280,720 -// Resolving here is much more difficult as resolves are tiled and the right -// half of the texture is 81920b away: -// 81920/4bpp=20480px, /32 (texture tile size)=640px -// We know the texture size with the first resolve and with the second we -// must check for overlap then compute the offset (in both X and Y). -class RenderCache { - public: - RenderCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device); - ~RenderCache(); - - VkResult Initialize(); - void Shutdown(); - - // Call this to determine if you should start a new render pass or continue - // with an already open pass. - bool dirty() const; - - CachedTileView* FindTileView(uint32_t base, uint32_t pitch, - xenos::MsaaSamples samples, bool color_or_depth, - uint32_t format); - - // Begins a render pass targeting the state-specified framebuffer formats. - // The command buffer will be transitioned into the render pass phase. - const RenderState* BeginRenderPass(VkCommandBuffer command_buffer, - VulkanShader* vertex_shader, - VulkanShader* pixel_shader); - - // Ends the current render pass. - // The command buffer will be transitioned out of the render pass phase. - void EndRenderPass(); - - // Clears all cached content. - void ClearCache(); - - // Queues commands to copy EDRAM contents into an image. - // The command buffer must not be inside of a render pass when calling this. - void RawCopyToImage(VkCommandBuffer command_buffer, uint32_t edram_base, - VkImage image, VkImageLayout image_layout, - bool color_or_depth, VkOffset3D offset, - VkExtent3D extents); - - // Queues commands to blit EDRAM contents into an image. - // The command buffer must not be inside of a render pass when calling this. - void BlitToImage(VkCommandBuffer command_buffer, uint32_t edram_base, - uint32_t pitch, uint32_t height, - xenos::MsaaSamples num_samples, VkImage image, - VkImageLayout image_layout, bool color_or_depth, - uint32_t format, VkFilter filter, VkOffset3D offset, - VkExtent3D extents); - - // Queues commands to clear EDRAM contents with a solid color. - // The command buffer must not be inside of a render pass when calling this. - void ClearEDRAMColor(VkCommandBuffer command_buffer, uint32_t edram_base, - xenos::ColorRenderTargetFormat format, uint32_t pitch, - uint32_t height, xenos::MsaaSamples num_samples, - float* color); - // Queues commands to clear EDRAM contents with depth/stencil values. - // The command buffer must not be inside of a render pass when calling this. - void ClearEDRAMDepthStencil(VkCommandBuffer command_buffer, - uint32_t edram_base, - xenos::DepthRenderTargetFormat format, - uint32_t pitch, uint32_t height, - xenos::MsaaSamples num_samples, float depth, - uint32_t stencil); - // Queues commands to fill EDRAM contents with a constant value. - // The command buffer must not be inside of a render pass when calling this. - void FillEDRAM(VkCommandBuffer command_buffer, uint32_t value); - - private: - // Parses the current state into a configuration object. - bool ParseConfiguration(RenderConfiguration* config); - - // Finds a tile view. Returns nullptr if none found matching the key. - CachedTileView* FindTileView(const TileViewKey& view_key) const; - - // Gets or creates a tile view with the given parameters. - CachedTileView* FindOrCreateTileView(VkCommandBuffer command_buffer, - const TileViewKey& view_key); - - void UpdateTileView(VkCommandBuffer command_buffer, CachedTileView* view, - bool load, bool insert_barrier = true); - - // Gets or creates a render pass and frame buffer for the given configuration. - // This attempts to reuse as much as possible across render passes and - // framebuffers. - bool ConfigureRenderPass(VkCommandBuffer command_buffer, - RenderConfiguration* config, - CachedRenderPass** out_render_pass, - CachedFramebuffer** out_framebuffer); - - RegisterFile* register_file_ = nullptr; - ui::vulkan::VulkanDevice* device_ = nullptr; - - // Entire 10MiB of EDRAM. - VkDeviceMemory edram_memory_ = nullptr; - // Buffer overlayed 1:1 with edram_memory_ to allow raw access. - VkBuffer edram_buffer_ = nullptr; - - // Cache of VkImage and VkImageView's for all of our EDRAM tilings. - // TODO(benvanik): non-linear lookup? Should only be a small number of these. - std::vector cached_tile_views_; - - // Cache of render passes based on formats. - std::vector cached_render_passes_; - - // Shadows of the registers that impact the render pass we choose. - // If the registers don't change between passes we can quickly reuse the - // previous one. - struct ShadowRegisters { - reg::RB_MODECONTROL rb_modecontrol; - reg::RB_SURFACE_INFO rb_surface_info; - reg::RB_COLOR_INFO rb_color_info; - reg::RB_COLOR_INFO rb_color1_info; - reg::RB_COLOR_INFO rb_color2_info; - reg::RB_COLOR_INFO rb_color3_info; - reg::RB_DEPTH_INFO rb_depth_info; - uint32_t pa_sc_window_scissor_tl; - uint32_t pa_sc_window_scissor_br; - - ShadowRegisters() { Reset(); } - void Reset() { std::memset(this, 0, sizeof(*this)); } - } shadow_registers_; - bool SetShadowRegister(uint32_t* dest, uint32_t register_name); - - // Configuration used for the current/previous Begin/End, representing the - // current shadow register state. - RenderState current_state_; - - // Only valid during a BeginRenderPass/EndRenderPass block. - VkCommandBuffer current_command_buffer_ = nullptr; -}; - -} // namespace vulkan -} // namespace gpu -} // namespace xe - -#endif // XENIA_GPU_VULKAN_RENDER_CACHE_H_ diff --git a/src/xenia/gpu/vulkan/shaders/bin/dummy_frag.h b/src/xenia/gpu/vulkan/shaders/bin/dummy_frag.h deleted file mode 100644 index 7bcf6bc90..000000000 --- a/src/xenia/gpu/vulkan/shaders/bin/dummy_frag.h +++ /dev/null @@ -1,50 +0,0 @@ -// generated from `xb genspirv` -// source: dummy.frag -const uint8_t dummy_frag[] = { - 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x06, 0x00, 0x08, 0x00, - 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x2B, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x4C, 0x53, 0x4C, - 0x2E, 0x73, 0x74, 0x64, 0x2E, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, - 0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0F, 0x00, 0x07, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, - 0x31, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, - 0xC2, 0x01, 0x00, 0x00, 0x04, 0x00, 0x09, 0x00, 0x47, 0x4C, 0x5F, 0x41, - 0x52, 0x42, 0x5F, 0x65, 0x78, 0x70, 0x6C, 0x69, 0x63, 0x69, 0x74, 0x5F, - 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x5F, 0x6C, 0x6F, 0x63, 0x61, 0x74, - 0x69, 0x6F, 0x6E, 0x00, 0x04, 0x00, 0x09, 0x00, 0x47, 0x4C, 0x5F, 0x41, - 0x52, 0x42, 0x5F, 0x73, 0x65, 0x70, 0x61, 0x72, 0x61, 0x74, 0x65, 0x5F, - 0x73, 0x68, 0x61, 0x64, 0x65, 0x72, 0x5F, 0x6F, 0x62, 0x6A, 0x65, 0x63, - 0x74, 0x73, 0x00, 0x00, 0x04, 0x00, 0x09, 0x00, 0x47, 0x4C, 0x5F, 0x41, - 0x52, 0x42, 0x5F, 0x73, 0x68, 0x61, 0x64, 0x69, 0x6E, 0x67, 0x5F, 0x6C, - 0x61, 0x6E, 0x67, 0x75, 0x61, 0x67, 0x65, 0x5F, 0x34, 0x32, 0x30, 0x70, - 0x61, 0x63, 0x6B, 0x00, 0x05, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x07, 0x00, - 0x2D, 0x00, 0x00, 0x00, 0x69, 0x6E, 0x5F, 0x69, 0x6E, 0x74, 0x65, 0x72, - 0x70, 0x6F, 0x6C, 0x61, 0x74, 0x6F, 0x72, 0x73, 0x00, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00, 0x6F, 0x43, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x2D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x31, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x2B, 0x00, 0x00, 0x00, - 0x08, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x2C, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x2E, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, - 0x2F, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x30, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x2F, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x30, 0x00, 0x00, 0x00, - 0x31, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, - 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, -}; diff --git a/src/xenia/gpu/vulkan/shaders/bin/dummy_frag.txt b/src/xenia/gpu/vulkan/shaders/bin/dummy_frag.txt deleted file mode 100644 index 33d1d9990..000000000 --- a/src/xenia/gpu/vulkan/shaders/bin/dummy_frag.txt +++ /dev/null @@ -1,37 +0,0 @@ -; SPIR-V -; Version: 1.0 -; Generator: Khronos Glslang Reference Front End; 6 -; Bound: 50 -; Schema: 0 - OpCapability Shader - OpCapability Sampled1D - %1 = OpExtInstImport "GLSL.std.450" - OpMemoryModel Logical GLSL450 - OpEntryPoint Fragment %main "main" %in_interpolators %oC - OpExecutionMode %main OriginUpperLeft - OpSource GLSL 450 - OpSourceExtension "GL_ARB_explicit_attrib_location" - OpSourceExtension "GL_ARB_separate_shader_objects" - OpSourceExtension "GL_ARB_shading_language_420pack" - OpName %main "main" - OpName %in_interpolators "in_interpolators" - OpName %oC "oC" - OpDecorate %in_interpolators Location 0 - OpDecorate %oC Location 0 - %void = OpTypeVoid - %3 = OpTypeFunction %void - %float = OpTypeFloat 32 - %v4float = OpTypeVector %float 4 - %uint = OpTypeInt 32 0 - %uint_16 = OpConstant %uint 16 -%_arr_v4float_uint_16 = OpTypeArray %v4float %uint_16 -%_ptr_Input__arr_v4float_uint_16 = OpTypePointer Input %_arr_v4float_uint_16 -%in_interpolators = OpVariable %_ptr_Input__arr_v4float_uint_16 Input - %uint_4 = OpConstant %uint 4 -%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 -%_ptr_Output__arr_v4float_uint_4 = OpTypePointer Output %_arr_v4float_uint_4 - %oC = OpVariable %_ptr_Output__arr_v4float_uint_4 Output - %main = OpFunction %void None %3 - %5 = OpLabel - OpReturn - OpFunctionEnd diff --git a/src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.h b/src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.h deleted file mode 100644 index 28935cc4a..000000000 --- a/src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.h +++ /dev/null @@ -1,183 +0,0 @@ -// generated from `xb genspirv` -// source: line_quad_list.geom -const uint8_t line_quad_list_geom[] = { - 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x06, 0x00, 0x08, 0x00, - 0x53, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x4C, 0x53, 0x4C, - 0x2E, 0x73, 0x74, 0x64, 0x2E, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, - 0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0F, 0x00, 0x0C, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x4D, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x1C, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x1A, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x03, 0x00, 0x03, 0x00, - 0x02, 0x00, 0x00, 0x00, 0xC2, 0x01, 0x00, 0x00, 0x04, 0x00, 0x09, 0x00, - 0x47, 0x4C, 0x5F, 0x41, 0x52, 0x42, 0x5F, 0x65, 0x78, 0x70, 0x6C, 0x69, - 0x63, 0x69, 0x74, 0x5F, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x5F, 0x6C, - 0x6F, 0x63, 0x61, 0x74, 0x69, 0x6F, 0x6E, 0x00, 0x04, 0x00, 0x09, 0x00, - 0x47, 0x4C, 0x5F, 0x41, 0x52, 0x42, 0x5F, 0x73, 0x65, 0x70, 0x61, 0x72, - 0x61, 0x74, 0x65, 0x5F, 0x73, 0x68, 0x61, 0x64, 0x65, 0x72, 0x5F, 0x6F, - 0x62, 0x6A, 0x65, 0x63, 0x74, 0x73, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x06, 0x00, 0x08, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, - 0x65, 0x72, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, 0x00, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x06, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x73, 0x69, 0x74, 0x69, 0x6F, 0x6E, 0x00, - 0x06, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x69, 0x6E, 0x74, 0x53, 0x69, 0x7A, 0x65, - 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, 0x0D, 0x00, 0x00, 0x00, - 0x67, 0x6C, 0x5F, 0x50, 0x65, 0x72, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, - 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, 0x0D, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x73, 0x69, 0x74, - 0x69, 0x6F, 0x6E, 0x00, 0x06, 0x00, 0x07, 0x00, 0x0D, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x69, 0x6E, 0x74, - 0x53, 0x69, 0x7A, 0x65, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x69, 0x6E, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x07, 0x00, 0x21, 0x00, 0x00, 0x00, 0x6F, 0x75, 0x74, 0x5F, - 0x69, 0x6E, 0x74, 0x65, 0x72, 0x70, 0x6F, 0x6C, 0x61, 0x74, 0x6F, 0x72, - 0x73, 0x00, 0x00, 0x00, 0x05, 0x00, 0x07, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x69, 0x6E, 0x5F, 0x69, 0x6E, 0x74, 0x65, 0x72, 0x70, 0x6F, 0x6C, 0x61, - 0x74, 0x6F, 0x72, 0x73, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x08, 0x00, - 0x4D, 0x00, 0x00, 0x00, 0x5F, 0x69, 0x6E, 0x5F, 0x70, 0x6F, 0x69, 0x6E, - 0x74, 0x5F, 0x63, 0x6F, 0x6F, 0x72, 0x64, 0x5F, 0x75, 0x6E, 0x75, 0x73, - 0x65, 0x64, 0x00, 0x00, 0x05, 0x00, 0x08, 0x00, 0x50, 0x00, 0x00, 0x00, - 0x5F, 0x69, 0x6E, 0x5F, 0x70, 0x6F, 0x69, 0x6E, 0x74, 0x5F, 0x73, 0x69, - 0x7A, 0x65, 0x5F, 0x75, 0x6E, 0x75, 0x73, 0x65, 0x64, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x08, 0x00, 0x52, 0x00, 0x00, 0x00, 0x5F, 0x6F, 0x75, 0x74, - 0x5F, 0x70, 0x6F, 0x69, 0x6E, 0x74, 0x5F, 0x63, 0x6F, 0x6F, 0x72, 0x64, - 0x5F, 0x75, 0x6E, 0x75, 0x73, 0x65, 0x64, 0x00, 0x48, 0x00, 0x05, 0x00, - 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x03, 0x00, 0x08, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, - 0x0D, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x0D, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x4D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x50, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, - 0x52, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x15, 0x00, 0x04, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x04, 0x00, - 0x0D, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x15, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, - 0x0F, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x18, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x19, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, - 0x1F, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x1F, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x39, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, - 0x4A, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x1C, 0x00, 0x04, 0x00, 0x4B, 0x00, 0x00, 0x00, 0x4A, 0x00, 0x00, 0x00, - 0x0F, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x4C, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x4B, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, - 0x4C, 0x00, 0x00, 0x00, 0x4D, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x1C, 0x00, 0x04, 0x00, 0x4E, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x0F, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x4F, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x4E, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, - 0x4F, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x51, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x4A, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x51, 0x00, 0x00, 0x00, - 0x52, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x13, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x16, 0x00, 0x00, 0x00, - 0x17, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x17, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x19, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1B, 0x00, 0x00, 0x00, - 0x1A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x1C, 0x00, 0x00, 0x00, - 0x1D, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x1B, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x25, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x1F, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x21, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, - 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x0C, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x29, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x17, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x19, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x1F, 0x00, 0x00, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x21, 0x00, 0x00, 0x00, 0x2F, 0x00, 0x00, 0x00, - 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x31, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, - 0x0C, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x32, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x17, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x19, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x30, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x25, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x1F, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x21, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, - 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x3A, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x39, 0x00, 0x00, 0x00, - 0x0C, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x17, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x19, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x39, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x25, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x39, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x1F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x21, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, - 0xDA, 0x00, 0x01, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x17, 0x00, 0x00, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x1D, 0x00, 0x00, 0x00, - 0x1B, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x27, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00, - 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, -}; diff --git a/src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.txt b/src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.txt deleted file mode 100644 index 17e49e49a..000000000 --- a/src/xenia/gpu/vulkan/shaders/bin/line_quad_list_geom.txt +++ /dev/null @@ -1,132 +0,0 @@ -; SPIR-V -; Version: 1.0 -; Generator: Khronos Glslang Reference Front End; 6 -; Bound: 83 -; Schema: 0 - OpCapability Geometry - OpCapability GeometryPointSize - %1 = OpExtInstImport "GLSL.std.450" - OpMemoryModel Logical GLSL450 - OpEntryPoint Geometry %main "main" %_ %gl_in %out_interpolators %in_interpolators %_in_point_coord_unused %_in_point_size_unused %_out_point_coord_unused - OpExecutionMode %main InputLinesAdjacency - OpExecutionMode %main Invocations 1 - OpExecutionMode %main OutputLineStrip - OpExecutionMode %main OutputVertices 5 - OpSource GLSL 450 - OpSourceExtension "GL_ARB_explicit_attrib_location" - OpSourceExtension "GL_ARB_separate_shader_objects" - OpName %main "main" - OpName %gl_PerVertex "gl_PerVertex" - OpMemberName %gl_PerVertex 0 "gl_Position" - OpMemberName %gl_PerVertex 1 "gl_PointSize" - OpName %_ "" - OpName %gl_PerVertex_0 "gl_PerVertex" - OpMemberName %gl_PerVertex_0 0 "gl_Position" - OpMemberName %gl_PerVertex_0 1 "gl_PointSize" - OpName %gl_in "gl_in" - OpName %out_interpolators "out_interpolators" - OpName %in_interpolators "in_interpolators" - OpName %_in_point_coord_unused "_in_point_coord_unused" - OpName %_in_point_size_unused "_in_point_size_unused" - OpName %_out_point_coord_unused "_out_point_coord_unused" - OpMemberDecorate %gl_PerVertex 0 BuiltIn Position - OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize - OpDecorate %gl_PerVertex Block - OpMemberDecorate %gl_PerVertex_0 0 BuiltIn Position - OpMemberDecorate %gl_PerVertex_0 1 BuiltIn PointSize - OpDecorate %gl_PerVertex_0 Block - OpDecorate %out_interpolators Location 0 - OpDecorate %in_interpolators Location 0 - OpDecorate %_in_point_coord_unused Location 16 - OpDecorate %_in_point_size_unused Location 17 - OpDecorate %_out_point_coord_unused Location 16 - %void = OpTypeVoid - %3 = OpTypeFunction %void - %float = OpTypeFloat 32 - %v4float = OpTypeVector %float 4 -%gl_PerVertex = OpTypeStruct %v4float %float -%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex - %_ = OpVariable %_ptr_Output_gl_PerVertex Output - %int = OpTypeInt 32 1 - %int_0 = OpConstant %int 0 -%gl_PerVertex_0 = OpTypeStruct %v4float %float - %uint = OpTypeInt 32 0 - %uint_4 = OpConstant %uint 4 -%_arr_gl_PerVertex_0_uint_4 = OpTypeArray %gl_PerVertex_0 %uint_4 -%_ptr_Input__arr_gl_PerVertex_0_uint_4 = OpTypePointer Input %_arr_gl_PerVertex_0_uint_4 - %gl_in = OpVariable %_ptr_Input__arr_gl_PerVertex_0_uint_4 Input -%_ptr_Input_v4float = OpTypePointer Input %v4float -%_ptr_Output_v4float = OpTypePointer Output %v4float - %int_1 = OpConstant %int 1 -%_ptr_Input_float = OpTypePointer Input %float -%_ptr_Output_float = OpTypePointer Output %float - %uint_16 = OpConstant %uint 16 -%_arr_v4float_uint_16 = OpTypeArray %v4float %uint_16 -%_ptr_Output__arr_v4float_uint_16 = OpTypePointer Output %_arr_v4float_uint_16 -%out_interpolators = OpVariable %_ptr_Output__arr_v4float_uint_16 Output -%_arr__arr_v4float_uint_16_uint_4 = OpTypeArray %_arr_v4float_uint_16 %uint_4 -%_ptr_Input__arr__arr_v4float_uint_16_uint_4 = OpTypePointer Input %_arr__arr_v4float_uint_16_uint_4 -%in_interpolators = OpVariable %_ptr_Input__arr__arr_v4float_uint_16_uint_4 Input -%_ptr_Input__arr_v4float_uint_16 = OpTypePointer Input %_arr_v4float_uint_16 - %int_2 = OpConstant %int 2 - %int_3 = OpConstant %int 3 - %v2float = OpTypeVector %float 2 -%_arr_v2float_uint_4 = OpTypeArray %v2float %uint_4 -%_ptr_Input__arr_v2float_uint_4 = OpTypePointer Input %_arr_v2float_uint_4 -%_in_point_coord_unused = OpVariable %_ptr_Input__arr_v2float_uint_4 Input -%_arr_float_uint_4 = OpTypeArray %float %uint_4 -%_ptr_Input__arr_float_uint_4 = OpTypePointer Input %_arr_float_uint_4 -%_in_point_size_unused = OpVariable %_ptr_Input__arr_float_uint_4 Input -%_ptr_Output_v2float = OpTypePointer Output %v2float -%_out_point_coord_unused = OpVariable %_ptr_Output_v2float Output - %main = OpFunction %void None %3 - %5 = OpLabel - %20 = OpAccessChain %_ptr_Input_v4float %gl_in %int_0 %int_0 - %21 = OpLoad %v4float %20 - %23 = OpAccessChain %_ptr_Output_v4float %_ %int_0 - OpStore %23 %21 - %26 = OpAccessChain %_ptr_Input_float %gl_in %int_0 %int_1 - %27 = OpLoad %float %26 - %29 = OpAccessChain %_ptr_Output_float %_ %int_1 - OpStore %29 %27 - %38 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %in_interpolators %int_0 - %39 = OpLoad %_arr_v4float_uint_16 %38 - OpStore %out_interpolators %39 - OpEmitVertex - %40 = OpAccessChain %_ptr_Input_v4float %gl_in %int_1 %int_0 - %41 = OpLoad %v4float %40 - OpStore %23 %41 - %43 = OpAccessChain %_ptr_Input_float %gl_in %int_1 %int_1 - %44 = OpLoad %float %43 - OpStore %29 %44 - %46 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %in_interpolators %int_1 - %47 = OpLoad %_arr_v4float_uint_16 %46 - OpStore %out_interpolators %47 - OpEmitVertex - %49 = OpAccessChain %_ptr_Input_v4float %gl_in %int_2 %int_0 - %50 = OpLoad %v4float %49 - OpStore %23 %50 - %52 = OpAccessChain %_ptr_Input_float %gl_in %int_2 %int_1 - %53 = OpLoad %float %52 - OpStore %29 %53 - %55 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %in_interpolators %int_2 - %56 = OpLoad %_arr_v4float_uint_16 %55 - OpStore %out_interpolators %56 - OpEmitVertex - %58 = OpAccessChain %_ptr_Input_v4float %gl_in %int_3 %int_0 - %59 = OpLoad %v4float %58 - OpStore %23 %59 - %61 = OpAccessChain %_ptr_Input_float %gl_in %int_3 %int_1 - %62 = OpLoad %float %61 - OpStore %29 %62 - %64 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %in_interpolators %int_3 - %65 = OpLoad %_arr_v4float_uint_16 %64 - OpStore %out_interpolators %65 - OpEmitVertex - OpStore %23 %21 - OpStore %29 %27 - OpStore %out_interpolators %39 - OpEmitVertex - OpEndPrimitive - OpReturn - OpFunctionEnd diff --git a/src/xenia/gpu/vulkan/shaders/bin/point_list_geom.h b/src/xenia/gpu/vulkan/shaders/bin/point_list_geom.h deleted file mode 100644 index 4d294d48c..000000000 --- a/src/xenia/gpu/vulkan/shaders/bin/point_list_geom.h +++ /dev/null @@ -1,245 +0,0 @@ -// generated from `xb genspirv` -// source: point_list.geom -const uint8_t point_list_geom[] = { - 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x06, 0x00, 0x08, 0x00, - 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x47, 0x4C, 0x53, 0x4C, 0x2E, 0x73, 0x74, 0x64, 0x2E, 0x34, 0x35, 0x30, - 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x0C, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, - 0x0F, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, - 0x5F, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, - 0x73, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0xC2, 0x01, 0x00, 0x00, - 0x04, 0x00, 0x09, 0x00, 0x47, 0x4C, 0x5F, 0x41, 0x52, 0x42, 0x5F, 0x65, - 0x78, 0x70, 0x6C, 0x69, 0x63, 0x69, 0x74, 0x5F, 0x61, 0x74, 0x74, 0x72, - 0x69, 0x62, 0x5F, 0x6C, 0x6F, 0x63, 0x61, 0x74, 0x69, 0x6F, 0x6E, 0x00, - 0x04, 0x00, 0x09, 0x00, 0x47, 0x4C, 0x5F, 0x41, 0x52, 0x42, 0x5F, 0x73, - 0x65, 0x70, 0x61, 0x72, 0x61, 0x74, 0x65, 0x5F, 0x73, 0x68, 0x61, 0x64, - 0x65, 0x72, 0x5F, 0x6F, 0x62, 0x6A, 0x65, 0x63, 0x74, 0x73, 0x00, 0x00, - 0x04, 0x00, 0x09, 0x00, 0x47, 0x4C, 0x5F, 0x41, 0x52, 0x42, 0x5F, 0x73, - 0x68, 0x61, 0x64, 0x69, 0x6E, 0x67, 0x5F, 0x6C, 0x61, 0x6E, 0x67, 0x75, - 0x61, 0x67, 0x65, 0x5F, 0x34, 0x32, 0x30, 0x70, 0x61, 0x63, 0x6B, 0x00, - 0x05, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, - 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x67, 0x6C, 0x5F, 0x50, 0x65, 0x72, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, - 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x73, 0x69, 0x74, - 0x69, 0x6F, 0x6E, 0x00, 0x05, 0x00, 0x04, 0x00, 0x0F, 0x00, 0x00, 0x00, - 0x67, 0x6C, 0x5F, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x05, 0x00, 0x07, 0x00, - 0x18, 0x00, 0x00, 0x00, 0x70, 0x75, 0x73, 0x68, 0x5F, 0x63, 0x6F, 0x6E, - 0x73, 0x74, 0x73, 0x5F, 0x74, 0x79, 0x70, 0x65, 0x00, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x07, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x77, 0x69, 0x6E, 0x64, 0x6F, 0x77, 0x5F, 0x73, 0x63, 0x61, 0x6C, 0x65, - 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x05, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x76, 0x74, 0x78, 0x5F, 0x66, 0x6D, 0x74, 0x00, - 0x06, 0x00, 0x06, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x70, 0x6F, 0x69, 0x6E, 0x74, 0x5F, 0x73, 0x69, 0x7A, 0x65, 0x00, 0x00, - 0x06, 0x00, 0x06, 0x00, 0x18, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x61, 0x6C, 0x70, 0x68, 0x61, 0x5F, 0x74, 0x65, 0x73, 0x74, 0x00, 0x00, - 0x06, 0x00, 0x07, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x70, 0x73, 0x5F, 0x70, 0x61, 0x72, 0x61, 0x6D, 0x5F, 0x67, 0x65, 0x6E, - 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, 0x1A, 0x00, 0x00, 0x00, - 0x70, 0x75, 0x73, 0x68, 0x5F, 0x63, 0x6F, 0x6E, 0x73, 0x74, 0x61, 0x6E, - 0x74, 0x73, 0x00, 0x00, 0x05, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x70, 0x6F, 0x69, 0x6E, 0x74, 0x5F, 0x73, 0x69, 0x7A, 0x65, 0x00, 0x00, - 0x05, 0x00, 0x06, 0x00, 0x3D, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, - 0x65, 0x72, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, 0x00, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x06, 0x00, 0x3D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x73, 0x69, 0x74, 0x69, 0x6F, 0x6E, 0x00, - 0x05, 0x00, 0x03, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x05, 0x00, 0x4D, 0x00, 0x00, 0x00, 0x69, 0x6E, 0x64, 0x65, - 0x78, 0x61, 0x62, 0x6C, 0x65, 0x00, 0x00, 0x00, 0x05, 0x00, 0x07, 0x00, - 0x5F, 0x00, 0x00, 0x00, 0x6F, 0x75, 0x74, 0x5F, 0x69, 0x6E, 0x74, 0x65, - 0x72, 0x70, 0x6F, 0x6C, 0x61, 0x74, 0x6F, 0x72, 0x73, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x07, 0x00, 0x62, 0x00, 0x00, 0x00, 0x69, 0x6E, 0x5F, 0x69, - 0x6E, 0x74, 0x65, 0x72, 0x70, 0x6F, 0x6C, 0x61, 0x74, 0x6F, 0x72, 0x73, - 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x05, 0x00, 0x67, 0x00, 0x00, 0x00, - 0x70, 0x6F, 0x69, 0x6E, 0x74, 0x5F, 0x63, 0x6F, 0x6F, 0x72, 0x64, 0x00, - 0x05, 0x00, 0x05, 0x00, 0x69, 0x00, 0x00, 0x00, 0x69, 0x6E, 0x64, 0x65, - 0x78, 0x61, 0x62, 0x6C, 0x65, 0x00, 0x00, 0x00, 0x05, 0x00, 0x08, 0x00, - 0x73, 0x00, 0x00, 0x00, 0x69, 0x6E, 0x5F, 0x70, 0x6F, 0x69, 0x6E, 0x74, - 0x5F, 0x63, 0x6F, 0x6F, 0x72, 0x64, 0x5F, 0x75, 0x6E, 0x75, 0x73, 0x65, - 0x64, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x03, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, - 0x18, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x18, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, - 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x40, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, - 0x3D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x3D, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x5F, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, - 0x62, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x67, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x73, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x03, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x0D, 0x00, 0x00, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x0E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x16, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x07, 0x00, 0x18, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x19, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, - 0x19, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, 0x1B, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x1C, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, 0x27, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x03, 0x00, 0x3D, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x3E, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, - 0x3E, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x43, 0x00, 0x00, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xBF, - 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x45, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x80, 0x3F, 0x2C, 0x00, 0x05, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x46, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x45, 0x00, 0x00, 0x00, - 0x2C, 0x00, 0x05, 0x00, 0x15, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, - 0x45, 0x00, 0x00, 0x00, 0x45, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x05, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, - 0x44, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x05, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x49, 0x00, 0x00, 0x00, 0x45, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, - 0x2C, 0x00, 0x07, 0x00, 0x43, 0x00, 0x00, 0x00, 0x4A, 0x00, 0x00, 0x00, - 0x46, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, - 0x49, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x4C, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x5A, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x5C, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x5D, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x5C, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x5E, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x5D, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x5E, 0x00, 0x00, 0x00, 0x5F, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x60, 0x00, 0x00, 0x00, - 0x5D, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x61, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x61, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x63, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x5D, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x66, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x66, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x05, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x6C, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x71, 0x00, 0x00, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x72, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x72, 0x00, 0x00, 0x00, 0x73, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0xF8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, - 0x4C, 0x00, 0x00, 0x00, 0x4D, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x4C, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x1C, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, - 0x1B, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x07, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x23, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0xBA, 0x00, 0x05, 0x00, 0x27, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x25, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0xF7, 0x00, 0x03, 0x00, - 0x2A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, - 0xF8, 0x00, 0x02, 0x00, 0x29, 0x00, 0x00, 0x00, 0x50, 0x00, 0x05, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, - 0x25, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0x2A, 0x00, 0x00, 0x00, - 0xF8, 0x00, 0x02, 0x00, 0x2A, 0x00, 0x00, 0x00, 0xF5, 0x00, 0x07, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, - 0x1A, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, - 0x4F, 0x00, 0x07, 0x00, 0x15, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, - 0x2F, 0x00, 0x00, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x88, 0x00, 0x05, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x32, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, - 0xF9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, - 0x35, 0x00, 0x00, 0x00, 0xF5, 0x00, 0x07, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x75, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, - 0x70, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0xB1, 0x00, 0x05, 0x00, - 0x27, 0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, 0x75, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x00, 0x00, 0xF6, 0x00, 0x04, 0x00, 0x37, 0x00, 0x00, 0x00, - 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00, - 0x3C, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, - 0xF8, 0x00, 0x02, 0x00, 0x36, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x07, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x4D, 0x00, 0x00, 0x00, 0x4A, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x16, 0x00, 0x00, 0x00, 0x4E, 0x00, 0x00, 0x00, - 0x4D, 0x00, 0x00, 0x00, 0x75, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x00, 0x00, 0x4E, 0x00, 0x00, 0x00, - 0x85, 0x00, 0x05, 0x00, 0x15, 0x00, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, - 0x4F, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, - 0x51, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x55, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x56, 0x00, 0x00, 0x00, - 0x52, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x57, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x58, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x50, 0x00, 0x07, 0x00, 0x07, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, - 0x55, 0x00, 0x00, 0x00, 0x56, 0x00, 0x00, 0x00, 0x57, 0x00, 0x00, 0x00, - 0x58, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x5A, 0x00, 0x00, 0x00, - 0x5B, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x5B, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x63, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, - 0x62, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x5D, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x5F, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x69, 0x00, 0x00, 0x00, 0x4A, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x16, 0x00, 0x00, 0x00, 0x6A, 0x00, 0x00, 0x00, - 0x69, 0x00, 0x00, 0x00, 0x75, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x6B, 0x00, 0x00, 0x00, 0x6A, 0x00, 0x00, 0x00, - 0x0C, 0x00, 0x07, 0x00, 0x15, 0x00, 0x00, 0x00, 0x6D, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x6B, 0x00, 0x00, 0x00, - 0x6C, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x67, 0x00, 0x00, 0x00, - 0x6D, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x80, 0x00, 0x05, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x75, 0x00, 0x00, 0x00, - 0x6F, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, - 0xF8, 0x00, 0x02, 0x00, 0x37, 0x00, 0x00, 0x00, 0xDB, 0x00, 0x01, 0x00, - 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, -}; diff --git a/src/xenia/gpu/vulkan/shaders/bin/point_list_geom.txt b/src/xenia/gpu/vulkan/shaders/bin/point_list_geom.txt deleted file mode 100644 index 0eec50c66..000000000 --- a/src/xenia/gpu/vulkan/shaders/bin/point_list_geom.txt +++ /dev/null @@ -1,167 +0,0 @@ -; SPIR-V -; Version: 1.0 -; Generator: Khronos Glslang Reference Front End; 6 -; Bound: 118 -; Schema: 0 - OpCapability Geometry - %1 = OpExtInstImport "GLSL.std.450" - OpMemoryModel Logical GLSL450 - OpEntryPoint Geometry %main "main" %gl_in %point_size %_ %out_interpolators %in_interpolators %point_coord %in_point_coord_unused - OpExecutionMode %main InputPoints - OpExecutionMode %main Invocations 1 - OpExecutionMode %main OutputTriangleStrip - OpExecutionMode %main OutputVertices 4 - OpSource GLSL 450 - OpSourceExtension "GL_ARB_explicit_attrib_location" - OpSourceExtension "GL_ARB_separate_shader_objects" - OpSourceExtension "GL_ARB_shading_language_420pack" - OpName %main "main" - OpName %gl_PerVertex "gl_PerVertex" - OpMemberName %gl_PerVertex 0 "gl_Position" - OpName %gl_in "gl_in" - OpName %push_consts_type "push_consts_type" - OpMemberName %push_consts_type 0 "window_scale" - OpMemberName %push_consts_type 1 "vtx_fmt" - OpMemberName %push_consts_type 2 "point_size" - OpMemberName %push_consts_type 3 "alpha_test" - OpMemberName %push_consts_type 4 "ps_param_gen" - OpName %push_constants "push_constants" - OpName %point_size "point_size" - OpName %gl_PerVertex_0 "gl_PerVertex" - OpMemberName %gl_PerVertex_0 0 "gl_Position" - OpName %_ "" - OpName %indexable "indexable" - OpName %out_interpolators "out_interpolators" - OpName %in_interpolators "in_interpolators" - OpName %point_coord "point_coord" - OpName %indexable_0 "indexable" - OpName %in_point_coord_unused "in_point_coord_unused" - OpMemberDecorate %gl_PerVertex 0 BuiltIn Position - OpDecorate %gl_PerVertex Block - OpMemberDecorate %push_consts_type 0 Offset 0 - OpMemberDecorate %push_consts_type 1 Offset 16 - OpMemberDecorate %push_consts_type 2 Offset 32 - OpMemberDecorate %push_consts_type 3 Offset 48 - OpMemberDecorate %push_consts_type 4 Offset 64 - OpDecorate %push_consts_type Block - OpDecorate %point_size Location 17 - OpMemberDecorate %gl_PerVertex_0 0 BuiltIn Position - OpDecorate %gl_PerVertex_0 Block - OpDecorate %out_interpolators Location 0 - OpDecorate %in_interpolators Location 0 - OpDecorate %point_coord Location 16 - OpDecorate %in_point_coord_unused Location 16 - %void = OpTypeVoid - %3 = OpTypeFunction %void - %float = OpTypeFloat 32 - %v4float = OpTypeVector %float 4 -%gl_PerVertex = OpTypeStruct %v4float - %uint = OpTypeInt 32 0 - %uint_1 = OpConstant %uint 1 -%_arr_gl_PerVertex_uint_1 = OpTypeArray %gl_PerVertex %uint_1 -%_ptr_Input__arr_gl_PerVertex_uint_1 = OpTypePointer Input %_arr_gl_PerVertex_uint_1 - %gl_in = OpVariable %_ptr_Input__arr_gl_PerVertex_uint_1 Input - %int = OpTypeInt 32 1 - %int_0 = OpConstant %int 0 -%_ptr_Input_v4float = OpTypePointer Input %v4float - %v2float = OpTypeVector %float 2 -%_ptr_Function_v2float = OpTypePointer Function %v2float -%push_consts_type = OpTypeStruct %v4float %v4float %v4float %v4float %uint -%_ptr_PushConstant_push_consts_type = OpTypePointer PushConstant %push_consts_type -%push_constants = OpVariable %_ptr_PushConstant_push_consts_type PushConstant - %int_2 = OpConstant %int 2 -%_ptr_PushConstant_v4float = OpTypePointer PushConstant %v4float -%_arr_float_uint_1 = OpTypeArray %float %uint_1 -%_ptr_Input__arr_float_uint_1 = OpTypePointer Input %_arr_float_uint_1 - %point_size = OpVariable %_ptr_Input__arr_float_uint_1 Input -%_ptr_Input_float = OpTypePointer Input %float - %float_0 = OpConstant %float 0 - %bool = OpTypeBool - %int_4 = OpConstant %int 4 -%gl_PerVertex_0 = OpTypeStruct %v4float -%_ptr_Output_gl_PerVertex_0 = OpTypePointer Output %gl_PerVertex_0 - %_ = OpVariable %_ptr_Output_gl_PerVertex_0 Output - %uint_4 = OpConstant %uint 4 -%_arr_v2float_uint_4 = OpTypeArray %v2float %uint_4 - %float_n1 = OpConstant %float -1 - %float_1 = OpConstant %float 1 - %70 = OpConstantComposite %v2float %float_n1 %float_1 - %71 = OpConstantComposite %v2float %float_1 %float_1 - %72 = OpConstantComposite %v2float %float_n1 %float_n1 - %73 = OpConstantComposite %v2float %float_1 %float_n1 - %74 = OpConstantComposite %_arr_v2float_uint_4 %70 %71 %72 %73 -%_ptr_Function__arr_v2float_uint_4 = OpTypePointer Function %_arr_v2float_uint_4 -%_ptr_Output_v4float = OpTypePointer Output %v4float - %uint_16 = OpConstant %uint 16 -%_arr_v4float_uint_16 = OpTypeArray %v4float %uint_16 -%_ptr_Output__arr_v4float_uint_16 = OpTypePointer Output %_arr_v4float_uint_16 -%out_interpolators = OpVariable %_ptr_Output__arr_v4float_uint_16 Output -%_arr__arr_v4float_uint_16_uint_1 = OpTypeArray %_arr_v4float_uint_16 %uint_1 -%_ptr_Input__arr__arr_v4float_uint_16_uint_1 = OpTypePointer Input %_arr__arr_v4float_uint_16_uint_1 -%in_interpolators = OpVariable %_ptr_Input__arr__arr_v4float_uint_16_uint_1 Input -%_ptr_Input__arr_v4float_uint_16 = OpTypePointer Input %_arr_v4float_uint_16 -%_ptr_Output_v2float = OpTypePointer Output %v2float -%point_coord = OpVariable %_ptr_Output_v2float Output - %108 = OpConstantComposite %v2float %float_0 %float_0 - %int_1 = OpConstant %int 1 -%_arr_v2float_uint_1 = OpTypeArray %v2float %uint_1 -%_ptr_Input__arr_v2float_uint_1 = OpTypePointer Input %_arr_v2float_uint_1 -%in_point_coord_unused = OpVariable %_ptr_Input__arr_v2float_uint_1 Input - %main = OpFunction %void None %3 - %5 = OpLabel - %indexable = OpVariable %_ptr_Function__arr_v2float_uint_4 Function -%indexable_0 = OpVariable %_ptr_Function__arr_v2float_uint_4 Function - %19 = OpAccessChain %_ptr_Input_v4float %gl_in %int_0 %int_0 - %20 = OpLoad %v4float %19 - %29 = OpAccessChain %_ptr_PushConstant_v4float %push_constants %int_2 - %30 = OpLoad %v4float %29 - %31 = OpVectorShuffle %v2float %30 %30 0 1 - %36 = OpAccessChain %_ptr_Input_float %point_size %int_0 - %37 = OpLoad %float %36 - %40 = OpFOrdGreaterThan %bool %37 %float_0 - OpSelectionMerge %42 None - OpBranchConditional %40 %41 %42 - %41 = OpLabel - %45 = OpCompositeConstruct %v2float %37 %37 - OpBranch %42 - %42 = OpLabel - %116 = OpPhi %v2float %31 %5 %45 %41 - %46 = OpAccessChain %_ptr_PushConstant_v4float %push_constants %int_0 - %47 = OpLoad %v4float %46 - %48 = OpVectorShuffle %v2float %47 %47 2 3 - %50 = OpFDiv %v2float %116 %48 - OpBranch %53 - %53 = OpLabel - %117 = OpPhi %int %int_0 %42 %112 %54 - %60 = OpSLessThan %bool %117 %int_4 - OpLoopMerge %55 %54 None - OpBranchConditional %60 %54 %55 - %54 = OpLabel - %65 = OpVectorShuffle %v2float %20 %20 0 1 - OpStore %indexable %74 - %78 = OpAccessChain %_ptr_Function_v2float %indexable %117 - %79 = OpLoad %v2float %78 - %81 = OpFMul %v2float %79 %50 - %82 = OpFAdd %v2float %65 %81 - %85 = OpCompositeExtract %float %82 0 - %86 = OpCompositeExtract %float %82 1 - %87 = OpCompositeExtract %float %20 2 - %88 = OpCompositeExtract %float %20 3 - %89 = OpCompositeConstruct %v4float %85 %86 %87 %88 - %91 = OpAccessChain %_ptr_Output_v4float %_ %int_0 - OpStore %91 %89 - %100 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %in_interpolators %int_0 - %101 = OpLoad %_arr_v4float_uint_16 %100 - OpStore %out_interpolators %101 - OpStore %indexable_0 %74 - %106 = OpAccessChain %_ptr_Function_v2float %indexable_0 %117 - %107 = OpLoad %v2float %106 - %109 = OpExtInst %v2float %1 FMax %107 %108 - OpStore %point_coord %109 - OpEmitVertex - %112 = OpIAdd %int %117 %int_1 - OpBranch %53 - %55 = OpLabel - OpEndPrimitive - OpReturn - OpFunctionEnd diff --git a/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.h b/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.h deleted file mode 100644 index fd5741897..000000000 --- a/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.h +++ /dev/null @@ -1,171 +0,0 @@ -// generated from `xb genspirv` -// source: quad_list.geom -const uint8_t quad_list_geom[] = { - 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x06, 0x00, 0x08, 0x00, - 0x4C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x4C, 0x53, 0x4C, - 0x2E, 0x73, 0x74, 0x64, 0x2E, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, - 0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0F, 0x00, 0x0C, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x00, 0x00, - 0x45, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x4A, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x1D, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x1A, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x03, 0x00, - 0x02, 0x00, 0x00, 0x00, 0xC2, 0x01, 0x00, 0x00, 0x04, 0x00, 0x09, 0x00, - 0x47, 0x4C, 0x5F, 0x41, 0x52, 0x42, 0x5F, 0x65, 0x78, 0x70, 0x6C, 0x69, - 0x63, 0x69, 0x74, 0x5F, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x5F, 0x6C, - 0x6F, 0x63, 0x61, 0x74, 0x69, 0x6F, 0x6E, 0x00, 0x04, 0x00, 0x09, 0x00, - 0x47, 0x4C, 0x5F, 0x41, 0x52, 0x42, 0x5F, 0x73, 0x65, 0x70, 0x61, 0x72, - 0x61, 0x74, 0x65, 0x5F, 0x73, 0x68, 0x61, 0x64, 0x65, 0x72, 0x5F, 0x6F, - 0x62, 0x6A, 0x65, 0x63, 0x74, 0x73, 0x00, 0x00, 0x04, 0x00, 0x09, 0x00, - 0x47, 0x4C, 0x5F, 0x41, 0x52, 0x42, 0x5F, 0x73, 0x68, 0x61, 0x64, 0x69, - 0x6E, 0x67, 0x5F, 0x6C, 0x61, 0x6E, 0x67, 0x75, 0x61, 0x67, 0x65, 0x5F, - 0x34, 0x32, 0x30, 0x70, 0x61, 0x63, 0x6B, 0x00, 0x05, 0x00, 0x04, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x05, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x69, 0x6E, 0x64, 0x65, - 0x78, 0x61, 0x62, 0x6C, 0x65, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x65, 0x72, 0x56, 0x65, - 0x72, 0x74, 0x65, 0x78, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, - 0x6F, 0x73, 0x69, 0x74, 0x69, 0x6F, 0x6E, 0x00, 0x06, 0x00, 0x07, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, - 0x6F, 0x69, 0x6E, 0x74, 0x53, 0x69, 0x7A, 0x65, 0x00, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x03, 0x00, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x06, 0x00, 0x25, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, - 0x65, 0x72, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, 0x00, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x06, 0x00, 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x73, 0x69, 0x74, 0x69, 0x6F, 0x6E, 0x00, - 0x06, 0x00, 0x07, 0x00, 0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x69, 0x6E, 0x74, 0x53, 0x69, 0x7A, 0x65, - 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x67, 0x6C, 0x5F, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x05, 0x00, 0x07, 0x00, - 0x38, 0x00, 0x00, 0x00, 0x6F, 0x75, 0x74, 0x5F, 0x69, 0x6E, 0x74, 0x65, - 0x72, 0x70, 0x6F, 0x6C, 0x61, 0x74, 0x6F, 0x72, 0x73, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x07, 0x00, 0x3B, 0x00, 0x00, 0x00, 0x69, 0x6E, 0x5F, 0x69, - 0x6E, 0x74, 0x65, 0x72, 0x70, 0x6F, 0x6C, 0x61, 0x74, 0x6F, 0x72, 0x73, - 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x08, 0x00, 0x45, 0x00, 0x00, 0x00, - 0x5F, 0x69, 0x6E, 0x5F, 0x70, 0x6F, 0x69, 0x6E, 0x74, 0x5F, 0x63, 0x6F, - 0x6F, 0x72, 0x64, 0x5F, 0x75, 0x6E, 0x75, 0x73, 0x65, 0x64, 0x00, 0x00, - 0x05, 0x00, 0x08, 0x00, 0x48, 0x00, 0x00, 0x00, 0x5F, 0x69, 0x6E, 0x5F, - 0x70, 0x6F, 0x69, 0x6E, 0x74, 0x5F, 0x73, 0x69, 0x7A, 0x65, 0x5F, 0x75, - 0x6E, 0x75, 0x73, 0x65, 0x64, 0x00, 0x00, 0x00, 0x05, 0x00, 0x08, 0x00, - 0x4A, 0x00, 0x00, 0x00, 0x5F, 0x6F, 0x75, 0x74, 0x5F, 0x70, 0x6F, 0x69, - 0x6E, 0x74, 0x5F, 0x63, 0x6F, 0x6F, 0x72, 0x64, 0x5F, 0x75, 0x6E, 0x75, - 0x73, 0x65, 0x64, 0x00, 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, - 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x25, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x03, 0x00, 0x25, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x38, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x3B, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, - 0x45, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x48, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x4A, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x1C, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x17, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x07, 0x00, 0x16, 0x00, 0x00, 0x00, - 0x1A, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, - 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x1C, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, - 0x16, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x17, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, - 0x26, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x27, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x26, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x27, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x2A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x2D, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x30, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x33, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x36, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x37, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x37, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x39, 0x00, 0x00, 0x00, - 0x36, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x3A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x39, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x3A, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x3D, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, - 0x42, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x1C, 0x00, 0x04, 0x00, 0x43, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x44, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, - 0x44, 0x00, 0x00, 0x00, 0x45, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x1C, 0x00, 0x04, 0x00, 0x46, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x47, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x46, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, - 0x47, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x49, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x42, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x49, 0x00, 0x00, 0x00, - 0x4A, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0xF8, 0x00, 0x02, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xF5, 0x00, 0x07, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x4B, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0xB1, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x4B, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0xF6, 0x00, 0x04, 0x00, - 0x0C, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xFA, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x0C, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x1D, 0x00, 0x00, 0x00, 0x4B, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2D, 0x00, 0x00, 0x00, - 0x2E, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x30, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, - 0x31, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x33, 0x00, 0x00, 0x00, - 0x34, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x34, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x3D, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x36, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x38, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, - 0xDA, 0x00, 0x01, 0x00, 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x00, 0x00, 0x4B, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, - 0xF9, 0x00, 0x02, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, - 0x0C, 0x00, 0x00, 0x00, 0xDB, 0x00, 0x01, 0x00, 0xFD, 0x00, 0x01, 0x00, - 0x38, 0x00, 0x01, 0x00, -}; diff --git a/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.txt b/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.txt deleted file mode 100644 index a7767342d..000000000 --- a/src/xenia/gpu/vulkan/shaders/bin/quad_list_geom.txt +++ /dev/null @@ -1,120 +0,0 @@ -; SPIR-V -; Version: 1.0 -; Generator: Khronos Glslang Reference Front End; 6 -; Bound: 76 -; Schema: 0 - OpCapability Geometry - OpCapability GeometryPointSize - %1 = OpExtInstImport "GLSL.std.450" - OpMemoryModel Logical GLSL450 - OpEntryPoint Geometry %main "main" %_ %gl_in %out_interpolators %in_interpolators %_in_point_coord_unused %_in_point_size_unused %_out_point_coord_unused - OpExecutionMode %main InputLinesAdjacency - OpExecutionMode %main Invocations 1 - OpExecutionMode %main OutputTriangleStrip - OpExecutionMode %main OutputVertices 4 - OpSource GLSL 450 - OpSourceExtension "GL_ARB_explicit_attrib_location" - OpSourceExtension "GL_ARB_separate_shader_objects" - OpSourceExtension "GL_ARB_shading_language_420pack" - OpName %main "main" - OpName %indexable "indexable" - OpName %gl_PerVertex "gl_PerVertex" - OpMemberName %gl_PerVertex 0 "gl_Position" - OpMemberName %gl_PerVertex 1 "gl_PointSize" - OpName %_ "" - OpName %gl_PerVertex_0 "gl_PerVertex" - OpMemberName %gl_PerVertex_0 0 "gl_Position" - OpMemberName %gl_PerVertex_0 1 "gl_PointSize" - OpName %gl_in "gl_in" - OpName %out_interpolators "out_interpolators" - OpName %in_interpolators "in_interpolators" - OpName %_in_point_coord_unused "_in_point_coord_unused" - OpName %_in_point_size_unused "_in_point_size_unused" - OpName %_out_point_coord_unused "_out_point_coord_unused" - OpMemberDecorate %gl_PerVertex 0 BuiltIn Position - OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize - OpDecorate %gl_PerVertex Block - OpMemberDecorate %gl_PerVertex_0 0 BuiltIn Position - OpMemberDecorate %gl_PerVertex_0 1 BuiltIn PointSize - OpDecorate %gl_PerVertex_0 Block - OpDecorate %out_interpolators Location 0 - OpDecorate %in_interpolators Location 0 - OpDecorate %_in_point_coord_unused Location 16 - OpDecorate %_in_point_size_unused Location 17 - OpDecorate %_out_point_coord_unused Location 16 - %void = OpTypeVoid - %3 = OpTypeFunction %void - %int = OpTypeInt 32 1 -%_ptr_Function_int = OpTypePointer Function %int - %int_0 = OpConstant %int 0 - %int_4 = OpConstant %int 4 - %bool = OpTypeBool - %uint = OpTypeInt 32 0 - %uint_4 = OpConstant %uint 4 -%_arr_int_uint_4 = OpTypeArray %int %uint_4 - %int_1 = OpConstant %int 1 - %int_3 = OpConstant %int 3 - %int_2 = OpConstant %int 2 - %26 = OpConstantComposite %_arr_int_uint_4 %int_0 %int_1 %int_3 %int_2 -%_ptr_Function__arr_int_uint_4 = OpTypePointer Function %_arr_int_uint_4 - %float = OpTypeFloat 32 - %v4float = OpTypeVector %float 4 -%gl_PerVertex = OpTypeStruct %v4float %float -%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex - %_ = OpVariable %_ptr_Output_gl_PerVertex Output -%gl_PerVertex_0 = OpTypeStruct %v4float %float -%_arr_gl_PerVertex_0_uint_4 = OpTypeArray %gl_PerVertex_0 %uint_4 -%_ptr_Input__arr_gl_PerVertex_0_uint_4 = OpTypePointer Input %_arr_gl_PerVertex_0_uint_4 - %gl_in = OpVariable %_ptr_Input__arr_gl_PerVertex_0_uint_4 Input -%_ptr_Input_v4float = OpTypePointer Input %v4float -%_ptr_Output_v4float = OpTypePointer Output %v4float -%_ptr_Input_float = OpTypePointer Input %float -%_ptr_Output_float = OpTypePointer Output %float - %uint_16 = OpConstant %uint 16 -%_arr_v4float_uint_16 = OpTypeArray %v4float %uint_16 -%_ptr_Output__arr_v4float_uint_16 = OpTypePointer Output %_arr_v4float_uint_16 -%out_interpolators = OpVariable %_ptr_Output__arr_v4float_uint_16 Output -%_arr__arr_v4float_uint_16_uint_4 = OpTypeArray %_arr_v4float_uint_16 %uint_4 -%_ptr_Input__arr__arr_v4float_uint_16_uint_4 = OpTypePointer Input %_arr__arr_v4float_uint_16_uint_4 -%in_interpolators = OpVariable %_ptr_Input__arr__arr_v4float_uint_16_uint_4 Input -%_ptr_Input__arr_v4float_uint_16 = OpTypePointer Input %_arr_v4float_uint_16 - %v2float = OpTypeVector %float 2 -%_arr_v2float_uint_4 = OpTypeArray %v2float %uint_4 -%_ptr_Input__arr_v2float_uint_4 = OpTypePointer Input %_arr_v2float_uint_4 -%_in_point_coord_unused = OpVariable %_ptr_Input__arr_v2float_uint_4 Input -%_arr_float_uint_4 = OpTypeArray %float %uint_4 -%_ptr_Input__arr_float_uint_4 = OpTypePointer Input %_arr_float_uint_4 -%_in_point_size_unused = OpVariable %_ptr_Input__arr_float_uint_4 Input -%_ptr_Output_v2float = OpTypePointer Output %v2float -%_out_point_coord_unused = OpVariable %_ptr_Output_v2float Output - %main = OpFunction %void None %3 - %5 = OpLabel - %indexable = OpVariable %_ptr_Function__arr_int_uint_4 Function - OpBranch %10 - %10 = OpLabel - %75 = OpPhi %int %int_0 %5 %65 %11 - %18 = OpSLessThan %bool %75 %int_4 - OpLoopMerge %12 %11 None - OpBranchConditional %18 %11 %12 - %11 = OpLabel - OpStore %indexable %26 - %30 = OpAccessChain %_ptr_Function_int %indexable %75 - %31 = OpLoad %int %30 - %43 = OpAccessChain %_ptr_Input_v4float %gl_in %31 %int_0 - %44 = OpLoad %v4float %43 - %46 = OpAccessChain %_ptr_Output_v4float %_ %int_0 - OpStore %46 %44 - %49 = OpAccessChain %_ptr_Input_float %gl_in %31 %int_1 - %50 = OpLoad %float %49 - %52 = OpAccessChain %_ptr_Output_float %_ %int_1 - OpStore %52 %50 - %62 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %in_interpolators %31 - %63 = OpLoad %_arr_v4float_uint_16 %62 - OpStore %out_interpolators %63 - OpEmitVertex - %65 = OpIAdd %int %75 %int_1 - OpBranch %10 - %12 = OpLabel - OpEndPrimitive - OpReturn - OpFunctionEnd diff --git a/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.h b/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.h deleted file mode 100644 index d613de39c..000000000 --- a/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.h +++ /dev/null @@ -1,374 +0,0 @@ -// generated from `xb genspirv` -// source: rect_list.geom -const uint8_t rect_list_geom[] = { - 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x06, 0x00, 0x08, 0x00, - 0x28, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x4C, 0x53, 0x4C, - 0x2E, 0x73, 0x74, 0x64, 0x2E, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, - 0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0F, 0x00, 0x0C, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x4C, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00, 0x00, - 0x0C, 0x01, 0x00, 0x00, 0x0F, 0x01, 0x00, 0x00, 0x11, 0x01, 0x00, 0x00, - 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x1D, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x1A, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x03, 0x00, 0x03, 0x00, - 0x02, 0x00, 0x00, 0x00, 0xC2, 0x01, 0x00, 0x00, 0x04, 0x00, 0x09, 0x00, - 0x47, 0x4C, 0x5F, 0x41, 0x52, 0x42, 0x5F, 0x65, 0x78, 0x70, 0x6C, 0x69, - 0x63, 0x69, 0x74, 0x5F, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x5F, 0x6C, - 0x6F, 0x63, 0x61, 0x74, 0x69, 0x6F, 0x6E, 0x00, 0x04, 0x00, 0x09, 0x00, - 0x47, 0x4C, 0x5F, 0x41, 0x52, 0x42, 0x5F, 0x73, 0x65, 0x70, 0x61, 0x72, - 0x61, 0x74, 0x65, 0x5F, 0x73, 0x68, 0x61, 0x64, 0x65, 0x72, 0x5F, 0x6F, - 0x62, 0x6A, 0x65, 0x63, 0x74, 0x73, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x06, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, - 0x65, 0x72, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, 0x00, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x06, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x73, 0x69, 0x74, 0x69, 0x6F, 0x6E, 0x00, - 0x06, 0x00, 0x07, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x69, 0x6E, 0x74, 0x53, 0x69, 0x7A, 0x65, - 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x67, 0x6C, 0x5F, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, - 0x4A, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x65, 0x72, 0x56, 0x65, - 0x72, 0x74, 0x65, 0x78, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, - 0x4A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, - 0x6F, 0x73, 0x69, 0x74, 0x69, 0x6F, 0x6E, 0x00, 0x06, 0x00, 0x07, 0x00, - 0x4A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, - 0x6F, 0x69, 0x6E, 0x74, 0x53, 0x69, 0x7A, 0x65, 0x00, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x03, 0x00, 0x4C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x07, 0x00, 0x58, 0x00, 0x00, 0x00, 0x6F, 0x75, 0x74, 0x5F, - 0x69, 0x6E, 0x74, 0x65, 0x72, 0x70, 0x6F, 0x6C, 0x61, 0x74, 0x6F, 0x72, - 0x73, 0x00, 0x00, 0x00, 0x05, 0x00, 0x07, 0x00, 0x5B, 0x00, 0x00, 0x00, - 0x69, 0x6E, 0x5F, 0x69, 0x6E, 0x74, 0x65, 0x72, 0x70, 0x6F, 0x6C, 0x61, - 0x74, 0x6F, 0x72, 0x73, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x08, 0x00, - 0x0C, 0x01, 0x00, 0x00, 0x5F, 0x69, 0x6E, 0x5F, 0x70, 0x6F, 0x69, 0x6E, - 0x74, 0x5F, 0x63, 0x6F, 0x6F, 0x72, 0x64, 0x5F, 0x75, 0x6E, 0x75, 0x73, - 0x65, 0x64, 0x00, 0x00, 0x05, 0x00, 0x08, 0x00, 0x0F, 0x01, 0x00, 0x00, - 0x5F, 0x69, 0x6E, 0x5F, 0x70, 0x6F, 0x69, 0x6E, 0x74, 0x5F, 0x73, 0x69, - 0x7A, 0x65, 0x5F, 0x75, 0x6E, 0x75, 0x73, 0x65, 0x64, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x08, 0x00, 0x11, 0x01, 0x00, 0x00, 0x5F, 0x6F, 0x75, 0x74, - 0x5F, 0x70, 0x6F, 0x69, 0x6E, 0x74, 0x5F, 0x63, 0x6F, 0x6F, 0x72, 0x64, - 0x5F, 0x75, 0x6E, 0x75, 0x73, 0x65, 0x64, 0x00, 0x48, 0x00, 0x05, 0x00, - 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x1D, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x03, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x4A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, - 0x4A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x4A, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, - 0x5B, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x0C, 0x01, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0F, 0x01, 0x00, 0x00, - 0x1E, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, - 0x11, 0x01, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x02, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, - 0x17, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x17, 0x00, 0x04, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x04, 0x00, 0x1D, 0x00, 0x00, 0x00, - 0x1C, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, - 0x1E, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x1D, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x27, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x6F, 0x12, 0x83, 0x3A, - 0x20, 0x00, 0x04, 0x00, 0x31, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x1C, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x04, 0x00, 0x4A, 0x00, 0x00, 0x00, - 0x1C, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x4B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x4A, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x4B, 0x00, 0x00, 0x00, 0x4C, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x4F, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x53, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x56, 0x00, 0x00, 0x00, - 0x1C, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x57, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x56, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x57, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, - 0x56, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x5A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x5A, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x5C, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x56, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x9F, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x1C, 0x00, 0x04, 0x00, 0x0A, 0x01, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x1F, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0B, 0x01, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0A, 0x01, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, - 0x0B, 0x01, 0x00, 0x00, 0x0C, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x1C, 0x00, 0x04, 0x00, 0x0D, 0x01, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x1F, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0E, 0x01, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0D, 0x01, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, - 0x0E, 0x01, 0x00, 0x00, 0x0F, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x10, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x10, 0x01, 0x00, 0x00, - 0x11, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x05, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x25, 0x01, 0x00, 0x00, 0x2F, 0x00, 0x00, 0x00, - 0x2F, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0xF8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x41, 0x00, 0x07, 0x00, - 0x27, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x25, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x41, 0x00, 0x07, 0x00, 0x27, 0x00, 0x00, 0x00, - 0x2C, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, - 0x50, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, - 0x29, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x31, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x1C, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, - 0x4F, 0x00, 0x07, 0x00, 0x07, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, - 0x33, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x83, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x16, 0x01, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, - 0x0C, 0x00, 0x06, 0x00, 0x07, 0x00, 0x00, 0x00, 0x17, 0x01, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x16, 0x01, 0x00, 0x00, - 0xBC, 0x00, 0x05, 0x00, 0x17, 0x00, 0x00, 0x00, 0x1A, 0x01, 0x00, 0x00, - 0x17, 0x01, 0x00, 0x00, 0x25, 0x01, 0x00, 0x00, 0x9B, 0x00, 0x04, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0x1B, 0x01, 0x00, 0x00, 0x1A, 0x01, 0x00, 0x00, - 0xA8, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, - 0x1B, 0x01, 0x00, 0x00, 0xF7, 0x00, 0x03, 0x00, 0x3A, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00, 0x38, 0x00, 0x00, 0x00, - 0x39, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, - 0x39, 0x00, 0x00, 0x00, 0x41, 0x00, 0x07, 0x00, 0x27, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x07, 0x00, 0x27, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00, 0x50, 0x00, 0x05, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x00, 0x00, 0x83, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x1F, 0x01, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, - 0x0C, 0x00, 0x06, 0x00, 0x07, 0x00, 0x00, 0x00, 0x20, 0x01, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1F, 0x01, 0x00, 0x00, - 0xBC, 0x00, 0x05, 0x00, 0x17, 0x00, 0x00, 0x00, 0x23, 0x01, 0x00, 0x00, - 0x20, 0x01, 0x00, 0x00, 0x25, 0x01, 0x00, 0x00, 0x9B, 0x00, 0x04, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0x24, 0x01, 0x00, 0x00, 0x23, 0x01, 0x00, 0x00, - 0xF9, 0x00, 0x02, 0x00, 0x3A, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, - 0x3A, 0x00, 0x00, 0x00, 0xF5, 0x00, 0x07, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x00, 0x00, 0x1B, 0x01, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, - 0x24, 0x01, 0x00, 0x00, 0x39, 0x00, 0x00, 0x00, 0xF7, 0x00, 0x03, 0x00, - 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00, - 0x47, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0xB1, 0x00, 0x00, 0x00, - 0xF8, 0x00, 0x02, 0x00, 0x48, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x4F, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x4C, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x50, 0x00, 0x00, 0x00, - 0x33, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x27, 0x00, 0x00, 0x00, - 0x51, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x2A, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x52, 0x00, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x53, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x4C, 0x00, 0x00, 0x00, - 0x2A, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x54, 0x00, 0x00, 0x00, - 0x52, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x5C, 0x00, 0x00, 0x00, - 0x5D, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x56, 0x00, 0x00, 0x00, 0x5E, 0x00, 0x00, 0x00, - 0x5D, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x58, 0x00, 0x00, 0x00, - 0x5E, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x31, 0x00, 0x00, 0x00, 0x5F, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x2A, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x1C, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x5F, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x50, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x27, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x63, 0x00, 0x00, 0x00, - 0x62, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x54, 0x00, 0x00, 0x00, - 0x63, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x5C, 0x00, 0x00, 0x00, - 0x65, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x56, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x00, - 0x65, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x58, 0x00, 0x00, 0x00, - 0x66, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x31, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x25, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x1C, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x50, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x27, 0x00, 0x00, 0x00, 0x6A, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x6B, 0x00, 0x00, 0x00, - 0x6A, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x54, 0x00, 0x00, 0x00, - 0x6B, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x5C, 0x00, 0x00, 0x00, - 0x6D, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x56, 0x00, 0x00, 0x00, 0x6E, 0x00, 0x00, 0x00, - 0x6D, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x58, 0x00, 0x00, 0x00, - 0x6E, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x50, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x54, 0x00, 0x00, 0x00, 0x6B, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x58, 0x00, 0x00, 0x00, 0x6E, 0x00, 0x00, 0x00, - 0xDA, 0x00, 0x01, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x50, 0x00, 0x00, 0x00, - 0x60, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x54, 0x00, 0x00, 0x00, - 0x63, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x58, 0x00, 0x00, 0x00, - 0x66, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x7F, 0x00, 0x04, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x82, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, - 0x4F, 0x00, 0x07, 0x00, 0x07, 0x00, 0x00, 0x00, 0x85, 0x00, 0x00, 0x00, - 0x60, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x86, 0x00, 0x00, 0x00, 0x82, 0x00, 0x00, 0x00, 0x85, 0x00, 0x00, 0x00, - 0x4F, 0x00, 0x07, 0x00, 0x07, 0x00, 0x00, 0x00, 0x89, 0x00, 0x00, 0x00, - 0x68, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x8A, 0x00, 0x00, 0x00, 0x86, 0x00, 0x00, 0x00, 0x89, 0x00, 0x00, 0x00, - 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x8E, 0x00, 0x00, 0x00, - 0x8A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x8F, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x90, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x91, 0x00, 0x00, 0x00, - 0x68, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x50, 0x00, 0x07, 0x00, - 0x1C, 0x00, 0x00, 0x00, 0x92, 0x00, 0x00, 0x00, 0x8E, 0x00, 0x00, 0x00, - 0x8F, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, 0x91, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x50, 0x00, 0x00, 0x00, 0x92, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x54, 0x00, 0x00, 0x00, 0x6B, 0x00, 0x00, 0x00, - 0xF9, 0x00, 0x02, 0x00, 0x99, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, - 0x99, 0x00, 0x00, 0x00, 0xF5, 0x00, 0x07, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x27, 0x01, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, - 0xB0, 0x00, 0x00, 0x00, 0x9A, 0x00, 0x00, 0x00, 0xB1, 0x00, 0x05, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0xA0, 0x00, 0x00, 0x00, 0x27, 0x01, 0x00, 0x00, - 0x9F, 0x00, 0x00, 0x00, 0xF6, 0x00, 0x04, 0x00, 0x9B, 0x00, 0x00, 0x00, - 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00, - 0xA0, 0x00, 0x00, 0x00, 0x9A, 0x00, 0x00, 0x00, 0x9B, 0x00, 0x00, 0x00, - 0xF8, 0x00, 0x02, 0x00, 0x9A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x31, 0x00, 0x00, 0x00, 0xA3, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x27, 0x01, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x1C, 0x00, 0x00, 0x00, 0xA4, 0x00, 0x00, 0x00, 0xA3, 0x00, 0x00, 0x00, - 0x7F, 0x00, 0x04, 0x00, 0x1C, 0x00, 0x00, 0x00, 0xA5, 0x00, 0x00, 0x00, - 0xA4, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x31, 0x00, 0x00, 0x00, - 0xA7, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, - 0x27, 0x01, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x1C, 0x00, 0x00, 0x00, - 0xA8, 0x00, 0x00, 0x00, 0xA7, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, - 0x1C, 0x00, 0x00, 0x00, 0xA9, 0x00, 0x00, 0x00, 0xA5, 0x00, 0x00, 0x00, - 0xA8, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x31, 0x00, 0x00, 0x00, - 0xAB, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, - 0x27, 0x01, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x1C, 0x00, 0x00, 0x00, - 0xAC, 0x00, 0x00, 0x00, 0xAB, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, - 0x1C, 0x00, 0x00, 0x00, 0xAD, 0x00, 0x00, 0x00, 0xA9, 0x00, 0x00, 0x00, - 0xAC, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x4F, 0x00, 0x00, 0x00, - 0xAE, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x27, 0x01, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0xAE, 0x00, 0x00, 0x00, 0xAD, 0x00, 0x00, 0x00, - 0x80, 0x00, 0x05, 0x00, 0x23, 0x00, 0x00, 0x00, 0xB0, 0x00, 0x00, 0x00, - 0x27, 0x01, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, - 0x99, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x9B, 0x00, 0x00, 0x00, - 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00, 0xF9, 0x00, 0x02, 0x00, - 0x49, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0xB1, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x4F, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00, - 0x4C, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0xB4, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x27, 0x00, 0x00, 0x00, 0xB5, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x06, 0x00, 0x00, 0x00, 0xB6, 0x00, 0x00, 0x00, 0xB5, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x05, 0x00, 0x53, 0x00, 0x00, 0x00, 0xB7, 0x00, 0x00, 0x00, - 0x4C, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0xB7, 0x00, 0x00, 0x00, 0xB6, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x5C, 0x00, 0x00, 0x00, 0xB8, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x56, 0x00, 0x00, 0x00, - 0xB9, 0x00, 0x00, 0x00, 0xB8, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x58, 0x00, 0x00, 0x00, 0xB9, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x31, 0x00, 0x00, 0x00, 0xBA, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x1C, 0x00, 0x00, 0x00, 0xBB, 0x00, 0x00, 0x00, - 0xBA, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0xB4, 0x00, 0x00, 0x00, - 0xBB, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x27, 0x00, 0x00, 0x00, - 0xBD, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, - 0x2A, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0xBE, 0x00, 0x00, 0x00, 0xBD, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0xB7, 0x00, 0x00, 0x00, 0xBE, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x5C, 0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00, 0x00, - 0x2A, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x56, 0x00, 0x00, 0x00, - 0xC1, 0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x58, 0x00, 0x00, 0x00, 0xC1, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x31, 0x00, 0x00, 0x00, 0xC2, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x1C, 0x00, 0x00, 0x00, 0xC3, 0x00, 0x00, 0x00, - 0xC2, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0xB4, 0x00, 0x00, 0x00, - 0xC3, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x27, 0x00, 0x00, 0x00, - 0xC5, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, - 0x2A, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0xC6, 0x00, 0x00, 0x00, 0xC5, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0xB7, 0x00, 0x00, 0x00, 0xC6, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x5C, 0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00, 0x00, - 0x25, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x56, 0x00, 0x00, 0x00, - 0xC9, 0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x58, 0x00, 0x00, 0x00, 0xC9, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, - 0xDB, 0x00, 0x01, 0x00, 0x3E, 0x00, 0x03, 0x00, 0xB4, 0x00, 0x00, 0x00, - 0x33, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0xB7, 0x00, 0x00, 0x00, - 0xB6, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x58, 0x00, 0x00, 0x00, - 0xB9, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0xB4, 0x00, 0x00, 0x00, 0xC3, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0xB7, 0x00, 0x00, 0x00, 0xC6, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x58, 0x00, 0x00, 0x00, 0xC9, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, - 0x4F, 0x00, 0x07, 0x00, 0x07, 0x00, 0x00, 0x00, 0xDF, 0x00, 0x00, 0x00, - 0xBB, 0x00, 0x00, 0x00, 0xBB, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, - 0xE0, 0x00, 0x00, 0x00, 0xDF, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, - 0x07, 0x00, 0x00, 0x00, 0xE1, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, - 0xE0, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x07, 0x00, 0x07, 0x00, 0x00, 0x00, - 0xE4, 0x00, 0x00, 0x00, 0xC3, 0x00, 0x00, 0x00, 0xC3, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, - 0x07, 0x00, 0x00, 0x00, 0xE5, 0x00, 0x00, 0x00, 0xE1, 0x00, 0x00, 0x00, - 0xE4, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, - 0xE9, 0x00, 0x00, 0x00, 0xE5, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0xEA, 0x00, 0x00, 0x00, - 0xE5, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0xEB, 0x00, 0x00, 0x00, 0xC3, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, - 0xEC, 0x00, 0x00, 0x00, 0xC3, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x50, 0x00, 0x07, 0x00, 0x1C, 0x00, 0x00, 0x00, 0xED, 0x00, 0x00, 0x00, - 0xE9, 0x00, 0x00, 0x00, 0xEA, 0x00, 0x00, 0x00, 0xEB, 0x00, 0x00, 0x00, - 0xEC, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0xB4, 0x00, 0x00, 0x00, - 0xED, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0xB7, 0x00, 0x00, 0x00, - 0xC6, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0xF3, 0x00, 0x00, 0x00, - 0xF8, 0x00, 0x02, 0x00, 0xF3, 0x00, 0x00, 0x00, 0xF5, 0x00, 0x07, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x26, 0x01, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0xB1, 0x00, 0x00, 0x00, 0x09, 0x01, 0x00, 0x00, 0xF4, 0x00, 0x00, 0x00, - 0xB1, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x00, 0x00, - 0x26, 0x01, 0x00, 0x00, 0x9F, 0x00, 0x00, 0x00, 0xF6, 0x00, 0x04, 0x00, - 0xF5, 0x00, 0x00, 0x00, 0xF4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xFA, 0x00, 0x04, 0x00, 0xF9, 0x00, 0x00, 0x00, 0xF4, 0x00, 0x00, 0x00, - 0xF5, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0xF4, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x31, 0x00, 0x00, 0x00, 0xFC, 0x00, 0x00, 0x00, - 0x5B, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x26, 0x01, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x1C, 0x00, 0x00, 0x00, 0xFD, 0x00, 0x00, 0x00, - 0xFC, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x31, 0x00, 0x00, 0x00, - 0xFF, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, - 0x26, 0x01, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x1C, 0x00, 0x00, 0x00, - 0x00, 0x01, 0x00, 0x00, 0xFF, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x04, 0x00, - 0x1C, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, - 0x81, 0x00, 0x05, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x02, 0x01, 0x00, 0x00, - 0xFD, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, - 0x31, 0x00, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00, 0x5B, 0x00, 0x00, 0x00, - 0x25, 0x00, 0x00, 0x00, 0x26, 0x01, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x1C, 0x00, 0x00, 0x00, 0x05, 0x01, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00, - 0x81, 0x00, 0x05, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x06, 0x01, 0x00, 0x00, - 0x02, 0x01, 0x00, 0x00, 0x05, 0x01, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x4F, 0x00, 0x00, 0x00, 0x07, 0x01, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, - 0x26, 0x01, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x07, 0x01, 0x00, 0x00, - 0x06, 0x01, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x09, 0x01, 0x00, 0x00, 0x26, 0x01, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, - 0xF9, 0x00, 0x02, 0x00, 0xF3, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, - 0xF5, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00, - 0xF9, 0x00, 0x02, 0x00, 0x49, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, - 0x49, 0x00, 0x00, 0x00, 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, -}; diff --git a/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.txt b/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.txt deleted file mode 100644 index ff580ea8d..000000000 --- a/src/xenia/gpu/vulkan/shaders/bin/rect_list_geom.txt +++ /dev/null @@ -1,274 +0,0 @@ -; SPIR-V -; Version: 1.0 -; Generator: Khronos Glslang Reference Front End; 6 -; Bound: 296 -; Schema: 0 - OpCapability Geometry - OpCapability GeometryPointSize - %1 = OpExtInstImport "GLSL.std.450" - OpMemoryModel Logical GLSL450 - OpEntryPoint Geometry %main "main" %gl_in %_ %out_interpolators %in_interpolators %_in_point_coord_unused %_in_point_size_unused %_out_point_coord_unused - OpExecutionMode %main Triangles - OpExecutionMode %main Invocations 1 - OpExecutionMode %main OutputTriangleStrip - OpExecutionMode %main OutputVertices 6 - OpSource GLSL 450 - OpSourceExtension "GL_ARB_explicit_attrib_location" - OpSourceExtension "GL_ARB_separate_shader_objects" - OpName %main "main" - OpName %gl_PerVertex "gl_PerVertex" - OpMemberName %gl_PerVertex 0 "gl_Position" - OpMemberName %gl_PerVertex 1 "gl_PointSize" - OpName %gl_in "gl_in" - OpName %gl_PerVertex_0 "gl_PerVertex" - OpMemberName %gl_PerVertex_0 0 "gl_Position" - OpMemberName %gl_PerVertex_0 1 "gl_PointSize" - OpName %_ "" - OpName %out_interpolators "out_interpolators" - OpName %in_interpolators "in_interpolators" - OpName %_in_point_coord_unused "_in_point_coord_unused" - OpName %_in_point_size_unused "_in_point_size_unused" - OpName %_out_point_coord_unused "_out_point_coord_unused" - OpMemberDecorate %gl_PerVertex 0 BuiltIn Position - OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize - OpDecorate %gl_PerVertex Block - OpMemberDecorate %gl_PerVertex_0 0 BuiltIn Position - OpMemberDecorate %gl_PerVertex_0 1 BuiltIn PointSize - OpDecorate %gl_PerVertex_0 Block - OpDecorate %out_interpolators Location 0 - OpDecorate %in_interpolators Location 0 - OpDecorate %_in_point_coord_unused Location 16 - OpDecorate %_in_point_size_unused Location 17 - OpDecorate %_out_point_coord_unused Location 16 - %void = OpTypeVoid - %3 = OpTypeFunction %void - %float = OpTypeFloat 32 - %v2float = OpTypeVector %float 2 - %bool = OpTypeBool - %v2bool = OpTypeVector %bool 2 - %v4float = OpTypeVector %float 4 -%gl_PerVertex = OpTypeStruct %v4float %float - %uint = OpTypeInt 32 0 - %uint_3 = OpConstant %uint 3 -%_arr_gl_PerVertex_uint_3 = OpTypeArray %gl_PerVertex %uint_3 -%_ptr_Input__arr_gl_PerVertex_uint_3 = OpTypePointer Input %_arr_gl_PerVertex_uint_3 - %gl_in = OpVariable %_ptr_Input__arr_gl_PerVertex_uint_3 Input - %int = OpTypeInt 32 1 - %int_0 = OpConstant %int 0 - %int_2 = OpConstant %int 2 - %uint_0 = OpConstant %uint 0 -%_ptr_Input_float = OpTypePointer Input %float - %int_1 = OpConstant %int 1 - %uint_1 = OpConstant %uint 1 -%float_0_00100000005 = OpConstant %float 0.00100000005 -%_ptr_Input_v4float = OpTypePointer Input %v4float -%gl_PerVertex_0 = OpTypeStruct %v4float %float -%_ptr_Output_gl_PerVertex_0 = OpTypePointer Output %gl_PerVertex_0 - %_ = OpVariable %_ptr_Output_gl_PerVertex_0 Output -%_ptr_Output_v4float = OpTypePointer Output %v4float -%_ptr_Output_float = OpTypePointer Output %float - %uint_16 = OpConstant %uint 16 -%_arr_v4float_uint_16 = OpTypeArray %v4float %uint_16 -%_ptr_Output__arr_v4float_uint_16 = OpTypePointer Output %_arr_v4float_uint_16 -%out_interpolators = OpVariable %_ptr_Output__arr_v4float_uint_16 Output -%_arr__arr_v4float_uint_16_uint_3 = OpTypeArray %_arr_v4float_uint_16 %uint_3 -%_ptr_Input__arr__arr_v4float_uint_16_uint_3 = OpTypePointer Input %_arr__arr_v4float_uint_16_uint_3 -%in_interpolators = OpVariable %_ptr_Input__arr__arr_v4float_uint_16_uint_3 Input -%_ptr_Input__arr_v4float_uint_16 = OpTypePointer Input %_arr_v4float_uint_16 - %int_16 = OpConstant %int 16 -%_arr_v2float_uint_3 = OpTypeArray %v2float %uint_3 -%_ptr_Input__arr_v2float_uint_3 = OpTypePointer Input %_arr_v2float_uint_3 -%_in_point_coord_unused = OpVariable %_ptr_Input__arr_v2float_uint_3 Input -%_arr_float_uint_3 = OpTypeArray %float %uint_3 -%_ptr_Input__arr_float_uint_3 = OpTypePointer Input %_arr_float_uint_3 -%_in_point_size_unused = OpVariable %_ptr_Input__arr_float_uint_3 Input -%_ptr_Output_v2float = OpTypePointer Output %v2float -%_out_point_coord_unused = OpVariable %_ptr_Output_v2float Output - %293 = OpConstantComposite %v2float %float_0_00100000005 %float_0_00100000005 - %main = OpFunction %void None %3 - %5 = OpLabel - %40 = OpAccessChain %_ptr_Input_float %gl_in %int_2 %int_0 %uint_0 - %41 = OpLoad %float %40 - %44 = OpAccessChain %_ptr_Input_float %gl_in %int_1 %int_0 %uint_1 - %45 = OpLoad %float %44 - %46 = OpCompositeConstruct %v2float %41 %45 - %50 = OpAccessChain %_ptr_Input_v4float %gl_in %int_0 %int_0 - %51 = OpLoad %v4float %50 - %52 = OpVectorShuffle %v2float %51 %51 0 1 - %278 = OpFSub %v2float %52 %46 - %279 = OpExtInst %v2float %1 FAbs %278 - %282 = OpFOrdLessThanEqual %v2bool %279 %293 - %283 = OpAll %bool %282 - %56 = OpLogicalNot %bool %283 - OpSelectionMerge %58 None - OpBranchConditional %56 %57 %58 - %57 = OpLabel - %59 = OpAccessChain %_ptr_Input_float %gl_in %int_1 %int_0 %uint_0 - %60 = OpLoad %float %59 - %61 = OpAccessChain %_ptr_Input_float %gl_in %int_2 %int_0 %uint_1 - %62 = OpLoad %float %61 - %63 = OpCompositeConstruct %v2float %60 %62 - %287 = OpFSub %v2float %52 %63 - %288 = OpExtInst %v2float %1 FAbs %287 - %291 = OpFOrdLessThanEqual %v2bool %288 %293 - %292 = OpAll %bool %291 - OpBranch %58 - %58 = OpLabel - %71 = OpPhi %bool %283 %5 %292 %57 - OpSelectionMerge %73 None - OpBranchConditional %71 %72 %177 - %72 = OpLabel - %80 = OpAccessChain %_ptr_Output_v4float %_ %int_0 - OpStore %80 %51 - %81 = OpAccessChain %_ptr_Input_float %gl_in %int_0 %int_1 - %82 = OpLoad %float %81 - %84 = OpAccessChain %_ptr_Output_float %_ %int_1 - OpStore %84 %82 - %93 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %in_interpolators %int_0 - %94 = OpLoad %_arr_v4float_uint_16 %93 - OpStore %out_interpolators %94 - OpEmitVertex - %95 = OpAccessChain %_ptr_Input_v4float %gl_in %int_1 %int_0 - %96 = OpLoad %v4float %95 - OpStore %80 %96 - %98 = OpAccessChain %_ptr_Input_float %gl_in %int_1 %int_1 - %99 = OpLoad %float %98 - OpStore %84 %99 - %101 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %in_interpolators %int_1 - %102 = OpLoad %_arr_v4float_uint_16 %101 - OpStore %out_interpolators %102 - OpEmitVertex - %103 = OpAccessChain %_ptr_Input_v4float %gl_in %int_2 %int_0 - %104 = OpLoad %v4float %103 - OpStore %80 %104 - %106 = OpAccessChain %_ptr_Input_float %gl_in %int_2 %int_1 - %107 = OpLoad %float %106 - OpStore %84 %107 - %109 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %in_interpolators %int_2 - %110 = OpLoad %_arr_v4float_uint_16 %109 - OpStore %out_interpolators %110 - OpEmitVertex - OpEndPrimitive - OpStore %80 %104 - OpStore %84 %107 - OpStore %out_interpolators %110 - OpEmitVertex - OpStore %80 %96 - OpStore %84 %99 - OpStore %out_interpolators %102 - OpEmitVertex - %130 = OpFNegate %v2float %52 - %133 = OpVectorShuffle %v2float %96 %96 0 1 - %134 = OpFAdd %v2float %130 %133 - %137 = OpVectorShuffle %v2float %104 %104 0 1 - %138 = OpFAdd %v2float %134 %137 - %142 = OpCompositeExtract %float %138 0 - %143 = OpCompositeExtract %float %138 1 - %144 = OpCompositeExtract %float %104 2 - %145 = OpCompositeExtract %float %104 3 - %146 = OpCompositeConstruct %v4float %142 %143 %144 %145 - OpStore %80 %146 - OpStore %84 %107 - OpBranch %153 - %153 = OpLabel - %295 = OpPhi %int %int_0 %72 %176 %154 - %160 = OpSLessThan %bool %295 %int_16 - OpLoopMerge %155 %154 None - OpBranchConditional %160 %154 %155 - %154 = OpLabel - %163 = OpAccessChain %_ptr_Input_v4float %in_interpolators %int_0 %295 - %164 = OpLoad %v4float %163 - %165 = OpFNegate %v4float %164 - %167 = OpAccessChain %_ptr_Input_v4float %in_interpolators %int_1 %295 - %168 = OpLoad %v4float %167 - %169 = OpFAdd %v4float %165 %168 - %171 = OpAccessChain %_ptr_Input_v4float %in_interpolators %int_2 %295 - %172 = OpLoad %v4float %171 - %173 = OpFAdd %v4float %169 %172 - %174 = OpAccessChain %_ptr_Output_v4float %out_interpolators %295 - OpStore %174 %173 - %176 = OpIAdd %int %295 %int_1 - OpBranch %153 - %155 = OpLabel - OpEmitVertex - OpEndPrimitive - OpBranch %73 - %177 = OpLabel - %180 = OpAccessChain %_ptr_Output_v4float %_ %int_0 - OpStore %180 %51 - %181 = OpAccessChain %_ptr_Input_float %gl_in %int_0 %int_1 - %182 = OpLoad %float %181 - %183 = OpAccessChain %_ptr_Output_float %_ %int_1 - OpStore %183 %182 - %184 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %in_interpolators %int_0 - %185 = OpLoad %_arr_v4float_uint_16 %184 - OpStore %out_interpolators %185 - OpEmitVertex - %186 = OpAccessChain %_ptr_Input_v4float %gl_in %int_1 %int_0 - %187 = OpLoad %v4float %186 - OpStore %180 %187 - %189 = OpAccessChain %_ptr_Input_float %gl_in %int_1 %int_1 - %190 = OpLoad %float %189 - OpStore %183 %190 - %192 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %in_interpolators %int_1 - %193 = OpLoad %_arr_v4float_uint_16 %192 - OpStore %out_interpolators %193 - OpEmitVertex - %194 = OpAccessChain %_ptr_Input_v4float %gl_in %int_2 %int_0 - %195 = OpLoad %v4float %194 - OpStore %180 %195 - %197 = OpAccessChain %_ptr_Input_float %gl_in %int_2 %int_1 - %198 = OpLoad %float %197 - OpStore %183 %198 - %200 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %in_interpolators %int_2 - %201 = OpLoad %_arr_v4float_uint_16 %200 - OpStore %out_interpolators %201 - OpEmitVertex - OpEndPrimitive - OpStore %180 %51 - OpStore %183 %182 - OpStore %out_interpolators %185 - OpEmitVertex - OpStore %180 %195 - OpStore %183 %198 - OpStore %out_interpolators %201 - OpEmitVertex - %223 = OpVectorShuffle %v2float %187 %187 0 1 - %224 = OpFNegate %v2float %223 - %225 = OpFAdd %v2float %52 %224 - %228 = OpVectorShuffle %v2float %195 %195 0 1 - %229 = OpFAdd %v2float %225 %228 - %233 = OpCompositeExtract %float %229 0 - %234 = OpCompositeExtract %float %229 1 - %235 = OpCompositeExtract %float %195 2 - %236 = OpCompositeExtract %float %195 3 - %237 = OpCompositeConstruct %v4float %233 %234 %235 %236 - OpStore %180 %237 - OpStore %183 %198 - OpBranch %243 - %243 = OpLabel - %294 = OpPhi %int %int_0 %177 %265 %244 - %249 = OpSLessThan %bool %294 %int_16 - OpLoopMerge %245 %244 None - OpBranchConditional %249 %244 %245 - %244 = OpLabel - %252 = OpAccessChain %_ptr_Input_v4float %in_interpolators %int_0 %294 - %253 = OpLoad %v4float %252 - %255 = OpAccessChain %_ptr_Input_v4float %in_interpolators %int_1 %294 - %256 = OpLoad %v4float %255 - %257 = OpFNegate %v4float %256 - %258 = OpFAdd %v4float %253 %257 - %260 = OpAccessChain %_ptr_Input_v4float %in_interpolators %int_2 %294 - %261 = OpLoad %v4float %260 - %262 = OpFAdd %v4float %258 %261 - %263 = OpAccessChain %_ptr_Output_v4float %out_interpolators %294 - OpStore %263 %262 - %265 = OpIAdd %int %294 %int_1 - OpBranch %243 - %245 = OpLabel - OpEmitVertex - OpEndPrimitive - OpBranch %73 - %73 = OpLabel - OpReturn - OpFunctionEnd diff --git a/src/xenia/gpu/vulkan/shaders/dummy.frag b/src/xenia/gpu/vulkan/shaders/dummy.frag deleted file mode 100644 index 325576f0f..000000000 --- a/src/xenia/gpu/vulkan/shaders/dummy.frag +++ /dev/null @@ -1,35 +0,0 @@ -// NOTE: This file is compiled and embedded into the exe. -// Use `xenia-build genspirv` and check in any changes under bin/. - -#version 450 core -#extension all : warn -#extension GL_ARB_shading_language_420pack : require -#extension GL_ARB_separate_shader_objects : require -#extension GL_ARB_explicit_attrib_location : require - -layout(set = 0, binding = 1) uniform consts_type { - vec4 float_consts[512]; - uint loop_consts[32]; - uint bool_consts[8]; -} consts; - -layout(push_constant) uniform push_consts_type { - vec4 window_scale; - vec4 vtx_fmt; - vec4 point_size; - vec4 alpha_test; - uint ps_param_gen; -} push_constants; - -layout(set = 1, binding = 0) uniform sampler1D textures1D[32]; -layout(set = 1, binding = 1) uniform sampler2D textures2D[32]; -layout(set = 1, binding = 2) uniform sampler3D textures3D[32]; -layout(set = 1, binding = 3) uniform samplerCube textures4D[32]; - -layout(location = 0) in vec4 in_interpolators[16]; -layout(location = 0) out vec4 oC[4]; - -void main() { - // This shader does absolutely nothing! - return; -} diff --git a/src/xenia/gpu/vulkan/shaders/line_quad_list.geom b/src/xenia/gpu/vulkan/shaders/line_quad_list.geom deleted file mode 100644 index 7f8863853..000000000 --- a/src/xenia/gpu/vulkan/shaders/line_quad_list.geom +++ /dev/null @@ -1,53 +0,0 @@ -// NOTE: This file is compiled and embedded into the exe. -// Use `xenia-build genspirv` and check in any changes under bin/. - -#version 450 core -#extension all : warn -#extension GL_ARB_separate_shader_objects : require -#extension GL_ARB_explicit_attrib_location : require - -in gl_PerVertex { - vec4 gl_Position; - float gl_PointSize; - // float gl_ClipDistance[]; -} gl_in[]; - -out gl_PerVertex { - vec4 gl_Position; - float gl_PointSize; - // float gl_ClipDistance[]; -}; - -layout(location = 0) in vec4 in_interpolators[][16]; -layout(location = 0) out vec4 out_interpolators[16]; - -layout(location = 16) in vec2 _in_point_coord_unused[]; -layout(location = 17) in float _in_point_size_unused[]; - -layout(location = 16) out vec2 _out_point_coord_unused; - -layout(lines_adjacency) in; -layout(line_strip, max_vertices = 5) out; -void main() { - gl_Position = gl_in[0].gl_Position; - gl_PointSize = gl_in[0].gl_PointSize; - out_interpolators = in_interpolators[0]; - EmitVertex(); - gl_Position = gl_in[1].gl_Position; - gl_PointSize = gl_in[1].gl_PointSize; - out_interpolators = in_interpolators[1]; - EmitVertex(); - gl_Position = gl_in[2].gl_Position; - gl_PointSize = gl_in[2].gl_PointSize; - out_interpolators = in_interpolators[2]; - EmitVertex(); - gl_Position = gl_in[3].gl_Position; - gl_PointSize = gl_in[3].gl_PointSize; - out_interpolators = in_interpolators[3]; - EmitVertex(); - gl_Position = gl_in[0].gl_Position; - gl_PointSize = gl_in[0].gl_PointSize; - out_interpolators = in_interpolators[0]; - EmitVertex(); - EndPrimitive(); -} diff --git a/src/xenia/gpu/vulkan/shaders/point_list.geom b/src/xenia/gpu/vulkan/shaders/point_list.geom deleted file mode 100644 index 52b29581e..000000000 --- a/src/xenia/gpu/vulkan/shaders/point_list.geom +++ /dev/null @@ -1,63 +0,0 @@ -// NOTE: This file is compiled and embedded into the exe. -// Use `xenia-build genspirv` and check in any changes under bin/. - -#version 450 core -#extension all : warn -#extension GL_ARB_shading_language_420pack : require -#extension GL_ARB_separate_shader_objects : require -#extension GL_ARB_explicit_attrib_location : require - -layout(push_constant) uniform push_consts_type { - vec4 window_scale; - vec4 vtx_fmt; - vec4 point_size; - vec4 alpha_test; - uint ps_param_gen; -} push_constants; - -in gl_PerVertex { - vec4 gl_Position; - // float gl_ClipDistance[]; -} gl_in[]; - -out gl_PerVertex { - vec4 gl_Position; - // float gl_ClipDistance[]; -}; - -layout(location = 0) in vec4 in_interpolators[][16]; -layout(location = 16) in vec2 in_point_coord_unused[]; -layout(location = 17) in float point_size[]; - -layout(location = 0) out vec4 out_interpolators[16]; -layout(location = 16) out vec2 point_coord; - -// TODO(benvanik): clamp to min/max. -// TODO(benvanik): figure out how to see which interpolator gets adjusted. - -layout(points) in; -layout(triangle_strip, max_vertices = 4) out; - -void main() { - const vec2 offsets[4] = { - vec2(-1.0, 1.0), - vec2( 1.0, 1.0), - vec2(-1.0, -1.0), - vec2( 1.0, -1.0), - }; - vec4 pos = gl_in[0].gl_Position; - vec2 window_scaled_psize = push_constants.point_size.xy; - // Shader header writes -1.0f to pointSize by default, so any positive value - // means that it was overwritten by the translated vertex shader. - if (point_size[0] > 0.0f) { - window_scaled_psize = vec2(point_size[0]); - } - window_scaled_psize /= push_constants.window_scale.zw; - for (int i = 0; i < 4; ++i) { - gl_Position = vec4(pos.xy + (offsets[i] * window_scaled_psize), pos.zw); - out_interpolators = in_interpolators[0]; - point_coord = max(offsets[i], vec2(0.0f)); - EmitVertex(); - } - EndPrimitive(); -} diff --git a/src/xenia/gpu/vulkan/shaders/quad_list.geom b/src/xenia/gpu/vulkan/shaders/quad_list.geom deleted file mode 100644 index b340b55da..000000000 --- a/src/xenia/gpu/vulkan/shaders/quad_list.geom +++ /dev/null @@ -1,42 +0,0 @@ -// NOTE: This file is compiled and embedded into the exe. -// Use `xenia-build genspirv` and check in any changes under bin/. - -#version 450 core -#extension all : warn -#extension GL_ARB_shading_language_420pack : require -#extension GL_ARB_separate_shader_objects : require -#extension GL_ARB_explicit_attrib_location : require - -in gl_PerVertex { - vec4 gl_Position; - float gl_PointSize; - // float gl_ClipDistance[]; -} gl_in[]; - -out gl_PerVertex { - vec4 gl_Position; - float gl_PointSize; - // float gl_ClipDistance[]; -}; - -layout(location = 0) in vec4 in_interpolators[][16]; -layout(location = 0) out vec4 out_interpolators[16]; - -layout(location = 16) in vec2 _in_point_coord_unused[]; -layout(location = 17) in float _in_point_size_unused[]; - -layout(location = 16) out vec2 _out_point_coord_unused; - -layout(lines_adjacency) in; -layout(triangle_strip, max_vertices = 4) out; -void main() { - const int order[4] = { 0, 1, 3, 2 }; - for (int i = 0; i < 4; ++i) { - int input_index = order[i]; - gl_Position = gl_in[input_index].gl_Position; - gl_PointSize = gl_in[input_index].gl_PointSize; - out_interpolators = in_interpolators[input_index]; - EmitVertex(); - } - EndPrimitive(); -} diff --git a/src/xenia/gpu/vulkan/shaders/rect_list.geom b/src/xenia/gpu/vulkan/shaders/rect_list.geom deleted file mode 100644 index 515e1b576..000000000 --- a/src/xenia/gpu/vulkan/shaders/rect_list.geom +++ /dev/null @@ -1,124 +0,0 @@ -// NOTE: This file is compiled and embedded into the exe. -// Use `xenia-build genspirv` and check in any changes under bin/. - -#version 450 core -#extension all : warn -#extension GL_ARB_separate_shader_objects : require -#extension GL_ARB_explicit_attrib_location : require - -in gl_PerVertex { - vec4 gl_Position; - float gl_PointSize; - // float gl_ClipDistance[]; -} gl_in[]; - -out gl_PerVertex { - vec4 gl_Position; - float gl_PointSize; - // float gl_ClipDistance[]; -}; - -layout(location = 0) in vec4 in_interpolators[][16]; -layout(location = 0) out vec4 out_interpolators[16]; - -layout(location = 16) in vec2 _in_point_coord_unused[]; -layout(location = 17) in float _in_point_size_unused[]; - -layout(location = 16) out vec2 _out_point_coord_unused; - -layout(triangles) in; -layout(triangle_strip, max_vertices = 6) out; - -bool equalsEpsilon(vec2 left, vec2 right, float epsilon) { - return all(lessThanEqual(abs(left - right), vec2(epsilon))); -} - -void main() { - // Most games use a left-aligned form. - if (equalsEpsilon(gl_in[0].gl_Position.xy, vec2(gl_in[2].gl_Position.x, gl_in[1].gl_Position.y), 0.001) || - equalsEpsilon(gl_in[0].gl_Position.xy, vec2(gl_in[1].gl_Position.x, gl_in[2].gl_Position.y), 0.001)) { - // 0 ------ 1 0: -1,-1 - // | - | 1: 1,-1 - // | // | 2: -1, 1 - // | - | 3: [ 1, 1 ] - // 2 ----- [3] - // - // 0 ------ 2 0: -1,-1 - // | - | 1: -1, 1 - // | // | 2: 1,-1 - // | - | 3: [ 1, 1 ] - // 1 ------[3] - gl_Position = gl_in[0].gl_Position; - gl_PointSize = gl_in[0].gl_PointSize; - out_interpolators = in_interpolators[0]; - EmitVertex(); - gl_Position = gl_in[1].gl_Position; - gl_PointSize = gl_in[1].gl_PointSize; - out_interpolators = in_interpolators[1]; - EmitVertex(); - gl_Position = gl_in[2].gl_Position; - gl_PointSize = gl_in[2].gl_PointSize; - out_interpolators = in_interpolators[2]; - EmitVertex(); - EndPrimitive(); - gl_Position = gl_in[2].gl_Position; - gl_PointSize = gl_in[2].gl_PointSize; - out_interpolators = in_interpolators[2]; - EmitVertex(); - gl_Position = gl_in[1].gl_Position; - gl_PointSize = gl_in[1].gl_PointSize; - out_interpolators = in_interpolators[1]; - EmitVertex(); - gl_Position = vec4((-gl_in[0].gl_Position.xy) + - gl_in[1].gl_Position.xy + - gl_in[2].gl_Position.xy, - gl_in[2].gl_Position.zw); - gl_PointSize = gl_in[2].gl_PointSize; - for (int i = 0; i < 16; ++i) { - out_interpolators[i] = (-in_interpolators[0][i]) + - in_interpolators[1][i] + - in_interpolators[2][i]; - } - EmitVertex(); - EndPrimitive(); - } else { - // 0 ------ 1 0: -1,-1 - // | - | 1: 1,-1 - // | \\ | 2: 1, 1 - // | - | 3: [-1, 1 ] - // [3] ----- 2 - gl_Position = gl_in[0].gl_Position; - gl_PointSize = gl_in[0].gl_PointSize; - out_interpolators = in_interpolators[0]; - EmitVertex(); - gl_Position = gl_in[1].gl_Position; - gl_PointSize = gl_in[1].gl_PointSize; - out_interpolators = in_interpolators[1]; - EmitVertex(); - gl_Position = gl_in[2].gl_Position; - gl_PointSize = gl_in[2].gl_PointSize; - out_interpolators = in_interpolators[2]; - EmitVertex(); - EndPrimitive(); - gl_Position = gl_in[0].gl_Position; - gl_PointSize = gl_in[0].gl_PointSize; - out_interpolators = in_interpolators[0]; - EmitVertex(); - gl_Position = gl_in[2].gl_Position; - gl_PointSize = gl_in[2].gl_PointSize; - out_interpolators = in_interpolators[2]; - EmitVertex(); - gl_Position = vec4( gl_in[0].gl_Position.xy + - (-gl_in[1].gl_Position.xy) + - gl_in[2].gl_Position.xy, - gl_in[2].gl_Position.zw); - gl_PointSize = gl_in[2].gl_PointSize; - for (int i = 0; i < 16; ++i) { - out_interpolators[i] = in_interpolators[0][i] + - (-in_interpolators[1][i]) + - in_interpolators[2][i]; - } - EmitVertex(); - EndPrimitive(); - } -} diff --git a/src/xenia/gpu/vulkan/texture_cache.cc b/src/xenia/gpu/vulkan/texture_cache.cc deleted file mode 100644 index 39a88ef38..000000000 --- a/src/xenia/gpu/vulkan/texture_cache.cc +++ /dev/null @@ -1,1664 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/gpu/vulkan/texture_cache.h" - -#include - -#include "third_party/fmt/include/fmt/format.h" -#include "xenia/base/logging.h" -#include "xenia/base/math.h" -#include "xenia/base/memory.h" -#include "xenia/base/profiling.h" -#include "xenia/gpu/gpu_flags.h" -#include "xenia/gpu/sampler_info.h" -#include "xenia/gpu/texture_conversion.h" -#include "xenia/gpu/texture_info.h" -#include "xenia/gpu/vulkan/texture_config.h" -#include "xenia/gpu/vulkan/vulkan_gpu_flags.h" -#include "xenia/ui/vulkan/vulkan_mem_alloc.h" - -DECLARE_bool(texture_dump); - -namespace xe { -namespace gpu { - -void TextureDump(const TextureInfo& src, void* buffer, size_t length); - -namespace vulkan { - -using xe::ui::vulkan::CheckResult; - -constexpr uint32_t kMaxTextureSamplers = 32; -constexpr VkDeviceSize kStagingBufferSize = 64 * 1024 * 1024; - -const char* get_dimension_name(xenos::DataDimension dimension) { - static const char* names[] = { - "1D", - "2D", - "3D", - "cube", - }; - auto value = static_cast(dimension); - if (value < xe::countof(names)) { - return names[value]; - } - return "unknown"; -} - -TextureCache::TextureCache(Memory* memory, RegisterFile* register_file, - TraceWriter* trace_writer, - ui::vulkan::VulkanDevice* device) - : memory_(memory), - register_file_(register_file), - trace_writer_(trace_writer), - device_(device), - staging_buffer_(device, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, - kStagingBufferSize), - wb_staging_buffer_(device, VK_BUFFER_USAGE_TRANSFER_DST_BIT, - kStagingBufferSize) {} - -TextureCache::~TextureCache() { Shutdown(); } - -VkResult TextureCache::Initialize() { - VkResult status = VK_SUCCESS; - - // Descriptor pool used for all of our cached descriptors. - VkDescriptorPoolSize pool_sizes[1]; - pool_sizes[0].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - pool_sizes[0].descriptorCount = 32768; - descriptor_pool_ = std::make_unique( - *device_, 32768, - std::vector(pool_sizes, std::end(pool_sizes))); - - wb_command_pool_ = std::make_unique( - *device_, device_->queue_family_index()); - - // Check some device limits - // On low sampler counts: Rarely would we experience over 16 unique samplers. - // This code could be refactored to scale up/down to the # of samplers. - auto& limits = device_->device_info().properties.limits; - if (limits.maxPerStageDescriptorSamplers < kMaxTextureSamplers || - limits.maxPerStageDescriptorSampledImages < kMaxTextureSamplers) { - XELOGE( - "Physical device is unable to support required number of sampled " - "images! Expect instability! (maxPerStageDescriptorSamplers={}, " - "maxPerStageDescriptorSampledImages={})", - limits.maxPerStageDescriptorSamplers, - limits.maxPerStageDescriptorSampledImages); - // assert_always(); - } - - // Create the descriptor set layout used for rendering. - // We always have the same number of samplers but only some are used. - // The shaders will alias the bindings to the 4 dimensional types. - VkDescriptorSetLayoutBinding bindings[1]; - bindings[0].binding = 0; - bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - bindings[0].descriptorCount = kMaxTextureSamplers; - bindings[0].stageFlags = - VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[0].pImmutableSamplers = nullptr; - - VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info; - descriptor_set_layout_info.sType = - VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - descriptor_set_layout_info.pNext = nullptr; - descriptor_set_layout_info.flags = 0; - descriptor_set_layout_info.bindingCount = - static_cast(xe::countof(bindings)); - descriptor_set_layout_info.pBindings = bindings; - status = - vkCreateDescriptorSetLayout(*device_, &descriptor_set_layout_info, - nullptr, &texture_descriptor_set_layout_); - if (status != VK_SUCCESS) { - return status; - } - - status = staging_buffer_.Initialize(); - if (status != VK_SUCCESS) { - return status; - } - - status = wb_staging_buffer_.Initialize(); - if (status != VK_SUCCESS) { - return status; - } - - // Create a memory allocator for textures. - VmaVulkanFunctions vulkan_funcs = {}; - ui::vulkan::FillVMAVulkanFunctions(&vulkan_funcs); - - VmaAllocatorCreateInfo alloc_info = { - 0, *device_, *device_, 0, 0, nullptr, nullptr, 0, nullptr, &vulkan_funcs, - }; - status = vmaCreateAllocator(&alloc_info, &mem_allocator_); - if (status != VK_SUCCESS) { - vkDestroyDescriptorSetLayout(*device_, texture_descriptor_set_layout_, - nullptr); - return status; - } - - invalidated_textures_sets_[0].reserve(64); - invalidated_textures_sets_[1].reserve(64); - invalidated_textures_ = &invalidated_textures_sets_[0]; - - device_queue_ = device_->AcquireQueue(device_->queue_family_index()); - - memory_invalidation_callback_handle_ = - memory_->RegisterPhysicalMemoryInvalidationCallback( - MemoryInvalidationCallbackThunk, this); - - return VK_SUCCESS; -} - -void TextureCache::Shutdown() { - if (memory_invalidation_callback_handle_ != nullptr) { - memory_->UnregisterPhysicalMemoryInvalidationCallback( - memory_invalidation_callback_handle_); - memory_invalidation_callback_handle_ = nullptr; - } - - if (device_queue_) { - device_->ReleaseQueue(device_queue_, device_->queue_family_index()); - } - - // Free all textures allocated. - ClearCache(); - Scavenge(); - - if (mem_allocator_ != nullptr) { - vmaDestroyAllocator(mem_allocator_); - mem_allocator_ = nullptr; - } - vkDestroyDescriptorSetLayout(*device_, texture_descriptor_set_layout_, - nullptr); -} - -TextureCache::Texture* TextureCache::AllocateTexture( - const TextureInfo& texture_info, VkFormatFeatureFlags required_flags) { - auto format_info = texture_info.format_info(); - assert_not_null(format_info); - - auto& config = texture_configs[int(format_info->format)]; - VkFormat format = config.host_format; - if (format == VK_FORMAT_UNDEFINED) { - XELOGE( - "Texture Cache: Attempted to allocate texture format {}, which is " - "defined as VK_FORMAT_UNDEFINED!", - texture_info.format_info()->name); - return nullptr; - } - - bool is_cube = false; - // Create an image first. - VkImageCreateInfo image_info = {}; - image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; - image_info.flags = 0; - - switch (texture_info.dimension) { - case xenos::DataDimension::k1D: - case xenos::DataDimension::k2DOrStacked: - if (!texture_info.is_stacked) { - image_info.imageType = VK_IMAGE_TYPE_2D; - } else { - image_info.imageType = VK_IMAGE_TYPE_3D; - image_info.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT; - } - break; - case xenos::DataDimension::k3D: - image_info.imageType = VK_IMAGE_TYPE_3D; - break; - case xenos::DataDimension::kCube: - image_info.imageType = VK_IMAGE_TYPE_2D; - image_info.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; - is_cube = true; - break; - default: - assert_unhandled_case(texture_info.dimension); - return nullptr; - } - - image_info.tiling = VK_IMAGE_TILING_OPTIMAL; - image_info.usage = - VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; - - // Check the device limits for the format before we create it. - VkFormatProperties props; - vkGetPhysicalDeviceFormatProperties(*device_, format, &props); - if ((props.optimalTilingFeatures & required_flags) != required_flags) { - // Texture needs conversion on upload to a native format. - XELOGE( - "Texture Cache: Invalid usage flag specified on format {} ({})\n\t" - "(requested: {})", - texture_info.format_info()->name, ui::vulkan::to_string(format), - ui::vulkan::to_flags_string(static_cast( - required_flags & ~props.optimalTilingFeatures))); - } - - if (texture_info.dimension != xenos::DataDimension::kCube && - props.optimalTilingFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) { - // Add color attachment usage if it's supported. - image_info.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - } else if (props.optimalTilingFeatures & - VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) { - // Add depth/stencil usage as well. - image_info.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; - } - - if (props.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT) { - image_info.usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; - } - - VkImageFormatProperties image_props; - vkGetPhysicalDeviceImageFormatProperties( - *device_, format, image_info.imageType, image_info.tiling, - image_info.usage, image_info.flags, &image_props); - - // TODO(DrChat): Actually check the image properties. - - image_info.format = format; - image_info.extent.width = texture_info.width + 1; - image_info.extent.height = texture_info.height + 1; - image_info.extent.depth = !is_cube ? 1 + texture_info.depth : 1; - image_info.mipLevels = texture_info.mip_min_level + texture_info.mip_levels(); - image_info.arrayLayers = !is_cube ? 1 : 1 + texture_info.depth; - image_info.samples = VK_SAMPLE_COUNT_1_BIT; - image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - image_info.queueFamilyIndexCount = 0; - image_info.pQueueFamilyIndices = nullptr; - image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - VkImage image; - - assert_true(image_props.maxExtent.width >= image_info.extent.width); - assert_true(image_props.maxExtent.height >= image_info.extent.height); - assert_true(image_props.maxExtent.depth >= image_info.extent.depth); - assert_true(image_props.maxMipLevels >= image_info.mipLevels); - assert_true(image_props.maxArrayLayers >= image_info.arrayLayers); - - VmaAllocation alloc; - VmaAllocationCreateInfo vma_create_info = { - 0, VMA_MEMORY_USAGE_GPU_ONLY, 0, 0, 0, nullptr, nullptr, - }; - VmaAllocationInfo vma_info = {}; - VkResult status = vmaCreateImage(mem_allocator_, &image_info, - &vma_create_info, &image, &alloc, &vma_info); - if (status != VK_SUCCESS) { - // Allocation failed. - return nullptr; - } - - auto texture = new Texture(); - texture->format = image_info.format; - texture->image = image; - texture->image_layout = image_info.initialLayout; - texture->alloc = alloc; - texture->alloc_info = vma_info; - texture->framebuffer = nullptr; - texture->usage_flags = image_info.usage; - texture->is_watched = false; - texture->texture_info = texture_info; - return texture; -} - -bool TextureCache::FreeTexture(Texture* texture) { - if (texture->in_flight_fence) { - VkResult status = vkGetFenceStatus(*device_, texture->in_flight_fence); - if (status != VK_SUCCESS && status != VK_ERROR_DEVICE_LOST) { - // Texture still in flight. - return false; - } - } - - if (texture->framebuffer) { - vkDestroyFramebuffer(*device_, texture->framebuffer, nullptr); - } - - for (auto it = texture->views.begin(); it != texture->views.end();) { - vkDestroyImageView(*device_, (*it)->view, nullptr); - it = texture->views.erase(it); - } - - { - global_critical_region_.Acquire(); - if (texture->is_watched) { - for (auto it = watched_textures_.begin(); - it != watched_textures_.end();) { - if (it->texture == texture) { - watched_textures_.erase(it); - break; - } - ++it; - } - texture->is_watched = false; - } - } - - vmaDestroyImage(mem_allocator_, texture->image, texture->alloc); - delete texture; - return true; -} - -void TextureCache::WatchTexture(Texture* texture) { - uint32_t address, size; - - { - global_critical_region_.Acquire(); - - assert_false(texture->is_watched); - - WatchedTexture watched_texture; - if (texture->texture_info.memory.base_address && - texture->texture_info.memory.base_size) { - watched_texture.is_mip = false; - address = texture->texture_info.memory.base_address; - size = texture->texture_info.memory.base_size; - } else if (texture->texture_info.memory.mip_address && - texture->texture_info.memory.mip_size) { - watched_texture.is_mip = true; - address = texture->texture_info.memory.mip_address; - size = texture->texture_info.memory.mip_size; - } else { - return; - } - watched_texture.texture = texture; - - // Fire any access watches that overlap this region. - for (auto it = watched_textures_.begin(); it != watched_textures_.end();) { - // Case 1: 2222222|222|11111111 - // Case 2: 1111111|222|22222222 - // Case 3: 1111111|222|11111111 (fragmentation) - // Case 4: 2222222|222|22222222 (complete overlap) - Texture* other_texture = it->texture; - uint32_t other_address, other_size; - if (it->is_mip) { - other_address = other_texture->texture_info.memory.mip_address; - other_size = other_texture->texture_info.memory.mip_size; - } else { - other_address = other_texture->texture_info.memory.base_address; - other_size = other_texture->texture_info.memory.base_size; - } - - bool hit = false; - if (address <= other_address && address + size > other_address) { - hit = true; - } else if (other_address <= address && - other_address + other_size > address) { - hit = true; - } else if (other_address <= address && - other_address + other_size > address + size) { - hit = true; - } else if (other_address >= address && - other_address + other_size < address + size) { - hit = true; - } - - if (hit) { - TextureTouched(other_texture); - it = watched_textures_.erase(it); - continue; - } - - ++it; - } - - watched_textures_.push_back(watched_texture); - texture->is_watched = true; - } - - memory_->EnablePhysicalMemoryAccessCallbacks(address, size, true, false); -} - -void TextureCache::TextureTouched(Texture* texture) { - if (texture->pending_invalidation) { - return; - } - { - auto global_lock = global_critical_region_.Acquire(); - assert_true(texture->is_watched); - texture->is_watched = false; - // Add to pending list so Scavenge will clean it up. - invalidated_textures_->insert(texture); - } - texture->pending_invalidation = true; -} - -std::pair TextureCache::MemoryInvalidationCallback( - uint32_t physical_address_start, uint32_t length, bool exact_range) { - global_critical_region_.Acquire(); - if (watched_textures_.empty()) { - return std::make_pair(0, UINT32_MAX); - } - // Get the texture within the range, or otherwise get the gap between two - // adjacent textures that can be safely unwatched. - uint32_t written_range_end = physical_address_start + length; - uint32_t previous_end = 0, next_start = UINT32_MAX; - for (auto it = watched_textures_.begin(); it != watched_textures_.end();) { - Texture* texture = it->texture; - uint32_t texture_address, texture_size; - if (it->is_mip) { - texture_address = texture->texture_info.memory.mip_address; - texture_size = texture->texture_info.memory.mip_size; - } else { - texture_address = texture->texture_info.memory.base_address; - texture_size = texture->texture_info.memory.base_size; - } - if (texture_address >= written_range_end) { - // Completely after the written range. - next_start = std::min(next_start, texture_address); - } else { - uint32_t texture_end = texture_address + texture_size; - if (texture_end <= physical_address_start) { - // Completely before the written range. - previous_end = std::max(previous_end, texture_end); - } else { - // Hit. - TextureTouched(texture); - it = watched_textures_.erase(it); - return std::make_pair(texture_address, texture_size); - } - } - ++it; - } - return std::make_pair(previous_end, next_start - previous_end); -} - -std::pair TextureCache::MemoryInvalidationCallbackThunk( - void* context_ptr, uint32_t physical_address_start, uint32_t length, - bool exact_range) { - return reinterpret_cast(context_ptr) - ->MemoryInvalidationCallback(physical_address_start, length, exact_range); -} - -TextureCache::Texture* TextureCache::DemandResolveTexture( - const TextureInfo& texture_info) { - auto texture_hash = texture_info.hash(); - for (auto it = textures_.find(texture_hash); it != textures_.end(); ++it) { - if (it->second->texture_info == texture_info) { - if (it->second->pending_invalidation) { - // This texture has been invalidated! - RemoveInvalidatedTextures(); - break; - } - - // Tell the trace writer to "cache" this memory (but not read it) - if (texture_info.memory.base_address) { - trace_writer_->WriteMemoryReadCached(texture_info.memory.base_address, - texture_info.memory.base_size); - } - if (texture_info.memory.mip_address) { - trace_writer_->WriteMemoryReadCached(texture_info.memory.mip_address, - texture_info.memory.mip_size); - } - - return it->second; - } - } - - VkFormatFeatureFlags required_flags = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; - if (texture_info.format == xenos::TextureFormat::k_24_8 || - texture_info.format == xenos::TextureFormat::k_24_8_FLOAT) { - required_flags |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; - } else { - required_flags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; - } - - // No texture at this location. Make a new one. - auto texture = AllocateTexture(texture_info, required_flags); - if (!texture) { - // Failed to allocate texture (out of memory) - XELOGE("Vulkan Texture Cache: Failed to allocate texture!"); - return nullptr; - } - - // Setup a debug name for the texture. - device_->DbgSetObjectName( - reinterpret_cast(texture->image), - VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT, - fmt::format( - "RT: 0x{:08X} - 0x{:08X} ({}, {})", texture_info.memory.base_address, - texture_info.memory.base_address + texture_info.memory.base_size, - texture_info.format_info()->name, - get_dimension_name(texture_info.dimension))); - - // Setup an access watch. If this texture is touched, it is destroyed. - WatchTexture(texture); - - textures_[texture_hash] = texture; - COUNT_profile_set("gpu/texture_cache/textures", textures_.size()); - return texture; -} - -TextureCache::Texture* TextureCache::Demand(const TextureInfo& texture_info, - VkCommandBuffer command_buffer, - VkFence completion_fence) { - // Run a tight loop to scan for an exact match existing texture. - auto texture_hash = texture_info.hash(); - for (auto it = textures_.find(texture_hash); it != textures_.end(); ++it) { - if (it->second->texture_info == texture_info) { - if (it->second->pending_invalidation) { - // This texture has been invalidated! - RemoveInvalidatedTextures(); - break; - } - - if (texture_info.memory.base_address) { - trace_writer_->WriteMemoryReadCached(texture_info.memory.base_address, - texture_info.memory.base_size); - } - if (texture_info.memory.mip_address) { - trace_writer_->WriteMemoryReadCached(texture_info.memory.mip_address, - texture_info.memory.mip_size); - } - return it->second; - } - } - - if (!command_buffer) { - // Texture not found and no command buffer was passed, preventing us from - // uploading a new one. - return nullptr; - } - - // Create a new texture and cache it. - auto texture = AllocateTexture(texture_info); - if (!texture) { - // Failed to allocate texture (out of memory) - XELOGE("Vulkan Texture Cache: Failed to allocate texture!"); - return nullptr; - } - - // Though we didn't find an exact match, that doesn't mean we're out of the - // woods yet. This texture could either be a portion of another texture or - // vice versa. Copy any overlapping textures into this texture. - // TODO: Byte count -> pixel count (on x and y axes) - VkOffset2D offset; - auto collide_tex = LookupAddress( - texture_info.memory.base_address, texture_info.width + 1, - texture_info.height + 1, texture_info.format_info()->format, &offset); - if (collide_tex != nullptr) { - // assert_always(); - } - - if (texture_info.memory.base_address) { - trace_writer_->WriteMemoryReadCached(texture_info.memory.base_address, - texture_info.memory.base_size); - } - if (texture_info.memory.mip_address) { - trace_writer_->WriteMemoryReadCached(texture_info.memory.mip_address, - texture_info.memory.mip_size); - } - - if (!UploadTexture(command_buffer, completion_fence, texture, texture_info)) { - FreeTexture(texture); - return nullptr; - } - - // Setup a debug name for the texture. - device_->DbgSetObjectName( - reinterpret_cast(texture->image), - VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT, - fmt::format( - "T: 0x{:08X} - 0x{:08X} ({}, {})", texture_info.memory.base_address, - texture_info.memory.base_address + texture_info.memory.base_size, - texture_info.format_info()->name, - get_dimension_name(texture_info.dimension))); - - textures_[texture_hash] = texture; - COUNT_profile_set("gpu/texture_cache/textures", textures_.size()); - - // Okay. Put a writewatch on it to tell us if it's been modified from the - // guest. - WatchTexture(texture); - - return texture; -} - -TextureCache::TextureView* TextureCache::DemandView(Texture* texture, - uint16_t swizzle) { - for (auto it = texture->views.begin(); it != texture->views.end(); ++it) { - if ((*it)->swizzle == swizzle) { - return (*it).get(); - } - } - - auto& config = texture_configs[uint32_t(texture->texture_info.format)]; - - VkImageViewCreateInfo view_info; - view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - view_info.pNext = nullptr; - view_info.flags = 0; - view_info.image = texture->image; - view_info.format = texture->format; - - bool is_cube = false; - switch (texture->texture_info.dimension) { - case xenos::DataDimension::k1D: - case xenos::DataDimension::k2DOrStacked: - if (!texture->texture_info.is_stacked) { - view_info.viewType = VK_IMAGE_VIEW_TYPE_2D; - } else { - view_info.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY; - } - break; - case xenos::DataDimension::k3D: - view_info.viewType = VK_IMAGE_VIEW_TYPE_3D; - break; - case xenos::DataDimension::kCube: - view_info.viewType = VK_IMAGE_VIEW_TYPE_CUBE; - is_cube = true; - break; - default: - assert_always(); - } - - VkComponentSwizzle swizzle_component_map[] = { - config.component_swizzle.r, config.component_swizzle.g, - config.component_swizzle.b, config.component_swizzle.a, - VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ONE, - VK_COMPONENT_SWIZZLE_IDENTITY, - }; - - VkComponentSwizzle components[] = { - swizzle_component_map[(swizzle >> 0) & 0x7], - swizzle_component_map[(swizzle >> 3) & 0x7], - swizzle_component_map[(swizzle >> 6) & 0x7], - swizzle_component_map[(swizzle >> 9) & 0x7], - }; - -#define SWIZZLE_VECTOR(r, x) \ - { \ - assert_true(config.vector_swizzle.x >= 0 && \ - config.vector_swizzle.x < xe::countof(components)); \ - view_info.components.r = components[config.vector_swizzle.x]; \ - } - SWIZZLE_VECTOR(r, x); - SWIZZLE_VECTOR(g, y); - SWIZZLE_VECTOR(b, z); - SWIZZLE_VECTOR(a, w); -#undef SWIZZLE_CHANNEL - - if (texture->format == VK_FORMAT_D16_UNORM_S8_UINT || - texture->format == VK_FORMAT_D24_UNORM_S8_UINT || - texture->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { - // This applies to any depth/stencil format, but we only use D24S8 / D32FS8. - view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; - } else { - view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - } - view_info.subresourceRange.baseMipLevel = texture->texture_info.mip_min_level; - view_info.subresourceRange.levelCount = texture->texture_info.mip_levels(); - view_info.subresourceRange.baseArrayLayer = 0; - view_info.subresourceRange.layerCount = - !is_cube ? 1 : 1 + texture->texture_info.depth; - - VkImageView view; - auto status = vkCreateImageView(*device_, &view_info, nullptr, &view); - CheckResult(status, "vkCreateImageView"); - if (status == VK_SUCCESS) { - auto texture_view = new TextureView(); - texture_view->texture = texture; - texture_view->view = view; - texture_view->swizzle = swizzle; - texture->views.push_back(std::unique_ptr(texture_view)); - return texture_view; - } - - return nullptr; -} - -TextureCache::Sampler* TextureCache::Demand(const SamplerInfo& sampler_info) { -#if FINE_GRAINED_DRAW_SCOPES - SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES - - auto sampler_hash = sampler_info.hash(); - for (auto it = samplers_.find(sampler_hash); it != samplers_.end(); ++it) { - if (it->second->sampler_info == sampler_info) { - // Found a compatible sampler. - return it->second; - } - } - - VkResult status = VK_SUCCESS; - - // Create a new sampler and cache it. - VkSamplerCreateInfo sampler_create_info; - sampler_create_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; - sampler_create_info.pNext = nullptr; - sampler_create_info.flags = 0; - sampler_create_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; - sampler_create_info.maxAnisotropy = 1.0f; - - // Texture level filtering. - VkSamplerMipmapMode mip_filter; - switch (sampler_info.mip_filter) { - case xenos::TextureFilter::kBaseMap: - // TODO(DrChat): ? - mip_filter = VK_SAMPLER_MIPMAP_MODE_NEAREST; - break; - case xenos::TextureFilter::kPoint: - mip_filter = VK_SAMPLER_MIPMAP_MODE_NEAREST; - break; - case xenos::TextureFilter::kLinear: - mip_filter = VK_SAMPLER_MIPMAP_MODE_LINEAR; - break; - default: - assert_unhandled_case(sampler_info.mip_filter); - return nullptr; - } - - VkFilter min_filter; - switch (sampler_info.min_filter) { - case xenos::TextureFilter::kPoint: - min_filter = VK_FILTER_NEAREST; - break; - case xenos::TextureFilter::kLinear: - min_filter = VK_FILTER_LINEAR; - break; - default: - assert_unhandled_case(sampler_info.min_filter); - return nullptr; - } - VkFilter mag_filter; - switch (sampler_info.mag_filter) { - case xenos::TextureFilter::kPoint: - mag_filter = VK_FILTER_NEAREST; - break; - case xenos::TextureFilter::kLinear: - mag_filter = VK_FILTER_LINEAR; - break; - default: - assert_unhandled_case(mag_filter); - return nullptr; - } - - sampler_create_info.minFilter = min_filter; - sampler_create_info.magFilter = mag_filter; - sampler_create_info.mipmapMode = mip_filter; - - // FIXME: Both halfway / mirror clamp to border aren't mapped properly. - VkSamplerAddressMode address_mode_map[] = { - /* kRepeat */ VK_SAMPLER_ADDRESS_MODE_REPEAT, - /* kMirroredRepeat */ VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT, - /* kClampToEdge */ VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, - /* kMirrorClampToEdge */ VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE, - /* kClampToHalfway */ VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, - /* kMirrorClampToHalfway */ VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE, - /* kClampToBorder */ VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, - /* kMirrorClampToBorder */ VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE, - }; - sampler_create_info.addressModeU = - address_mode_map[static_cast(sampler_info.clamp_u)]; - sampler_create_info.addressModeV = - address_mode_map[static_cast(sampler_info.clamp_v)]; - sampler_create_info.addressModeW = - address_mode_map[static_cast(sampler_info.clamp_w)]; - - float aniso = 0.f; - switch (sampler_info.aniso_filter) { - case xenos::AnisoFilter::kDisabled: - aniso = 1.0f; - break; - case xenos::AnisoFilter::kMax_1_1: - aniso = 1.0f; - break; - case xenos::AnisoFilter::kMax_2_1: - aniso = 2.0f; - break; - case xenos::AnisoFilter::kMax_4_1: - aniso = 4.0f; - break; - case xenos::AnisoFilter::kMax_8_1: - aniso = 8.0f; - break; - case xenos::AnisoFilter::kMax_16_1: - aniso = 16.0f; - break; - default: - assert_unhandled_case(aniso); - return nullptr; - } - - sampler_create_info.anisotropyEnable = - sampler_info.aniso_filter != xenos::AnisoFilter::kDisabled ? VK_TRUE - : VK_FALSE; - sampler_create_info.maxAnisotropy = aniso; - - sampler_create_info.compareEnable = VK_FALSE; - sampler_create_info.compareOp = VK_COMPARE_OP_NEVER; - sampler_create_info.mipLodBias = sampler_info.lod_bias; - sampler_create_info.minLod = float(sampler_info.mip_min_level); - sampler_create_info.maxLod = float(sampler_info.mip_max_level); - sampler_create_info.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; - sampler_create_info.unnormalizedCoordinates = VK_FALSE; - VkSampler vk_sampler; - status = - vkCreateSampler(*device_, &sampler_create_info, nullptr, &vk_sampler); - CheckResult(status, "vkCreateSampler"); - if (status != VK_SUCCESS) { - return nullptr; - } - - auto sampler = new Sampler(); - sampler->sampler = vk_sampler; - sampler->sampler_info = sampler_info; - samplers_[sampler_hash] = sampler; - - return sampler; -} - -bool TextureFormatIsSimilar(xenos::TextureFormat left, - xenos::TextureFormat right) { -#define COMPARE_FORMAT(x, y) \ - if ((left == xenos::TextureFormat::x && right == xenos::TextureFormat::y) || \ - (left == xenos::TextureFormat::y && right == xenos::TextureFormat::x)) { \ - return true; \ - } - - if (left == right) return true; - if (GetBaseFormat(left) == GetBaseFormat(right)) return true; - - return false; -#undef COMPARE_FORMAT -} - -TextureCache::Texture* TextureCache::Lookup(const TextureInfo& texture_info) { - auto texture_hash = texture_info.hash(); - for (auto it = textures_.find(texture_hash); it != textures_.end(); ++it) { - if (it->second->texture_info == texture_info) { - return it->second; - } - } - - // slow path - for (auto it = textures_.begin(); it != textures_.end(); ++it) { - const auto& other_texture_info = it->second->texture_info; - -#define COMPARE_FIELD(x) \ - if (texture_info.x != other_texture_info.x) continue - COMPARE_FIELD(memory.base_address); - COMPARE_FIELD(memory.base_size); - COMPARE_FIELD(dimension); - COMPARE_FIELD(width); - COMPARE_FIELD(height); - COMPARE_FIELD(depth); - COMPARE_FIELD(endianness); - COMPARE_FIELD(is_tiled); -#undef COMPARE_FIELD - - if (!TextureFormatIsSimilar(texture_info.format, - other_texture_info.format)) { - continue; - } - - /*const auto format_info = texture_info.format_info(); - const auto other_format_info = other_texture_info.format_info(); -#define COMPARE_FIELD(x) if (format_info->x != other_format_info->x) continue - COMPARE_FIELD(type); - COMPARE_FIELD(block_width); - COMPARE_FIELD(block_height); - COMPARE_FIELD(bits_per_pixel); -#undef COMPARE_FIELD*/ - return it->second; - } - - return nullptr; -} - -TextureCache::Texture* TextureCache::LookupAddress(uint32_t guest_address, - uint32_t width, - uint32_t height, - xenos::TextureFormat format, - VkOffset2D* out_offset) { - for (auto it = textures_.begin(); it != textures_.end(); ++it) { - const auto& texture_info = it->second->texture_info; - if (guest_address >= texture_info.memory.base_address && - guest_address < - texture_info.memory.base_address + texture_info.memory.base_size && - texture_info.pitch >= width && texture_info.height >= height && - out_offset) { - auto offset_bytes = guest_address - texture_info.memory.base_address; - - if (texture_info.dimension == xenos::DataDimension::k2DOrStacked) { - out_offset->x = 0; - out_offset->y = offset_bytes / texture_info.pitch; - if (offset_bytes % texture_info.pitch != 0) { - // TODO: offset_x - } - } - - return it->second; - } - - if (texture_info.memory.base_address == guest_address && - texture_info.dimension == xenos::DataDimension::k2DOrStacked && - texture_info.pitch == width && texture_info.height == height) { - if (out_offset) { - out_offset->x = 0; - out_offset->y = 0; - } - - return it->second; - } - } - - return nullptr; -} - -void TextureCache::FlushPendingCommands(VkCommandBuffer command_buffer, - VkFence completion_fence) { - auto status = vkEndCommandBuffer(command_buffer); - CheckResult(status, "vkEndCommandBuffer"); - - VkSubmitInfo submit_info; - std::memset(&submit_info, 0, sizeof(submit_info)); - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = &command_buffer; - - if (device_queue_) { - auto status = - vkQueueSubmit(device_queue_, 1, &submit_info, completion_fence); - CheckResult(status, "vkQueueSubmit"); - } else { - std::lock_guard(device_->primary_queue_mutex()); - - auto status = vkQueueSubmit(device_->primary_queue(), 1, &submit_info, - completion_fence); - CheckResult(status, "vkQueueSubmit"); - } - - vkWaitForFences(*device_, 1, &completion_fence, VK_TRUE, -1); - staging_buffer_.Scavenge(); - vkResetFences(*device_, 1, &completion_fence); - - // Reset the command buffer and put it back into the recording state. - vkResetCommandBuffer(command_buffer, 0); - VkCommandBufferBeginInfo begin_info; - std::memset(&begin_info, 0, sizeof(begin_info)); - begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - vkBeginCommandBuffer(command_buffer, &begin_info); -} - -bool TextureCache::ConvertTexture(uint8_t* dest, VkBufferImageCopy* copy_region, - uint32_t mip, const TextureInfo& src) { -#if FINE_GRAINED_DRAW_SCOPES - SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES - uint32_t offset_x = 0; - uint32_t offset_y = 0; - uint32_t address = src.GetMipLocation(mip, &offset_x, &offset_y, true); - if (!address) { - return false; - } - - void* host_address = memory_->TranslatePhysical(address); - - auto is_cube = src.dimension == xenos::DataDimension::kCube; - auto src_extent = src.GetMipExtent(mip, true); - auto dst_extent = GetMipExtent(src, mip); - - uint32_t src_pitch = - src_extent.block_pitch_h * src.format_info()->bytes_per_block(); - uint32_t dst_pitch = - dst_extent.block_pitch_h * GetFormatInfo(src.format)->bytes_per_block(); - - auto copy_block = GetFormatCopyBlock(src.format); - - const uint8_t* src_mem = reinterpret_cast(host_address); - if (!src.is_tiled) { - for (uint32_t face = 0; face < dst_extent.depth; face++) { - src_mem += offset_y * src_pitch; - src_mem += offset_x * src.format_info()->bytes_per_block(); - for (uint32_t y = 0; y < dst_extent.block_height; y++) { - copy_block(src.endianness, dest + y * dst_pitch, - src_mem + y * src_pitch, dst_pitch); - } - src_mem += src_pitch * src_extent.block_pitch_v; - dest += dst_pitch * dst_extent.block_pitch_v; - } - } else { - // Untile image. - // We could do this in a shader to speed things up, as this is pretty slow. - for (uint32_t face = 0; face < dst_extent.depth; face++) { - texture_conversion::UntileInfo untile_info; - std::memset(&untile_info, 0, sizeof(untile_info)); - untile_info.offset_x = offset_x; - untile_info.offset_y = offset_y; - untile_info.width = src_extent.block_width; - untile_info.height = src_extent.block_height; - untile_info.input_pitch = src_extent.block_pitch_h; - untile_info.output_pitch = dst_extent.block_pitch_h; - untile_info.input_format_info = src.format_info(); - untile_info.output_format_info = GetFormatInfo(src.format); - untile_info.copy_callback = [=](auto o, auto i, auto l) { - copy_block(src.endianness, o, i, l); - }; - texture_conversion::Untile(dest, src_mem, &untile_info); - src_mem += src_pitch * src_extent.block_pitch_v; - dest += dst_pitch * dst_extent.block_pitch_v; - } - } - - copy_region->bufferRowLength = dst_extent.pitch; - copy_region->bufferImageHeight = dst_extent.height; - copy_region->imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - copy_region->imageSubresource.mipLevel = mip; - copy_region->imageSubresource.baseArrayLayer = 0; - copy_region->imageSubresource.layerCount = !is_cube ? 1 : dst_extent.depth; - copy_region->imageExtent.width = std::max(1u, (src.width + 1) >> mip); - copy_region->imageExtent.height = std::max(1u, (src.height + 1) >> mip); - copy_region->imageExtent.depth = !is_cube ? dst_extent.depth : 1; - return true; -} - -bool TextureCache::UploadTexture(VkCommandBuffer command_buffer, - VkFence completion_fence, Texture* dest, - const TextureInfo& src) { -#if FINE_GRAINED_DRAW_SCOPES - SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES - - size_t unpack_length = ComputeTextureStorage(src); - - XELOGGPU( - "Uploading texture @ 0x{:08X}/0x{:08X} ({}x{}x{}, format: {}, dim: {}, " - "levels: {} ({}-{}), stacked: {}, pitch: {}, tiled: {}, packed mips: {}, " - "unpack length: 0x{:X})", - src.memory.base_address, src.memory.mip_address, src.width + 1, - src.height + 1, src.depth + 1, src.format_info()->name, - get_dimension_name(src.dimension), src.mip_levels(), src.mip_min_level, - src.mip_max_level, src.is_stacked ? "yes" : "no", src.pitch, - src.is_tiled ? "yes" : "no", src.has_packed_mips ? "yes" : "no", - unpack_length); - - XELOGGPU("Extent: {}x{}x{} {},{},{}", src.extent.pitch, src.extent.height, - src.extent.depth, src.extent.block_pitch_h, src.extent.block_height, - src.extent.block_pitch_v); - - if (!unpack_length) { - XELOGW("Failed to compute texture storage!"); - return false; - } - - if (!staging_buffer_.CanAcquire(unpack_length)) { - // Need to have unique memory for every upload for at least one frame. If we - // run out of memory, we need to flush all queued upload commands to the - // GPU. - FlushPendingCommands(command_buffer, completion_fence); - - // Uploads have been flushed. Continue. - if (!staging_buffer_.CanAcquire(unpack_length)) { - // The staging buffer isn't big enough to hold this texture. - XELOGE( - "TextureCache staging buffer is too small! (uploading 0x{:X} bytes)", - unpack_length); - assert_always(); - return false; - } - } - - // Grab some temporary memory for staging. - auto alloc = staging_buffer_.Acquire(unpack_length, completion_fence); - assert_not_null(alloc); - if (!alloc) { - XELOGE("{}: Failed to acquire staging memory!", __func__); - return false; - } - - // DEBUG: Check the source address. If it's completely zero'd out, print it. - bool valid = false; - auto src_data = memory_->TranslatePhysical(src.memory.base_address); - for (uint32_t i = 0; i < src.memory.base_size; i++) { - if (src_data[i] != 0) { - valid = true; - break; - } - } - - if (!valid) { - XELOGW("Warning: Texture @ 0x{:08X} is blank!", src.memory.base_address); - } - - // Upload texture into GPU memory. - // TODO: If the GPU supports it, we can submit a compute batch to convert the - // texture and copy it to its destination. Otherwise, fallback to conversion - // on the CPU. - uint32_t copy_region_count = src.mip_levels(); - std::vector copy_regions(copy_region_count); - - // Upload all mips. - auto unpack_buffer = reinterpret_cast(alloc->host_ptr); - VkDeviceSize unpack_offset = 0; - for (uint32_t mip = src.mip_min_level, region = 0; mip <= src.mip_max_level; - mip++, region++) { - if (!ConvertTexture(&unpack_buffer[unpack_offset], ©_regions[region], - mip, src)) { - XELOGW("Failed to convert texture mip {}!", mip); - return false; - } - copy_regions[region].bufferOffset = alloc->offset + unpack_offset; - copy_regions[region].imageOffset = {0, 0, 0}; - - /* - XELOGGPU("Mip {} {}x{}x{} @ 0x{:X}", mip, - copy_regions[region].imageExtent.width, - copy_regions[region].imageExtent.height, - copy_regions[region].imageExtent.depth, unpack_offset); - */ - - unpack_offset += ComputeMipStorage(src, mip); - } - - if (cvars::texture_dump) { - TextureDump(src, unpack_buffer, unpack_length); - } - - // Transition the texture into a transfer destination layout. - VkImageMemoryBarrier barrier; - barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - barrier.pNext = nullptr; - barrier.srcAccessMask = 0; - barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - barrier.oldLayout = dest->image_layout; - barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - barrier.srcQueueFamilyIndex = VK_FALSE; - barrier.dstQueueFamilyIndex = VK_FALSE; - barrier.image = dest->image; - if (dest->format == VK_FORMAT_D16_UNORM_S8_UINT || - dest->format == VK_FORMAT_D24_UNORM_S8_UINT || - dest->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { - barrier.subresourceRange.aspectMask = - VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - } else { - barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - } - barrier.subresourceRange.baseMipLevel = src.mip_min_level; - barrier.subresourceRange.levelCount = src.mip_levels(); - barrier.subresourceRange.baseArrayLayer = - copy_regions[0].imageSubresource.baseArrayLayer; - barrier.subresourceRange.layerCount = - copy_regions[0].imageSubresource.layerCount; - - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, - nullptr, 1, &barrier); - - // Now move the converted texture into the destination. - if (dest->format == VK_FORMAT_D16_UNORM_S8_UINT || - dest->format == VK_FORMAT_D24_UNORM_S8_UINT || - dest->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { - // Do just a depth upload (for now). - // This assumes depth buffers don't have mips (hopefully they don't) - assert_true(src.mip_levels() == 1); - copy_regions[0].imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; - } - - vkCmdCopyBufferToImage(command_buffer, staging_buffer_.gpu_buffer(), - dest->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - copy_region_count, copy_regions.data()); - - // Now transition the texture into a shader readonly source. - barrier.srcAccessMask = barrier.dstAccessMask; - barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - barrier.oldLayout = barrier.newLayout; - barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, - 0, 0, nullptr, 0, nullptr, 1, &barrier); - - dest->image_layout = barrier.newLayout; - return true; -} - -const FormatInfo* TextureCache::GetFormatInfo(xenos::TextureFormat format) { - switch (format) { - case xenos::TextureFormat::k_CTX1: - return FormatInfo::Get(xenos::TextureFormat::k_8_8); - case xenos::TextureFormat::k_DXT3A: - return FormatInfo::Get(xenos::TextureFormat::k_DXT2_3); - default: - return FormatInfo::Get(format); - } -} - -texture_conversion::CopyBlockCallback TextureCache::GetFormatCopyBlock( - xenos::TextureFormat format) { - switch (format) { - case xenos::TextureFormat::k_CTX1: - return texture_conversion::ConvertTexelCTX1ToR8G8; - case xenos::TextureFormat::k_DXT3A: - return texture_conversion::ConvertTexelDXT3AToDXT3; - default: - return texture_conversion::CopySwapBlock; - } -} - -TextureExtent TextureCache::GetMipExtent(const TextureInfo& src, uint32_t mip) { - auto format_info = GetFormatInfo(src.format); - uint32_t width = src.width + 1; - uint32_t height = src.height + 1; - uint32_t depth = src.depth + 1; - TextureExtent extent; - if (mip == 0) { - extent = TextureExtent::Calculate(format_info, width, height, depth, false, - false); - } else { - uint32_t mip_width = std::max(1u, width >> mip); - uint32_t mip_height = std::max(1u, height >> mip); - extent = TextureExtent::Calculate(format_info, mip_width, mip_height, depth, - false, false); - } - return extent; -} - -uint32_t TextureCache::ComputeMipStorage(const FormatInfo* format_info, - uint32_t width, uint32_t height, - uint32_t depth, uint32_t mip) { - assert_not_null(format_info); - TextureExtent extent; - if (mip == 0) { - extent = TextureExtent::Calculate(format_info, width, height, depth, false, - false); - } else { - uint32_t mip_width = std::max(1u, width >> mip); - uint32_t mip_height = std::max(1u, height >> mip); - extent = TextureExtent::Calculate(format_info, mip_width, mip_height, depth, - false, false); - } - uint32_t bytes_per_block = format_info->bytes_per_block(); - return extent.all_blocks() * bytes_per_block; -} - -uint32_t TextureCache::ComputeMipStorage(const TextureInfo& src, uint32_t mip) { - uint32_t size = ComputeMipStorage(GetFormatInfo(src.format), src.width + 1, - src.height + 1, src.depth + 1, mip); - // ensure 4-byte alignment - return (size + 3) & (~3u); -} - -uint32_t TextureCache::ComputeTextureStorage(const TextureInfo& src) { - auto format_info = GetFormatInfo(src.format); - uint32_t width = src.width + 1; - uint32_t height = src.height + 1; - uint32_t depth = src.depth + 1; - uint32_t length = 0; - for (uint32_t mip = src.mip_min_level; mip <= src.mip_max_level; ++mip) { - if (mip == 0 && !src.memory.base_address) { - continue; - } else if (mip > 0 && !src.memory.mip_address) { - continue; - } - length += ComputeMipStorage(format_info, width, height, depth, mip); - } - return length; -} - -void TextureCache::WritebackTexture(Texture* texture) { - VkResult status = VK_SUCCESS; - VkFence fence = wb_command_pool_->BeginBatch(); - auto alloc = wb_staging_buffer_.Acquire(texture->alloc_info.size, fence); - if (!alloc) { - wb_command_pool_->EndBatch(); - return; - } - - auto command_buffer = wb_command_pool_->AcquireEntry(); - - VkCommandBufferBeginInfo begin_info = { - VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - nullptr, - VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, - nullptr, - }; - vkBeginCommandBuffer(command_buffer, &begin_info); - - // TODO: Transition the texture to a transfer source. - // TODO: copy depth/layers? - - VkBufferImageCopy region; - region.bufferOffset = alloc->offset; - region.bufferRowLength = 0; - region.bufferImageHeight = 0; - region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - region.imageSubresource.mipLevel = 0; - region.imageSubresource.baseArrayLayer = 0; - region.imageSubresource.layerCount = 1; - region.imageOffset.x = 0; - region.imageOffset.y = 0; - region.imageOffset.z = 0; - region.imageExtent.width = texture->texture_info.width + 1; - region.imageExtent.height = texture->texture_info.height + 1; - region.imageExtent.depth = 1; - - vkCmdCopyImageToBuffer(command_buffer, texture->image, - VK_IMAGE_LAYOUT_GENERAL, - wb_staging_buffer_.gpu_buffer(), 1, ®ion); - - // TODO: Transition the texture back to a shader resource. - - vkEndCommandBuffer(command_buffer); - - // Submit the command buffer. - // Submit commands and wait. - { - std::lock_guard(device_->primary_queue_mutex()); - VkSubmitInfo submit_info = { - VK_STRUCTURE_TYPE_SUBMIT_INFO, - nullptr, - 0, - nullptr, - nullptr, - 1, - &command_buffer, - 0, - nullptr, - }; - status = vkQueueSubmit(device_->primary_queue(), 1, &submit_info, fence); - CheckResult(status, "vkQueueSubmit"); - - if (status == VK_SUCCESS) { - status = vkQueueWaitIdle(device_->primary_queue()); - CheckResult(status, "vkQueueWaitIdle"); - } - } - - wb_command_pool_->EndBatch(); - - if (status == VK_SUCCESS) { - auto dest = - memory_->TranslatePhysical(texture->texture_info.memory.base_address); - std::memcpy(dest, alloc->host_ptr, texture->texture_info.memory.base_size); - } - - wb_staging_buffer_.Scavenge(); -} - -void TextureCache::HashTextureBindings( - XXH64_state_t* hash_state, uint32_t& fetch_mask, - const std::vector& bindings) { - for (auto& binding : bindings) { - uint32_t fetch_bit = 1 << binding.fetch_constant; - if (fetch_mask & fetch_bit) { - // We've covered this binding. - continue; - } - fetch_mask |= fetch_bit; - - auto& regs = *register_file_; - int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6; - auto group = - reinterpret_cast(®s.values[r]); - auto& fetch = group->texture_fetch; - - XXH64_update(hash_state, &fetch, sizeof(fetch)); - } -} - -VkDescriptorSet TextureCache::PrepareTextureSet( - VkCommandBuffer command_buffer, VkFence completion_fence, - const std::vector& vertex_bindings, - const std::vector& pixel_bindings) { - XXH64_state_t hash_state; - XXH64_reset(&hash_state, 0); - - // (quickly) Generate a hash. - uint32_t fetch_mask = 0; - HashTextureBindings(&hash_state, fetch_mask, vertex_bindings); - HashTextureBindings(&hash_state, fetch_mask, pixel_bindings); - uint64_t hash = XXH64_digest(&hash_state); - for (auto it = texture_sets_.find(hash); it != texture_sets_.end(); ++it) { - // TODO(DrChat): We need to compare the bindings and ensure they're equal. - return it->second; - } - - // Clear state. - auto update_set_info = &update_set_info_; - update_set_info->has_setup_fetch_mask = 0; - update_set_info->image_write_count = 0; - - std::memset(update_set_info, 0, sizeof(update_set_info_)); - - // Process vertex and pixel shader bindings. - // This does things lazily and de-dupes fetch constants reused in both - // shaders. - bool any_failed = false; - any_failed = !SetupTextureBindings(command_buffer, completion_fence, - update_set_info, vertex_bindings) || - any_failed; - any_failed = !SetupTextureBindings(command_buffer, completion_fence, - update_set_info, pixel_bindings) || - any_failed; - if (any_failed) { - XELOGW("Failed to setup one or more texture bindings!"); - // TODO(benvanik): actually bail out here? - } - - // Open a new batch of descriptor sets (for this frame) - if (!descriptor_pool_->has_open_batch()) { - descriptor_pool_->BeginBatch(completion_fence); - } - - auto descriptor_set = - descriptor_pool_->AcquireEntry(texture_descriptor_set_layout_); - if (!descriptor_set) { - return nullptr; - } - - for (uint32_t i = 0; i < update_set_info->image_write_count; i++) { - update_set_info->image_writes[i].dstSet = descriptor_set; - } - - // Update the descriptor set. - if (update_set_info->image_write_count > 0) { - vkUpdateDescriptorSets(*device_, update_set_info->image_write_count, - update_set_info->image_writes, 0, nullptr); - } - - texture_sets_[hash] = descriptor_set; - return descriptor_set; -} - -bool TextureCache::SetupTextureBindings( - VkCommandBuffer command_buffer, VkFence completion_fence, - UpdateSetInfo* update_set_info, - const std::vector& bindings) { - bool any_failed = false; - for (auto& binding : bindings) { - uint32_t fetch_bit = 1 << binding.fetch_constant; - if ((update_set_info->has_setup_fetch_mask & fetch_bit) == 0) { - // Needs setup. - any_failed = !SetupTextureBinding(command_buffer, completion_fence, - update_set_info, binding) || - any_failed; - update_set_info->has_setup_fetch_mask |= fetch_bit; - } - } - return !any_failed; -} - -bool TextureCache::SetupTextureBinding(VkCommandBuffer command_buffer, - VkFence completion_fence, - UpdateSetInfo* update_set_info, - const Shader::TextureBinding& binding) { -#if FINE_GRAINED_DRAW_SCOPES - SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES - - auto& regs = *register_file_; - int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6; - auto group = - reinterpret_cast(®s.values[r]); - auto& fetch = group->texture_fetch; - - // Disabled? - // TODO(benvanik): reset sampler. - switch (fetch.type) { - case xenos::FetchConstantType::kTexture: - break; - case xenos::FetchConstantType::kInvalidTexture: - if (cvars::gpu_allow_invalid_fetch_constants) { - break; - } - XELOGW( - "Texture fetch constant {} ({:08X} {:08X} {:08X} {:08X} {:08X} " - "{:08X}) has " - "\"invalid\" type! This is incorrect behavior, but you can try " - "bypassing this by launching Xenia with " - "--gpu_allow_invalid_fetch_constants=true.", - binding.fetch_constant, fetch.dword_0, fetch.dword_1, fetch.dword_2, - fetch.dword_3, fetch.dword_4, fetch.dword_5); - return false; - default: - XELOGW( - "Texture fetch constant {} ({:08X} {:08X} {:08X} {:08X} {:08X} " - "{:08X}) is " - "completely invalid!", - binding.fetch_constant, fetch.dword_0, fetch.dword_1, fetch.dword_2, - fetch.dword_3, fetch.dword_4, fetch.dword_5); - return false; - } - - TextureInfo texture_info; - if (!TextureInfo::Prepare(fetch, &texture_info)) { - XELOGE("Unable to parse texture fetcher info"); - return false; // invalid texture used - } - SamplerInfo sampler_info; - if (!SamplerInfo::Prepare(fetch, binding.fetch_instr, &sampler_info)) { - XELOGE("Unable to parse sampler info"); - return false; // invalid texture used - } - - // Search via the base format. - texture_info.format = GetBaseFormat(texture_info.format); - - auto texture = Demand(texture_info, command_buffer, completion_fence); - auto sampler = Demand(sampler_info); - if (texture == nullptr || sampler == nullptr) { - XELOGE("Texture or sampler is NULL!"); - return false; - } - - uint16_t swizzle = static_cast(fetch.swizzle); - auto view = DemandView(texture, swizzle); - - auto image_info = - &update_set_info->image_infos[update_set_info->image_write_count]; - auto image_write = - &update_set_info->image_writes[update_set_info->image_write_count]; - update_set_info->image_write_count++; - - // Sanity check, we only have 32 binding slots. - assert(binding.binding_index < 32); - - image_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - image_write->pNext = nullptr; - // image_write->dstSet is set later... - image_write->dstBinding = 0; - image_write->dstArrayElement = uint32_t(binding.binding_index); - image_write->descriptorCount = 1; - image_write->descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - image_write->pImageInfo = image_info; - image_write->pBufferInfo = nullptr; - image_write->pTexelBufferView = nullptr; - - image_info->imageView = view->view; - image_info->imageLayout = texture->image_layout; - image_info->sampler = sampler->sampler; - texture->in_flight_fence = completion_fence; - - return true; -} - -void TextureCache::RemoveInvalidatedTextures() { - std::unordered_set& invalidated_textures = *invalidated_textures_; - - // Clean up any invalidated textures. - { - auto global_lock = global_critical_region_.Acquire(); - if (invalidated_textures_ == &invalidated_textures_sets_[0]) { - invalidated_textures_ = &invalidated_textures_sets_[1]; - } else { - invalidated_textures_ = &invalidated_textures_sets_[0]; - } - } - - // Append all invalidated textures to a deletion queue. They will be deleted - // when all command buffers using them have finished executing. - if (!invalidated_textures.empty()) { - for (auto it = invalidated_textures.begin(); - it != invalidated_textures.end(); ++it) { - pending_delete_textures_.push_back(*it); - textures_.erase((*it)->texture_info.hash()); - } - - COUNT_profile_set("gpu/texture_cache/textures", textures_.size()); - COUNT_profile_set("gpu/texture_cache/pending_deletes", - pending_delete_textures_.size()); - invalidated_textures.clear(); - } -} - -void TextureCache::ClearCache() { - RemoveInvalidatedTextures(); - for (auto it = textures_.begin(); it != textures_.end(); ++it) { - while (!FreeTexture(it->second)) { - // Texture still in use. Busy loop. - xe::threading::MaybeYield(); - } - } - textures_.clear(); - COUNT_profile_set("gpu/texture_cache/textures", 0); - - for (auto it = samplers_.begin(); it != samplers_.end(); ++it) { - vkDestroySampler(*device_, it->second->sampler, nullptr); - delete it->second; - } - samplers_.clear(); -} - -void TextureCache::Scavenge() { - SCOPE_profile_cpu_f("gpu"); - - // Close any open descriptor pool batches - if (descriptor_pool_->has_open_batch()) { - descriptor_pool_->EndBatch(); - } - - // Free unused descriptor sets - // TODO(DrChat): These sets could persist across frames, we just need a smart - // way to detect if they're unused and free them. - texture_sets_.clear(); - descriptor_pool_->Scavenge(); - staging_buffer_.Scavenge(); - - // Kill all pending delete textures. - RemoveInvalidatedTextures(); - if (!pending_delete_textures_.empty()) { - for (auto it = pending_delete_textures_.begin(); - it != pending_delete_textures_.end();) { - if (!FreeTexture(*it)) { - break; - } - - it = pending_delete_textures_.erase(it); - } - - COUNT_profile_set("gpu/texture_cache/pending_deletes", - pending_delete_textures_.size()); - } -} - -} // namespace vulkan -} // namespace gpu -} // namespace xe diff --git a/src/xenia/gpu/vulkan/texture_cache.h b/src/xenia/gpu/vulkan/texture_cache.h deleted file mode 100644 index f4d2ad564..000000000 --- a/src/xenia/gpu/vulkan/texture_cache.h +++ /dev/null @@ -1,244 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_VULKAN_TEXTURE_CACHE_H_ -#define XENIA_GPU_VULKAN_TEXTURE_CACHE_H_ - -#include -#include -#include -#include - -#include "xenia/base/mutex.h" -#include "xenia/gpu/register_file.h" -#include "xenia/gpu/sampler_info.h" -#include "xenia/gpu/shader.h" -#include "xenia/gpu/texture_conversion.h" -#include "xenia/gpu/texture_info.h" -#include "xenia/gpu/trace_writer.h" -#include "xenia/gpu/vulkan/vulkan_command_processor.h" -#include "xenia/gpu/xenos.h" -#include "xenia/ui/vulkan/circular_buffer.h" -#include "xenia/ui/vulkan/fenced_pools.h" -#include "xenia/ui/vulkan/vulkan.h" -#include "xenia/ui/vulkan/vulkan_device.h" - -#include "third_party/vulkan/vk_mem_alloc.h" - -namespace xe { -namespace gpu { -namespace vulkan { - -// -class TextureCache { - public: - struct TextureView; - - // This represents an uploaded Vulkan texture. - struct Texture { - TextureInfo texture_info; - std::vector> views; - - VkFormat format; - VkImage image; - VkImageLayout image_layout; - VmaAllocation alloc; - VmaAllocationInfo alloc_info; - VkFramebuffer framebuffer; // Blit target frame buffer. - VkImageUsageFlags usage_flags; - - bool is_watched; - bool pending_invalidation; - - // Pointer to the latest usage fence. - VkFence in_flight_fence; - }; - - struct TextureView { - Texture* texture; - VkImageView view; - - union { - struct { - // FIXME: This only applies on little-endian platforms! - uint16_t swiz_x : 3; - uint16_t swiz_y : 3; - uint16_t swiz_z : 3; - uint16_t swiz_w : 3; - uint16_t : 4; - }; - - uint16_t swizzle; - }; - }; - - TextureCache(Memory* memory, RegisterFile* register_file, - TraceWriter* trace_writer, ui::vulkan::VulkanDevice* device); - ~TextureCache(); - - VkResult Initialize(); - void Shutdown(); - - // Descriptor set layout containing all possible texture bindings. - // The set contains one descriptor for each texture sampler [0-31]. - VkDescriptorSetLayout texture_descriptor_set_layout() const { - return texture_descriptor_set_layout_; - } - - // Prepares a descriptor set containing the samplers and images for all - // bindings. The textures will be uploaded/converted/etc as needed. - // Requires a fence to be provided that will be signaled when finished - // using the returned descriptor set. - VkDescriptorSet PrepareTextureSet( - VkCommandBuffer setup_command_buffer, VkFence completion_fence, - const std::vector& vertex_bindings, - const std::vector& pixel_bindings); - - // TODO(benvanik): ReadTexture. - - Texture* Lookup(const TextureInfo& texture_info); - - // Looks for a texture either containing or matching these parameters. - // Caller is responsible for checking if the texture returned is an exact - // match or just contains the texture given by the parameters. - // If offset_x and offset_y are not null, this may return a texture that - // contains this address at an offset. - Texture* LookupAddress(uint32_t guest_address, uint32_t width, - uint32_t height, xenos::TextureFormat format, - VkOffset2D* out_offset = nullptr); - - TextureView* DemandView(Texture* texture, uint16_t swizzle); - - // Demands a texture for the purpose of resolving from EDRAM. This either - // creates a new texture or returns a previously created texture. - Texture* DemandResolveTexture(const TextureInfo& texture_info); - - // Clears all cached content. - void ClearCache(); - - // Frees any unused resources - void Scavenge(); - - private: - struct UpdateSetInfo; - - // Cached Vulkan sampler. - struct Sampler { - SamplerInfo sampler_info; - VkSampler sampler; - }; - - struct WatchedTexture { - Texture* texture; - bool is_mip; - }; - - // Allocates a new texture and memory to back it on the GPU. - Texture* AllocateTexture(const TextureInfo& texture_info, - VkFormatFeatureFlags required_flags = - VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT); - bool FreeTexture(Texture* texture); - - void WatchTexture(Texture* texture); - void TextureTouched(Texture* texture); - std::pair MemoryInvalidationCallback( - uint32_t physical_address_start, uint32_t length, bool exact_range); - static std::pair MemoryInvalidationCallbackThunk( - void* context_ptr, uint32_t physical_address_start, uint32_t length, - bool exact_range); - - // Demands a texture. If command_buffer is null and the texture hasn't been - // uploaded to graphics memory already, we will return null and bail. - Texture* Demand(const TextureInfo& texture_info, - VkCommandBuffer command_buffer = nullptr, - VkFence completion_fence = nullptr); - Sampler* Demand(const SamplerInfo& sampler_info); - - void FlushPendingCommands(VkCommandBuffer command_buffer, - VkFence completion_fence); - - bool ConvertTexture(uint8_t* dest, VkBufferImageCopy* copy_region, - uint32_t mip, const TextureInfo& src); - - static const FormatInfo* GetFormatInfo(xenos::TextureFormat format); - static texture_conversion::CopyBlockCallback GetFormatCopyBlock( - xenos::TextureFormat format); - static TextureExtent GetMipExtent(const TextureInfo& src, uint32_t mip); - static uint32_t ComputeMipStorage(const FormatInfo* format_info, - uint32_t width, uint32_t height, - uint32_t depth, uint32_t mip); - static uint32_t ComputeMipStorage(const TextureInfo& src, uint32_t mip); - static uint32_t ComputeTextureStorage(const TextureInfo& src); - - // Writes a texture back into guest memory. This call is (mostly) asynchronous - // but the texture must not be flagged for destruction. - void WritebackTexture(Texture* texture); - - // Queues commands to upload a texture from system memory, applying any - // conversions necessary. This may flush the command buffer to the GPU if we - // run out of staging memory. - bool UploadTexture(VkCommandBuffer command_buffer, VkFence completion_fence, - Texture* dest, const TextureInfo& src); - - void HashTextureBindings(XXH64_state_t* hash_state, uint32_t& fetch_mask, - const std::vector& bindings); - bool SetupTextureBindings( - VkCommandBuffer command_buffer, VkFence completion_fence, - UpdateSetInfo* update_set_info, - const std::vector& bindings); - bool SetupTextureBinding(VkCommandBuffer command_buffer, - VkFence completion_fence, - UpdateSetInfo* update_set_info, - const Shader::TextureBinding& binding); - - // Removes invalidated textures from the cache, queues them for delete. - void RemoveInvalidatedTextures(); - - Memory* memory_ = nullptr; - - RegisterFile* register_file_ = nullptr; - TraceWriter* trace_writer_ = nullptr; - ui::vulkan::VulkanDevice* device_ = nullptr; - VkQueue device_queue_ = nullptr; - - std::unique_ptr wb_command_pool_ = nullptr; - std::unique_ptr descriptor_pool_ = nullptr; - std::unordered_map texture_sets_; - VkDescriptorSetLayout texture_descriptor_set_layout_ = nullptr; - - VmaAllocator mem_allocator_ = nullptr; - - ui::vulkan::CircularBuffer staging_buffer_; - ui::vulkan::CircularBuffer wb_staging_buffer_; - std::unordered_map textures_; - std::unordered_map samplers_; - std::list pending_delete_textures_; - - void* memory_invalidation_callback_handle_ = nullptr; - - xe::global_critical_region global_critical_region_; - std::list watched_textures_; - std::unordered_set* invalidated_textures_; - std::unordered_set invalidated_textures_sets_[2]; - - struct UpdateSetInfo { - // Bitmap of all 32 fetch constants and whether they have been setup yet. - // This prevents duplication across the vertex and pixel shader. - uint32_t has_setup_fetch_mask; - uint32_t image_write_count = 0; - VkWriteDescriptorSet image_writes[32]; - VkDescriptorImageInfo image_infos[32]; - } update_set_info_; -}; - -} // namespace vulkan -} // namespace gpu -} // namespace xe - -#endif // XENIA_GPU_VULKAN_TEXTURE_CACHE_H_ diff --git a/src/xenia/gpu/vulkan/texture_config.cc b/src/xenia/gpu/vulkan/texture_config.cc deleted file mode 100644 index 60098322c..000000000 --- a/src/xenia/gpu/vulkan/texture_config.cc +++ /dev/null @@ -1,146 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/gpu/vulkan/texture_config.h" - -namespace xe { -namespace gpu { -namespace vulkan { - -#define COMP_SWIZ(r, g, b, a) \ - { \ - VK_COMPONENT_SWIZZLE_##r, VK_COMPONENT_SWIZZLE_##g, \ - VK_COMPONENT_SWIZZLE_##b, VK_COMPONENT_SWIZZLE_##a \ - } -#define VEC_SWIZ(x, y, z, w) \ - { \ - VECTOR_SWIZZLE_##x, VECTOR_SWIZZLE_##y, VECTOR_SWIZZLE_##z, \ - VECTOR_SWIZZLE_##w \ - } - -#define RGBA COMP_SWIZ(R, G, B, A) -#define ___R COMP_SWIZ(IDENTITY, IDENTITY, IDENTITY, R) -#define RRRR COMP_SWIZ(R, R, R, R) - -#define XYZW VEC_SWIZ(X, Y, Z, W) -#define YXWZ VEC_SWIZ(Y, X, W, Z) -#define ZYXW VEC_SWIZ(Z, Y, X, W) - -#define ___(format) \ - { VK_FORMAT_##format } -#define _c_(format, component_swizzle) \ - { VK_FORMAT_##format, component_swizzle, XYZW } -#define __v(format, vector_swizzle) \ - { VK_FORMAT_##format, RGBA, vector_swizzle } -#define _cv(format, component_swizzle, vector_swizzle) \ - { VK_FORMAT_##format, component_swizzle, vector_swizzle } - -// https://www.khronos.org/registry/vulkan/specs/1.1-extensions/man/html/VkFormat.html -const TextureConfig texture_configs[64] = { - /* k_1_REVERSE */ ___(UNDEFINED), - /* k_1 */ ___(UNDEFINED), - /* k_8 */ ___(R8_UNORM), - /* k_1_5_5_5 */ __v(A1R5G5B5_UNORM_PACK16, ZYXW), - /* k_5_6_5 */ __v(R5G6B5_UNORM_PACK16, ZYXW), - /* k_6_5_5 */ ___(UNDEFINED), - /* k_8_8_8_8 */ ___(R8G8B8A8_UNORM), - /* k_2_10_10_10 */ ___(A2R10G10B10_UNORM_PACK32), - /* k_8_A */ ___(R8_UNORM), - /* k_8_B */ ___(UNDEFINED), - /* k_8_8 */ ___(R8G8_UNORM), - /* k_Cr_Y1_Cb_Y0_REP */ ___(UNDEFINED), - /* k_Y1_Cr_Y0_Cb_REP */ ___(UNDEFINED), - /* k_16_16_EDRAM */ ___(UNDEFINED), - /* k_8_8_8_8_A */ ___(UNDEFINED), - /* k_4_4_4_4 */ __v(R4G4B4A4_UNORM_PACK16, YXWZ), - // TODO: Verify if these two are correct (I think not). - /* k_10_11_11 */ ___(B10G11R11_UFLOAT_PACK32), - /* k_11_11_10 */ ___(B10G11R11_UFLOAT_PACK32), - - /* k_DXT1 */ ___(BC1_RGBA_UNORM_BLOCK), - /* k_DXT2_3 */ ___(BC2_UNORM_BLOCK), - /* k_DXT4_5 */ ___(BC3_UNORM_BLOCK), - /* k_16_16_16_16_EDRAM */ ___(UNDEFINED), - - // TODO: D24 unsupported on AMD. - /* k_24_8 */ ___(D24_UNORM_S8_UINT), - /* k_24_8_FLOAT */ ___(D32_SFLOAT_S8_UINT), - /* k_16 */ ___(R16_UNORM), - /* k_16_16 */ ___(R16G16_UNORM), - /* k_16_16_16_16 */ ___(R16G16B16A16_UNORM), - /* k_16_EXPAND */ ___(R16_SFLOAT), - /* k_16_16_EXPAND */ ___(R16G16_SFLOAT), - /* k_16_16_16_16_EXPAND */ ___(R16G16B16A16_SFLOAT), - /* k_16_FLOAT */ ___(R16_SFLOAT), - /* k_16_16_FLOAT */ ___(R16G16_SFLOAT), - /* k_16_16_16_16_FLOAT */ ___(R16G16B16A16_SFLOAT), - - // ! These are UNORM formats, not SINT. - /* k_32 */ ___(R32_SINT), - /* k_32_32 */ ___(R32G32_SINT), - /* k_32_32_32_32 */ ___(R32G32B32A32_SINT), - /* k_32_FLOAT */ ___(R32_SFLOAT), - /* k_32_32_FLOAT */ ___(R32G32_SFLOAT), - /* k_32_32_32_32_FLOAT */ ___(R32G32B32A32_SFLOAT), - /* k_32_AS_8 */ ___(UNDEFINED), - /* k_32_AS_8_8 */ ___(UNDEFINED), - /* k_16_MPEG */ ___(UNDEFINED), - /* k_16_16_MPEG */ ___(UNDEFINED), - /* k_8_INTERLACED */ ___(UNDEFINED), - /* k_32_AS_8_INTERLACED */ ___(UNDEFINED), - /* k_32_AS_8_8_INTERLACED */ ___(UNDEFINED), - /* k_16_INTERLACED */ ___(UNDEFINED), - /* k_16_MPEG_INTERLACED */ ___(UNDEFINED), - /* k_16_16_MPEG_INTERLACED */ ___(UNDEFINED), - - // https://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf - /* k_DXN */ ___(BC5_UNORM_BLOCK), // ? - - /* k_8_8_8_8_AS_16_16_16_16 */ ___(R8G8B8A8_UNORM), - /* k_DXT1_AS_16_16_16_16 */ ___(BC1_RGBA_UNORM_BLOCK), - /* k_DXT2_3_AS_16_16_16_16 */ ___(BC2_UNORM_BLOCK), - /* k_DXT4_5_AS_16_16_16_16 */ ___(BC3_UNORM_BLOCK), - - /* k_2_10_10_10_AS_16_16_16_16 */ ___(A2R10G10B10_UNORM_PACK32), - - // TODO: Verify if these two are correct (I think not). - /* k_10_11_11_AS_16_16_16_16 */ ___(B10G11R11_UFLOAT_PACK32), // ? - /* k_11_11_10_AS_16_16_16_16 */ ___(B10G11R11_UFLOAT_PACK32), // ? - /* k_32_32_32_FLOAT */ ___(R32G32B32_SFLOAT), - /* k_DXT3A */ _c_(BC2_UNORM_BLOCK, ___R), - /* k_DXT5A */ _c_(BC4_UNORM_BLOCK, RRRR), // ATI1N - - // https://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf - /* k_CTX1 */ ___(R8G8_UINT), - - /* k_DXT3A_AS_1_1_1_1 */ ___(UNDEFINED), - - /* k_8_8_8_8_GAMMA_EDRAM */ ___(UNDEFINED), - /* k_2_10_10_10_FLOAT_EDRAM */ ___(UNDEFINED), -}; - -#undef _cv -#undef __v -#undef _c_ -#undef ___ - -#undef ZYXW -#undef YXWZ -#undef XYZW - -#undef RRRR -#undef ___R -#undef RGBA - -#undef VEC_SWIZ -#undef COMP_SWIZ - -} // namespace vulkan -} // namespace gpu -} // namespace xe diff --git a/src/xenia/gpu/vulkan/texture_config.h b/src/xenia/gpu/vulkan/texture_config.h deleted file mode 100644 index 5e23d19bd..000000000 --- a/src/xenia/gpu/vulkan/texture_config.h +++ /dev/null @@ -1,50 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_VULKAN_TEXTURE_CONFIG_H_ -#define XENIA_GPU_VULKAN_TEXTURE_CONFIG_H_ - -#include "third_party/volk/volk.h" -#include "xenia/gpu/texture_info.h" -#include "xenia/gpu/xenos.h" - -namespace xe { -namespace gpu { -namespace vulkan { - -typedef enum VectorSwizzle { - VECTOR_SWIZZLE_X = 0, - VECTOR_SWIZZLE_Y = 1, - VECTOR_SWIZZLE_Z = 2, - VECTOR_SWIZZLE_W = 3, -} VectorSwizzle; - -struct TextureConfig { - VkFormat host_format; - struct { - VkComponentSwizzle r = VK_COMPONENT_SWIZZLE_R; - VkComponentSwizzle g = VK_COMPONENT_SWIZZLE_G; - VkComponentSwizzle b = VK_COMPONENT_SWIZZLE_B; - VkComponentSwizzle a = VK_COMPONENT_SWIZZLE_A; - } component_swizzle; - struct { - VectorSwizzle x = VECTOR_SWIZZLE_X; - VectorSwizzle y = VECTOR_SWIZZLE_Y; - VectorSwizzle z = VECTOR_SWIZZLE_Z; - VectorSwizzle w = VECTOR_SWIZZLE_W; - } vector_swizzle; -}; - -extern const TextureConfig texture_configs[64]; - -} // namespace vulkan -} // namespace gpu -} // namespace xe - -#endif // XENIA_GPU_VULKAN_TEXTURE_CONFIG_H_ diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 5f8f800ff..5edf7705e 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -9,1327 +9,47 @@ #include "xenia/gpu/vulkan/vulkan_command_processor.h" -#include - -#include "xenia/base/logging.h" -#include "xenia/base/math.h" -#include "xenia/base/profiling.h" -#include "xenia/gpu/gpu_flags.h" -#include "xenia/gpu/registers.h" -#include "xenia/gpu/sampler_info.h" -#include "xenia/gpu/texture_info.h" -#include "xenia/gpu/vulkan/vulkan_gpu_flags.h" -#include "xenia/gpu/vulkan/vulkan_graphics_system.h" -#include "xenia/gpu/xenos.h" -#include "xenia/ui/vulkan/vulkan_util.h" - namespace xe { namespace gpu { namespace vulkan { -using namespace xe::gpu::xenos; -using xe::ui::vulkan::CheckResult; - -constexpr size_t kDefaultBufferCacheCapacity = 256 * 1024 * 1024; - VulkanCommandProcessor::VulkanCommandProcessor( VulkanGraphicsSystem* graphics_system, kernel::KernelState* kernel_state) : CommandProcessor(graphics_system, kernel_state) {} - VulkanCommandProcessor::~VulkanCommandProcessor() = default; -void VulkanCommandProcessor::RequestFrameTrace( - const std::filesystem::path& root_path) { - // Override traces if renderdoc is attached. - if (device_->is_renderdoc_attached()) { - trace_requested_ = true; - return; - } - - return CommandProcessor::RequestFrameTrace(root_path); -} - void VulkanCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) {} void VulkanCommandProcessor::RestoreEdramSnapshot(const void* snapshot) {} -void VulkanCommandProcessor::ClearCaches() { - CommandProcessor::ClearCaches(); - cache_clear_requested_ = true; -} - bool VulkanCommandProcessor::SetupContext() { - if (!CommandProcessor::SetupContext()) { - XELOGE("Unable to initialize base command processor context"); - return false; - } - - // Acquire our device and queue. - auto context = static_cast(context_.get()); - device_ = context->device(); - queue_ = device_->AcquireQueue(device_->queue_family_index()); - if (!queue_) { - // Need to reuse primary queue (with locks). - queue_ = device_->primary_queue(); - queue_mutex_ = &device_->primary_queue_mutex(); - } - - VkResult status = VK_SUCCESS; - - // Setup a blitter. - blitter_ = std::make_unique(); - status = blitter_->Initialize(device_); - if (status != VK_SUCCESS) { - XELOGE("Unable to initialize blitter"); - blitter_->Shutdown(); - return false; - } - - // Setup fenced pools used for all our per-frame/per-draw resources. - command_buffer_pool_ = std::make_unique( - *device_, device_->queue_family_index()); - - // Initialize the state machine caches. - buffer_cache_ = std::make_unique( - register_file_, memory_, device_, kDefaultBufferCacheCapacity); - status = buffer_cache_->Initialize(); - if (status != VK_SUCCESS) { - XELOGE("Unable to initialize buffer cache"); - buffer_cache_->Shutdown(); - return false; - } - - texture_cache_ = std::make_unique(memory_, register_file_, - &trace_writer_, device_); - status = texture_cache_->Initialize(); - if (status != VK_SUCCESS) { - XELOGE("Unable to initialize texture cache"); - texture_cache_->Shutdown(); - return false; - } - - pipeline_cache_ = std::make_unique(register_file_, device_); - status = pipeline_cache_->Initialize( - buffer_cache_->constant_descriptor_set_layout(), - texture_cache_->texture_descriptor_set_layout(), - buffer_cache_->vertex_descriptor_set_layout()); - if (status != VK_SUCCESS) { - XELOGE("Unable to initialize pipeline cache"); - pipeline_cache_->Shutdown(); - return false; - } - - render_cache_ = std::make_unique(register_file_, device_); - status = render_cache_->Initialize(); - if (status != VK_SUCCESS) { - XELOGE("Unable to initialize render cache"); - render_cache_->Shutdown(); - return false; - } - - return true; + return CommandProcessor::SetupContext(); } void VulkanCommandProcessor::ShutdownContext() { - // TODO(benvanik): wait until idle. - - if (swap_state_.front_buffer_texture) { - // Free swap chain image. - DestroySwapImage(); - } - - buffer_cache_.reset(); - pipeline_cache_.reset(); - render_cache_.reset(); - texture_cache_.reset(); - - blitter_.reset(); - - // Free all pools. This must come after all of our caches clean up. - command_buffer_pool_.reset(); - - // Release queue, if we were using an acquired one. - if (!queue_mutex_) { - device_->ReleaseQueue(queue_, device_->queue_family_index()); - queue_ = nullptr; - } - - CommandProcessor::ShutdownContext(); -} - -void VulkanCommandProcessor::MakeCoherent() { - RegisterFile* regs = register_file_; - auto status_host = regs->values[XE_GPU_REG_COHER_STATUS_HOST].u32; - - CommandProcessor::MakeCoherent(); - - // Make region coherent - if (status_host & 0x80000000ul) { - // TODO(benvanik): less-fine-grained clearing. - buffer_cache_->InvalidateCache(); - - if ((status_host & 0x01000000) != 0 && (status_host & 0x02000000) == 0) { - coher_base_vc_ = regs->values[XE_GPU_REG_COHER_BASE_HOST].u32; - coher_size_vc_ = regs->values[XE_GPU_REG_COHER_SIZE_HOST].u32; - } - } -} - -void VulkanCommandProcessor::PrepareForWait() { - SCOPE_profile_cpu_f("gpu"); - - CommandProcessor::PrepareForWait(); - - // TODO(benvanik): fences and fancy stuff. We should figure out a way to - // make interrupt callbacks from the GPU so that we don't have to do a full - // synchronize here. - // glFlush(); - // glFinish(); - - context_->ClearCurrent(); -} - -void VulkanCommandProcessor::ReturnFromWait() { - context_->MakeCurrent(); - - CommandProcessor::ReturnFromWait(); -} - -void VulkanCommandProcessor::WriteRegister(uint32_t index, uint32_t value) { - CommandProcessor::WriteRegister(index, value); - - if (index >= XE_GPU_REG_SHADER_CONSTANT_000_X && - index <= XE_GPU_REG_SHADER_CONSTANT_511_W) { - uint32_t offset = index - XE_GPU_REG_SHADER_CONSTANT_000_X; - offset /= 4 * 4; - offset ^= 0x3F; - - dirty_float_constants_ |= (1ull << offset); - } else if (index >= XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 && - index <= XE_GPU_REG_SHADER_CONSTANT_BOOL_224_255) { - uint32_t offset = index - XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031; - offset ^= 0x7; - - dirty_bool_constants_ |= (1 << offset); - } else if (index >= XE_GPU_REG_SHADER_CONSTANT_LOOP_00 && - index <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31) { - uint32_t offset = index - XE_GPU_REG_SHADER_CONSTANT_LOOP_00; - offset ^= 0x1F; - - dirty_loop_constants_ |= (1 << offset); - } else if (index == XE_GPU_REG_DC_LUT_PWL_DATA) { - UpdateGammaRampValue(GammaRampType::kPWL, value); - } else if (index == XE_GPU_REG_DC_LUT_30_COLOR) { - UpdateGammaRampValue(GammaRampType::kNormal, value); - } else if (index >= XE_GPU_REG_DC_LUT_RW_MODE && - index <= XE_GPU_REG_DC_LUTA_CONTROL) { - uint32_t offset = index - XE_GPU_REG_DC_LUT_RW_MODE; - offset ^= 0x05; - - dirty_gamma_constants_ |= (1 << offset); - - if (index == XE_GPU_REG_DC_LUT_RW_INDEX) { - gamma_ramp_rw_subindex_ = 0; - } - } -} - -void VulkanCommandProcessor::CreateSwapImage(VkCommandBuffer setup_buffer, - VkExtent2D extents) { - VkImageCreateInfo image_info; - std::memset(&image_info, 0, sizeof(VkImageCreateInfo)); - image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; - image_info.imageType = VK_IMAGE_TYPE_2D; - image_info.format = VK_FORMAT_R8G8B8A8_UNORM; - image_info.extent = {extents.width, extents.height, 1}; - image_info.mipLevels = 1; - image_info.arrayLayers = 1; - image_info.samples = VK_SAMPLE_COUNT_1_BIT; - image_info.tiling = VK_IMAGE_TILING_OPTIMAL; - image_info.usage = - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - image_info.queueFamilyIndexCount = 0; - image_info.pQueueFamilyIndices = nullptr; - image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - - VkImage image_fb; - auto status = vkCreateImage(*device_, &image_info, nullptr, &image_fb); - CheckResult(status, "vkCreateImage"); - - // Bind memory to image. - VkMemoryRequirements mem_requirements; - vkGetImageMemoryRequirements(*device_, image_fb, &mem_requirements); - fb_memory_ = device_->AllocateMemory(mem_requirements, 0); - assert_not_null(fb_memory_); - - status = vkBindImageMemory(*device_, image_fb, fb_memory_, 0); - CheckResult(status, "vkBindImageMemory"); - - std::lock_guard lock(swap_state_.mutex); - swap_state_.front_buffer_texture = reinterpret_cast(image_fb); - - VkImageViewCreateInfo view_create_info = { - VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - nullptr, - 0, - image_fb, - VK_IMAGE_VIEW_TYPE_2D, - VK_FORMAT_R8G8B8A8_UNORM, - {VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, - VK_COMPONENT_SWIZZLE_A}, - {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}, - }; - status = - vkCreateImageView(*device_, &view_create_info, nullptr, &fb_image_view_); - CheckResult(status, "vkCreateImageView"); - - VkFramebufferCreateInfo framebuffer_create_info = { - VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - nullptr, - 0, - blitter_->GetRenderPass(VK_FORMAT_R8G8B8A8_UNORM, true), - 1, - &fb_image_view_, - extents.width, - extents.height, - 1, - }; - status = vkCreateFramebuffer(*device_, &framebuffer_create_info, nullptr, - &fb_framebuffer_); - CheckResult(status, "vkCreateFramebuffer"); - - // Transition image to general layout. - VkImageMemoryBarrier barrier; - std::memset(&barrier, 0, sizeof(VkImageMemoryBarrier)); - barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - barrier.srcAccessMask = 0; - barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.image = image_fb; - barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; - - vkCmdPipelineBarrier(setup_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, 0, - nullptr, 0, nullptr, 1, &barrier); -} - -void VulkanCommandProcessor::DestroySwapImage() { - vkDestroyFramebuffer(*device_, fb_framebuffer_, nullptr); - vkDestroyImageView(*device_, fb_image_view_, nullptr); - - std::lock_guard lock(swap_state_.mutex); - vkDestroyImage(*device_, - reinterpret_cast(swap_state_.front_buffer_texture), - nullptr); - vkFreeMemory(*device_, fb_memory_, nullptr); - - swap_state_.front_buffer_texture = 0; - fb_memory_ = nullptr; - fb_framebuffer_ = nullptr; - fb_image_view_ = nullptr; -} - -void VulkanCommandProcessor::BeginFrame() { - assert_false(frame_open_); - - // TODO(benvanik): bigger batches. - // TODO(DrChat): Decouple setup buffer from current batch. - // Begin a new batch, and allocate and begin a command buffer and setup - // buffer. - current_batch_fence_ = command_buffer_pool_->BeginBatch(); - current_command_buffer_ = command_buffer_pool_->AcquireEntry(); - current_setup_buffer_ = command_buffer_pool_->AcquireEntry(); - - VkCommandBufferBeginInfo command_buffer_begin_info; - command_buffer_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - command_buffer_begin_info.pNext = nullptr; - command_buffer_begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - command_buffer_begin_info.pInheritanceInfo = nullptr; - auto status = - vkBeginCommandBuffer(current_command_buffer_, &command_buffer_begin_info); - CheckResult(status, "vkBeginCommandBuffer"); - - status = - vkBeginCommandBuffer(current_setup_buffer_, &command_buffer_begin_info); - CheckResult(status, "vkBeginCommandBuffer"); - - // Flag renderdoc down to start a capture if requested. - // The capture will end when these commands are submitted to the queue. - static uint32_t frame = 0; - if (device_->is_renderdoc_attached() && !capturing_ && - (cvars::vulkan_renderdoc_capture_all || trace_requested_)) { - if (queue_mutex_) { - queue_mutex_->lock(); - } - - capturing_ = true; - trace_requested_ = false; - device_->BeginRenderDocFrameCapture(); - - if (queue_mutex_) { - queue_mutex_->unlock(); - } - } - - frame_open_ = true; -} - -void VulkanCommandProcessor::EndFrame() { - if (current_render_state_) { - render_cache_->EndRenderPass(); - current_render_state_ = nullptr; - } - - VkResult status = VK_SUCCESS; - status = vkEndCommandBuffer(current_setup_buffer_); - CheckResult(status, "vkEndCommandBuffer"); - status = vkEndCommandBuffer(current_command_buffer_); - CheckResult(status, "vkEndCommandBuffer"); - - current_command_buffer_ = nullptr; - current_setup_buffer_ = nullptr; - command_buffer_pool_->EndBatch(); - - frame_open_ = false; + return CommandProcessor::ShutdownContext(); } void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width, - uint32_t frontbuffer_height) { - SCOPE_profile_cpu_f("gpu"); - - // Build a final command buffer that copies the game's frontbuffer texture - // into our backbuffer texture. - VkCommandBuffer copy_commands = nullptr; - bool opened_batch; - if (command_buffer_pool_->has_open_batch()) { - copy_commands = command_buffer_pool_->AcquireEntry(); - opened_batch = false; - } else { - current_batch_fence_ = command_buffer_pool_->BeginBatch(); - copy_commands = command_buffer_pool_->AcquireEntry(); - opened_batch = true; - } - - VkCommandBufferBeginInfo begin_info; - std::memset(&begin_info, 0, sizeof(begin_info)); - begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - auto status = vkBeginCommandBuffer(copy_commands, &begin_info); - CheckResult(status, "vkBeginCommandBuffer"); - - if (!frontbuffer_ptr) { - // Trace viewer does this. - frontbuffer_ptr = last_copy_base_; - } - - if (!swap_state_.front_buffer_texture) { - CreateSwapImage(copy_commands, {frontbuffer_width, frontbuffer_height}); - } - auto swap_fb = reinterpret_cast(swap_state_.front_buffer_texture); - - auto& regs = *register_file_; - int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0; - auto group = - reinterpret_cast(®s.values[r]); - auto& fetch = group->texture_fetch; - - TextureInfo texture_info; - if (!TextureInfo::Prepare(group->texture_fetch, &texture_info)) { - assert_always(); - } - - // Issue the commands to copy the game's frontbuffer to our backbuffer. - auto texture = texture_cache_->Lookup(texture_info); - if (texture) { - texture->in_flight_fence = current_batch_fence_; - - // Insert a barrier so the GPU finishes writing to the image. - VkImageMemoryBarrier barrier; - std::memset(&barrier, 0, sizeof(VkImageMemoryBarrier)); - barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - barrier.srcAccessMask = - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - barrier.oldLayout = texture->image_layout; - barrier.newLayout = texture->image_layout; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.image = texture->image; - barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; - - vkCmdPipelineBarrier(copy_commands, - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | - VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr, - 0, nullptr, 1, &barrier); - - barrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL; - barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - barrier.image = swap_fb; - vkCmdPipelineBarrier(copy_commands, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, 0, - nullptr, 0, nullptr, 1, &barrier); - - // Part of the source image that we want to blit from. - VkRect2D src_rect = { - {0, 0}, - {texture->texture_info.width + 1, texture->texture_info.height + 1}, - }; - VkRect2D dst_rect = {{0, 0}, {frontbuffer_width, frontbuffer_height}}; - - VkViewport viewport = { - 0.f, 0.f, float(frontbuffer_width), float(frontbuffer_height), - 0.f, 1.f}; - - VkRect2D scissor = {{0, 0}, {frontbuffer_width, frontbuffer_height}}; - - blitter_->BlitTexture2D( - copy_commands, current_batch_fence_, - texture_cache_->DemandView(texture, 0x688)->view, src_rect, - {texture->texture_info.width + 1, texture->texture_info.height + 1}, - VK_FORMAT_R8G8B8A8_UNORM, dst_rect, - {frontbuffer_width, frontbuffer_height}, fb_framebuffer_, viewport, - scissor, VK_FILTER_LINEAR, true, true); - - std::swap(barrier.oldLayout, barrier.newLayout); - barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - vkCmdPipelineBarrier( - copy_commands, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier); - - std::lock_guard lock(swap_state_.mutex); - swap_state_.width = frontbuffer_width; - swap_state_.height = frontbuffer_height; - } - - status = vkEndCommandBuffer(copy_commands); - CheckResult(status, "vkEndCommandBuffer"); - - // Queue up current command buffers. - // TODO(benvanik): bigger batches. - std::vector submit_buffers; - if (frame_open_) { - // TODO(DrChat): If the setup buffer is empty, don't bother queueing it up. - submit_buffers.push_back(current_setup_buffer_); - submit_buffers.push_back(current_command_buffer_); - EndFrame(); - } - - if (opened_batch) { - command_buffer_pool_->EndBatch(); - } - - submit_buffers.push_back(copy_commands); - if (!submit_buffers.empty()) { - // TODO(benvanik): move to CP or to host (trace dump, etc). - // This only needs to surround a vkQueueSubmit. - if (queue_mutex_) { - queue_mutex_->lock(); - } - - VkSubmitInfo submit_info; - std::memset(&submit_info, 0, sizeof(VkSubmitInfo)); - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.commandBufferCount = uint32_t(submit_buffers.size()); - submit_info.pCommandBuffers = submit_buffers.data(); - - submit_info.waitSemaphoreCount = 0; - submit_info.pWaitSemaphores = nullptr; - submit_info.pWaitDstStageMask = nullptr; - - submit_info.signalSemaphoreCount = 0; - submit_info.pSignalSemaphores = nullptr; - - status = vkQueueSubmit(queue_, 1, &submit_info, current_batch_fence_); - if (device_->is_renderdoc_attached() && capturing_) { - device_->EndRenderDocFrameCapture(); - capturing_ = false; - } - if (queue_mutex_) { - queue_mutex_->unlock(); - } - } - - vkWaitForFences(*device_, 1, ¤t_batch_fence_, VK_TRUE, -1); - if (cache_clear_requested_) { - cache_clear_requested_ = false; - - buffer_cache_->ClearCache(); - pipeline_cache_->ClearCache(); - render_cache_->ClearCache(); - texture_cache_->ClearCache(); - } - - // Scavenging. - { -#if FINE_GRAINED_DRAW_SCOPES - SCOPE_profile_cpu_i( - "gpu", - "xe::gpu::vulkan::VulkanCommandProcessor::PerformSwap Scavenging"); -#endif // FINE_GRAINED_DRAW_SCOPES - // Command buffers must be scavenged first to avoid a race condition. - // We don't want to reuse a batch when the caches haven't yet cleared old - // resources! - command_buffer_pool_->Scavenge(); - - blitter_->Scavenge(); - texture_cache_->Scavenge(); - buffer_cache_->Scavenge(); - } - - current_batch_fence_ = nullptr; -} + uint32_t frontbuffer_height) {} Shader* VulkanCommandProcessor::LoadShader(xenos::ShaderType shader_type, uint32_t guest_address, const uint32_t* host_address, uint32_t dword_count) { - return pipeline_cache_->LoadShader(shader_type, guest_address, host_address, - dword_count); + return nullptr; } -bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, +bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, uint32_t index_count, IndexBufferInfo* index_buffer_info, bool major_mode_explicit) { - auto& regs = *register_file_; - -#if FINE_GRAINED_DRAW_SCOPES - SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES - - auto enable_mode = - static_cast(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7); - if (enable_mode == ModeControl::kIgnore) { - // Ignored. - return true; - } else if (enable_mode == ModeControl::kCopy) { - // Special copy handling. - return IssueCopy(); - } - - if ((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 & 0x3FFF) == 0) { - // Doesn't actually draw. - return true; - } - - // Shaders will have already been defined by previous loads. - // We need them to do just about anything so validate here. - auto vertex_shader = static_cast(active_vertex_shader()); - auto pixel_shader = static_cast(active_pixel_shader()); - if (!vertex_shader) { - // Always need a vertex shader. - return false; - } - // Depth-only mode doesn't need a pixel shader (we'll use a fake one). - if (enable_mode == ModeControl::kDepth) { - // Use a dummy pixel shader when required. - pixel_shader = nullptr; - } else if (!pixel_shader) { - // Need a pixel shader in normal color mode. - return true; - } - - bool full_update = false; - if (!frame_open_) { - BeginFrame(); - full_update = true; - } - auto command_buffer = current_command_buffer_; - auto setup_buffer = current_setup_buffer_; - - // Begin the render pass. - // This will setup our framebuffer and begin the pass in the command buffer. - // This reuses a previous render pass if one is already open. - if (render_cache_->dirty() || !current_render_state_) { - if (current_render_state_) { - render_cache_->EndRenderPass(); - current_render_state_ = nullptr; - } - - full_update = true; - current_render_state_ = render_cache_->BeginRenderPass( - command_buffer, vertex_shader, pixel_shader); - if (!current_render_state_) { - return false; - } - } - - // Configure the pipeline for drawing. - // This encodes all render state (blend, depth, etc), our shader stages, - // and our vertex input layout. - VkPipeline pipeline = nullptr; - auto pipeline_status = pipeline_cache_->ConfigurePipeline( - command_buffer, current_render_state_, vertex_shader, pixel_shader, - primitive_type, &pipeline); - if (pipeline_status == PipelineCache::UpdateStatus::kError) { - return false; - } else if (pipeline_status == PipelineCache::UpdateStatus::kMismatch || - full_update) { - vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline); - } - pipeline_cache_->SetDynamicState(command_buffer, full_update); - - // Pass registers to the shaders. - if (!PopulateConstants(command_buffer, vertex_shader, pixel_shader)) { - return false; - } - - // Upload and bind index buffer data (if we have any). - if (!PopulateIndexBuffer(command_buffer, index_buffer_info)) { - return false; - } - - // Upload and bind all vertex buffer data. - if (!PopulateVertexBuffers(command_buffer, setup_buffer, vertex_shader)) { - return false; - } - - // Bind samplers/textures. - // Uploads all textures that need it. - // Setup buffer may be flushed to GPU if the texture cache needs it. - if (!PopulateSamplers(command_buffer, setup_buffer, vertex_shader, - pixel_shader)) { - return false; - } - - // Actually issue the draw. - if (!index_buffer_info) { - // Auto-indexed draw. - uint32_t instance_count = 1; - uint32_t first_vertex = - register_file_->values[XE_GPU_REG_VGT_INDX_OFFSET].u32; - uint32_t first_instance = 0; - vkCmdDraw(command_buffer, index_count, instance_count, first_vertex, - first_instance); - } else { - // Index buffer draw. - uint32_t instance_count = 1; - uint32_t first_index = 0; - uint32_t vertex_offset = - register_file_->values[XE_GPU_REG_VGT_INDX_OFFSET].u32; - uint32_t first_instance = 0; - vkCmdDrawIndexed(command_buffer, index_count, instance_count, first_index, - vertex_offset, first_instance); - } - return true; } -bool VulkanCommandProcessor::PopulateConstants(VkCommandBuffer command_buffer, - VulkanShader* vertex_shader, - VulkanShader* pixel_shader) { -#if FINE_GRAINED_DRAW_SCOPES - SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES - - xe::gpu::Shader::ConstantRegisterMap dummy_map; - std::memset(&dummy_map, 0, sizeof(dummy_map)); - - // Upload the constants the shaders require. - // These are optional, and if none are defined 0 will be returned. - auto constant_offsets = buffer_cache_->UploadConstantRegisters( - current_setup_buffer_, vertex_shader->constant_register_map(), - pixel_shader ? pixel_shader->constant_register_map() : dummy_map, - current_batch_fence_); - if (constant_offsets.first == VK_WHOLE_SIZE || - constant_offsets.second == VK_WHOLE_SIZE) { - // Shader wants constants but we couldn't upload them. - return false; - } - - // Configure constant uniform access to point at our offsets. - auto constant_descriptor_set = buffer_cache_->constant_descriptor_set(); - auto pipeline_layout = pipeline_cache_->pipeline_layout(); - uint32_t set_constant_offsets[2] = { - static_cast(constant_offsets.first), - static_cast(constant_offsets.second)}; - vkCmdBindDescriptorSets( - command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, - &constant_descriptor_set, - static_cast(xe::countof(set_constant_offsets)), - set_constant_offsets); - - return true; -} - -bool VulkanCommandProcessor::PopulateIndexBuffer( - VkCommandBuffer command_buffer, IndexBufferInfo* index_buffer_info) { - auto& regs = *register_file_; - if (!index_buffer_info || !index_buffer_info->guest_base) { - // No index buffer or auto draw. - return true; - } - auto& info = *index_buffer_info; - -#if FINE_GRAINED_DRAW_SCOPES - SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES - - // Min/max index ranges for clamping. This is often [0g,FFFF|FFFFFF]. - // All indices should be clamped to [min,max]. May be a way to do this in GL. - uint32_t min_index = regs[XE_GPU_REG_VGT_MIN_VTX_INDX].u32; - uint32_t max_index = regs[XE_GPU_REG_VGT_MAX_VTX_INDX].u32; - assert_true(min_index == 0); - assert_true(max_index == 0xFFFF || max_index == 0xFFFFFF); - - assert_true(info.endianness == xenos::Endian::k8in16 || - info.endianness == xenos::Endian::k8in32); - - trace_writer_.WriteMemoryRead(info.guest_base, info.length); - - // Upload (or get a cached copy of) the buffer. - uint32_t source_addr = info.guest_base; - uint32_t source_length = - info.count * (info.format == xenos::IndexFormat::kInt32 - ? sizeof(uint32_t) - : sizeof(uint16_t)); - auto buffer_ref = buffer_cache_->UploadIndexBuffer( - current_setup_buffer_, source_addr, source_length, info.format, - current_batch_fence_); - if (buffer_ref.second == VK_WHOLE_SIZE) { - // Failed to upload buffer. - return false; - } - - // Bind the buffer. - VkIndexType index_type = info.format == xenos::IndexFormat::kInt32 - ? VK_INDEX_TYPE_UINT32 - : VK_INDEX_TYPE_UINT16; - vkCmdBindIndexBuffer(command_buffer, buffer_ref.first, buffer_ref.second, - index_type); - - return true; -} - -bool VulkanCommandProcessor::PopulateVertexBuffers( - VkCommandBuffer command_buffer, VkCommandBuffer setup_buffer, - VulkanShader* vertex_shader) { - auto& regs = *register_file_; - -#if FINE_GRAINED_DRAW_SCOPES - SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES - - auto& vertex_bindings = vertex_shader->vertex_bindings(); - if (vertex_bindings.empty()) { - // No bindings. - return true; - } - - assert_true(vertex_bindings.size() <= 32); - auto descriptor_set = buffer_cache_->PrepareVertexSet( - setup_buffer, current_batch_fence_, vertex_bindings); - if (!descriptor_set) { - XELOGW("Failed to prepare vertex set!"); - return false; - } - - vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline_cache_->pipeline_layout(), 2, 1, - &descriptor_set, 0, nullptr); - return true; -} - -bool VulkanCommandProcessor::PopulateSamplers(VkCommandBuffer command_buffer, - VkCommandBuffer setup_buffer, - VulkanShader* vertex_shader, - VulkanShader* pixel_shader) { -#if FINE_GRAINED_DRAW_SCOPES - SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES - - std::vector dummy_bindings; - auto descriptor_set = texture_cache_->PrepareTextureSet( - setup_buffer, current_batch_fence_, vertex_shader->texture_bindings(), - pixel_shader ? pixel_shader->texture_bindings() : dummy_bindings); - if (!descriptor_set) { - // Unable to bind set. - XELOGW("Failed to prepare texture set!"); - return false; - } - - vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline_cache_->pipeline_layout(), 1, 1, - &descriptor_set, 0, nullptr); - - return true; -} - -bool VulkanCommandProcessor::IssueCopy() { - SCOPE_profile_cpu_f("gpu"); - auto& regs = *register_file_; - - // This is used to resolve surfaces, taking them from EDRAM render targets - // to system memory. It can optionally clear color/depth surfaces, too. - // The command buffer has stuff for actually doing this by drawing, however - // we should be able to do it without that much easier. - - struct { - reg::RB_COPY_CONTROL copy_control; - uint32_t copy_dest_base; - reg::RB_COPY_DEST_PITCH copy_dest_pitch; - reg::RB_COPY_DEST_INFO copy_dest_info; - uint32_t tile_clear; - uint32_t depth_clear; - uint32_t color_clear; - uint32_t color_clear_low; - uint32_t copy_func; - uint32_t copy_ref; - uint32_t copy_mask; - uint32_t copy_surface_slice; - }* copy_regs = reinterpret_cast( - ®s[XE_GPU_REG_RB_COPY_CONTROL].u32); - - struct { - reg::PA_SC_WINDOW_OFFSET window_offset; - reg::PA_SC_WINDOW_SCISSOR_TL window_scissor_tl; - reg::PA_SC_WINDOW_SCISSOR_BR window_scissor_br; - }* window_regs = reinterpret_cast( - ®s[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32); - - // True if the source tile is a color target - bool is_color_source = copy_regs->copy_control.copy_src_select <= 3; - - // Render targets 0-3, 4 = depth - uint32_t copy_src_select = copy_regs->copy_control.copy_src_select; - bool color_clear_enabled = copy_regs->copy_control.color_clear_enable != 0; - bool depth_clear_enabled = copy_regs->copy_control.depth_clear_enable != 0; - CopyCommand copy_command = copy_regs->copy_control.copy_command; - - assert_true(copy_regs->copy_dest_info.copy_dest_array == 0); - assert_true(copy_regs->copy_dest_info.copy_dest_slice == 0); - auto copy_dest_format = - ColorFormatToTextureFormat(copy_regs->copy_dest_info.copy_dest_format); - // TODO: copy dest number / bias - - uint32_t copy_dest_base = copy_regs->copy_dest_base; - uint32_t copy_dest_pitch = copy_regs->copy_dest_pitch.copy_dest_pitch; - uint32_t copy_dest_height = copy_regs->copy_dest_pitch.copy_dest_height; - - // None of this is supported yet: - assert_true(copy_regs->copy_surface_slice == 0); - assert_true(copy_regs->copy_func == 0); - assert_true(copy_regs->copy_ref == 0); - assert_true(copy_regs->copy_mask == 0); - - // RB_SURFACE_INFO - // https://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html - uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; - uint32_t surface_pitch = surface_info & 0x3FFF; - auto surface_msaa = - static_cast((surface_info >> 16) & 0x3); - - // TODO(benvanik): any way to scissor this? a200 has: - // REG_A2XX_RB_COPY_DEST_OFFSET = A2XX_RB_COPY_DEST_OFFSET_X(tile->xoff) | - // A2XX_RB_COPY_DEST_OFFSET_Y(tile->yoff); - // but I can't seem to find something similar. - uint32_t dest_logical_width = copy_dest_pitch; - uint32_t dest_logical_height = copy_dest_height; - - // vtx_window_offset_enable - assert_true(regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & 0x00010000); - uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32; - int32_t window_offset_x = window_regs->window_offset.window_x_offset; - int32_t window_offset_y = window_regs->window_offset.window_y_offset; - - uint32_t dest_texel_size = uint32_t(GetTexelSize(copy_dest_format)); - - // Adjust the copy base offset to point to the beginning of the texture, so - // we don't run into hiccups down the road (e.g. resolving the last part going - // backwards). - int32_t dest_offset = - window_offset_y * copy_dest_pitch * int(dest_texel_size); - dest_offset += window_offset_x * 32 * int(dest_texel_size); - copy_dest_base += dest_offset; - - // HACK: vertices to use are always in vf0. - int copy_vertex_fetch_slot = 0; - int r = - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + (copy_vertex_fetch_slot / 3) * 6; - const auto group = reinterpret_cast(®s.values[r]); - const xe_gpu_vertex_fetch_t* fetch = nullptr; - switch (copy_vertex_fetch_slot % 3) { - case 0: - fetch = &group->vertex_fetch_0; - break; - case 1: - fetch = &group->vertex_fetch_1; - break; - case 2: - fetch = &group->vertex_fetch_2; - break; - } - assert_true(fetch->type == xenos::FetchConstantType::kVertex); - assert_true(fetch->endian == xenos::Endian::k8in32); - assert_true(fetch->size == 6); - const uint8_t* vertex_addr = memory_->TranslatePhysical(fetch->address << 2); - trace_writer_.WriteMemoryRead(fetch->address << 2, fetch->size * 4); - - // Most vertices have a negative half pixel offset applied, which we reverse. - auto& vtx_cntl = *(reg::PA_SU_VTX_CNTL*)®s[XE_GPU_REG_PA_SU_VTX_CNTL].u32; - float vtx_offset = vtx_cntl.pix_center == 0 ? 0.5f : 0.f; - - float dest_points[6]; - for (int i = 0; i < 6; i++) { - dest_points[i] = - GpuSwap(xe::load(vertex_addr + i * 4), fetch->endian) + - vtx_offset; - } - - // Note: The xenos only supports rectangle copies (luckily) - int32_t dest_min_x = int32_t( - (std::min(std::min(dest_points[0], dest_points[2]), dest_points[4]))); - int32_t dest_max_x = int32_t( - (std::max(std::max(dest_points[0], dest_points[2]), dest_points[4]))); - - int32_t dest_min_y = int32_t( - (std::min(std::min(dest_points[1], dest_points[3]), dest_points[5]))); - int32_t dest_max_y = int32_t( - (std::max(std::max(dest_points[1], dest_points[3]), dest_points[5]))); - - VkOffset2D resolve_offset = {dest_min_x, dest_min_y}; - VkExtent2D resolve_extent = {uint32_t(dest_max_x - dest_min_x), - uint32_t(dest_max_y - dest_min_y)}; - - uint32_t color_edram_base = 0; - uint32_t depth_edram_base = 0; - xenos::ColorRenderTargetFormat color_format; - xenos::DepthRenderTargetFormat depth_format; - if (is_color_source) { - // Source from a color target. - reg::RB_COLOR_INFO color_info[4] = { - regs.Get(), - regs.Get(XE_GPU_REG_RB_COLOR1_INFO), - regs.Get(XE_GPU_REG_RB_COLOR2_INFO), - regs.Get(XE_GPU_REG_RB_COLOR3_INFO), - }; - color_edram_base = color_info[copy_src_select].color_base; - color_format = color_info[copy_src_select].color_format; - assert_true(color_info[copy_src_select].color_exp_bias == 0); - } - - if (!is_color_source || depth_clear_enabled) { - // Source from or clear a depth target. - reg::RB_DEPTH_INFO depth_info = {regs[XE_GPU_REG_RB_DEPTH_INFO].u32}; - depth_edram_base = depth_info.depth_base; - depth_format = depth_info.depth_format; - if (!is_color_source) { - copy_dest_format = DepthRenderTargetToTextureFormat(depth_format); - } - } - - xenos::Endian resolve_endian = xenos::Endian::k8in32; - if (copy_regs->copy_dest_info.copy_dest_endian <= xenos::Endian128::k16in32) { - resolve_endian = - static_cast(copy_regs->copy_dest_info.copy_dest_endian); - } - - // Demand a resolve texture from the texture cache. - TextureInfo texture_info; - TextureInfo::PrepareResolve( - copy_dest_base, copy_dest_format, resolve_endian, copy_dest_pitch, - dest_logical_width, std::max(1u, dest_logical_height), 1, &texture_info); - - auto texture = texture_cache_->DemandResolveTexture(texture_info); - if (!texture) { - // Out of memory. - XELOGD("Failed to demand resolve texture!"); - return false; - } - - if (!(texture->usage_flags & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | - VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT))) { - // Resolve image doesn't support drawing, and we don't support conversion. - return false; - } - - texture->in_flight_fence = current_batch_fence_; - - // For debugging purposes only (trace viewer) - last_copy_base_ = texture->texture_info.memory.base_address; - - if (!frame_open_) { - BeginFrame(); - } else if (current_render_state_) { - // Copy commands cannot be issued within a render pass. - render_cache_->EndRenderPass(); - current_render_state_ = nullptr; - } - auto command_buffer = current_command_buffer_; - - if (texture->image_layout == VK_IMAGE_LAYOUT_UNDEFINED) { - // Transition the image to a general layout. - VkImageMemoryBarrier image_barrier; - image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - image_barrier.pNext = nullptr; - image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - image_barrier.srcAccessMask = 0; - image_barrier.dstAccessMask = 0; - image_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - image_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; - image_barrier.image = texture->image; - image_barrier.subresourceRange = {0, 0, 1, 0, 1}; - image_barrier.subresourceRange.aspectMask = - is_color_source - ? VK_IMAGE_ASPECT_COLOR_BIT - : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - texture->image_layout = VK_IMAGE_LAYOUT_GENERAL; - - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, 0, nullptr, 0, - nullptr, 1, &image_barrier); - } - - // Transition the image into a transfer destination layout, if needed. - // TODO: If blitting, layout should be color attachment. - VkImageMemoryBarrier image_barrier; - image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - image_barrier.pNext = nullptr; - image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - image_barrier.srcAccessMask = 0; - image_barrier.dstAccessMask = - is_color_source ? VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT - : VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - image_barrier.oldLayout = texture->image_layout; - image_barrier.newLayout = - is_color_source ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL - : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - image_barrier.image = texture->image; - image_barrier.subresourceRange = {0, 0, 1, 0, 1}; - image_barrier.subresourceRange.aspectMask = - is_color_source ? VK_IMAGE_ASPECT_COLOR_BIT - : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, 0, nullptr, 0, - nullptr, 1, &image_barrier); - - // Ask the render cache to copy to the resolve texture. - auto edram_base = is_color_source ? color_edram_base : depth_edram_base; - uint32_t src_format = is_color_source ? static_cast(color_format) - : static_cast(depth_format); - VkFilter filter = is_color_source ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; - - XELOGGPU("Resolve RT {:08X} {:08X}({}) -> 0x{:08X} ({}x{}, format: {})", - edram_base, surface_pitch, surface_pitch, copy_dest_base, - copy_dest_pitch, copy_dest_height, texture_info.format_info()->name); - switch (copy_command) { - case CopyCommand::kRaw: - /* - render_cache_->RawCopyToImage(command_buffer, edram_base, - texture->image, texture->image_layout, is_color_source, resolve_offset, - resolve_extent); break; - */ - - case CopyCommand::kConvert: { - /* - if (!is_color_source && copy_regs->copy_dest_info.copy_dest_swap == 0) { - // Depth images are a bit more complicated. Try a blit! - render_cache_->BlitToImage( - command_buffer, edram_base, surface_pitch, resolve_extent.height, - surface_msaa, texture->image, texture->image_layout, - is_color_source, src_format, filter, - {resolve_offset.x, resolve_offset.y, 0}, - {resolve_extent.width, resolve_extent.height, 1}); - break; - } - */ - - // Blit with blitter. - auto view = render_cache_->FindTileView( - edram_base, surface_pitch, surface_msaa, is_color_source, src_format); - if (!view) { - XELOGGPU("Failed to find tile view!"); - break; - } - - // Convert the tile view to a sampled image. - // Put a barrier on the tile view. - VkImageMemoryBarrier tile_image_barrier; - tile_image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - tile_image_barrier.pNext = nullptr; - tile_image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - tile_image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - tile_image_barrier.srcAccessMask = - is_color_source ? VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT - : VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - tile_image_barrier.dstAccessMask = - VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT; - tile_image_barrier.oldLayout = view->image_layout; - tile_image_barrier.newLayout = view->image_layout; - tile_image_barrier.image = view->image; - tile_image_barrier.subresourceRange = {0, 0, 1, 0, 1}; - tile_image_barrier.subresourceRange.aspectMask = - is_color_source - ? VK_IMAGE_ASPECT_COLOR_BIT - : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT | - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, - 0, 0, nullptr, 0, nullptr, 1, &tile_image_barrier); - - auto render_pass = - blitter_->GetRenderPass(texture->format, is_color_source); - - // Create a framebuffer containing our image. - if (!texture->framebuffer) { - auto texture_view = texture_cache_->DemandView(texture, 0x688); - - VkFramebufferCreateInfo fb_create_info = { - VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - nullptr, - 0, - render_pass, - 1, - &texture_view->view, - texture->texture_info.width + 1, - texture->texture_info.height + 1, - 1, - }; - - VkResult res = vkCreateFramebuffer(*device_, &fb_create_info, nullptr, - &texture->framebuffer); - CheckResult(res, "vkCreateFramebuffer"); - } - - VkRect2D src_rect = { - {0, 0}, - resolve_extent, - }; - - VkRect2D dst_rect = { - {resolve_offset.x, resolve_offset.y}, - resolve_extent, - }; - - // If the destination rectangle lies outside the window, make it start - // inside. The Xenos does not copy pixel data at any offset in screen - // coordinates. - int32_t dst_adj_x = - std::max(dst_rect.offset.x, -window_offset_x) - dst_rect.offset.x; - int32_t dst_adj_y = - std::max(dst_rect.offset.y, -window_offset_y) - dst_rect.offset.y; - - if (uint32_t(dst_adj_x) > dst_rect.extent.width || - uint32_t(dst_adj_y) > dst_rect.extent.height) { - // No-op? - break; - } - - dst_rect.offset.x += dst_adj_x; - dst_rect.offset.y += dst_adj_y; - dst_rect.extent.width -= dst_adj_x; - dst_rect.extent.height -= dst_adj_y; - src_rect.extent.width -= dst_adj_x; - src_rect.extent.height -= dst_adj_y; - - VkViewport viewport = { - 0.f, 0.f, float(copy_dest_pitch), float(copy_dest_height), 0.f, 1.f, - }; - - uint32_t scissor_tl_x = window_regs->window_scissor_tl.tl_x; - uint32_t scissor_br_x = window_regs->window_scissor_br.br_x; - uint32_t scissor_tl_y = window_regs->window_scissor_tl.tl_y; - uint32_t scissor_br_y = window_regs->window_scissor_br.br_y; - - // Clamp the values to destination dimensions. - scissor_tl_x = std::min(scissor_tl_x, copy_dest_pitch); - scissor_br_x = std::min(scissor_br_x, copy_dest_pitch); - scissor_tl_y = std::min(scissor_tl_y, copy_dest_height); - scissor_br_y = std::min(scissor_br_y, copy_dest_height); - - VkRect2D scissor = { - {int32_t(scissor_tl_x), int32_t(scissor_tl_y)}, - {scissor_br_x - scissor_tl_x, scissor_br_y - scissor_tl_y}, - }; - - blitter_->BlitTexture2D( - command_buffer, current_batch_fence_, - is_color_source ? view->image_view : view->image_view_depth, src_rect, - view->GetSize(), texture->format, dst_rect, - {copy_dest_pitch, copy_dest_height}, texture->framebuffer, viewport, - scissor, filter, is_color_source, - copy_regs->copy_dest_info.copy_dest_swap != 0); - - // Pull the tile view back to a color/depth attachment. - std::swap(tile_image_barrier.srcAccessMask, - tile_image_barrier.dstAccessMask); - std::swap(tile_image_barrier.oldLayout, tile_image_barrier.newLayout); - vkCmdPipelineBarrier(command_buffer, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | - VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, 0, nullptr, 0, - nullptr, 1, &tile_image_barrier); - } break; - - case CopyCommand::kConstantOne: - case CopyCommand::kNull: - assert_always(); - break; - } - - // And pull it back from a transfer destination. - image_barrier.srcAccessMask = image_barrier.dstAccessMask; - image_barrier.dstAccessMask = - VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT; - std::swap(image_barrier.newLayout, image_barrier.oldLayout); - vkCmdPipelineBarrier(command_buffer, - is_color_source - ? VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT - : VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | - VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, 0, nullptr, 0, nullptr, 1, &image_barrier); - - // Perform any requested clears. - uint32_t copy_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32; - uint32_t copy_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32; - uint32_t copy_color_clear_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LO].u32; - assert_true(copy_color_clear == copy_color_clear_low); - - if (color_clear_enabled) { - // If color clear is enabled, we can only clear a selected color target! - assert_true(is_color_source); - - // TODO(benvanik): verify color order. - float color[] = {((copy_color_clear >> 0) & 0xFF) / 255.0f, - ((copy_color_clear >> 8) & 0xFF) / 255.0f, - ((copy_color_clear >> 16) & 0xFF) / 255.0f, - ((copy_color_clear >> 24) & 0xFF) / 255.0f}; - - // TODO(DrChat): Do we know the surface height at this point? - render_cache_->ClearEDRAMColor(command_buffer, color_edram_base, - color_format, surface_pitch, - resolve_extent.height, surface_msaa, color); - } - - if (depth_clear_enabled) { - float depth = - (copy_depth_clear & 0xFFFFFF00) / static_cast(0xFFFFFF00); - uint8_t stencil = copy_depth_clear & 0xFF; - - // TODO(DrChat): Do we know the surface height at this point? - render_cache_->ClearEDRAMDepthStencil( - command_buffer, depth_edram_base, depth_format, surface_pitch, - resolve_extent.height, surface_msaa, depth, stencil); - } - - return true; -} +bool VulkanCommandProcessor::IssueCopy() { return true; } void VulkanCommandProcessor::InitializeTrace() {} diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 4f1621ba5..f841461e8 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -10,69 +10,29 @@ #ifndef XENIA_GPU_VULKAN_VULKAN_COMMAND_PROCESSOR_H_ #define XENIA_GPU_VULKAN_VULKAN_COMMAND_PROCESSOR_H_ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "xenia/base/threading.h" #include "xenia/gpu/command_processor.h" -#include "xenia/gpu/register_file.h" -#include "xenia/gpu/vulkan/buffer_cache.h" -#include "xenia/gpu/vulkan/pipeline_cache.h" -#include "xenia/gpu/vulkan/render_cache.h" -#include "xenia/gpu/vulkan/texture_cache.h" -#include "xenia/gpu/vulkan/vulkan_shader.h" +#include "xenia/gpu/vulkan/vulkan_graphics_system.h" #include "xenia/gpu/xenos.h" -#include "xenia/kernel/xthread.h" -#include "xenia/memory.h" -#include "xenia/ui/vulkan/blitter.h" -#include "xenia/ui/vulkan/fenced_pools.h" -#include "xenia/ui/vulkan/vulkan_context.h" -#include "xenia/ui/vulkan/vulkan_device.h" -#include "xenia/ui/vulkan/vulkan_util.h" +#include "xenia/kernel/kernel_state.h" namespace xe { namespace gpu { namespace vulkan { -class VulkanGraphicsSystem; -class TextureCache; - class VulkanCommandProcessor : public CommandProcessor { public: VulkanCommandProcessor(VulkanGraphicsSystem* graphics_system, kernel::KernelState* kernel_state); - ~VulkanCommandProcessor() override; + ~VulkanCommandProcessor(); - void RequestFrameTrace(const std::filesystem::path& root_path) override; void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override; - void RestoreEdramSnapshot(const void* snapshot) override; - void ClearCaches() override; - RenderCache* render_cache() { return render_cache_.get(); } + void RestoreEdramSnapshot(const void* snapshot) override; private: bool SetupContext() override; void ShutdownContext() override; - void MakeCoherent() override; - void PrepareForWait() override; - void ReturnFromWait() override; - - void WriteRegister(uint32_t index, uint32_t value) override; - - void BeginFrame(); - void EndFrame(); - - void CreateSwapImage(VkCommandBuffer setup_buffer, VkExtent2D extents); - void DestroySwapImage(); - void PerformSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width, uint32_t frontbuffer_height) override; @@ -80,69 +40,13 @@ class VulkanCommandProcessor : public CommandProcessor { const uint32_t* host_address, uint32_t dword_count) override; - bool IssueDraw(xenos::PrimitiveType primitive_type, uint32_t index_count, + bool IssueDraw(xenos::PrimitiveType prim_type, uint32_t index_count, IndexBufferInfo* index_buffer_info, bool major_mode_explicit) override; - bool PopulateConstants(VkCommandBuffer command_buffer, - VulkanShader* vertex_shader, - VulkanShader* pixel_shader); - bool PopulateIndexBuffer(VkCommandBuffer command_buffer, - IndexBufferInfo* index_buffer_info); - bool PopulateVertexBuffers(VkCommandBuffer command_buffer, - VkCommandBuffer setup_buffer, - VulkanShader* vertex_shader); - bool PopulateSamplers(VkCommandBuffer command_buffer, - VkCommandBuffer setup_buffer, - VulkanShader* vertex_shader, - VulkanShader* pixel_shader); bool IssueCopy() override; void InitializeTrace() override; void FinalizeTrace() override; - - xe::ui::vulkan::VulkanDevice* device_ = nullptr; - - // front buffer / back buffer memory - VkDeviceMemory fb_memory_ = nullptr; - VkImageView fb_image_view_ = nullptr; - VkFramebuffer fb_framebuffer_ = nullptr; - - uint64_t dirty_float_constants_ = 0; // Dirty float constants in blocks of 4 - uint8_t dirty_bool_constants_ = 0; - uint32_t dirty_loop_constants_ = 0; - uint8_t dirty_gamma_constants_ = 0; - - uint32_t coher_base_vc_ = 0; - uint32_t coher_size_vc_ = 0; - - // TODO(benvanik): abstract behind context? - // Queue used to submit work. This may be a dedicated queue for the command - // processor and no locking will be required for use. If a dedicated queue - // was not available this will be the device primary_queue and the - // queue_mutex must be used to synchronize access to it. - VkQueue queue_ = nullptr; - std::mutex* queue_mutex_ = nullptr; - - // Last copy base address, for debugging only. - uint32_t last_copy_base_ = 0; - - bool capturing_ = false; - bool trace_requested_ = false; - bool cache_clear_requested_ = false; - - std::unique_ptr buffer_cache_; - std::unique_ptr pipeline_cache_; - std::unique_ptr render_cache_; - std::unique_ptr texture_cache_; - - std::unique_ptr blitter_; - std::unique_ptr command_buffer_pool_; - - bool frame_open_ = false; - const RenderState* current_render_state_ = nullptr; - VkCommandBuffer current_command_buffer_ = nullptr; - VkCommandBuffer current_setup_buffer_ = nullptr; - VkFence current_batch_fence_; }; } // namespace vulkan diff --git a/src/xenia/gpu/vulkan/vulkan_gpu_flags.cc b/src/xenia/gpu/vulkan/vulkan_gpu_flags.cc deleted file mode 100644 index 1e27a4e9a..000000000 --- a/src/xenia/gpu/vulkan/vulkan_gpu_flags.cc +++ /dev/null @@ -1,16 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/gpu/vulkan/vulkan_gpu_flags.h" - -DEFINE_bool(vulkan_renderdoc_capture_all, false, - "Capture everything with RenderDoc.", "Vulkan"); -DEFINE_bool(vulkan_native_msaa, false, "Use native MSAA", "Vulkan"); -DEFINE_bool(vulkan_dump_disasm, false, - "Dump shader disassembly. NVIDIA only supported.", "Vulkan"); diff --git a/src/xenia/gpu/vulkan/vulkan_gpu_flags.h b/src/xenia/gpu/vulkan/vulkan_gpu_flags.h deleted file mode 100644 index 153bc9bc5..000000000 --- a/src/xenia/gpu/vulkan/vulkan_gpu_flags.h +++ /dev/null @@ -1,20 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_VULKAN_VULKAN_GPU_FLAGS_H_ -#define XENIA_GPU_VULKAN_VULKAN_GPU_FLAGS_H_ - -#define FINE_GRAINED_DRAW_SCOPES 1 -#include "xenia/base/cvar.h" - -DECLARE_bool(vulkan_renderdoc_capture_all); -DECLARE_bool(vulkan_native_msaa); -DECLARE_bool(vulkan_dump_disasm); - -#endif // XENIA_GPU_VULKAN_VULKAN_GPU_FLAGS_H_ diff --git a/src/xenia/gpu/vulkan/vulkan_graphics_system.cc b/src/xenia/gpu/vulkan/vulkan_graphics_system.cc index 57c79912b..36e0f3899 100644 --- a/src/xenia/gpu/vulkan/vulkan_graphics_system.cc +++ b/src/xenia/gpu/vulkan/vulkan_graphics_system.cc @@ -2,261 +2,39 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * + * Copyright 2020 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ #include "xenia/gpu/vulkan/vulkan_graphics_system.h" -#include -#include - -#include "xenia/base/logging.h" -#include "xenia/base/profiling.h" -#include "xenia/cpu/processor.h" -#include "xenia/gpu/gpu_flags.h" #include "xenia/gpu/vulkan/vulkan_command_processor.h" -#include "xenia/gpu/vulkan/vulkan_gpu_flags.h" #include "xenia/ui/vulkan/vulkan_provider.h" -#include "xenia/ui/vulkan/vulkan_swap_chain.h" -#include "xenia/ui/vulkan/vulkan_util.h" -#include "xenia/ui/window.h" +#include "xenia/xbox.h" namespace xe { namespace gpu { namespace vulkan { -using xe::ui::RawImage; -using xe::ui::vulkan::CheckResult; - VulkanGraphicsSystem::VulkanGraphicsSystem() {} -VulkanGraphicsSystem::~VulkanGraphicsSystem() = default; + +VulkanGraphicsSystem::~VulkanGraphicsSystem() {} X_STATUS VulkanGraphicsSystem::Setup(cpu::Processor* processor, kernel::KernelState* kernel_state, ui::Window* target_window) { - // Must create the provider so we can create contexts. - auto provider = xe::ui::vulkan::VulkanProvider::Create(target_window); - device_ = provider->device(); - provider_ = std::move(provider); + provider_ = xe::ui::vulkan::VulkanProvider::Create(target_window); - auto result = GraphicsSystem::Setup(processor, kernel_state, target_window); - if (result) { - return result; - } - - if (target_window) { - display_context_ = reinterpret_cast( - target_window->context()); - } - - // Create our own command pool we can use for captures. - VkCommandPoolCreateInfo create_info = { - VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, - nullptr, - VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | - VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, - device_->queue_family_index(), - }; - auto status = - vkCreateCommandPool(*device_, &create_info, nullptr, &command_pool_); - CheckResult(status, "vkCreateCommandPool"); - - return X_STATUS_SUCCESS; + return GraphicsSystem::Setup(processor, kernel_state, target_window); } -void VulkanGraphicsSystem::Shutdown() { - GraphicsSystem::Shutdown(); - - vkDestroyCommandPool(*device_, command_pool_, nullptr); -} - -std::unique_ptr VulkanGraphicsSystem::Capture() { - auto& swap_state = command_processor_->swap_state(); - std::lock_guard lock(swap_state.mutex); - if (!swap_state.front_buffer_texture) { - return nullptr; - } - - VkResult status = VK_SUCCESS; - - VkCommandBufferAllocateInfo alloc_info = { - VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, - nullptr, - command_pool_, - VK_COMMAND_BUFFER_LEVEL_PRIMARY, - 1, - }; - - VkCommandBuffer cmd = nullptr; - status = vkAllocateCommandBuffers(*device_, &alloc_info, &cmd); - CheckResult(status, "vkAllocateCommandBuffers"); - if (status != VK_SUCCESS) { - return nullptr; - } - - VkCommandBufferBeginInfo begin_info = { - VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - nullptr, - VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, - nullptr, - }; - vkBeginCommandBuffer(cmd, &begin_info); - - auto front_buffer = - reinterpret_cast(swap_state.front_buffer_texture); - - status = CreateCaptureBuffer(cmd, {swap_state.width, swap_state.height}); - if (status != VK_SUCCESS) { - vkFreeCommandBuffers(*device_, command_pool_, 1, &cmd); - return nullptr; - } - - VkImageMemoryBarrier barrier; - std::memset(&barrier, 0, sizeof(VkImageMemoryBarrier)); - barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - barrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL; - barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.image = front_buffer; - barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; - vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, - nullptr, 1, &barrier); - - // Copy front buffer into capture image. - VkBufferImageCopy region = { - 0, 0, - 0, {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}, - {0, 0, 0}, {swap_state.width, swap_state.height, 1}, - }; - - vkCmdCopyImageToBuffer(cmd, front_buffer, VK_IMAGE_LAYOUT_GENERAL, - capture_buffer_, 1, ®ion); - - VkBufferMemoryBarrier memory_barrier = { - VK_STRUCTURE_TYPE_MEMORY_BARRIER, - nullptr, - VK_ACCESS_TRANSFER_WRITE_BIT, - VK_ACCESS_HOST_READ_BIT | VK_ACCESS_MEMORY_READ_BIT, - VK_QUEUE_FAMILY_IGNORED, - VK_QUEUE_FAMILY_IGNORED, - capture_buffer_, - 0, - VK_WHOLE_SIZE, - }; - vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, 0, nullptr, 1, - &memory_barrier, 0, nullptr); - - status = vkEndCommandBuffer(cmd); - - // Submit commands and wait. - if (status == VK_SUCCESS) { - std::lock_guard(device_->primary_queue_mutex()); - VkSubmitInfo submit_info = { - VK_STRUCTURE_TYPE_SUBMIT_INFO, - nullptr, - 0, - nullptr, - nullptr, - 1, - &cmd, - 0, - nullptr, - }; - status = vkQueueSubmit(device_->primary_queue(), 1, &submit_info, nullptr); - CheckResult(status, "vkQueueSubmit"); - - if (status == VK_SUCCESS) { - status = vkQueueWaitIdle(device_->primary_queue()); - CheckResult(status, "vkQueueWaitIdle"); - } - } - - vkFreeCommandBuffers(*device_, command_pool_, 1, &cmd); - - void* data; - if (status == VK_SUCCESS) { - status = vkMapMemory(*device_, capture_buffer_memory_, 0, VK_WHOLE_SIZE, 0, - &data); - CheckResult(status, "vkMapMemory"); - } - - if (status == VK_SUCCESS) { - std::unique_ptr raw_image(new RawImage()); - raw_image->width = swap_state.width; - raw_image->height = swap_state.height; - raw_image->stride = swap_state.width * 4; - raw_image->data.resize(raw_image->stride * raw_image->height); - - std::memcpy(raw_image->data.data(), data, - raw_image->stride * raw_image->height); - - vkUnmapMemory(*device_, capture_buffer_memory_); - DestroyCaptureBuffer(); - return raw_image; - } - - DestroyCaptureBuffer(); - return nullptr; -} - -VkResult VulkanGraphicsSystem::CreateCaptureBuffer(VkCommandBuffer cmd, - VkExtent2D extents) { - VkResult status = VK_SUCCESS; - - VkBufferCreateInfo buffer_info = { - VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - nullptr, - 0, - extents.width * extents.height * 4, - VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT, - VK_SHARING_MODE_EXCLUSIVE, - 0, - nullptr, - }; - status = vkCreateBuffer(*device_, &buffer_info, nullptr, &capture_buffer_); - if (status != VK_SUCCESS) { - return status; - } - - capture_buffer_size_ = extents.width * extents.height * 4; - - // Bind memory to buffer. - VkMemoryRequirements mem_requirements; - vkGetBufferMemoryRequirements(*device_, capture_buffer_, &mem_requirements); - capture_buffer_memory_ = device_->AllocateMemory( - mem_requirements, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); - assert_not_null(capture_buffer_memory_); - - status = - vkBindBufferMemory(*device_, capture_buffer_, capture_buffer_memory_, 0); - CheckResult(status, "vkBindImageMemory"); - if (status != VK_SUCCESS) { - vkDestroyBuffer(*device_, capture_buffer_, nullptr); - return status; - } - - return status; -} - -void VulkanGraphicsSystem::DestroyCaptureBuffer() { - vkDestroyBuffer(*device_, capture_buffer_, nullptr); - vkFreeMemory(*device_, capture_buffer_memory_, nullptr); - capture_buffer_ = nullptr; - capture_buffer_memory_ = nullptr; - capture_buffer_size_ = 0; -} +void VulkanGraphicsSystem::Shutdown() { GraphicsSystem::Shutdown(); } std::unique_ptr VulkanGraphicsSystem::CreateCommandProcessor() { - return std::make_unique(this, kernel_state_); + return std::unique_ptr( + new VulkanCommandProcessor(this, kernel_state_)); } void VulkanGraphicsSystem::Swap(xe::ui::UIEvent* e) { @@ -264,63 +42,9 @@ void VulkanGraphicsSystem::Swap(xe::ui::UIEvent* e) { return; } - // Check for pending swap. auto& swap_state = command_processor_->swap_state(); - if (display_context_->WasLost()) { - // We're crashing. Cheese it. - swap_state.pending = false; - return; - } - - { - std::lock_guard lock(swap_state.mutex); - if (!swap_state.pending) { - // return; - } - - swap_state.pending = false; - } - - if (!swap_state.front_buffer_texture) { - // Not yet ready. - return; - } - - auto swap_chain = display_context_->swap_chain(); - auto copy_cmd_buffer = swap_chain->copy_cmd_buffer(); - auto front_buffer = - reinterpret_cast(swap_state.front_buffer_texture); - - VkImageMemoryBarrier barrier; - std::memset(&barrier, 0, sizeof(VkImageMemoryBarrier)); - barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - barrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL; - barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.image = front_buffer; - barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; - vkCmdPipelineBarrier(copy_cmd_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, - nullptr, 1, &barrier); - - VkImageBlit region; - region.srcSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; - region.srcOffsets[0] = {0, 0, 0}; - region.srcOffsets[1] = {static_cast(swap_state.width), - static_cast(swap_state.height), 1}; - - region.dstSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; - region.dstOffsets[0] = {0, 0, 0}; - region.dstOffsets[1] = {static_cast(swap_chain->surface_width()), - static_cast(swap_chain->surface_height()), - 1}; - vkCmdBlitImage(copy_cmd_buffer, front_buffer, VK_IMAGE_LAYOUT_GENERAL, - swap_chain->surface_image(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion, - VK_FILTER_LINEAR); + std::lock_guard lock(swap_state.mutex); + swap_state.pending = false; } } // namespace vulkan diff --git a/src/xenia/gpu/vulkan/vulkan_graphics_system.h b/src/xenia/gpu/vulkan/vulkan_graphics_system.h index 74a17aa78..eb04d2b71 100644 --- a/src/xenia/gpu/vulkan/vulkan_graphics_system.h +++ b/src/xenia/gpu/vulkan/vulkan_graphics_system.h @@ -12,8 +12,8 @@ #include +#include "xenia/gpu/command_processor.h" #include "xenia/gpu/graphics_system.h" -#include "xenia/ui/vulkan/vulkan_context.h" namespace xe { namespace gpu { @@ -26,29 +26,16 @@ class VulkanGraphicsSystem : public GraphicsSystem { static bool IsAvailable() { return true; } - std::string name() const override { return "Vulkan - obsolete"; } + std::string name() const override { return "Vulkan Prototype"; } X_STATUS Setup(cpu::Processor* processor, kernel::KernelState* kernel_state, ui::Window* target_window) override; void Shutdown() override; - std::unique_ptr Capture() override; - private: - VkResult CreateCaptureBuffer(VkCommandBuffer cmd, VkExtent2D extents); - void DestroyCaptureBuffer(); - std::unique_ptr CreateCommandProcessor() override; + void Swap(xe::ui::UIEvent* e) override; - - xe::ui::vulkan::VulkanDevice* device_ = nullptr; - xe::ui::vulkan::VulkanContext* display_context_ = nullptr; - - VkCommandPool command_pool_ = nullptr; - - VkBuffer capture_buffer_ = nullptr; - VkDeviceMemory capture_buffer_memory_ = nullptr; - VkDeviceSize capture_buffer_size_ = 0; }; } // namespace vulkan diff --git a/src/xenia/gpu/vulkan/vulkan_shader.cc b/src/xenia/gpu/vulkan/vulkan_shader.cc deleted file mode 100644 index 659ad9326..000000000 --- a/src/xenia/gpu/vulkan/vulkan_shader.cc +++ /dev/null @@ -1,64 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/gpu/vulkan/vulkan_shader.h" - -#include "third_party/fmt/include/fmt/format.h" -#include "xenia/base/assert.h" -#include "xenia/base/logging.h" -#include "xenia/base/math.h" -#include "xenia/ui/vulkan/vulkan_device.h" -#include "xenia/ui/vulkan/vulkan_util.h" - -namespace xe { -namespace gpu { -namespace vulkan { - -using xe::ui::vulkan::CheckResult; - -VulkanShader::VulkanShader(ui::vulkan::VulkanDevice* device, - xenos::ShaderType shader_type, uint64_t data_hash, - const uint32_t* dword_ptr, uint32_t dword_count) - : Shader(shader_type, data_hash, dword_ptr, dword_count), device_(device) {} - -VulkanShader::~VulkanShader() { - if (shader_module_) { - vkDestroyShaderModule(*device_, shader_module_, nullptr); - shader_module_ = nullptr; - } -} - -bool VulkanShader::Prepare() { - assert_null(shader_module_); - assert_true(is_valid()); - - // Create the shader module. - VkShaderModuleCreateInfo shader_info; - shader_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - shader_info.pNext = nullptr; - shader_info.flags = 0; - shader_info.codeSize = translated_binary_.size(); - shader_info.pCode = - reinterpret_cast(translated_binary_.data()); - auto status = - vkCreateShaderModule(*device_, &shader_info, nullptr, &shader_module_); - CheckResult(status, "vkCreateShaderModule"); - - char typeChar = shader_type_ == xenos::ShaderType::kPixel - ? 'p' - : shader_type_ == xenos::ShaderType::kVertex ? 'v' : 'u'; - device_->DbgSetObjectName( - uint64_t(shader_module_), VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT, - fmt::format("S({}): {:016X}", typeChar, ucode_data_hash())); - return status == VK_SUCCESS; -} - -} // namespace vulkan -} // namespace gpu -} // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_shader.h b/src/xenia/gpu/vulkan/vulkan_shader.h deleted file mode 100644 index 9dd64a22c..000000000 --- a/src/xenia/gpu/vulkan/vulkan_shader.h +++ /dev/null @@ -1,43 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_VULKAN_VULKAN_SHADER_H_ -#define XENIA_GPU_VULKAN_VULKAN_SHADER_H_ - -#include - -#include "xenia/gpu/shader.h" -#include "xenia/ui/vulkan/vulkan_context.h" - -namespace xe { -namespace gpu { -namespace vulkan { - -class VulkanShader : public Shader { - public: - VulkanShader(ui::vulkan::VulkanDevice* device, xenos::ShaderType shader_type, - uint64_t data_hash, const uint32_t* dword_ptr, - uint32_t dword_count); - ~VulkanShader() override; - - // Available only if the shader is_valid and has been prepared. - VkShaderModule shader_module() const { return shader_module_; } - - bool Prepare(); - - private: - ui::vulkan::VulkanDevice* device_ = nullptr; - VkShaderModule shader_module_ = nullptr; -}; - -} // namespace vulkan -} // namespace gpu -} // namespace xe - -#endif // XENIA_GPU_VULKAN_VULKAN_SHADER_H_ diff --git a/src/xenia/gpu/vulkan/vulkan_trace_dump_main.cc b/src/xenia/gpu/vulkan/vulkan_trace_dump_main.cc deleted file mode 100644 index 16c1f34b8..000000000 --- a/src/xenia/gpu/vulkan/vulkan_trace_dump_main.cc +++ /dev/null @@ -1,60 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/base/logging.h" -#include "xenia/base/main.h" -#include "xenia/gpu/trace_dump.h" -#include "xenia/gpu/vulkan/vulkan_command_processor.h" -#include "xenia/gpu/vulkan/vulkan_graphics_system.h" -#include "xenia/ui/vulkan/vulkan_device.h" -#include "xenia/ui/vulkan/vulkan_provider.h" - -namespace xe { -namespace gpu { -namespace vulkan { - -using namespace xe::gpu::xenos; - -class VulkanTraceDump : public TraceDump { - public: - std::unique_ptr CreateGraphicsSystem() override { - return std::unique_ptr(new VulkanGraphicsSystem()); - } - - void BeginHostCapture() override { - auto device = static_cast( - graphics_system_->provider()) - ->device(); - if (device->is_renderdoc_attached()) { - device->BeginRenderDocFrameCapture(); - } - } - - void EndHostCapture() override { - auto device = static_cast( - graphics_system_->provider()) - ->device(); - if (device->is_renderdoc_attached()) { - device->EndRenderDocFrameCapture(); - } - } -}; - -int trace_dump_main(const std::vector& args) { - VulkanTraceDump trace_dump; - return trace_dump.Main(args); -} - -} // namespace vulkan -} // namespace gpu -} // namespace xe - -DEFINE_ENTRY_POINT("xenia-gpu-vulkan-trace-dump", - xe::gpu::vulkan::trace_dump_main, "some.trace", - "target_trace_file"); diff --git a/src/xenia/gpu/vulkan/vulkan_trace_viewer_main.cc b/src/xenia/gpu/vulkan/vulkan_trace_viewer_main.cc deleted file mode 100644 index 769a1b8c8..000000000 --- a/src/xenia/gpu/vulkan/vulkan_trace_viewer_main.cc +++ /dev/null @@ -1,76 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/base/logging.h" -#include "xenia/base/main.h" -#include "xenia/gpu/trace_viewer.h" -#include "xenia/gpu/vulkan/vulkan_command_processor.h" -#include "xenia/gpu/vulkan/vulkan_graphics_system.h" - -namespace xe { -namespace gpu { -namespace vulkan { - -using namespace xe::gpu::xenos; - -class VulkanTraceViewer : public TraceViewer { - public: - std::unique_ptr CreateGraphicsSystem() override { - return std::unique_ptr(new VulkanGraphicsSystem()); - } - - uintptr_t GetColorRenderTarget( - uint32_t pitch, xenos::MsaaSamples samples, uint32_t base, - xenos::ColorRenderTargetFormat format) override { - auto command_processor = static_cast( - graphics_system_->command_processor()); - // return command_processor->GetColorRenderTarget(pitch, samples, base, - // format); - return 0; - } - - uintptr_t GetDepthRenderTarget( - uint32_t pitch, xenos::MsaaSamples samples, uint32_t base, - xenos::DepthRenderTargetFormat format) override { - auto command_processor = static_cast( - graphics_system_->command_processor()); - // return command_processor->GetDepthRenderTarget(pitch, samples, base, - // format); - return 0; - } - - uintptr_t GetTextureEntry(const TextureInfo& texture_info, - const SamplerInfo& sampler_info) override { - auto command_processor = static_cast( - graphics_system_->command_processor()); - - // auto entry_view = - // command_processor->texture_cache()->Demand(texture_info, - // sampler_info); - // if (!entry_view) { - // return 0; - //} - // auto texture = entry_view->texture; - // return static_cast(texture->handle); - return 0; - } -}; - -int trace_viewer_main(const std::vector& args) { - VulkanTraceViewer trace_viewer; - return trace_viewer.Main(args); -} - -} // namespace vulkan -} // namespace gpu -} // namespace xe - -DEFINE_ENTRY_POINT("xenia-gpu-vulkan-trace-viewer", - xe::gpu::vulkan::trace_viewer_main, "some.trace", - "target_trace_file"); diff --git a/src/xenia/ui/d3d12/d3d12_context.h b/src/xenia/ui/d3d12/d3d12_context.h index 4cf13d87e..2651adae9 100644 --- a/src/xenia/ui/d3d12/d3d12_context.h +++ b/src/xenia/ui/d3d12/d3d12_context.h @@ -69,11 +69,10 @@ class D3D12Context : public GraphicsContext { private: friend class D3D12Provider; - explicit D3D12Context(D3D12Provider* provider, Window* target_window); + bool Initialize(); private: - bool Initialize(); bool InitializeSwapChainBuffers(); void Shutdown(); diff --git a/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc b/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc index f91c79677..6bc92e8c0 100644 --- a/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc +++ b/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc @@ -22,8 +22,8 @@ namespace ui { namespace d3d12 { // Generated with `xb buildhlsl`. -#include "xenia/ui/d3d12/shaders/dxbc/immediate_ps.h" -#include "xenia/ui/d3d12/shaders/dxbc/immediate_vs.h" +#include "xenia/ui/shaders/bytecode/d3d12_5_1/immediate_ps.h" +#include "xenia/ui/shaders/bytecode/d3d12_5_1/immediate_vs.h" class D3D12ImmediateTexture : public ImmediateTexture { public: diff --git a/src/xenia/ui/d3d12/premake5.lua b/src/xenia/ui/d3d12/premake5.lua index f301a94d2..1615ee611 100644 --- a/src/xenia/ui/d3d12/premake5.lua +++ b/src/xenia/ui/d3d12/premake5.lua @@ -12,7 +12,7 @@ project("xenia-ui-d3d12") }) local_platform_files() files({ - "shaders/bin/*.h", + "../shaders/bytecode/d3d12_5_1/*.h", }) group("demos") diff --git a/src/xenia/ui/d3d12/shaders/dxbc/immediate_ps.cso b/src/xenia/ui/shaders/bytecode/d3d12_5_1/immediate_ps.cso similarity index 100% rename from src/xenia/ui/d3d12/shaders/dxbc/immediate_ps.cso rename to src/xenia/ui/shaders/bytecode/d3d12_5_1/immediate_ps.cso diff --git a/src/xenia/ui/d3d12/shaders/dxbc/immediate_ps.h b/src/xenia/ui/shaders/bytecode/d3d12_5_1/immediate_ps.h similarity index 100% rename from src/xenia/ui/d3d12/shaders/dxbc/immediate_ps.h rename to src/xenia/ui/shaders/bytecode/d3d12_5_1/immediate_ps.h diff --git a/src/xenia/ui/d3d12/shaders/dxbc/immediate_ps.txt b/src/xenia/ui/shaders/bytecode/d3d12_5_1/immediate_ps.txt similarity index 100% rename from src/xenia/ui/d3d12/shaders/dxbc/immediate_ps.txt rename to src/xenia/ui/shaders/bytecode/d3d12_5_1/immediate_ps.txt diff --git a/src/xenia/ui/d3d12/shaders/dxbc/immediate_vs.cso b/src/xenia/ui/shaders/bytecode/d3d12_5_1/immediate_vs.cso similarity index 100% rename from src/xenia/ui/d3d12/shaders/dxbc/immediate_vs.cso rename to src/xenia/ui/shaders/bytecode/d3d12_5_1/immediate_vs.cso diff --git a/src/xenia/ui/d3d12/shaders/dxbc/immediate_vs.h b/src/xenia/ui/shaders/bytecode/d3d12_5_1/immediate_vs.h similarity index 100% rename from src/xenia/ui/d3d12/shaders/dxbc/immediate_vs.h rename to src/xenia/ui/shaders/bytecode/d3d12_5_1/immediate_vs.h diff --git a/src/xenia/ui/d3d12/shaders/dxbc/immediate_vs.txt b/src/xenia/ui/shaders/bytecode/d3d12_5_1/immediate_vs.txt similarity index 100% rename from src/xenia/ui/d3d12/shaders/dxbc/immediate_vs.txt rename to src/xenia/ui/shaders/bytecode/d3d12_5_1/immediate_vs.txt diff --git a/src/xenia/ui/d3d12/shaders/immediate.ps.hlsl b/src/xenia/ui/shaders/immediate.ps.hlsl similarity index 100% rename from src/xenia/ui/d3d12/shaders/immediate.ps.hlsl rename to src/xenia/ui/shaders/immediate.ps.hlsl diff --git a/src/xenia/ui/d3d12/shaders/immediate.vs.hlsl b/src/xenia/ui/shaders/immediate.vs.hlsl similarity index 100% rename from src/xenia/ui/d3d12/shaders/immediate.vs.hlsl rename to src/xenia/ui/shaders/immediate.vs.hlsl diff --git a/src/xenia/ui/spirv/premake5.lua b/src/xenia/ui/spirv/premake5.lua deleted file mode 100644 index 9988a051a..000000000 --- a/src/xenia/ui/spirv/premake5.lua +++ /dev/null @@ -1,19 +0,0 @@ -project_root = "../../../.." -include(project_root.."/tools/build") - -group("src") -project("xenia-ui-spirv") - uuid("2323a069-5b29-44a3-b524-f35451a81978") - kind("StaticLib") - language("C++") - links({ - "glslang-spirv", - "spirv-tools", - "xenia-base", - }) - defines({ - }) - includedirs({ - project_root.."/third_party/spirv-tools/external/include", - }) - local_platform_files() diff --git a/src/xenia/ui/spirv/spirv_assembler.cc b/src/xenia/ui/spirv/spirv_assembler.cc deleted file mode 100644 index b7fc5c901..000000000 --- a/src/xenia/ui/spirv/spirv_assembler.cc +++ /dev/null @@ -1,78 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/ui/spirv/spirv_assembler.h" - -#include "third_party/spirv-tools/include/spirv-tools/libspirv.h" -#include "xenia/base/logging.h" - -namespace xe { -namespace ui { -namespace spirv { - -SpirvAssembler::Result::Result(spv_binary binary, spv_diagnostic diagnostic) - : binary_(binary), diagnostic_(diagnostic) {} - -SpirvAssembler::Result::~Result() { - if (binary_) { - spvBinaryDestroy(binary_); - } - if (diagnostic_) { - spvDiagnosticDestroy(diagnostic_); - } -} - -bool SpirvAssembler::Result::has_error() const { return !!diagnostic_; } - -size_t SpirvAssembler::Result::error_source_line() const { - return diagnostic_ ? diagnostic_->position.line : 0; -} - -size_t SpirvAssembler::Result::error_source_column() const { - return diagnostic_ ? diagnostic_->position.column : 0; -} - -const char* SpirvAssembler::Result::error_string() const { - return diagnostic_ ? diagnostic_->error : ""; -} - -const uint32_t* SpirvAssembler::Result::words() const { - return binary_ ? binary_->code : nullptr; -} - -size_t SpirvAssembler::Result::word_count() const { - return binary_ ? binary_->wordCount : 0; -} - -SpirvAssembler::SpirvAssembler() - : spv_context_(spvContextCreate(SPV_ENV_VULKAN_1_0)) {} - -SpirvAssembler::~SpirvAssembler() { spvContextDestroy(spv_context_); } - -std::unique_ptr SpirvAssembler::Assemble( - const char* source_text, size_t source_text_length) { - spv_binary binary = nullptr; - spv_diagnostic diagnostic = nullptr; - auto result_code = spvTextToBinary(spv_context_, source_text, - source_text_length, &binary, &diagnostic); - std::unique_ptr result(new Result(binary, diagnostic)); - if (result_code) { - XELOGE("Failed to assemble spv: {}", result_code); - if (result->has_error()) { - return result; - } else { - return nullptr; - } - } - return result; -} - -} // namespace spirv -} // namespace ui -} // namespace xe diff --git a/src/xenia/ui/spirv/spirv_assembler.h b/src/xenia/ui/spirv/spirv_assembler.h deleted file mode 100644 index 3fabc5d61..000000000 --- a/src/xenia/ui/spirv/spirv_assembler.h +++ /dev/null @@ -1,69 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_UI_SPIRV_SPIRV_ASSEMBLER_H_ -#define XENIA_UI_SPIRV_SPIRV_ASSEMBLER_H_ - -#include -#include - -#include "xenia/ui/spirv/spirv_util.h" - -namespace xe { -namespace ui { -namespace spirv { - -class SpirvAssembler { - public: - class Result { - public: - Result(spv_binary binary, spv_diagnostic diagnostic); - ~Result(); - - // True if the result has an error associated with it. - bool has_error() const; - // Line of the error in the provided source text. - size_t error_source_line() const; - // Column of the error in the provided source text. - size_t error_source_column() const; - // Human-readable description of the error. - const char* error_string() const; - - // Assembled SPIRV binary. - // Returned pointer lifetime is tied to this Result instance. - const uint32_t* words() const; - // Size of the SPIRV binary, in words. - size_t word_count() const; - - private: - spv_binary binary_ = nullptr; - spv_diagnostic diagnostic_ = nullptr; - }; - - SpirvAssembler(); - ~SpirvAssembler(); - - // Assembles the given source text into a SPIRV binary. - // The return will be nullptr if assembly fails due to a library error. - // The return may have an error set on it if the source text is malformed. - std::unique_ptr Assemble(const char* source_text, - size_t source_text_length); - std::unique_ptr Assemble(const std::string_view source_text) { - return Assemble(source_text.data(), source_text.size()); - } - - private: - spv_context spv_context_ = nullptr; -}; - -} // namespace spirv -} // namespace ui -} // namespace xe - -#endif // XENIA_UI_SPIRV_SPIRV_ASSEMBLER_H_ diff --git a/src/xenia/ui/spirv/spirv_disassembler.cc b/src/xenia/ui/spirv/spirv_disassembler.cc deleted file mode 100644 index a8401c8ce..000000000 --- a/src/xenia/ui/spirv/spirv_disassembler.cc +++ /dev/null @@ -1,82 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/ui/spirv/spirv_disassembler.h" - -#include "third_party/spirv-tools/include/spirv-tools/libspirv.h" -#include "xenia/base/logging.h" - -namespace xe { -namespace ui { -namespace spirv { - -SpirvDisassembler::Result::Result(spv_text text, spv_diagnostic diagnostic) - : text_(text), diagnostic_(diagnostic) {} - -SpirvDisassembler::Result::~Result() { - if (text_) { - spvTextDestroy(text_); - } - if (diagnostic_) { - spvDiagnosticDestroy(diagnostic_); - } -} - -bool SpirvDisassembler::Result::has_error() const { return !!diagnostic_; } - -size_t SpirvDisassembler::Result::error_word_index() const { - return diagnostic_ ? diagnostic_->position.index : 0; -} - -const char* SpirvDisassembler::Result::error_string() const { - return diagnostic_ ? diagnostic_->error : ""; -} - -const char* SpirvDisassembler::Result::text() const { - return text_ ? text_->str : ""; -} - -std::string SpirvDisassembler::Result::to_string() const { - return text_ ? std::string(text_->str, text_->length) : ""; -} - -void SpirvDisassembler::Result::AppendText(StringBuffer* target_buffer) const { - if (text_) { - target_buffer->AppendBytes(reinterpret_cast(text_->str), - text_->length); - } -} - -SpirvDisassembler::SpirvDisassembler() - : spv_context_(spvContextCreate(SPV_ENV_VULKAN_1_0)) {} - -SpirvDisassembler::~SpirvDisassembler() { spvContextDestroy(spv_context_); } - -std::unique_ptr SpirvDisassembler::Disassemble( - const uint32_t* words, size_t word_count) { - spv_text text = nullptr; - spv_diagnostic diagnostic = nullptr; - auto result_code = - spvBinaryToText(spv_context_, words, word_count, - SPV_BINARY_TO_TEXT_OPTION_INDENT, &text, &diagnostic); - std::unique_ptr result(new Result(text, diagnostic)); - if (result_code) { - XELOGE("Failed to disassemble spv: {}", result_code); - if (result->has_error()) { - return result; - } else { - return nullptr; - } - } - return result; -} - -} // namespace spirv -} // namespace ui -} // namespace xe diff --git a/src/xenia/ui/spirv/spirv_disassembler.h b/src/xenia/ui/spirv/spirv_disassembler.h deleted file mode 100644 index b779b9d75..000000000 --- a/src/xenia/ui/spirv/spirv_disassembler.h +++ /dev/null @@ -1,66 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_UI_SPIRV_SPIRV_DISASSEMBLER_H_ -#define XENIA_UI_SPIRV_SPIRV_DISASSEMBLER_H_ - -#include -#include - -#include "xenia/base/string_buffer.h" -#include "xenia/ui/spirv/spirv_util.h" - -namespace xe { -namespace ui { -namespace spirv { - -class SpirvDisassembler { - public: - class Result { - public: - Result(spv_text text, spv_diagnostic diagnostic); - ~Result(); - - // True if the result has an error associated with it. - bool has_error() const; - // Index of the error in the provided binary word data. - size_t error_word_index() const; - // Human-readable description of the error. - const char* error_string() const; - - // Disassembled source text. - // Returned pointer lifetime is tied to this Result instance. - const char* text() const; - // Converts the disassembled source text to a string. - std::string to_string() const; - // Appends the disassembled source text to the given buffer. - void AppendText(StringBuffer* target_buffer) const; - - private: - spv_text text_ = nullptr; - spv_diagnostic diagnostic_ = nullptr; - }; - - SpirvDisassembler(); - ~SpirvDisassembler(); - - // Disassembles the given SPIRV binary. - // The return will be nullptr if disassembly fails due to a library error. - // The return may have an error set on it if the SPIRV binary is malformed. - std::unique_ptr Disassemble(const uint32_t* words, size_t word_count); - - private: - spv_context spv_context_ = nullptr; -}; - -} // namespace spirv -} // namespace ui -} // namespace xe - -#endif // XENIA_UI_SPIRV_SPIRV_DISASSEMBLER_H_ diff --git a/src/xenia/ui/spirv/spirv_util.cc b/src/xenia/ui/spirv/spirv_util.cc deleted file mode 100644 index a5a5da7a3..000000000 --- a/src/xenia/ui/spirv/spirv_util.cc +++ /dev/null @@ -1,20 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/ui/spirv/spirv_util.h" - -namespace xe { -namespace ui { -namespace spirv { - -// - -} // namespace spirv -} // namespace ui -} // namespace xe diff --git a/src/xenia/ui/spirv/spirv_util.h b/src/xenia/ui/spirv/spirv_util.h deleted file mode 100644 index b0555d7fa..000000000 --- a/src/xenia/ui/spirv/spirv_util.h +++ /dev/null @@ -1,36 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_UI_SPIRV_SPIRV_UTIL_H_ -#define XENIA_UI_SPIRV_SPIRV_UTIL_H_ - -#include "third_party/spirv-headers/include/spirv/1.1/spirv.hpp11" -#include "third_party/spirv/GLSL.std.450.hpp11" - -// Forward declarations from SPIRV-Tools so we don't pollute /so/ much. -struct spv_binary_t; -typedef spv_binary_t* spv_binary; -struct spv_context_t; -typedef spv_context_t* spv_context; -struct spv_diagnostic_t; -typedef spv_diagnostic_t* spv_diagnostic; -struct spv_text_t; -typedef spv_text_t* spv_text; - -namespace xe { -namespace ui { -namespace spirv { - -// - -} // namespace spirv -} // namespace ui -} // namespace xe - -#endif // XENIA_UI_SPIRV_SPIRV_UTIL_H_ diff --git a/src/xenia/ui/spirv/spirv_validator.cc b/src/xenia/ui/spirv/spirv_validator.cc deleted file mode 100644 index 3d586d0ba..000000000 --- a/src/xenia/ui/spirv/spirv_validator.cc +++ /dev/null @@ -1,80 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/ui/spirv/spirv_validator.h" - -#include "third_party/spirv-tools/include/spirv-tools/libspirv.h" -#include "xenia/base/logging.h" - -namespace xe { -namespace ui { -namespace spirv { - -SpirvValidator::Result::Result(spv_text text, spv_diagnostic diagnostic) - : text_(text), diagnostic_(diagnostic) {} - -SpirvValidator::Result::~Result() { - if (text_) { - spvTextDestroy(text_); - } - if (diagnostic_) { - spvDiagnosticDestroy(diagnostic_); - } -} - -bool SpirvValidator::Result::has_error() const { return !!diagnostic_; } - -size_t SpirvValidator::Result::error_word_index() const { - return diagnostic_ ? diagnostic_->position.index : 0; -} - -const char* SpirvValidator::Result::error_string() const { - return diagnostic_ ? diagnostic_->error : ""; -} - -const char* SpirvValidator::Result::text() const { - return text_ ? text_->str : ""; -} - -std::string SpirvValidator::Result::to_string() const { - return text_ ? std::string(text_->str, text_->length) : ""; -} - -void SpirvValidator::Result::AppendText(StringBuffer* target_buffer) const { - if (text_) { - target_buffer->AppendBytes(reinterpret_cast(text_->str), - text_->length); - } -} - -SpirvValidator::SpirvValidator() - : spv_context_(spvContextCreate(SPV_ENV_UNIVERSAL_1_1)) {} -SpirvValidator::~SpirvValidator() { spvContextDestroy(spv_context_); } - -std::unique_ptr SpirvValidator::Validate( - const uint32_t* words, size_t word_count) { - spv_text text = nullptr; - spv_diagnostic diagnostic = nullptr; - spv_const_binary_t binary = {words, word_count}; - auto result_code = spvValidate(spv_context_, &binary, &diagnostic); - std::unique_ptr result(new Result(text, diagnostic)); - if (result_code) { - XELOGE("Failed to validate spv: {}", result_code); - if (result->has_error()) { - return result; - } else { - return nullptr; - } - } - return result; -} - -} // namespace spirv -} // namespace ui -} // namespace xe \ No newline at end of file diff --git a/src/xenia/ui/spirv/spirv_validator.h b/src/xenia/ui/spirv/spirv_validator.h deleted file mode 100644 index 890843f27..000000000 --- a/src/xenia/ui/spirv/spirv_validator.h +++ /dev/null @@ -1,66 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_UI_SPIRV_SPIRV_VALIDATOR_H_ -#define XENIA_UI_SPIRV_SPIRV_VALIDATOR_H_ - -#include -#include - -#include "xenia/base/string_buffer.h" -#include "xenia/ui/spirv/spirv_util.h" - -namespace xe { -namespace ui { -namespace spirv { - -class SpirvValidator { - public: - class Result { - public: - Result(spv_text text, spv_diagnostic diagnostic); - ~Result(); - - // True if the result has an error associated with it. - bool has_error() const; - // Index of the error in the provided binary word data. - size_t error_word_index() const; - // Human-readable description of the error. - const char* error_string() const; - - // Disassembled source text. - // Returned pointer lifetime is tied to this Result instance. - const char* text() const; - // Converts the disassembled source text to a string. - std::string to_string() const; - // Appends the disassembled source text to the given buffer. - void AppendText(StringBuffer* target_buffer) const; - - private: - spv_text text_ = nullptr; - spv_diagnostic diagnostic_ = nullptr; - }; - - SpirvValidator(); - ~SpirvValidator(); - - // Validates the given SPIRV binary. - // The return will be nullptr if validation fails due to a library error. - // The return may have an error set on it if the SPIRV binary is malformed. - std::unique_ptr Validate(const uint32_t* words, size_t word_count); - - private: - spv_context spv_context_ = nullptr; -}; - -} // namespace spirv -} // namespace ui -} // namespace xe - -#endif // XENIA_UI_SPIRV_SPIRV_VALIDATOR_H_ diff --git a/src/xenia/ui/vulkan/blitter.cc b/src/xenia/ui/vulkan/blitter.cc deleted file mode 100644 index e4394eef7..000000000 --- a/src/xenia/ui/vulkan/blitter.cc +++ /dev/null @@ -1,588 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/ui/vulkan/blitter.h" -#include "xenia/base/math.h" -#include "xenia/ui/vulkan/fenced_pools.h" - -namespace xe { -namespace ui { -namespace vulkan { - -// Generated with `xenia-build genspirv`. -#include "xenia/ui/vulkan/shaders/bin/blit_color_frag.h" -#include "xenia/ui/vulkan/shaders/bin/blit_depth_frag.h" -#include "xenia/ui/vulkan/shaders/bin/blit_vert.h" - -Blitter::Blitter() {} -Blitter::~Blitter() { Shutdown(); } - -VkResult Blitter::Initialize(VulkanDevice* device) { - device_ = device; - - VkResult status = VK_SUCCESS; - - // Shaders - VkShaderModuleCreateInfo shader_create_info; - std::memset(&shader_create_info, 0, sizeof(shader_create_info)); - shader_create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - shader_create_info.codeSize = sizeof(blit_vert); - shader_create_info.pCode = reinterpret_cast(blit_vert); - status = vkCreateShaderModule(*device_, &shader_create_info, nullptr, - &blit_vertex_); - CheckResult(status, "vkCreateShaderModule"); - if (status != VK_SUCCESS) { - return status; - } - device_->DbgSetObjectName(uint64_t(blit_vertex_), - VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT, - "S(B): Vertex"); - - shader_create_info.codeSize = sizeof(blit_color_frag); - shader_create_info.pCode = reinterpret_cast(blit_color_frag); - status = vkCreateShaderModule(*device_, &shader_create_info, nullptr, - &blit_color_); - CheckResult(status, "vkCreateShaderModule"); - if (status != VK_SUCCESS) { - return status; - } - device_->DbgSetObjectName(uint64_t(blit_color_), - VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT, - "S(B): Color"); - - shader_create_info.codeSize = sizeof(blit_depth_frag); - shader_create_info.pCode = reinterpret_cast(blit_depth_frag); - status = vkCreateShaderModule(*device_, &shader_create_info, nullptr, - &blit_depth_); - CheckResult(status, "vkCreateShaderModule"); - if (status != VK_SUCCESS) { - return status; - } - device_->DbgSetObjectName(uint64_t(blit_depth_), - VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT, - "S(B): Depth"); - - // Create the descriptor set layout used for our texture sampler. - // As it changes almost every draw we cache it per texture. - VkDescriptorSetLayoutCreateInfo texture_set_layout_info; - texture_set_layout_info.sType = - VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - texture_set_layout_info.pNext = nullptr; - texture_set_layout_info.flags = 0; - texture_set_layout_info.bindingCount = 1; - VkDescriptorSetLayoutBinding texture_binding; - texture_binding.binding = 0; - texture_binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - texture_binding.descriptorCount = 1; - texture_binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - texture_binding.pImmutableSamplers = nullptr; - texture_set_layout_info.pBindings = &texture_binding; - status = vkCreateDescriptorSetLayout(*device_, &texture_set_layout_info, - nullptr, &descriptor_set_layout_); - CheckResult(status, "vkCreateDescriptorSetLayout"); - if (status != VK_SUCCESS) { - return status; - } - - // Create a descriptor pool - VkDescriptorPoolSize pool_sizes[1]; - pool_sizes[0].descriptorCount = 4096; - pool_sizes[0].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - descriptor_pool_ = std::make_unique( - *device_, 4096, - std::vector(pool_sizes, std::end(pool_sizes))); - - // Create the pipeline layout used for our pipeline. - VkPipelineLayoutCreateInfo pipeline_layout_info; - pipeline_layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - pipeline_layout_info.pNext = nullptr; - pipeline_layout_info.flags = 0; - VkDescriptorSetLayout set_layouts[] = {descriptor_set_layout_}; - pipeline_layout_info.setLayoutCount = - static_cast(xe::countof(set_layouts)); - pipeline_layout_info.pSetLayouts = set_layouts; - VkPushConstantRange push_constant_ranges[2]; - - push_constant_ranges[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - push_constant_ranges[0].offset = 0; - push_constant_ranges[0].size = sizeof(VtxPushConstants); - push_constant_ranges[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - push_constant_ranges[1].offset = sizeof(VtxPushConstants); - push_constant_ranges[1].size = sizeof(PixPushConstants); - - pipeline_layout_info.pushConstantRangeCount = - static_cast(xe::countof(push_constant_ranges)); - pipeline_layout_info.pPushConstantRanges = push_constant_ranges; - status = vkCreatePipelineLayout(*device_, &pipeline_layout_info, nullptr, - &pipeline_layout_); - CheckResult(status, "vkCreatePipelineLayout"); - if (status != VK_SUCCESS) { - return status; - } - - // Create two samplers. - VkSamplerCreateInfo sampler_create_info = { - VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, - nullptr, - 0, - VK_FILTER_NEAREST, - VK_FILTER_NEAREST, - VK_SAMPLER_MIPMAP_MODE_NEAREST, - VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, - VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, - VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, - 0.f, - VK_FALSE, - 1.f, - VK_FALSE, - VK_COMPARE_OP_NEVER, - 0.f, - 0.f, - VK_BORDER_COLOR_INT_TRANSPARENT_BLACK, - VK_FALSE, - }; - status = - vkCreateSampler(*device_, &sampler_create_info, nullptr, &samp_nearest_); - CheckResult(status, "vkCreateSampler"); - if (status != VK_SUCCESS) { - return status; - } - - sampler_create_info.minFilter = VK_FILTER_LINEAR; - sampler_create_info.magFilter = VK_FILTER_LINEAR; - sampler_create_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; - status = - vkCreateSampler(*device_, &sampler_create_info, nullptr, &samp_linear_); - CheckResult(status, "vkCreateSampler"); - if (status != VK_SUCCESS) { - return status; - } - - return VK_SUCCESS; -} - -void Blitter::Shutdown() { - if (samp_nearest_) { - vkDestroySampler(*device_, samp_nearest_, nullptr); - samp_nearest_ = nullptr; - } - if (samp_linear_) { - vkDestroySampler(*device_, samp_linear_, nullptr); - samp_linear_ = nullptr; - } - if (blit_vertex_) { - vkDestroyShaderModule(*device_, blit_vertex_, nullptr); - blit_vertex_ = nullptr; - } - if (blit_color_) { - vkDestroyShaderModule(*device_, blit_color_, nullptr); - blit_color_ = nullptr; - } - if (blit_depth_) { - vkDestroyShaderModule(*device_, blit_depth_, nullptr); - blit_depth_ = nullptr; - } - if (pipeline_color_) { - vkDestroyPipeline(*device_, pipeline_color_, nullptr); - pipeline_color_ = nullptr; - } - if (pipeline_depth_) { - vkDestroyPipeline(*device_, pipeline_depth_, nullptr); - pipeline_depth_ = nullptr; - } - if (pipeline_layout_) { - vkDestroyPipelineLayout(*device_, pipeline_layout_, nullptr); - pipeline_layout_ = nullptr; - } - if (descriptor_set_layout_) { - vkDestroyDescriptorSetLayout(*device_, descriptor_set_layout_, nullptr); - descriptor_set_layout_ = nullptr; - } - for (auto& pipeline : pipelines_) { - vkDestroyPipeline(*device_, pipeline.second, nullptr); - } - pipelines_.clear(); - - for (auto& pass : render_passes_) { - vkDestroyRenderPass(*device_, pass.second, nullptr); - } - render_passes_.clear(); -} - -void Blitter::Scavenge() { - if (descriptor_pool_->has_open_batch()) { - descriptor_pool_->EndBatch(); - } - - descriptor_pool_->Scavenge(); -} - -void Blitter::BlitTexture2D(VkCommandBuffer command_buffer, VkFence fence, - VkImageView src_image_view, VkRect2D src_rect, - VkExtent2D src_extents, VkFormat dst_image_format, - VkRect2D dst_rect, VkExtent2D dst_extents, - VkFramebuffer dst_framebuffer, VkViewport viewport, - VkRect2D scissor, VkFilter filter, - bool color_or_depth, bool swap_channels) { - // Do we need a full draw, or can we cheap out with a blit command? - bool full_draw = swap_channels || true; - if (full_draw) { - if (!descriptor_pool_->has_open_batch()) { - descriptor_pool_->BeginBatch(fence); - } - - // Acquire a render pass. - auto render_pass = GetRenderPass(dst_image_format, color_or_depth); - VkRenderPassBeginInfo render_pass_info = { - VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - nullptr, - render_pass, - dst_framebuffer, - {{0, 0}, dst_extents}, - 0, - nullptr, - }; - - vkCmdBeginRenderPass(command_buffer, &render_pass_info, - VK_SUBPASS_CONTENTS_INLINE); - - vkCmdSetViewport(command_buffer, 0, 1, &viewport); - vkCmdSetScissor(command_buffer, 0, 1, &scissor); - - // Acquire a pipeline. - auto pipeline = - GetPipeline(render_pass, color_or_depth ? blit_color_ : blit_depth_, - color_or_depth); - vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline); - - // Acquire and update a descriptor set for this image. - auto set = descriptor_pool_->AcquireEntry(descriptor_set_layout_); - if (!set) { - assert_always(); - descriptor_pool_->CancelBatch(); - return; - } - - VkWriteDescriptorSet write; - write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - write.pNext = nullptr; - write.dstSet = set; - write.dstBinding = 0; - write.dstArrayElement = 0; - write.descriptorCount = 1; - write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - - VkDescriptorImageInfo image; - image.sampler = filter == VK_FILTER_NEAREST ? samp_nearest_ : samp_linear_; - image.imageView = src_image_view; - image.imageLayout = VK_IMAGE_LAYOUT_GENERAL; - - write.pImageInfo = ℑ - write.pBufferInfo = nullptr; - write.pTexelBufferView = nullptr; - vkUpdateDescriptorSets(*device_, 1, &write, 0, nullptr); - - vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline_layout_, 0, 1, &set, 0, nullptr); - - VtxPushConstants vtx_constants = { - { - float(src_rect.offset.x) / src_extents.width, - float(src_rect.offset.y) / src_extents.height, - float(src_rect.extent.width) / src_extents.width, - float(src_rect.extent.height) / src_extents.height, - }, - { - float(dst_rect.offset.x) / dst_extents.width, - float(dst_rect.offset.y) / dst_extents.height, - float(dst_rect.extent.width) / dst_extents.width, - float(dst_rect.extent.height) / dst_extents.height, - }, - }; - vkCmdPushConstants(command_buffer, pipeline_layout_, - VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(VtxPushConstants), - &vtx_constants); - - PixPushConstants pix_constants = { - 0, - 0, - 0, - swap_channels ? 1 : 0, - }; - vkCmdPushConstants(command_buffer, pipeline_layout_, - VK_SHADER_STAGE_FRAGMENT_BIT, sizeof(VtxPushConstants), - sizeof(PixPushConstants), &pix_constants); - - vkCmdDraw(command_buffer, 4, 1, 0, 0); - vkCmdEndRenderPass(command_buffer); - } -} - -void Blitter::CopyColorTexture2D(VkCommandBuffer command_buffer, VkFence fence, - VkImage src_image, VkImageView src_image_view, - VkOffset2D src_offset, VkImage dst_image, - VkImageView dst_image_view, VkExtent2D extents, - VkFilter filter, bool swap_channels) {} - -void Blitter::CopyDepthTexture(VkCommandBuffer command_buffer, VkFence fence, - VkImage src_image, VkImageView src_image_view, - VkOffset2D src_offset, VkImage dst_image, - VkImageView dst_image_view, VkExtent2D extents) { -} - -VkRenderPass Blitter::GetRenderPass(VkFormat format, bool color_or_depth) { - auto pass = render_passes_.find(format); - if (pass != render_passes_.end()) { - return pass->second; - } - - // Create and cache the render pass. - VkRenderPass render_pass = CreateRenderPass(format, color_or_depth); - if (render_pass) { - render_passes_[format] = render_pass; - } - - return render_pass; -} - -VkPipeline Blitter::GetPipeline(VkRenderPass render_pass, - VkShaderModule frag_shader, - bool color_or_depth) { - auto it = pipelines_.find(std::make_pair(render_pass, frag_shader)); - if (it != pipelines_.end()) { - return it->second; - } - - // Create and cache the pipeline. - VkPipeline pipeline = - CreatePipeline(render_pass, frag_shader, color_or_depth); - if (pipeline) { - pipelines_[std::make_pair(render_pass, frag_shader)] = pipeline; - } - - return pipeline; -} - -VkRenderPass Blitter::CreateRenderPass(VkFormat output_format, - bool color_or_depth) { - VkAttachmentDescription attachments[1]; - std::memset(attachments, 0, sizeof(attachments)); - - // Output attachment - attachments[0].flags = 0; - attachments[0].format = output_format; - attachments[0].samples = VK_SAMPLE_COUNT_1_BIT; - attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - attachments[0].storeOp = VK_ATTACHMENT_STORE_OP_STORE; - attachments[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - attachments[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; - attachments[0].initialLayout = - color_or_depth ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL - : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - attachments[0].finalLayout = attachments[0].initialLayout; - - VkAttachmentReference attach_refs[1]; - attach_refs[0].attachment = 0; - attach_refs[0].layout = - color_or_depth ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL - : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - - VkSubpassDescription subpass = { - 0, VK_PIPELINE_BIND_POINT_GRAPHICS, - 0, nullptr, - 0, nullptr, - nullptr, nullptr, - 0, nullptr, - }; - - if (color_or_depth) { - subpass.colorAttachmentCount = 1; - subpass.pColorAttachments = attach_refs; - } else { - subpass.pDepthStencilAttachment = attach_refs; - } - - VkRenderPassCreateInfo renderpass_info = { - VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - nullptr, - 0, - 1, - attachments, - 1, - &subpass, - 0, - nullptr, - }; - VkRenderPass renderpass = nullptr; - VkResult result = - vkCreateRenderPass(*device_, &renderpass_info, nullptr, &renderpass); - CheckResult(result, "vkCreateRenderPass"); - - return renderpass; -} - -VkPipeline Blitter::CreatePipeline(VkRenderPass render_pass, - VkShaderModule frag_shader, - bool color_or_depth) { - VkResult result = VK_SUCCESS; - - // Pipeline - VkGraphicsPipelineCreateInfo pipeline_info; - std::memset(&pipeline_info, 0, sizeof(VkGraphicsPipelineCreateInfo)); - pipeline_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; - - // Shaders - pipeline_info.stageCount = 2; - VkPipelineShaderStageCreateInfo stages[2]; - std::memset(stages, 0, sizeof(stages)); - stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; - stages[0].module = blit_vertex_; - stages[0].pName = "main"; - stages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; - stages[1].module = frag_shader; - stages[1].pName = "main"; - - pipeline_info.pStages = stages; - - // Vertex input - VkPipelineVertexInputStateCreateInfo vtx_state; - std::memset(&vtx_state, 0, sizeof(vtx_state)); - vtx_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; - vtx_state.flags = 0; - vtx_state.vertexAttributeDescriptionCount = 0; - vtx_state.pVertexAttributeDescriptions = nullptr; - vtx_state.vertexBindingDescriptionCount = 0; - vtx_state.pVertexBindingDescriptions = nullptr; - - pipeline_info.pVertexInputState = &vtx_state; - - // Input Assembly - VkPipelineInputAssemblyStateCreateInfo input_info; - input_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; - input_info.pNext = nullptr; - input_info.flags = 0; - input_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; - input_info.primitiveRestartEnable = VK_FALSE; - pipeline_info.pInputAssemblyState = &input_info; - pipeline_info.pTessellationState = nullptr; - VkPipelineViewportStateCreateInfo viewport_state_info; - viewport_state_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; - viewport_state_info.pNext = nullptr; - viewport_state_info.flags = 0; - viewport_state_info.viewportCount = 1; - viewport_state_info.pViewports = nullptr; - viewport_state_info.scissorCount = 1; - viewport_state_info.pScissors = nullptr; - pipeline_info.pViewportState = &viewport_state_info; - VkPipelineRasterizationStateCreateInfo rasterization_info; - rasterization_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; - rasterization_info.pNext = nullptr; - rasterization_info.flags = 0; - rasterization_info.depthClampEnable = VK_FALSE; - rasterization_info.rasterizerDiscardEnable = VK_FALSE; - rasterization_info.polygonMode = VK_POLYGON_MODE_FILL; - rasterization_info.cullMode = VK_CULL_MODE_NONE; - rasterization_info.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; - rasterization_info.depthBiasEnable = VK_FALSE; - rasterization_info.depthBiasConstantFactor = 0; - rasterization_info.depthBiasClamp = 0; - rasterization_info.depthBiasSlopeFactor = 0; - rasterization_info.lineWidth = 1.0f; - pipeline_info.pRasterizationState = &rasterization_info; - VkPipelineMultisampleStateCreateInfo multisample_info; - multisample_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; - multisample_info.pNext = nullptr; - multisample_info.flags = 0; - multisample_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; - multisample_info.sampleShadingEnable = VK_FALSE; - multisample_info.minSampleShading = 0; - multisample_info.pSampleMask = nullptr; - multisample_info.alphaToCoverageEnable = VK_FALSE; - multisample_info.alphaToOneEnable = VK_FALSE; - pipeline_info.pMultisampleState = &multisample_info; - VkPipelineDepthStencilStateCreateInfo depth_info = { - VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, - nullptr, - 0, - VK_TRUE, - VK_TRUE, - VK_COMPARE_OP_ALWAYS, - VK_FALSE, - VK_FALSE, - {}, - {}, - 0.f, - 1.f, - }; - pipeline_info.pDepthStencilState = &depth_info; - VkPipelineColorBlendStateCreateInfo blend_info; - blend_info.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; - blend_info.pNext = nullptr; - blend_info.flags = 0; - blend_info.logicOpEnable = VK_FALSE; - blend_info.logicOp = VK_LOGIC_OP_NO_OP; - - VkPipelineColorBlendAttachmentState blend_attachments[1]; - if (color_or_depth) { - blend_attachments[0].blendEnable = VK_FALSE; - blend_attachments[0].srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; - blend_attachments[0].dstColorBlendFactor = VK_BLEND_FACTOR_ZERO; - blend_attachments[0].colorBlendOp = VK_BLEND_OP_ADD; - blend_attachments[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; - blend_attachments[0].dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; - blend_attachments[0].alphaBlendOp = VK_BLEND_OP_ADD; - blend_attachments[0].colorWriteMask = 0xF; - - blend_info.attachmentCount = - static_cast(xe::countof(blend_attachments)); - blend_info.pAttachments = blend_attachments; - } else { - blend_info.attachmentCount = 0; - blend_info.pAttachments = nullptr; - } - - std::memset(blend_info.blendConstants, 0, sizeof(blend_info.blendConstants)); - pipeline_info.pColorBlendState = &blend_info; - VkPipelineDynamicStateCreateInfo dynamic_state_info; - dynamic_state_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; - dynamic_state_info.pNext = nullptr; - dynamic_state_info.flags = 0; - VkDynamicState dynamic_states[] = { - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, - }; - dynamic_state_info.dynamicStateCount = - static_cast(xe::countof(dynamic_states)); - dynamic_state_info.pDynamicStates = dynamic_states; - pipeline_info.pDynamicState = &dynamic_state_info; - pipeline_info.layout = pipeline_layout_; - pipeline_info.renderPass = render_pass; - pipeline_info.subpass = 0; - pipeline_info.basePipelineHandle = nullptr; - pipeline_info.basePipelineIndex = -1; - - VkPipeline pipeline = nullptr; - result = vkCreateGraphicsPipelines(*device_, nullptr, 1, &pipeline_info, - nullptr, &pipeline); - CheckResult(result, "vkCreateGraphicsPipelines"); - - return pipeline; -} - -} // namespace vulkan -} // namespace ui -} // namespace xe \ No newline at end of file diff --git a/src/xenia/ui/vulkan/blitter.h b/src/xenia/ui/vulkan/blitter.h deleted file mode 100644 index 2de0997d1..000000000 --- a/src/xenia/ui/vulkan/blitter.h +++ /dev/null @@ -1,101 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_UI_VULKAN_BLITTER_H_ -#define XENIA_UI_VULKAN_BLITTER_H_ - -#include -#include - -#include "xenia/ui/vulkan/vulkan.h" -#include "xenia/ui/vulkan/vulkan_device.h" - -namespace xe { -namespace ui { -namespace vulkan { - -class DescriptorPool; - -class Blitter { - public: - Blitter(); - ~Blitter(); - - VkResult Initialize(VulkanDevice* device); - void Scavenge(); - void Shutdown(); - - // Queues commands to blit a texture to another texture. - // - // src_rect is the rectangle of pixels to copy from the source - // src_extents is the actual size of the source image - // dst_rect is the rectangle of pixels that are replaced with the source - // dst_extents is the actual size of the destination image - // dst_framebuffer must only have one attachment, the target texture. - // viewport is the viewport rect (set to {0, 0, dst_w, dst_h} if unsure) - // scissor is the scissor rect for the dest (set to dst size if unsure) - void BlitTexture2D(VkCommandBuffer command_buffer, VkFence fence, - VkImageView src_image_view, VkRect2D src_rect, - VkExtent2D src_extents, VkFormat dst_image_format, - VkRect2D dst_rect, VkExtent2D dst_extents, - VkFramebuffer dst_framebuffer, VkViewport viewport, - VkRect2D scissor, VkFilter filter, bool color_or_depth, - bool swap_channels); - - void CopyColorTexture2D(VkCommandBuffer command_buffer, VkFence fence, - VkImage src_image, VkImageView src_image_view, - VkOffset2D src_offset, VkImage dst_image, - VkImageView dst_image_view, VkExtent2D extents, - VkFilter filter, bool swap_channels); - void CopyDepthTexture(VkCommandBuffer command_buffer, VkFence fence, - VkImage src_image, VkImageView src_image_view, - VkOffset2D src_offset, VkImage dst_image, - VkImageView dst_image_view, VkExtent2D extents); - - // For framebuffer creation. - VkRenderPass GetRenderPass(VkFormat format, bool color_or_depth); - - private: - struct VtxPushConstants { - float src_uv[4]; // 0x00 - float dst_uv[4]; // 0x10 - }; - - struct PixPushConstants { - int _pad[3]; // 0x20 - int swap; // 0x2C - }; - - VkPipeline GetPipeline(VkRenderPass render_pass, VkShaderModule frag_shader, - bool color_or_depth); - VkRenderPass CreateRenderPass(VkFormat output_format, bool color_or_depth); - VkPipeline CreatePipeline(VkRenderPass render_pass, - VkShaderModule frag_shader, bool color_or_depth); - - std::unique_ptr descriptor_pool_ = nullptr; - VulkanDevice* device_ = nullptr; - VkPipeline pipeline_color_ = nullptr; - VkPipeline pipeline_depth_ = nullptr; - VkPipelineLayout pipeline_layout_ = nullptr; - VkShaderModule blit_vertex_ = nullptr; - VkShaderModule blit_color_ = nullptr; - VkShaderModule blit_depth_ = nullptr; - VkSampler samp_linear_ = nullptr; - VkSampler samp_nearest_ = nullptr; - VkDescriptorSetLayout descriptor_set_layout_ = nullptr; - - std::map render_passes_; - std::map, VkPipeline> pipelines_; -}; - -} // namespace vulkan -} // namespace ui -} // namespace xe - -#endif // XENIA_UI_VULKAN_BLITTER_H_ diff --git a/src/xenia/ui/vulkan/circular_buffer.cc b/src/xenia/ui/vulkan/circular_buffer.cc deleted file mode 100644 index 06cb68aa7..000000000 --- a/src/xenia/ui/vulkan/circular_buffer.cc +++ /dev/null @@ -1,281 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include - -#include "xenia/base/assert.h" -#include "xenia/base/logging.h" -#include "xenia/base/math.h" - -#include "xenia/ui/vulkan/circular_buffer.h" - -namespace xe { -namespace ui { -namespace vulkan { - -CircularBuffer::CircularBuffer(VulkanDevice* device, VkBufferUsageFlags usage, - VkDeviceSize capacity, VkDeviceSize alignment) - : device_(device), capacity_(capacity) { - VkResult status = VK_SUCCESS; - - // Create our internal buffer. - VkBufferCreateInfo buffer_info; - buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - buffer_info.pNext = nullptr; - buffer_info.flags = 0; - buffer_info.size = capacity; - buffer_info.usage = usage; - buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - buffer_info.queueFamilyIndexCount = 0; - buffer_info.pQueueFamilyIndices = nullptr; - status = vkCreateBuffer(*device_, &buffer_info, nullptr, &gpu_buffer_); - CheckResult(status, "vkCreateBuffer"); - if (status != VK_SUCCESS) { - assert_always(); - } - - VkMemoryRequirements reqs; - vkGetBufferMemoryRequirements(*device_, gpu_buffer_, &reqs); - alignment_ = xe::round_up(alignment, reqs.alignment); -} -CircularBuffer::~CircularBuffer() { Shutdown(); } - -VkResult CircularBuffer::Initialize(VkDeviceMemory memory, - VkDeviceSize offset) { - assert_true(offset % alignment_ == 0); - gpu_memory_ = memory; - gpu_base_ = offset; - - VkResult status = VK_SUCCESS; - - // Bind the buffer to its backing memory. - status = vkBindBufferMemory(*device_, gpu_buffer_, gpu_memory_, gpu_base_); - CheckResult(status, "vkBindBufferMemory"); - if (status != VK_SUCCESS) { - XELOGE("CircularBuffer::Initialize - Failed to bind memory!"); - Shutdown(); - return status; - } - - // Map the memory so we can access it. - status = vkMapMemory(*device_, gpu_memory_, gpu_base_, capacity_, 0, - reinterpret_cast(&host_base_)); - CheckResult(status, "vkMapMemory"); - if (status != VK_SUCCESS) { - XELOGE("CircularBuffer::Initialize - Failed to map memory!"); - Shutdown(); - return status; - } - - return VK_SUCCESS; -} - -VkResult CircularBuffer::Initialize() { - VkResult status = VK_SUCCESS; - - VkMemoryRequirements reqs; - vkGetBufferMemoryRequirements(*device_, gpu_buffer_, &reqs); - - // Allocate memory from the device to back the buffer. - owns_gpu_memory_ = true; - gpu_memory_ = device_->AllocateMemory(reqs); - if (!gpu_memory_) { - XELOGE("CircularBuffer::Initialize - Failed to allocate memory!"); - Shutdown(); - return VK_ERROR_INITIALIZATION_FAILED; - } - - capacity_ = reqs.size; - gpu_base_ = 0; - - // Bind the buffer to its backing memory. - status = vkBindBufferMemory(*device_, gpu_buffer_, gpu_memory_, gpu_base_); - CheckResult(status, "vkBindBufferMemory"); - if (status != VK_SUCCESS) { - XELOGE("CircularBuffer::Initialize - Failed to bind memory!"); - Shutdown(); - return status; - } - - // Map the memory so we can access it. - status = vkMapMemory(*device_, gpu_memory_, gpu_base_, capacity_, 0, - reinterpret_cast(&host_base_)); - CheckResult(status, "vkMapMemory"); - if (status != VK_SUCCESS) { - XELOGE("CircularBuffer::Initialize - Failed to map memory!"); - Shutdown(); - return status; - } - - return VK_SUCCESS; -} - -void CircularBuffer::Shutdown() { - Clear(); - if (host_base_) { - vkUnmapMemory(*device_, gpu_memory_); - host_base_ = nullptr; - } - if (gpu_buffer_) { - vkDestroyBuffer(*device_, gpu_buffer_, nullptr); - gpu_buffer_ = nullptr; - } - if (gpu_memory_ && owns_gpu_memory_) { - vkFreeMemory(*device_, gpu_memory_, nullptr); - gpu_memory_ = nullptr; - } -} - -void CircularBuffer::GetBufferMemoryRequirements(VkMemoryRequirements* reqs) { - vkGetBufferMemoryRequirements(*device_, gpu_buffer_, reqs); -} - -bool CircularBuffer::CanAcquire(VkDeviceSize length) { - // Make sure the length is aligned. - length = xe::round_up(length, alignment_); - if (allocations_.empty()) { - // Read head has caught up to write head (entire buffer available for write) - assert_true(read_head_ == write_head_); - return capacity_ >= length; - } else if (write_head_ < read_head_) { - // Write head wrapped around and is behind read head. - // | write |---- read ----| - return (read_head_ - write_head_) >= length; - } else if (write_head_ > read_head_) { - // Read head behind write head. - // 1. Check if there's enough room from write -> capacity - // | |---- read ----| write | - if ((capacity_ - write_head_) >= length) { - return true; - } - - // 2. Check if there's enough room from 0 -> read - // | write |---- read ----| | - if ((read_head_ - 0) >= length) { - return true; - } - } - - return false; -} - -CircularBuffer::Allocation* CircularBuffer::Acquire(VkDeviceSize length, - VkFence fence) { - VkDeviceSize aligned_length = xe::round_up(length, alignment_); - if (!CanAcquire(aligned_length)) { - return nullptr; - } - - assert_true(write_head_ % alignment_ == 0); - if (write_head_ < read_head_) { - // Write head behind read head. - assert_true(read_head_ - write_head_ >= aligned_length); - - Allocation alloc; - alloc.host_ptr = host_base_ + write_head_; - alloc.gpu_memory = gpu_memory_; - alloc.offset = gpu_base_ + write_head_; - alloc.length = length; - alloc.aligned_length = aligned_length; - alloc.fence = fence; - write_head_ += aligned_length; - allocations_.push(alloc); - - return &allocations_.back(); - } else { - // Write head equal to/after read head - if (capacity_ - write_head_ >= aligned_length) { - // Free space from write -> capacity - Allocation alloc; - alloc.host_ptr = host_base_ + write_head_; - alloc.gpu_memory = gpu_memory_; - alloc.offset = gpu_base_ + write_head_; - alloc.length = length; - alloc.aligned_length = aligned_length; - alloc.fence = fence; - write_head_ += aligned_length; - allocations_.push(alloc); - - return &allocations_.back(); - } else if ((read_head_ - 0) >= aligned_length) { - // Not enough space from write -> capacity, but there is enough free space - // from begin -> read - Allocation alloc; - alloc.host_ptr = host_base_ + 0; - alloc.gpu_memory = gpu_memory_; - alloc.offset = gpu_base_ + 0; - alloc.length = length; - alloc.aligned_length = aligned_length; - alloc.fence = fence; - write_head_ = aligned_length; - allocations_.push(alloc); - - return &allocations_.back(); - } - } - - return nullptr; -} - -void CircularBuffer::Flush(Allocation* allocation) { - VkMappedMemoryRange range; - range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - range.pNext = nullptr; - range.memory = gpu_memory_; - range.offset = gpu_base_ + allocation->offset; - range.size = allocation->length; - vkFlushMappedMemoryRanges(*device_, 1, &range); -} - -void CircularBuffer::Flush(VkDeviceSize offset, VkDeviceSize length) { - VkMappedMemoryRange range; - range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - range.pNext = nullptr; - range.memory = gpu_memory_; - range.offset = gpu_base_ + offset; - range.size = length; - vkFlushMappedMemoryRanges(*device_, 1, &range); -} - -void CircularBuffer::Clear() { - allocations_ = std::queue{}; - write_head_ = read_head_ = 0; -} - -void CircularBuffer::Scavenge() { - // Stash the last signalled fence - VkFence fence = nullptr; - while (!allocations_.empty()) { - Allocation& alloc = allocations_.front(); - if (fence != alloc.fence && - vkGetFenceStatus(*device_, alloc.fence) != VK_SUCCESS) { - // Don't bother freeing following allocations to ensure proper ordering. - break; - } - - fence = alloc.fence; - if (capacity_ - read_head_ < alloc.aligned_length) { - // This allocation is stored at the beginning of the buffer. - read_head_ = alloc.aligned_length; - } else { - read_head_ += alloc.aligned_length; - } - - allocations_.pop(); - } - - if (allocations_.empty()) { - // Reset R/W heads to work around fragmentation issues. - read_head_ = write_head_ = 0; - } -} - -} // namespace vulkan -} // namespace ui -} // namespace xe \ No newline at end of file diff --git a/src/xenia/ui/vulkan/circular_buffer.h b/src/xenia/ui/vulkan/circular_buffer.h deleted file mode 100644 index 85b069aa0..000000000 --- a/src/xenia/ui/vulkan/circular_buffer.h +++ /dev/null @@ -1,93 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_UI_VULKAN_CIRCULAR_BUFFER_H_ -#define XENIA_UI_VULKAN_CIRCULAR_BUFFER_H_ - -#include - -#include "xenia/ui/vulkan/vulkan.h" -#include "xenia/ui/vulkan/vulkan_device.h" - -namespace xe { -namespace ui { -namespace vulkan { - -// A circular buffer, intended to hold (fairly) temporary memory that will be -// released when a fence is signaled. Best used when allocations are taken -// in-order with command buffer submission. -// -// Allocations loop around the buffer in circles (but are not fragmented at the -// ends of the buffer), where trailing older allocations are freed after use. -class CircularBuffer { - public: - CircularBuffer(VulkanDevice* device, VkBufferUsageFlags usage, - VkDeviceSize capacity, VkDeviceSize alignment = 256); - ~CircularBuffer(); - - struct Allocation { - void* host_ptr; - VkDeviceMemory gpu_memory; - VkDeviceSize offset; - VkDeviceSize length; - VkDeviceSize aligned_length; - - // Allocation usage fence. This allocation will be deleted when the fence - // becomes signaled. - VkFence fence; - }; - - VkResult Initialize(VkDeviceMemory memory, VkDeviceSize offset); - VkResult Initialize(); - void Shutdown(); - - void GetBufferMemoryRequirements(VkMemoryRequirements* reqs); - - VkDeviceSize alignment() const { return alignment_; } - VkDeviceSize capacity() const { return capacity_; } - VkBuffer gpu_buffer() const { return gpu_buffer_; } - VkDeviceMemory gpu_memory() const { return gpu_memory_; } - uint8_t* host_base() const { return host_base_; } - - bool CanAcquire(VkDeviceSize length); - - // Acquires space to hold memory. This allocation is only freed when the fence - // reaches the signaled state. - Allocation* Acquire(VkDeviceSize length, VkFence fence); - void Flush(Allocation* allocation); - void Flush(VkDeviceSize offset, VkDeviceSize length); - - // Clears all allocations, regardless of whether they've been consumed or not. - void Clear(); - - // Frees any allocations whose fences have been signaled. - void Scavenge(); - - private: - // All of these variables are relative to gpu_base - VkDeviceSize capacity_ = 0; - VkDeviceSize alignment_ = 0; - VkDeviceSize write_head_ = 0; - VkDeviceSize read_head_ = 0; - - VulkanDevice* device_; - bool owns_gpu_memory_ = false; - VkBuffer gpu_buffer_ = nullptr; - VkDeviceMemory gpu_memory_ = nullptr; - VkDeviceSize gpu_base_ = 0; - uint8_t* host_base_ = nullptr; - - std::queue allocations_; -}; - -} // namespace vulkan -} // namespace ui -} // namespace xe - -#endif // XENIA_UI_GL_CIRCULAR_BUFFER_H_ diff --git a/src/xenia/ui/vulkan/fenced_pools.cc b/src/xenia/ui/vulkan/fenced_pools.cc deleted file mode 100644 index f7aeffff3..000000000 --- a/src/xenia/ui/vulkan/fenced_pools.cc +++ /dev/null @@ -1,124 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/ui/vulkan/fenced_pools.h" - -#include "xenia/base/assert.h" -#include "xenia/base/math.h" -#include "xenia/ui/vulkan/vulkan_util.h" - -namespace xe { -namespace ui { -namespace vulkan { - -using xe::ui::vulkan::CheckResult; - -CommandBufferPool::CommandBufferPool(VkDevice device, - uint32_t queue_family_index) - : BaseFencedPool(device) { - // Create the pool used for allocating buffers. - // They are marked as transient (short-lived) and cycled frequently. - VkCommandPoolCreateInfo cmd_pool_info; - cmd_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; - cmd_pool_info.pNext = nullptr; - cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | - VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; - cmd_pool_info.queueFamilyIndex = queue_family_index; - auto err = - vkCreateCommandPool(device_, &cmd_pool_info, nullptr, &command_pool_); - CheckResult(err, "vkCreateCommandPool"); - - // Allocate a bunch of command buffers to start. - constexpr uint32_t kDefaultCount = 32; - VkCommandBufferAllocateInfo command_buffer_info; - command_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - command_buffer_info.pNext = nullptr; - command_buffer_info.commandPool = command_pool_; - command_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; - command_buffer_info.commandBufferCount = kDefaultCount; - VkCommandBuffer command_buffers[kDefaultCount]; - err = - vkAllocateCommandBuffers(device_, &command_buffer_info, command_buffers); - CheckResult(err, "vkCreateCommandBuffer"); - for (size_t i = 0; i < xe::countof(command_buffers); ++i) { - PushEntry(command_buffers[i], nullptr); - } -} - -CommandBufferPool::~CommandBufferPool() { - FreeAllEntries(); - vkDestroyCommandPool(device_, command_pool_, nullptr); - command_pool_ = nullptr; -} - -VkCommandBuffer CommandBufferPool::AllocateEntry(void* data) { - // TODO(benvanik): allocate a bunch at once? - VkCommandBufferAllocateInfo command_buffer_info; - command_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - command_buffer_info.pNext = nullptr; - command_buffer_info.commandPool = command_pool_; - command_buffer_info.level = - VkCommandBufferLevel(reinterpret_cast(data)); - command_buffer_info.commandBufferCount = 1; - VkCommandBuffer command_buffer; - auto err = - vkAllocateCommandBuffers(device_, &command_buffer_info, &command_buffer); - CheckResult(err, "vkCreateCommandBuffer"); - return command_buffer; -} - -void CommandBufferPool::FreeEntry(VkCommandBuffer handle) { - vkFreeCommandBuffers(device_, command_pool_, 1, &handle); -} - -DescriptorPool::DescriptorPool(VkDevice device, uint32_t max_count, - std::vector pool_sizes) - : BaseFencedPool(device) { - VkDescriptorPoolCreateInfo descriptor_pool_info; - descriptor_pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; - descriptor_pool_info.pNext = nullptr; - descriptor_pool_info.flags = - VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; - descriptor_pool_info.maxSets = max_count; - descriptor_pool_info.poolSizeCount = uint32_t(pool_sizes.size()); - descriptor_pool_info.pPoolSizes = pool_sizes.data(); - auto err = vkCreateDescriptorPool(device, &descriptor_pool_info, nullptr, - &descriptor_pool_); - CheckResult(err, "vkCreateDescriptorPool"); -} -DescriptorPool::~DescriptorPool() { - FreeAllEntries(); - vkDestroyDescriptorPool(device_, descriptor_pool_, nullptr); - descriptor_pool_ = nullptr; -} - -VkDescriptorSet DescriptorPool::AllocateEntry(void* data) { - VkDescriptorSetLayout layout = reinterpret_cast(data); - - VkDescriptorSet descriptor_set = nullptr; - VkDescriptorSetAllocateInfo set_alloc_info; - set_alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; - set_alloc_info.pNext = nullptr; - set_alloc_info.descriptorPool = descriptor_pool_; - set_alloc_info.descriptorSetCount = 1; - set_alloc_info.pSetLayouts = &layout; - auto err = - vkAllocateDescriptorSets(device_, &set_alloc_info, &descriptor_set); - CheckResult(err, "vkAllocateDescriptorSets"); - - return descriptor_set; -} - -void DescriptorPool::FreeEntry(VkDescriptorSet handle) { - vkFreeDescriptorSets(device_, descriptor_pool_, 1, &handle); -} - -} // namespace vulkan -} // namespace ui -} // namespace xe diff --git a/src/xenia/ui/vulkan/fenced_pools.h b/src/xenia/ui/vulkan/fenced_pools.h deleted file mode 100644 index d64bcd6ac..000000000 --- a/src/xenia/ui/vulkan/fenced_pools.h +++ /dev/null @@ -1,334 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_UI_VULKAN_FENCED_POOLS_H_ -#define XENIA_UI_VULKAN_FENCED_POOLS_H_ - -#include - -#include "xenia/base/assert.h" -#include "xenia/ui/vulkan/vulkan.h" -#include "xenia/ui/vulkan/vulkan_util.h" - -namespace xe { -namespace ui { -namespace vulkan { - -// Simple pool for Vulkan homogenous objects that cannot be reused while -// in-flight. -// It batches pooled objects into groups and uses a vkQueueSubmit fence to -// indicate their availability. If no objects are free when one is requested -// the caller is expected to create them. -template -class BaseFencedPool { - public: - BaseFencedPool(VkDevice device) : device_(device) {} - - virtual ~BaseFencedPool() { - // TODO(benvanik): wait on fence until done. - assert_null(pending_batch_list_head_); - - // Subclasses must call FreeAllEntries() to properly clean up things. - assert_null(free_batch_list_head_); - assert_null(free_entry_list_head_); - } - - // True if one or more batches are still pending on the GPU. - bool has_pending() const { return pending_batch_list_head_ != nullptr; } - // True if a batch is open. - bool has_open_batch() const { return open_batch_ != nullptr; } - - // Checks all pending batches for completion and scavenges their entries. - // This should be called as frequently as reasonable. - void Scavenge() { - while (pending_batch_list_head_) { - auto batch = pending_batch_list_head_; - assert_not_null(batch->fence); - - VkResult status = vkGetFenceStatus(device_, batch->fence); - if (status == VK_SUCCESS || status == VK_ERROR_DEVICE_LOST) { - // Batch has completed. Reclaim. - pending_batch_list_head_ = batch->next; - if (batch == pending_batch_list_tail_) { - pending_batch_list_tail_ = nullptr; - } - batch->next = free_batch_list_head_; - free_batch_list_head_ = batch; - batch->entry_list_tail->next = free_entry_list_head_; - free_entry_list_head_ = batch->entry_list_head; - batch->entry_list_head = nullptr; - batch->entry_list_tail = nullptr; - } else { - // Batch is still in-flight. Since batches are executed in order we know - // no others after it could have completed, so early-exit. - return; - } - } - } - - // Begins a new batch. - // All entries acquired within this batch will be marked as in-use until - // the fence returned is signalled. - // Pass in a fence to use an external fence. This assumes the fence has been - // reset. - VkFence BeginBatch(VkFence fence = nullptr) { - assert_null(open_batch_); - Batch* batch = nullptr; - if (free_batch_list_head_) { - // Reuse a batch. - batch = free_batch_list_head_; - free_batch_list_head_ = batch->next; - batch->next = nullptr; - - if (batch->flags & kBatchOwnsFence && !fence) { - // Reset owned fence. - vkResetFences(device_, 1, &batch->fence); - } else if ((batch->flags & kBatchOwnsFence) && fence) { - // Transfer owned -> external - vkDestroyFence(device_, batch->fence, nullptr); - batch->fence = fence; - batch->flags &= ~kBatchOwnsFence; - } else if (!(batch->flags & kBatchOwnsFence) && !fence) { - // external -> owned - VkFenceCreateInfo info; - info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - info.pNext = nullptr; - info.flags = 0; - VkResult res = vkCreateFence(device_, &info, nullptr, &batch->fence); - if (res != VK_SUCCESS) { - assert_always(); - } - - batch->flags |= kBatchOwnsFence; - } else { - // external -> external - batch->fence = fence; - } - } else { - // Allocate new batch. - batch = new Batch(); - batch->next = nullptr; - batch->flags = 0; - - if (!fence) { - VkFenceCreateInfo info; - info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - info.pNext = nullptr; - info.flags = 0; - VkResult res = vkCreateFence(device_, &info, nullptr, &batch->fence); - if (res != VK_SUCCESS) { - assert_always(); - } - - batch->flags |= kBatchOwnsFence; - } else { - batch->fence = fence; - } - } - batch->entry_list_head = nullptr; - batch->entry_list_tail = nullptr; - open_batch_ = batch; - - return batch->fence; - } - - // Cancels an open batch, and releases all entries acquired within. - void CancelBatch() { - assert_not_null(open_batch_); - - auto batch = open_batch_; - open_batch_ = nullptr; - - // Relink the batch back into the free batch list. - batch->next = free_batch_list_head_; - free_batch_list_head_ = batch; - - // Relink entries back into free entries list. - batch->entry_list_tail->next = free_entry_list_head_; - free_entry_list_head_ = batch->entry_list_head; - batch->entry_list_head = nullptr; - batch->entry_list_tail = nullptr; - } - - // Ends the current batch. - void EndBatch() { - assert_not_null(open_batch_); - - // Close and see if we have anything. - auto batch = open_batch_; - open_batch_ = nullptr; - if (!batch->entry_list_head) { - // Nothing to do. - batch->next = free_batch_list_head_; - free_batch_list_head_ = batch; - return; - } - - // Append to the end of the batch list. - batch->next = nullptr; - if (!pending_batch_list_head_) { - pending_batch_list_head_ = batch; - } - if (pending_batch_list_tail_) { - pending_batch_list_tail_->next = batch; - pending_batch_list_tail_ = batch; - } else { - pending_batch_list_tail_ = batch; - } - } - - protected: - // Attempts to acquire an entry from the pool in the current batch. - // If none are available a new one will be allocated. - HANDLE AcquireEntry(void* data) { - Entry* entry = nullptr; - if (free_entry_list_head_) { - // Slice off an entry from the free list. - Entry* prev = nullptr; - Entry* cur = free_entry_list_head_; - while (cur != nullptr) { - if (cur->data == data) { - if (prev) { - prev->next = cur->next; - } else { - free_entry_list_head_ = cur->next; - } - - entry = cur; - break; - } - - prev = cur; - cur = cur->next; - } - } - - if (!entry) { - // No entry available; allocate new. - entry = new Entry(); - entry->data = data; - entry->handle = static_cast(this)->AllocateEntry(data); - if (!entry->handle) { - delete entry; - return nullptr; - } - } - entry->next = nullptr; - if (!open_batch_->entry_list_head) { - open_batch_->entry_list_head = entry; - } - if (open_batch_->entry_list_tail) { - open_batch_->entry_list_tail->next = entry; - } - open_batch_->entry_list_tail = entry; - return entry->handle; - } - - void PushEntry(HANDLE handle, void* data) { - auto entry = new Entry(); - entry->next = free_entry_list_head_; - entry->data = data; - entry->handle = handle; - free_entry_list_head_ = entry; - } - - void FreeAllEntries() { - // Run down free lists. - while (free_batch_list_head_) { - auto batch = free_batch_list_head_; - free_batch_list_head_ = batch->next; - - if (batch->flags & kBatchOwnsFence) { - vkDestroyFence(device_, batch->fence, nullptr); - batch->fence = nullptr; - } - delete batch; - } - while (free_entry_list_head_) { - auto entry = free_entry_list_head_; - free_entry_list_head_ = entry->next; - static_cast(this)->FreeEntry(entry->handle); - delete entry; - } - } - - VkDevice device_ = nullptr; - - private: - struct Entry { - Entry* next; - void* data; - HANDLE handle; - }; - struct Batch { - Batch* next; - Entry* entry_list_head; - Entry* entry_list_tail; - uint32_t flags; - VkFence fence; - }; - - static const uint32_t kBatchOwnsFence = 1; - - Batch* free_batch_list_head_ = nullptr; - Entry* free_entry_list_head_ = nullptr; - Batch* pending_batch_list_head_ = nullptr; - Batch* pending_batch_list_tail_ = nullptr; - Batch* open_batch_ = nullptr; -}; - -class CommandBufferPool - : public BaseFencedPool { - public: - typedef BaseFencedPool Base; - - CommandBufferPool(VkDevice device, uint32_t queue_family_index); - ~CommandBufferPool() override; - - VkCommandBuffer AcquireEntry( - VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY) { - return Base::AcquireEntry(reinterpret_cast(level)); - } - - protected: - friend class BaseFencedPool; - VkCommandBuffer AllocateEntry(void* data); - void FreeEntry(VkCommandBuffer handle); - - VkCommandPool command_pool_ = nullptr; -}; - -class DescriptorPool : public BaseFencedPool { - public: - typedef BaseFencedPool Base; - - DescriptorPool(VkDevice device, uint32_t max_count, - std::vector pool_sizes); - ~DescriptorPool() override; - - VkDescriptorSet AcquireEntry(VkDescriptorSetLayout layout) { - return Base::AcquireEntry(layout); - } - - // WARNING: Allocating sets from the vulkan pool will not be tracked! - VkDescriptorPool descriptor_pool() { return descriptor_pool_; } - - protected: - friend class BaseFencedPool; - VkDescriptorSet AllocateEntry(void* data); - void FreeEntry(VkDescriptorSet handle); - - VkDescriptorPool descriptor_pool_ = nullptr; -}; - -} // namespace vulkan -} // namespace ui -} // namespace xe - -#endif // XENIA_UI_VULKAN_FENCED_POOLS_H_ diff --git a/src/xenia/ui/vulkan/premake5.lua b/src/xenia/ui/vulkan/premake5.lua index d93f98af6..e657b4af3 100644 --- a/src/xenia/ui/vulkan/premake5.lua +++ b/src/xenia/ui/vulkan/premake5.lua @@ -7,55 +7,10 @@ project("xenia-ui-vulkan") kind("StaticLib") language("C++") links({ - "fmt", "xenia-base", "xenia-ui", - "xenia-ui-spirv", - }) - defines({ - }) - includedirs({ - project_root.."/third_party/vulkan/", }) local_platform_files() files({ - "shaders/bin/*.h", + "../shaders/bytecode/vulkan_spirv/*.h", }) - removefiles({"*_demo.cc"}) - -group("demos") -project("xenia-ui-window-vulkan-demo") - uuid("97598f13-3177-454c-8e58-c59e2b6ede27") - kind("WindowedApp") - language("C++") - links({ - "fmt", - "imgui", - "volk", - "xenia-base", - "xenia-ui", - "xenia-ui-spirv", - "xenia-ui-vulkan", - }) - defines({ - }) - includedirs({ - project_root.."/third_party/vulkan/", - }) - files({ - "../window_demo.cc", - "vulkan_window_demo.cc", - project_root.."/src/xenia/base/main_"..platform_suffix..".cc", - }) - resincludedirs({ - project_root, - }) - - filter("platforms:Linux") - links({ - "X11", - "xcb", - "X11-xcb", - "GL", - "vulkan", - }) diff --git a/src/xenia/ui/vulkan/shaders/bin/blit_color_frag.h b/src/xenia/ui/vulkan/shaders/bin/blit_color_frag.h deleted file mode 100644 index e91b12124..000000000 --- a/src/xenia/ui/vulkan/shaders/bin/blit_color_frag.h +++ /dev/null @@ -1,88 +0,0 @@ -// generated from `xb genspirv` -// source: blit_color.frag -const uint8_t blit_color_frag[] = { - 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x06, 0x00, 0x08, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x47, 0x4C, 0x53, 0x4C, 0x2E, 0x73, 0x74, 0x64, 0x2E, 0x34, 0x35, 0x30, - 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x07, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x03, 0x00, - 0x02, 0x00, 0x00, 0x00, 0xC2, 0x01, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6F, 0x43, 0x00, 0x00, - 0x05, 0x00, 0x05, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x73, 0x72, 0x63, 0x5F, - 0x74, 0x65, 0x78, 0x74, 0x75, 0x72, 0x65, 0x00, 0x05, 0x00, 0x04, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x76, 0x74, 0x78, 0x5F, 0x75, 0x76, 0x00, 0x00, - 0x05, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x50, 0x75, 0x73, 0x68, - 0x43, 0x6F, 0x6E, 0x73, 0x74, 0x61, 0x6E, 0x74, 0x73, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x05, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x5F, 0x70, 0x61, 0x64, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x05, 0x00, - 0x16, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x73, 0x77, 0x61, 0x70, - 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x70, 0x75, 0x73, 0x68, 0x5F, 0x63, 0x6F, 0x6E, 0x73, 0x74, 0x61, 0x6E, - 0x74, 0x73, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, - 0x0D, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, - 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x16, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x19, 0x00, 0x09, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1B, 0x00, 0x03, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0F, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, - 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x1A, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x02, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, - 0x0D, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0F, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x57, 0x00, 0x05, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x1A, 0x00, 0x00, 0x00, - 0x1B, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, - 0x1B, 0x00, 0x00, 0x00, 0xAB, 0x00, 0x05, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x1F, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, - 0xF7, 0x00, 0x03, 0x00, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xFA, 0x00, 0x04, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x09, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x23, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x23, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, - 0xF8, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xFD, 0x00, 0x01, 0x00, - 0x38, 0x00, 0x01, 0x00, -}; diff --git a/src/xenia/ui/vulkan/shaders/bin/blit_color_frag.spv b/src/xenia/ui/vulkan/shaders/bin/blit_color_frag.spv deleted file mode 100644 index 52ffa72dcd18596da1c9aa6215d28de0f51c69e4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1000 zcmYk4%Sr=55Jg*K;@cSCZzsl`A}&+}Q4kk#At<VLKq60p=+hk4-&#CsIhu+eX$cKcRPFgyD}z1&d{k9!(bR- ztv}Tt8{)%w37>=~&H=1)bPeHyhI!Clex90pp3ki1!#L~s<+hgGza_1g`wriJ?c}Z2 zYRf5vIr6Jk{BZW%h?9CFiOCC8#I;(perL=u=ZIhHO)>W`5S#3;-TpVIqDmcWk}z|% zY;>~!yxfzkE90N{oL>Qt@C@^Ov2DD{yz2BmruiDnog3<6bF2$s5k8Lw@6X&ji}(%^ zeF5>KG*`FSA`^@d<$GqYW%)A9Q3~?xH}T%Wn&%gZ_EV?)Nus^P<(qpCV)AE* z^3^j-K1+;n>ucoBE~kqRu&4Xah}+NH_i?7JbpIl2_j~?QT62F^UuCV$pwjm^;4Rea zooQnZljp29|d_jmr@~p@VwFPN}P58CR#;?4h0&T>Kr5I@VK6 weV_Cw6SW`s)*@U?ZLaO(*&Dd$UGg=rq?ovK_2HUxW!mI?Q_cDre_@nv{=b^@bif#7#-)zx&yKgMsL z;0^hf{J>-II}H(5o;P$3`Vz$-4M@dUu1{T!+2lRDE5?)C{5!siDqd>TPK)O(=8HHh za<669JT9`?98U1LUpLaNu4rk>dvbE>kK|aD56aYYdR2Sio;Or^SBlX*aP{Wrp3Ze) zefOI2PPPMfsd{oWfbH6vzVeQ}E?brFiMg%|czxBQ9Rs5uId8yrl(!U_g)aDQMS6k5 zC+AII@V6D=vv2tOif99ePtKdc&b3YY!o|RTg`|pCLeaD-M-MqOJ96ediAB$@91i${ UFlO^Fz_Dv}Wj|nlQh6c&2e_ex?V`ckoZcF-RLCiLYP@Xe9{>In7_&w z6TfeE8i-8}U3KbIb#?VXcWI(0gsxBtr^9;atie!%385F(y|%u&UQM%l^~uZw75$;x zEsqV5=Q;(xZYM465%37CfNp+D?7t~gMs-qGroP(G&8?4({j72Lrd4kosw;=f@QLpB zr)C3N?M?QLcKT&?x0Pnwtt`c_5RrGK`#W3hgJaI#>uHvA=1=19k-xQbTtMDS(Zfb# zztaXTY=_r2wwoqdvfF~~$@SLG`J|O?Cf{uFDCT{Mtu~XrwZu+$8b>iFXRO_99>vPc z$a%S(k#jr94)eZi_|B*o+y$>s+;;-s6*+s7I}d8qbT0N{JbyTB>$oCkj+}es%#pL# z$Z5;@e%7|0I{UNVG2j}4mA{V09PJUT3i~)1Mb8J9`^Luc{etAgF#g$`=ND_L@Z;In z@Xx`kl{0SbKH!?>uEzR-oQRqM>=3>$MvZflQxmzt!k3TS1#JBe*OQ+=gzp*FFh`BO zUt}K#&eOXM7qf=1tu$_nTIFFau^6@UOu;s-6cd5a77d27;7CY)O^IqdU-{G61#(q}V<^H02 G3jP6cY;kM= diff --git a/src/xenia/ui/vulkan/shaders/bin/blit_vert.txt b/src/xenia/ui/vulkan/shaders/bin/blit_vert.txt deleted file mode 100644 index 7f423f63d..000000000 --- a/src/xenia/ui/vulkan/shaders/bin/blit_vert.txt +++ /dev/null @@ -1,99 +0,0 @@ -; SPIR-V -; Version: 1.0 -; Generator: Khronos Glslang Reference Front End; 6 -; Bound: 76 -; Schema: 0 - OpCapability Shader - %1 = OpExtInstImport "GLSL.std.450" - OpMemoryModel Logical GLSL450 - OpEntryPoint Vertex %main "main" %gl_VertexIndex %_ %vtx_uv - OpSource GLSL 450 - OpName %main "main" - OpName %gl_VertexIndex "gl_VertexIndex" - OpName %indexable "indexable" - OpName %PushConstants "PushConstants" - OpMemberName %PushConstants 0 "src_uv" - OpMemberName %PushConstants 1 "dst_uv" - OpName %push_constants "push_constants" - OpName %gl_PerVertex "gl_PerVertex" - OpMemberName %gl_PerVertex 0 "gl_Position" - OpMemberName %gl_PerVertex 1 "gl_PointSize" - OpMemberName %gl_PerVertex 2 "gl_ClipDistance" - OpMemberName %gl_PerVertex 3 "gl_CullDistance" - OpName %_ "" - OpName %vtx_uv "vtx_uv" - OpDecorate %gl_VertexIndex BuiltIn VertexIndex - OpMemberDecorate %PushConstants 0 Offset 0 - OpMemberDecorate %PushConstants 1 Offset 16 - OpDecorate %PushConstants Block - OpMemberDecorate %gl_PerVertex 0 BuiltIn Position - OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize - OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance - OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance - OpDecorate %gl_PerVertex Block - OpDecorate %vtx_uv Location 0 - %void = OpTypeVoid - %3 = OpTypeFunction %void - %float = OpTypeFloat 32 - %v2float = OpTypeVector %float 2 -%_ptr_Function_v2float = OpTypePointer Function %v2float - %uint = OpTypeInt 32 0 - %uint_4 = OpConstant %uint 4 -%_arr_v2float_uint_4 = OpTypeArray %v2float %uint_4 - %float_0 = OpConstant %float 0 - %14 = OpConstantComposite %v2float %float_0 %float_0 - %float_1 = OpConstant %float 1 - %16 = OpConstantComposite %v2float %float_1 %float_0 - %17 = OpConstantComposite %v2float %float_0 %float_1 - %18 = OpConstantComposite %v2float %float_1 %float_1 - %19 = OpConstantComposite %_arr_v2float_uint_4 %14 %16 %17 %18 - %int = OpTypeInt 32 1 -%_ptr_Input_int = OpTypePointer Input %int -%gl_VertexIndex = OpVariable %_ptr_Input_int Input -%_ptr_Function__arr_v2float_uint_4 = OpTypePointer Function %_arr_v2float_uint_4 - %float_2 = OpConstant %float 2 - %v4float = OpTypeVector %float 4 -%PushConstants = OpTypeStruct %v4float %v4float -%_ptr_PushConstant_PushConstants = OpTypePointer PushConstant %PushConstants -%push_constants = OpVariable %_ptr_PushConstant_PushConstants PushConstant - %int_1 = OpConstant %int 1 -%_ptr_PushConstant_v4float = OpTypePointer PushConstant %v4float - %44 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2 - %uint_1 = OpConstant %uint 1 -%_arr_float_uint_1 = OpTypeArray %float %uint_1 -%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1 -%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex - %_ = OpVariable %_ptr_Output_gl_PerVertex Output - %int_0 = OpConstant %int 0 -%_ptr_Output_v4float = OpTypePointer Output %v4float -%_ptr_Output_v2float = OpTypePointer Output %v2float - %vtx_uv = OpVariable %_ptr_Output_v2float Output - %main = OpFunction %void None %3 - %5 = OpLabel - %indexable = OpVariable %_ptr_Function__arr_v2float_uint_4 Function - %23 = OpLoad %int %gl_VertexIndex - OpStore %indexable %19 - %26 = OpAccessChain %_ptr_Function_v2float %indexable %23 - %27 = OpLoad %v2float %26 - %42 = OpAccessChain %_ptr_PushConstant_v4float %push_constants %int_1 - %43 = OpLoad %v4float %42 - %45 = OpFMul %v4float %43 %44 - %53 = OpVectorShuffle %v2float %45 %45 0 1 - %54 = OpFSub %v2float %53 %18 - %57 = OpVectorShuffle %v2float %45 %45 2 3 - %58 = OpFMul %v2float %27 %57 - %59 = OpFAdd %v2float %54 %58 - %60 = OpCompositeExtract %float %59 0 - %61 = OpCompositeExtract %float %59 1 - %62 = OpCompositeConstruct %v4float %60 %61 %float_0 %float_1 - %64 = OpAccessChain %_ptr_Output_v4float %_ %int_0 - OpStore %64 %62 - %68 = OpAccessChain %_ptr_PushConstant_v4float %push_constants %int_0 - %69 = OpLoad %v4float %68 - %70 = OpVectorShuffle %v2float %69 %69 2 3 - %71 = OpFMul %v2float %27 %70 - %74 = OpVectorShuffle %v2float %69 %69 0 1 - %75 = OpFAdd %v2float %71 %74 - OpStore %vtx_uv %75 - OpReturn - OpFunctionEnd diff --git a/src/xenia/ui/vulkan/shaders/bin/immediate_frag.h b/src/xenia/ui/vulkan/shaders/bin/immediate_frag.h deleted file mode 100644 index fe463a1a4..000000000 --- a/src/xenia/ui/vulkan/shaders/bin/immediate_frag.h +++ /dev/null @@ -1,109 +0,0 @@ -// generated from `xb genspirv` -// source: immediate.frag -const uint8_t immediate_frag[] = { - 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x06, 0x00, 0x08, 0x00, - 0x33, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x2B, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x4C, 0x53, 0x4C, - 0x2E, 0x73, 0x74, 0x64, 0x2E, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, - 0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0F, 0x00, 0x08, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x03, 0x00, - 0x02, 0x00, 0x00, 0x00, 0xC2, 0x01, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6F, 0x75, 0x74, 0x5F, - 0x63, 0x6F, 0x6C, 0x6F, 0x72, 0x00, 0x00, 0x00, 0x05, 0x00, 0x05, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x76, 0x74, 0x78, 0x5F, 0x63, 0x6F, 0x6C, 0x6F, - 0x72, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, 0x0F, 0x00, 0x00, 0x00, - 0x50, 0x75, 0x73, 0x68, 0x43, 0x6F, 0x6E, 0x73, 0x74, 0x61, 0x6E, 0x74, - 0x73, 0x00, 0x00, 0x00, 0x06, 0x00, 0x0A, 0x00, 0x0F, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x72, 0x65, 0x73, 0x74, 0x72, 0x69, 0x63, 0x74, - 0x5F, 0x74, 0x65, 0x78, 0x74, 0x75, 0x72, 0x65, 0x5F, 0x73, 0x61, 0x6D, - 0x70, 0x6C, 0x65, 0x73, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x70, 0x75, 0x73, 0x68, 0x5F, 0x63, 0x6F, 0x6E, - 0x73, 0x74, 0x61, 0x6E, 0x74, 0x73, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, - 0x1C, 0x00, 0x00, 0x00, 0x76, 0x74, 0x78, 0x5F, 0x75, 0x76, 0x00, 0x00, - 0x05, 0x00, 0x06, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x74, 0x65, 0x78, 0x74, - 0x75, 0x72, 0x65, 0x5F, 0x73, 0x61, 0x6D, 0x70, 0x6C, 0x65, 0x72, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, - 0x0F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, - 0x40, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x0F, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x1C, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, - 0x2C, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, - 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, - 0x0D, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x03, 0x00, - 0x0F, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, - 0x17, 0x00, 0x04, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x1B, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, - 0x1B, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x15, 0x00, 0x04, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x1D, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x1F, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x80, 0x3F, 0x19, 0x00, 0x09, 0x00, 0x29, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x1B, 0x00, 0x03, 0x00, 0x2A, 0x00, 0x00, 0x00, - 0x29, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2B, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, - 0x2B, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x0C, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, - 0x15, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0xAA, 0x00, 0x05, 0x00, - 0x0D, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, - 0x12, 0x00, 0x00, 0x00, 0xA8, 0x00, 0x04, 0x00, 0x0D, 0x00, 0x00, 0x00, - 0x17, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0xF7, 0x00, 0x03, 0x00, - 0x19, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00, - 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, - 0xF8, 0x00, 0x02, 0x00, 0x18, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x1F, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0xBC, 0x00, 0x05, 0x00, - 0x0D, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0x19, 0x00, 0x00, 0x00, - 0xF8, 0x00, 0x02, 0x00, 0x19, 0x00, 0x00, 0x00, 0xF5, 0x00, 0x07, 0x00, - 0x0D, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, - 0xF7, 0x00, 0x03, 0x00, 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xFA, 0x00, 0x04, 0x00, 0x24, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, - 0x26, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x25, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, - 0x2C, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x1A, 0x00, 0x00, 0x00, - 0x2E, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x57, 0x00, 0x05, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, - 0x2E, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x31, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x85, 0x00, 0x05, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, - 0x2F, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x32, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0x26, 0x00, 0x00, 0x00, - 0xF8, 0x00, 0x02, 0x00, 0x26, 0x00, 0x00, 0x00, 0xFD, 0x00, 0x01, 0x00, - 0x38, 0x00, 0x01, 0x00, -}; diff --git a/src/xenia/ui/vulkan/shaders/bin/immediate_frag.spv b/src/xenia/ui/vulkan/shaders/bin/immediate_frag.spv deleted file mode 100644 index bfb164d1c7a262709e102618dfee6fdf27a08f00..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1252 zcmY+COKTHh6oyZmCe~J4wKr?EPOJ(Vjdu~m8!qCai-HT6A=H3{HYqcc(xn^!i})K{ z`JcoK2tH3{M&bu=&UfB({oYA!dh0~QR5W5X9z=1@#xzWXwuPN1s^8guyWP(6_w9#| z?wYv}HO)A-Nc{|VanRcv$e#rB;37Cft}%vjtI;{)8%^u5lQnf1>xQGeyF1(;rrhQ1 zbCe%XaE(}m+ZkmaUJnOZ-W%i@ek10_Jl9D3S)T6g=H0x1oR89eH|rf7?)OcjV}tia z98yoye+}HneVsKrD)MgO|KB2w4tegm2|W7lsIOLUnBab%E$|X_c(d zz17>Ju0LVw*k`cndJ3rNbE3Xm{}mbPung2neau&r6(IkG9Q9ub!kpKVy diff --git a/src/xenia/ui/vulkan/shaders/bin/immediate_frag.txt b/src/xenia/ui/vulkan/shaders/bin/immediate_frag.txt deleted file mode 100644 index 68aeadb07..000000000 --- a/src/xenia/ui/vulkan/shaders/bin/immediate_frag.txt +++ /dev/null @@ -1,83 +0,0 @@ -; SPIR-V -; Version: 1.0 -; Generator: Khronos Glslang Reference Front End; 6 -; Bound: 51 -; Schema: 0 - OpCapability Shader - OpCapability Sampled1D - %1 = OpExtInstImport "GLSL.std.450" - OpMemoryModel Logical GLSL450 - OpEntryPoint Fragment %main "main" %out_color %vtx_color %vtx_uv - OpExecutionMode %main OriginUpperLeft - OpSource GLSL 450 - OpName %main "main" - OpName %out_color "out_color" - OpName %vtx_color "vtx_color" - OpName %PushConstants "PushConstants" - OpMemberName %PushConstants 0 "restrict_texture_samples" - OpName %push_constants "push_constants" - OpName %vtx_uv "vtx_uv" - OpName %texture_sampler "texture_sampler" - OpDecorate %out_color Location 0 - OpDecorate %vtx_color Location 1 - OpMemberDecorate %PushConstants 0 Offset 64 - OpDecorate %PushConstants Block - OpDecorate %vtx_uv Location 0 - OpDecorate %texture_sampler DescriptorSet 0 - OpDecorate %texture_sampler Binding 0 - %void = OpTypeVoid - %3 = OpTypeFunction %void - %float = OpTypeFloat 32 - %v4float = OpTypeVector %float 4 -%_ptr_Output_v4float = OpTypePointer Output %v4float - %out_color = OpVariable %_ptr_Output_v4float Output -%_ptr_Input_v4float = OpTypePointer Input %v4float - %vtx_color = OpVariable %_ptr_Input_v4float Input - %bool = OpTypeBool - %int = OpTypeInt 32 1 -%PushConstants = OpTypeStruct %int -%_ptr_PushConstant_PushConstants = OpTypePointer PushConstant %PushConstants -%push_constants = OpVariable %_ptr_PushConstant_PushConstants PushConstant - %int_0 = OpConstant %int 0 -%_ptr_PushConstant_int = OpTypePointer PushConstant %int - %v2float = OpTypeVector %float 2 -%_ptr_Input_v2float = OpTypePointer Input %v2float - %vtx_uv = OpVariable %_ptr_Input_v2float Input - %uint = OpTypeInt 32 0 - %uint_0 = OpConstant %uint 0 -%_ptr_Input_float = OpTypePointer Input %float - %float_1 = OpConstant %float 1 - %41 = OpTypeImage %float 2D 0 0 0 1 Unknown - %42 = OpTypeSampledImage %41 -%_ptr_UniformConstant_42 = OpTypePointer UniformConstant %42 -%texture_sampler = OpVariable %_ptr_UniformConstant_42 UniformConstant - %main = OpFunction %void None %3 - %5 = OpLabel - %12 = OpLoad %v4float %vtx_color - OpStore %out_color %12 - %20 = OpAccessChain %_ptr_PushConstant_int %push_constants %int_0 - %21 = OpLoad %int %20 - %22 = OpIEqual %bool %21 %int_0 - %23 = OpLogicalNot %bool %22 - OpSelectionMerge %25 None - OpBranchConditional %23 %24 %25 - %24 = OpLabel - %32 = OpAccessChain %_ptr_Input_float %vtx_uv %uint_0 - %33 = OpLoad %float %32 - %35 = OpFOrdLessThanEqual %bool %33 %float_1 - OpBranch %25 - %25 = OpLabel - %36 = OpPhi %bool %22 %5 %35 %24 - OpSelectionMerge %38 None - OpBranchConditional %36 %37 %38 - %37 = OpLabel - %45 = OpLoad %42 %texture_sampler - %46 = OpLoad %v2float %vtx_uv - %47 = OpImageSampleImplicitLod %v4float %45 %46 - %49 = OpLoad %v4float %out_color - %50 = OpFMul %v4float %49 %47 - OpStore %out_color %50 - OpBranch %38 - %38 = OpLabel - OpReturn - OpFunctionEnd diff --git a/src/xenia/ui/vulkan/shaders/bin/immediate_vert.h b/src/xenia/ui/vulkan/shaders/bin/immediate_vert.h deleted file mode 100644 index 7a2941e27..000000000 --- a/src/xenia/ui/vulkan/shaders/bin/immediate_vert.h +++ /dev/null @@ -1,128 +0,0 @@ -// generated from `xb genspirv` -// source: immediate.vert -const uint8_t immediate_vert[] = { - 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x06, 0x00, 0x08, 0x00, - 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x47, 0x4C, 0x53, 0x4C, 0x2E, 0x73, 0x74, 0x64, 0x2E, 0x34, 0x35, 0x30, - 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, - 0x0D, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, - 0x2A, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0xC2, 0x01, 0x00, 0x00, - 0x05, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, - 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x67, 0x6C, 0x5F, 0x50, 0x65, 0x72, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, - 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x73, 0x69, 0x74, - 0x69, 0x6F, 0x6E, 0x00, 0x06, 0x00, 0x07, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x69, 0x6E, 0x74, - 0x53, 0x69, 0x7A, 0x65, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x07, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x43, - 0x6C, 0x69, 0x70, 0x44, 0x69, 0x73, 0x74, 0x61, 0x6E, 0x63, 0x65, 0x00, - 0x06, 0x00, 0x07, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x67, 0x6C, 0x5F, 0x43, 0x75, 0x6C, 0x6C, 0x44, 0x69, 0x73, 0x74, 0x61, - 0x6E, 0x63, 0x65, 0x00, 0x05, 0x00, 0x03, 0x00, 0x0D, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, 0x11, 0x00, 0x00, 0x00, - 0x50, 0x75, 0x73, 0x68, 0x43, 0x6F, 0x6E, 0x73, 0x74, 0x61, 0x6E, 0x74, - 0x73, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x11, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x70, 0x72, 0x6F, 0x6A, 0x65, 0x63, 0x74, 0x69, - 0x6F, 0x6E, 0x5F, 0x6D, 0x61, 0x74, 0x72, 0x69, 0x78, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x06, 0x00, 0x13, 0x00, 0x00, 0x00, 0x70, 0x75, 0x73, 0x68, - 0x5F, 0x63, 0x6F, 0x6E, 0x73, 0x74, 0x61, 0x6E, 0x74, 0x73, 0x00, 0x00, - 0x05, 0x00, 0x04, 0x00, 0x19, 0x00, 0x00, 0x00, 0x69, 0x6E, 0x5F, 0x70, - 0x6F, 0x73, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x29, 0x00, 0x00, 0x00, - 0x76, 0x74, 0x78, 0x5F, 0x75, 0x76, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, - 0x2A, 0x00, 0x00, 0x00, 0x69, 0x6E, 0x5F, 0x75, 0x76, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x05, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x76, 0x74, 0x78, 0x5F, - 0x63, 0x6F, 0x6C, 0x6F, 0x72, 0x00, 0x00, 0x00, 0x05, 0x00, 0x05, 0x00, - 0x2E, 0x00, 0x00, 0x00, 0x69, 0x6E, 0x5F, 0x63, 0x6F, 0x6C, 0x6F, 0x72, - 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x03, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, - 0x19, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x29, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x2A, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, - 0x2C, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, - 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x1C, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x06, 0x00, 0x0B, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x0A, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0C, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, - 0x0C, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x15, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, - 0x0F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x04, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x1E, 0x00, 0x03, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x11, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x17, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, - 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1B, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x1C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3F, 0x20, 0x00, 0x04, 0x00, - 0x21, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x28, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x2C, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x2D, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x3B, 0x00, 0x04, 0x00, 0x2D, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0xF8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, - 0x0F, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, - 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x17, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, - 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, - 0x1A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x50, 0x00, 0x07, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x1F, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x1B, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x91, 0x00, 0x05, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, - 0x1F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x21, 0x00, 0x00, 0x00, - 0x22, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x0D, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x26, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, - 0x17, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, - 0x3E, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x2F, 0x00, 0x00, 0x00, - 0x2E, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x2C, 0x00, 0x00, 0x00, - 0x2F, 0x00, 0x00, 0x00, 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, -}; diff --git a/src/xenia/ui/vulkan/shaders/bin/immediate_vert.spv b/src/xenia/ui/vulkan/shaders/bin/immediate_vert.spv deleted file mode 100644 index a8f67216478dd1c38799e3a7ba3e9c2d6f00bdb5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1488 zcmYk4T~8B16o!YE1w;h_`B1U8Rt;ZPFVvVAH3XA#!9@)Tx0|MHV9c`H>~0C+2L1wn z$zSD-iSM&JQ#+fpXWsXG%sFSKUTe=9Gi7GXoOy4mwPb2yjJYos-90)!>g2^~=flU+ zWx>=-M{JAoVoggf`Zi(zQ1V2wE!mOmNjj3czMAxJ1qOB4SHVKER5`me|WpNdb zE-TEeWCzL~lyk#ANaHkf9CMV9r_7=JsvF?Gs^vLm1Bdn;Gl8Rj&#{N&KDx57$L~;# z?=>iKFve|R7CF3+9+G(^AqN~ex~}hr_AD@cPh$E8Qzv%b%Os{}F!@&WN4Inab+cd7 zZbrH(Syw!Dfazyd|2gTVgthDNJ?%MfQ+Qq)f4GKr^zUQ zepO!10WV5e8^X+nywufD6f=S&_p&tS!;u@zJA%Xh*zNJB$8~9Hrhn`k(o+(6;=#N( zIBLQ7nS|x>W{GaU6W`LFKBy1ec9=Numcu?jc-wLQJnoV6z~ti|$w~ZMX>8=j4(49r zKIsk72XkQ&^G6JCZeM?TUzIQq=J{2g%wbIe52miC5_ -#include - -#include "xenia/base/assert.h" -#include "xenia/base/logging.h" -#include "xenia/base/math.h" -#include "xenia/base/profiling.h" -#include "xenia/ui/vulkan/vulkan.h" -#include "xenia/ui/vulkan/vulkan_device.h" #include "xenia/ui/vulkan/vulkan_immediate_drawer.h" -#include "xenia/ui/vulkan/vulkan_instance.h" #include "xenia/ui/vulkan/vulkan_provider.h" -#include "xenia/ui/vulkan/vulkan_swap_chain.h" -#include "xenia/ui/vulkan/vulkan_util.h" -#include "xenia/ui/window.h" - -#if XE_PLATFORM_LINUX -#include "xenia/ui/window_gtk.h" - -#include -#endif namespace xe { namespace ui { @@ -38,164 +19,18 @@ namespace vulkan { VulkanContext::VulkanContext(VulkanProvider* provider, Window* target_window) : GraphicsContext(provider, target_window) {} -VulkanContext::~VulkanContext() { - VkResult status; - auto provider = static_cast(provider_); - auto device = provider->device(); - { - std::lock_guard queue_lock(device->primary_queue_mutex()); - status = vkQueueWaitIdle(device->primary_queue()); - } - immediate_drawer_.reset(); - swap_chain_.reset(); -} - -bool VulkanContext::Initialize() { - auto provider = static_cast(provider_); - auto device = provider->device(); - - if (target_window_) { - // Create swap chain used to present to the window. - VkResult status = VK_ERROR_FEATURE_NOT_PRESENT; - VkSurfaceKHR surface = nullptr; -#if XE_PLATFORM_WIN32 - VkWin32SurfaceCreateInfoKHR create_info; - create_info.sType = VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR; - create_info.pNext = nullptr; - create_info.flags = 0; - create_info.hinstance = - static_cast(target_window_->native_platform_handle()); - create_info.hwnd = static_cast(target_window_->native_handle()); - status = vkCreateWin32SurfaceKHR(*provider->instance(), &create_info, - nullptr, &surface); - CheckResult(status, "vkCreateWin32SurfaceKHR"); -#elif XE_PLATFORM_LINUX -#ifdef GDK_WINDOWING_X11 - GtkWidget* window_handle = - static_cast(target_window_->native_handle()); - GdkDisplay* gdk_display = gtk_widget_get_display(window_handle); - assert(GDK_IS_X11_DISPLAY(gdk_display)); - xcb_connection_t* connection = - XGetXCBConnection(gdk_x11_display_get_xdisplay(gdk_display)); - xcb_window_t window = - gdk_x11_window_get_xid(gtk_widget_get_window(window_handle)); - VkXcbSurfaceCreateInfoKHR create_info; - create_info.sType = VK_STRUCTURE_TYPE_XCB_SURFACE_CREATE_INFO_KHR; - create_info.pNext = nullptr; - create_info.flags = 0; - create_info.connection = static_cast( - target_window_->native_platform_handle()); - create_info.window = static_cast(window); - status = vkCreateXcbSurfaceKHR(*provider->instance(), &create_info, nullptr, - &surface); - CheckResult(status, "vkCreateXcbSurfaceKHR"); -#else -#error Unsupported GDK Backend on Linux. -#endif // GDK_WINDOWING_X11 -#else -#error Platform not yet implemented. -#endif // XE_PLATFORM_WIN32 - if (status != VK_SUCCESS) { - XELOGE("Failed to create presentation surface"); - return false; - } - - swap_chain_ = std::make_unique(provider->instance(), - provider->device()); - if (swap_chain_->Initialize(surface) != VK_SUCCESS) { - XELOGE("Unable to initialize swap chain"); - return false; - } - - // Only initialize immediate mode drawer if we are not an offscreen context. - immediate_drawer_ = std::make_unique(this); - status = immediate_drawer_->Initialize(); - if (status != VK_SUCCESS) { - XELOGE("Failed to initialize the immediate mode drawer"); - immediate_drawer_.reset(); - return false; - } - } - - return true; -} +bool VulkanContext::Initialize() { return false; } ImmediateDrawer* VulkanContext::immediate_drawer() { return immediate_drawer_.get(); } -VulkanInstance* VulkanContext::instance() const { - return static_cast(provider_)->instance(); -} +void VulkanContext::BeginSwap() {} -VulkanDevice* VulkanContext::device() const { - return static_cast(provider_)->device(); -} - -bool VulkanContext::is_current() { return false; } - -bool VulkanContext::MakeCurrent() { - SCOPE_profile_cpu_f("gpu"); - return true; -} - -void VulkanContext::ClearCurrent() {} - -void VulkanContext::BeginSwap() { - SCOPE_profile_cpu_f("gpu"); - auto provider = static_cast(provider_); - auto device = provider->device(); - - VkResult status; - - // If we have a window see if it's been resized since we last swapped. - // If it has been, we'll need to reinitialize the swap chain before we - // start touching it. - if (target_window_) { - if (target_window_->scaled_width() != swap_chain_->surface_width() || - target_window_->scaled_height() != swap_chain_->surface_height()) { - // Resized! - swap_chain_->Reinitialize(); - } - } - - if (!context_lost_) { - // Acquire the next image and set it up for use. - status = swap_chain_->Begin(); - if (status == VK_ERROR_DEVICE_LOST) { - context_lost_ = true; - } - } - - // TODO(benvanik): use a fence instead? May not be possible with target image. - std::lock_guard queue_lock(device->primary_queue_mutex()); - status = vkQueueWaitIdle(device->primary_queue()); -} - -void VulkanContext::EndSwap() { - SCOPE_profile_cpu_f("gpu"); - auto provider = static_cast(provider_); - auto device = provider->device(); - - VkResult status; - - if (!context_lost_) { - // Notify the presentation engine the image is ready. - // The contents must be in a coherent state. - status = swap_chain_->End(); - if (status == VK_ERROR_DEVICE_LOST) { - context_lost_ = true; - } - } - - // Wait until the queue is idle. - // TODO(benvanik): is this required? - std::lock_guard queue_lock(device->primary_queue_mutex()); - status = vkQueueWaitIdle(device->primary_queue()); -} +void VulkanContext::EndSwap() {} std::unique_ptr VulkanContext::Capture() { - // TODO(benvanik): read back swap chain front buffer. + // TODO(Triang3l): Read back swap chain front buffer. return nullptr; } diff --git a/src/xenia/ui/vulkan/vulkan_context.h b/src/xenia/ui/vulkan/vulkan_context.h index 3665ffd78..880e99561 100644 --- a/src/xenia/ui/vulkan/vulkan_context.h +++ b/src/xenia/ui/vulkan/vulkan_context.h @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * + * Copyright 2020 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -13,51 +13,39 @@ #include #include "xenia/ui/graphics_context.h" -#include "xenia/ui/vulkan/vulkan.h" +#include "xenia/ui/vulkan/vulkan_immediate_drawer.h" +#include "xenia/ui/vulkan/vulkan_provider.h" namespace xe { namespace ui { namespace vulkan { -class VulkanDevice; -class VulkanImmediateDrawer; -class VulkanInstance; -class VulkanProvider; -class VulkanSwapChain; - class VulkanContext : public GraphicsContext { public: - ~VulkanContext() override; - ImmediateDrawer* immediate_drawer() override; - VulkanSwapChain* swap_chain() const { return swap_chain_.get(); } - VulkanInstance* instance() const; - VulkanDevice* device() const; - bool is_current() override; - bool MakeCurrent() override; - void ClearCurrent() override; - - bool WasLost() override { return context_lost_; } + // Returns true if the OS took away our context because we caused a TDR or + // some other outstanding error. When this happens, this context, as well as + // any other shared contexts are junk. + // This context must be made current in order for this call to work properly. + bool WasLost() override { return false; } void BeginSwap() override; void EndSwap() override; std::unique_ptr Capture() override; - protected: - bool context_lost_ = false; + VulkanProvider* GetVulkanProvider() const { + return static_cast(provider_); + } private: friend class VulkanProvider; - explicit VulkanContext(VulkanProvider* provider, Window* target_window); - - private: bool Initialize(); - std::unique_ptr swap_chain_; - std::unique_ptr immediate_drawer_; + private: + std::unique_ptr immediate_drawer_ = nullptr; }; } // namespace vulkan diff --git a/src/xenia/ui/vulkan/vulkan_device.cc b/src/xenia/ui/vulkan/vulkan_device.cc deleted file mode 100644 index 275a45070..000000000 --- a/src/xenia/ui/vulkan/vulkan_device.cc +++ /dev/null @@ -1,417 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2017 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/ui/vulkan/vulkan_device.h" - -#include -#include -#include -#include - -#include "third_party/renderdoc/renderdoc_app.h" - -#include "xenia/base/assert.h" -#include "xenia/base/logging.h" -#include "xenia/base/math.h" -#include "xenia/base/profiling.h" -#include "xenia/ui/vulkan/vulkan.h" -#include "xenia/ui/vulkan/vulkan_immediate_drawer.h" -#include "xenia/ui/vulkan/vulkan_instance.h" -#include "xenia/ui/vulkan/vulkan_util.h" -#include "xenia/ui/window.h" - -namespace xe { -namespace ui { -namespace vulkan { - -VulkanDevice::VulkanDevice(VulkanInstance* instance) : instance_(instance) { - if (cvars::vulkan_validation) { - DeclareRequiredLayer("VK_LAYER_LUNARG_standard_validation", - Version::Make(0, 0, 0), true); - // DeclareRequiredLayer("VK_LAYER_GOOGLE_unique_objects", Version::Make(0, - // 0, 0), true); - /* - DeclareRequiredLayer("VK_LAYER_GOOGLE_threading", Version::Make(0, 0, 0), - true); - DeclareRequiredLayer("VK_LAYER_LUNARG_core_validation", - Version::Make(0, 0, 0), true); - DeclareRequiredLayer("VK_LAYER_LUNARG_object_tracker", - Version::Make(0, 0, 0), true); - DeclareRequiredLayer("VK_LAYER_LUNARG_draw_state", Version::Make(0, 0, 0), - true); - DeclareRequiredLayer("VK_LAYER_LUNARG_parameter_validation", - Version::Make(0, 0, 0), true); - DeclareRequiredLayer("VK_LAYER_LUNARG_swapchain", Version::Make(0, 0, 0), - true); - DeclareRequiredLayer("VK_LAYER_LUNARG_device_limits", - Version::Make(0, 0, 0), true); - DeclareRequiredLayer("VK_LAYER_LUNARG_image", Version::Make(0, 0, 0), true); - */ - } - - // AMD shader info (optional) - DeclareRequiredExtension(VK_AMD_SHADER_INFO_EXTENSION_NAME, - Version::Make(0, 0, 0), true); - // Debug markers (optional) - DeclareRequiredExtension(VK_EXT_DEBUG_MARKER_EXTENSION_NAME, - Version::Make(0, 0, 0), true); - - DeclareRequiredExtension(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, - Version::Make(0, 0, 0), false); -} - -VulkanDevice::~VulkanDevice() { - if (handle) { - vkDestroyDevice(handle, nullptr); - handle = nullptr; - } -} - -bool VulkanDevice::Initialize(DeviceInfo device_info) { - // Gather list of enabled layer names. - auto layers_result = CheckRequirements(required_layers_, device_info.layers); - auto& enabled_layers = layers_result.second; - - // Gather list of enabled extension names. - auto extensions_result = - CheckRequirements(required_extensions_, device_info.extensions); - enabled_extensions_ = extensions_result.second; - - // We wait until both extensions and layers are checked before failing out so - // that the user gets a complete list of what they have/don't. - if (!extensions_result.first || !layers_result.first) { - FatalVulkanError( - "Layer and extension verification failed; aborting initialization"); - return false; - } - - // Query supported features so we can make sure we have what we need. - VkPhysicalDeviceFeatures supported_features; - vkGetPhysicalDeviceFeatures(device_info.handle, &supported_features); - VkPhysicalDeviceFeatures enabled_features = {0}; - bool any_features_missing = false; -#define ENABLE_AND_EXPECT(name) \ - if (!supported_features.name) { \ - any_features_missing = true; \ - FatalVulkanError("Vulkan device is missing feature " #name); \ - } else { \ - enabled_features.name = VK_TRUE; \ - } - ENABLE_AND_EXPECT(shaderClipDistance); - ENABLE_AND_EXPECT(shaderCullDistance); - ENABLE_AND_EXPECT(shaderStorageImageExtendedFormats); - ENABLE_AND_EXPECT(shaderTessellationAndGeometryPointSize); - ENABLE_AND_EXPECT(samplerAnisotropy); - ENABLE_AND_EXPECT(geometryShader); - ENABLE_AND_EXPECT(depthClamp); - ENABLE_AND_EXPECT(multiViewport); - ENABLE_AND_EXPECT(independentBlend); - ENABLE_AND_EXPECT(textureCompressionBC); - // TODO(benvanik): add other features. - if (any_features_missing) { - XELOGE( - "One or more required device features are missing; aborting " - "initialization"); - return false; - } - - // Pick a queue. - // Any queue we use must support both graphics and presentation. - // TODO(benvanik): use multiple queues (DMA-only, compute-only, etc). - if (device_info.queue_family_properties.empty()) { - FatalVulkanError("No queue families available"); - return false; - } - uint32_t ideal_queue_family_index = UINT_MAX; - uint32_t queue_count = 1; - for (size_t i = 0; i < device_info.queue_family_properties.size(); ++i) { - auto queue_flags = device_info.queue_family_properties[i].queueFlags; - if (queue_flags & VK_QUEUE_GRAPHICS_BIT && - queue_flags & VK_QUEUE_TRANSFER_BIT) { - // Can do graphics and transfer - good! - ideal_queue_family_index = static_cast(i); - // Grab all the queues we can. - queue_count = device_info.queue_family_properties[i].queueCount; - break; - } - } - if (ideal_queue_family_index == UINT_MAX) { - FatalVulkanError( - "No queue families available that can both do graphics and transfer"); - return false; - } - - // Some tools *cough* renderdoc *cough* can't handle multiple queues. - if (cvars::vulkan_primary_queue_only) { - queue_count = 1; - } - - std::vector queue_infos; - std::vector> queue_priorities; - queue_infos.resize(device_info.queue_family_properties.size()); - queue_priorities.resize(queue_infos.size()); - for (int i = 0; i < queue_infos.size(); i++) { - VkDeviceQueueCreateInfo& queue_info = queue_infos[i]; - VkQueueFamilyProperties& family_props = - device_info.queue_family_properties[i]; - - queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; - queue_info.pNext = nullptr; - queue_info.flags = 0; - queue_info.queueFamilyIndex = i; - queue_info.queueCount = family_props.queueCount; - - queue_priorities[i].resize(family_props.queueCount, 0.f); - if (i == ideal_queue_family_index) { - // Prioritize the first queue on the primary queue family. - queue_priorities[i][0] = 1.0f; - } - - queue_info.pQueuePriorities = queue_priorities[i].data(); - } - - VkDeviceCreateInfo create_info; - create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; - create_info.pNext = nullptr; - create_info.flags = 0; - create_info.queueCreateInfoCount = static_cast(queue_infos.size()); - create_info.pQueueCreateInfos = queue_infos.data(); - create_info.enabledLayerCount = static_cast(enabled_layers.size()); - create_info.ppEnabledLayerNames = enabled_layers.data(); - create_info.enabledExtensionCount = - static_cast(enabled_extensions_.size()); - create_info.ppEnabledExtensionNames = enabled_extensions_.data(); - create_info.pEnabledFeatures = &enabled_features; - - auto err = vkCreateDevice(device_info.handle, &create_info, nullptr, &handle); - switch (err) { - case VK_SUCCESS: - // Ok! - break; - case VK_ERROR_INITIALIZATION_FAILED: - FatalVulkanError("Device initialization failed; generic"); - return false; - case VK_ERROR_EXTENSION_NOT_PRESENT: - FatalVulkanError( - "Device initialization failed; requested extension not present"); - return false; - case VK_ERROR_LAYER_NOT_PRESENT: - FatalVulkanError( - "Device initialization failed; requested layer not present"); - return false; - default: - FatalVulkanError(std::string("Device initialization failed; unknown: ") + - to_string(err)); - return false; - } - - // Set flags so we can track enabled extensions easily. - for (auto& ext : enabled_extensions_) { - if (!std::strcmp(ext, VK_EXT_DEBUG_MARKER_EXTENSION_NAME)) { - debug_marker_ena_ = true; - pfn_vkDebugMarkerSetObjectNameEXT_ = - (PFN_vkDebugMarkerSetObjectNameEXT)vkGetDeviceProcAddr( - *this, "vkDebugMarkerSetObjectNameEXT"); - pfn_vkCmdDebugMarkerBeginEXT_ = - (PFN_vkCmdDebugMarkerBeginEXT)vkGetDeviceProcAddr( - *this, "vkCmdDebugMarkerBeginEXT"); - pfn_vkCmdDebugMarkerEndEXT_ = - (PFN_vkCmdDebugMarkerEndEXT)vkGetDeviceProcAddr( - *this, "vkCmdDebugMarkerEndEXT"); - pfn_vkCmdDebugMarkerInsertEXT_ = - (PFN_vkCmdDebugMarkerInsertEXT)vkGetDeviceProcAddr( - *this, "vkCmdDebugMarkerInsertEXT"); - } - } - - device_info_ = std::move(device_info); - queue_family_index_ = ideal_queue_family_index; - - // Get the primary queue used for most submissions/etc. - vkGetDeviceQueue(handle, queue_family_index_, 0, &primary_queue_); - if (!primary_queue_) { - XELOGE("vkGetDeviceQueue returned nullptr!"); - return false; - } - - // Get all additional queues, if we got any. - free_queues_.resize(device_info_.queue_family_properties.size()); - for (uint32_t i = 0; i < device_info_.queue_family_properties.size(); i++) { - VkQueueFamilyProperties& family_props = - device_info_.queue_family_properties[i]; - - for (uint32_t j = 0; j < family_props.queueCount; j++) { - VkQueue queue = nullptr; - if (i == queue_family_index_ && j == 0) { - // Already retrieved the primary queue index. - continue; - } - - vkGetDeviceQueue(handle, i, j, &queue); - if (queue) { - free_queues_[i].push_back(queue); - } - } - } - - XELOGVK("Device initialized successfully!"); - return true; -} - -bool VulkanDevice::HasEnabledExtension(const char* name) { - for (auto extension : enabled_extensions_) { - if (!std::strcmp(extension, name)) { - return true; - } - } - - return false; -} - -VkQueue VulkanDevice::AcquireQueue(uint32_t queue_family_index) { - std::lock_guard lock(queue_mutex_); - if (free_queues_[queue_family_index].empty()) { - return nullptr; - } - - auto queue = free_queues_[queue_family_index].back(); - free_queues_[queue_family_index].pop_back(); - return queue; -} - -void VulkanDevice::ReleaseQueue(VkQueue queue, uint32_t queue_family_index) { - std::lock_guard lock(queue_mutex_); - free_queues_[queue_family_index].push_back(queue); -} - -void VulkanDevice::DbgSetObjectName(uint64_t object, - VkDebugReportObjectTypeEXT object_type, - std::string name) { - if (!debug_marker_ena_ || pfn_vkDebugMarkerSetObjectNameEXT_ == nullptr) { - // Extension disabled. - return; - } - - VkDebugMarkerObjectNameInfoEXT info; - info.sType = VK_STRUCTURE_TYPE_DEBUG_MARKER_OBJECT_NAME_INFO_EXT; - info.pNext = nullptr; - info.objectType = object_type; - info.object = object; - info.pObjectName = name.c_str(); - pfn_vkDebugMarkerSetObjectNameEXT_(*this, &info); -} - -void VulkanDevice::DbgMarkerBegin(VkCommandBuffer command_buffer, - std::string name, float r, float g, float b, - float a) { - if (!debug_marker_ena_ || pfn_vkCmdDebugMarkerBeginEXT_ == nullptr) { - // Extension disabled. - return; - } - - VkDebugMarkerMarkerInfoEXT info; - info.sType = VK_STRUCTURE_TYPE_DEBUG_MARKER_MARKER_INFO_EXT; - info.pNext = nullptr; - info.pMarkerName = name.c_str(); - info.color[0] = r; - info.color[1] = g; - info.color[2] = b; - info.color[3] = a; - pfn_vkCmdDebugMarkerBeginEXT_(command_buffer, &info); -} - -void VulkanDevice::DbgMarkerEnd(VkCommandBuffer command_buffer) { - if (!debug_marker_ena_ || pfn_vkCmdDebugMarkerEndEXT_ == nullptr) { - // Extension disabled. - return; - } - - pfn_vkCmdDebugMarkerEndEXT_(command_buffer); -} - -void VulkanDevice::DbgMarkerInsert(VkCommandBuffer command_buffer, - std::string name, float r, float g, float b, - float a) { - if (!debug_marker_ena_ || pfn_vkCmdDebugMarkerInsertEXT_ == nullptr) { - // Extension disabled. - return; - } - - VkDebugMarkerMarkerInfoEXT info; - info.sType = VK_STRUCTURE_TYPE_DEBUG_MARKER_MARKER_INFO_EXT; - info.pNext = nullptr; - info.pMarkerName = name.c_str(); - info.color[0] = r; - info.color[1] = g; - info.color[2] = g; - info.color[3] = b; - pfn_vkCmdDebugMarkerInsertEXT_(command_buffer, &info); -} - -bool VulkanDevice::is_renderdoc_attached() const { - return instance_->is_renderdoc_attached(); -} - -void VulkanDevice::BeginRenderDocFrameCapture() { - auto api = reinterpret_cast(instance_->renderdoc_api()); - if (!api) { - return; - } - assert_true(api->IsFrameCapturing() == 0); - - api->StartFrameCapture(nullptr, nullptr); -} - -void VulkanDevice::EndRenderDocFrameCapture() { - auto api = reinterpret_cast(instance_->renderdoc_api()); - if (!api) { - return; - } - assert_true(api->IsFrameCapturing() == 1); - - api->EndFrameCapture(nullptr, nullptr); -} - -VkDeviceMemory VulkanDevice::AllocateMemory( - const VkMemoryRequirements& requirements, VkFlags required_properties) { - // Search memory types to find one matching our requirements and our - // properties. - uint32_t type_index = UINT_MAX; - for (uint32_t i = 0; i < device_info_.memory_properties.memoryTypeCount; - ++i) { - const auto& memory_type = device_info_.memory_properties.memoryTypes[i]; - if (((requirements.memoryTypeBits >> i) & 1) == 1) { - // Type is available for use; check for a match on properties. - if ((memory_type.propertyFlags & required_properties) == - required_properties) { - type_index = i; - break; - } - } - } - if (type_index == UINT_MAX) { - XELOGE("Unable to find a matching memory type"); - return nullptr; - } - - // Allocate the memory. - VkMemoryAllocateInfo memory_info; - memory_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - memory_info.pNext = nullptr; - memory_info.allocationSize = requirements.size; - memory_info.memoryTypeIndex = type_index; - VkDeviceMemory memory = nullptr; - auto err = vkAllocateMemory(handle, &memory_info, nullptr, &memory); - CheckResult(err, "vkAllocateMemory"); - return memory; -} - -} // namespace vulkan -} // namespace ui -} // namespace xe diff --git a/src/xenia/ui/vulkan/vulkan_device.h b/src/xenia/ui/vulkan/vulkan_device.h deleted file mode 100644 index 498e6da28..000000000 --- a/src/xenia/ui/vulkan/vulkan_device.h +++ /dev/null @@ -1,131 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2014 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_UI_VULKAN_VULKAN_DEVICE_H_ -#define XENIA_UI_VULKAN_VULKAN_DEVICE_H_ - -#include -#include -#include -#include - -#include "xenia/ui/vulkan/vulkan.h" -#include "xenia/ui/vulkan/vulkan_util.h" - -namespace xe { -namespace ui { -namespace vulkan { - -class VulkanInstance; - -// Wrapper and utilities for VkDevice. -// Prefer passing this around over a VkDevice and casting as needed to call -// APIs. -class VulkanDevice { - public: - VulkanDevice(VulkanInstance* instance); - ~VulkanDevice(); - - VkDevice handle = nullptr; - - operator VkDevice() const { return handle; } - operator VkPhysicalDevice() const { return device_info_.handle; } - - // Declares a layer to verify and enable upon initialization. - // Must be called before Initialize. - void DeclareRequiredLayer(std::string name, uint32_t min_version, - bool is_optional) { - required_layers_.push_back({name, min_version, is_optional}); - } - - // Declares an extension to verify and enable upon initialization. - // Must be called before Initialize. - void DeclareRequiredExtension(std::string name, uint32_t min_version, - bool is_optional) { - required_extensions_.push_back({name, min_version, is_optional}); - } - - // Initializes the device, querying and enabling extensions and layers and - // preparing the device for general use. - // If initialization succeeds it's likely that no more failures beyond runtime - // issues will occur. - bool Initialize(DeviceInfo device_info); - - bool HasEnabledExtension(const char* name); - - uint32_t queue_family_index() const { return queue_family_index_; } - std::mutex& primary_queue_mutex() { return queue_mutex_; } - // Access to the primary queue must be synchronized with primary_queue_mutex. - VkQueue primary_queue() const { return primary_queue_; } - const DeviceInfo& device_info() const { return device_info_; } - - // Acquires a queue for exclusive use by the caller. - // The queue will not be touched by any other code until it's returned with - // ReleaseQueue. - // Not all devices support queues or only support a limited number. If this - // returns null the primary_queue should be used with the - // primary_queue_mutex. - // This method is thread safe. - VkQueue AcquireQueue(uint32_t queue_family_index); - // Releases a queue back to the device pool. - // This method is thread safe. - void ReleaseQueue(VkQueue queue, uint32_t queue_family_index); - - void DbgSetObjectName(uint64_t object, VkDebugReportObjectTypeEXT object_type, - std::string name); - - void DbgMarkerBegin(VkCommandBuffer command_buffer, std::string name, - float r = 0.0f, float g = 0.0f, float b = 0.0f, - float a = 0.0f); - void DbgMarkerEnd(VkCommandBuffer command_buffer); - - void DbgMarkerInsert(VkCommandBuffer command_buffer, std::string name, - float r = 0.0f, float g = 0.0f, float b = 0.0f, - float a = 0.0f); - - // True if RenderDoc is attached and available for use. - bool is_renderdoc_attached() const; - // Begins capturing the current frame in RenderDoc, if it is attached. - // Must be paired with EndRenderDocCapture. Multiple frames cannot be - // captured at the same time. - void BeginRenderDocFrameCapture(); - // Ends a capture. - void EndRenderDocFrameCapture(); - - // Allocates memory of the given size matching the required properties. - VkDeviceMemory AllocateMemory( - const VkMemoryRequirements& requirements, - VkFlags required_properties = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); - - private: - VulkanInstance* instance_ = nullptr; - - std::vector required_layers_; - std::vector required_extensions_; - std::vector enabled_extensions_; - - bool debug_marker_ena_ = false; - PFN_vkDebugMarkerSetObjectNameEXT pfn_vkDebugMarkerSetObjectNameEXT_ = - nullptr; - PFN_vkCmdDebugMarkerBeginEXT pfn_vkCmdDebugMarkerBeginEXT_ = nullptr; - PFN_vkCmdDebugMarkerEndEXT pfn_vkCmdDebugMarkerEndEXT_ = nullptr; - PFN_vkCmdDebugMarkerInsertEXT pfn_vkCmdDebugMarkerInsertEXT_ = nullptr; - - DeviceInfo device_info_; - uint32_t queue_family_index_ = 0; - std::mutex queue_mutex_; - VkQueue primary_queue_ = nullptr; - std::vector> free_queues_; -}; - -} // namespace vulkan -} // namespace ui -} // namespace xe - -#endif // XENIA_UI_VULKAN_VULKAN_DEVICE_H_ diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc index a911ac332..3fc06ebd5 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc @@ -2,904 +2,36 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * + * Copyright 2020 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ #include "xenia/ui/vulkan/vulkan_immediate_drawer.h" -#include "xenia/base/assert.h" -#include "xenia/base/logging.h" -#include "xenia/base/math.h" -#include "xenia/ui/graphics_context.h" -#include "xenia/ui/vulkan/vulkan_context.h" -#include "xenia/ui/vulkan/vulkan_device.h" -#include "xenia/ui/vulkan/vulkan_swap_chain.h" - namespace xe { namespace ui { namespace vulkan { -// Generated with `xenia-build genspirv`. -#include "xenia/ui/vulkan/shaders/bin/immediate_frag.h" -#include "xenia/ui/vulkan/shaders/bin/immediate_vert.h" - -constexpr uint32_t kCircularBufferCapacity = 2 * 1024 * 1024; - -class LightweightCircularBuffer { - public: - LightweightCircularBuffer(VulkanDevice* device) : device_(*device) { - buffer_capacity_ = xe::round_up(kCircularBufferCapacity, 4096); - - // Index buffer. - VkBufferCreateInfo index_buffer_info; - index_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - index_buffer_info.pNext = nullptr; - index_buffer_info.flags = 0; - index_buffer_info.size = buffer_capacity_; - index_buffer_info.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT; - index_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - index_buffer_info.queueFamilyIndexCount = 0; - index_buffer_info.pQueueFamilyIndices = nullptr; - auto status = - vkCreateBuffer(device_, &index_buffer_info, nullptr, &index_buffer_); - CheckResult(status, "vkCreateBuffer"); - - // Vertex buffer. - VkBufferCreateInfo vertex_buffer_info; - vertex_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - vertex_buffer_info.pNext = nullptr; - vertex_buffer_info.flags = 0; - vertex_buffer_info.size = buffer_capacity_; - vertex_buffer_info.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; - vertex_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - vertex_buffer_info.queueFamilyIndexCount = 0; - vertex_buffer_info.pQueueFamilyIndices = nullptr; - status = - vkCreateBuffer(*device, &vertex_buffer_info, nullptr, &vertex_buffer_); - CheckResult(status, "vkCreateBuffer"); - - // Allocate underlying buffer. - // We alias it for both vertices and indices. - VkMemoryRequirements buffer_requirements; - vkGetBufferMemoryRequirements(device_, index_buffer_, &buffer_requirements); - buffer_memory_ = device->AllocateMemory( - buffer_requirements, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); - vkBindBufferMemory(*device, index_buffer_, buffer_memory_, 0); - vkBindBufferMemory(*device, vertex_buffer_, buffer_memory_, 0); - - // Persistent mapping. - status = vkMapMemory(device_, buffer_memory_, 0, VK_WHOLE_SIZE, 0, - &buffer_data_); - CheckResult(status, "vkMapMemory"); - } - - ~LightweightCircularBuffer() { - if (buffer_memory_) { - vkUnmapMemory(device_, buffer_memory_); - buffer_memory_ = nullptr; - } - - VK_SAFE_DESTROY(vkDestroyBuffer, device_, index_buffer_, nullptr); - VK_SAFE_DESTROY(vkDestroyBuffer, device_, vertex_buffer_, nullptr); - VK_SAFE_DESTROY(vkFreeMemory, device_, buffer_memory_, nullptr); - } - - VkBuffer vertex_buffer() const { return vertex_buffer_; } - VkBuffer index_buffer() const { return index_buffer_; } - - // Allocates space for data and copies it into the buffer. - // Returns the offset in the buffer of the data or VK_WHOLE_SIZE if the buffer - // is full. - VkDeviceSize Emplace(const void* source_data, size_t source_length) { - // TODO(benvanik): query actual alignment. - source_length = xe::round_up(source_length, 256); - - // Run down old fences to free up space. - - // Check to see if we have space. - // return VK_WHOLE_SIZE; - - // Compute new range and mark as in use. - if (current_offset_ + source_length > buffer_capacity_) { - // Wraps around. - current_offset_ = 0; - } - VkDeviceSize offset = current_offset_; - current_offset_ += source_length; - - // Copy data. - auto dest_ptr = reinterpret_cast(buffer_data_) + offset; - std::memcpy(dest_ptr, source_data, source_length); - - // Insert fence. - // TODO(benvanik): coarse-grained fences, these may be too fine. - - // Flush memory. - // TODO(benvanik): do only in large batches? can barrier it. - VkMappedMemoryRange dirty_range; - dirty_range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - dirty_range.pNext = nullptr; - dirty_range.memory = buffer_memory_; - dirty_range.offset = offset; - dirty_range.size = source_length; - vkFlushMappedMemoryRanges(device_, 1, &dirty_range); - return offset; - } - - private: - VkDevice device_ = nullptr; - - VkBuffer index_buffer_ = nullptr; - VkBuffer vertex_buffer_ = nullptr; - VkDeviceMemory buffer_memory_ = nullptr; - void* buffer_data_ = nullptr; - size_t buffer_capacity_ = 0; - size_t current_offset_ = 0; -}; - -class VulkanImmediateTexture : public ImmediateTexture { - public: - VulkanImmediateTexture(VulkanDevice* device, VkDescriptorPool descriptor_pool, - VkSampler sampler, uint32_t width, uint32_t height) - : ImmediateTexture(width, height), - device_(device), - descriptor_pool_(descriptor_pool), - sampler_(sampler) {} - - ~VulkanImmediateTexture() override { Shutdown(); } - - VkResult Initialize(VkDescriptorSetLayout descriptor_set_layout, - VkImageView image_view) { - handle = reinterpret_cast(this); - image_view_ = image_view; - VkResult status; - - // Create descriptor set used just for this texture. - // It never changes, so we can reuse it and not worry with updates. - VkDescriptorSetAllocateInfo set_alloc_info; - set_alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; - set_alloc_info.pNext = nullptr; - set_alloc_info.descriptorPool = descriptor_pool_; - set_alloc_info.descriptorSetCount = 1; - set_alloc_info.pSetLayouts = &descriptor_set_layout; - status = - vkAllocateDescriptorSets(*device_, &set_alloc_info, &descriptor_set_); - CheckResult(status, "vkAllocateDescriptorSets"); - if (status != VK_SUCCESS) { - return status; - } - - // Initialize descriptor with our texture. - VkDescriptorImageInfo texture_info; - texture_info.sampler = sampler_; - texture_info.imageView = image_view_; - texture_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL; - VkWriteDescriptorSet descriptor_write; - descriptor_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - descriptor_write.pNext = nullptr; - descriptor_write.dstSet = descriptor_set_; - descriptor_write.dstBinding = 0; - descriptor_write.dstArrayElement = 0; - descriptor_write.descriptorCount = 1; - descriptor_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - descriptor_write.pImageInfo = &texture_info; - vkUpdateDescriptorSets(*device_, 1, &descriptor_write, 0, nullptr); - - return VK_SUCCESS; - } - - VkResult Initialize(VkDescriptorSetLayout descriptor_set_layout) { - handle = reinterpret_cast(this); - VkResult status; - - // Create image object. - VkImageCreateInfo image_info; - image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; - image_info.pNext = nullptr; - image_info.flags = 0; - image_info.imageType = VK_IMAGE_TYPE_2D; - image_info.format = VK_FORMAT_R8G8B8A8_UNORM; - image_info.extent = {width, height, 1}; - image_info.mipLevels = 1; - image_info.arrayLayers = 1; - image_info.samples = VK_SAMPLE_COUNT_1_BIT; - image_info.tiling = VK_IMAGE_TILING_LINEAR; - image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; - image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - image_info.queueFamilyIndexCount = 0; - image_info.pQueueFamilyIndices = nullptr; - image_info.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED; - status = vkCreateImage(*device_, &image_info, nullptr, &image_); - CheckResult(status, "vkCreateImage"); - if (status != VK_SUCCESS) { - return status; - } - - // Allocate memory for the image. - VkMemoryRequirements memory_requirements; - vkGetImageMemoryRequirements(*device_, image_, &memory_requirements); - device_memory_ = device_->AllocateMemory( - memory_requirements, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); - if (!device_memory_) { - return VK_ERROR_INITIALIZATION_FAILED; - } - - // Bind memory and the image together. - status = vkBindImageMemory(*device_, image_, device_memory_, 0); - CheckResult(status, "vkBindImageMemory"); - if (status != VK_SUCCESS) { - return status; - } - - // Create image view used by the shader. - VkImageViewCreateInfo view_info; - view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - view_info.pNext = nullptr; - view_info.flags = 0; - view_info.image = image_; - view_info.viewType = VK_IMAGE_VIEW_TYPE_2D; - view_info.format = VK_FORMAT_R8G8B8A8_UNORM; - view_info.components = { - VK_COMPONENT_SWIZZLE_R, - VK_COMPONENT_SWIZZLE_G, - VK_COMPONENT_SWIZZLE_B, - VK_COMPONENT_SWIZZLE_A, - }; - view_info.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; - status = vkCreateImageView(*device_, &view_info, nullptr, &image_view_); - CheckResult(status, "vkCreateImageView"); - if (status != VK_SUCCESS) { - return status; - } - - // Create descriptor set used just for this texture. - // It never changes, so we can reuse it and not worry with updates. - VkDescriptorSetAllocateInfo set_alloc_info; - set_alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; - set_alloc_info.pNext = nullptr; - set_alloc_info.descriptorPool = descriptor_pool_; - set_alloc_info.descriptorSetCount = 1; - set_alloc_info.pSetLayouts = &descriptor_set_layout; - status = - vkAllocateDescriptorSets(*device_, &set_alloc_info, &descriptor_set_); - CheckResult(status, "vkAllocateDescriptorSets"); - if (status != VK_SUCCESS) { - return status; - } - - // Initialize descriptor with our texture. - VkDescriptorImageInfo texture_info; - texture_info.sampler = sampler_; - texture_info.imageView = image_view_; - texture_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL; - VkWriteDescriptorSet descriptor_write; - descriptor_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - descriptor_write.pNext = nullptr; - descriptor_write.dstSet = descriptor_set_; - descriptor_write.dstBinding = 0; - descriptor_write.dstArrayElement = 0; - descriptor_write.descriptorCount = 1; - descriptor_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - descriptor_write.pImageInfo = &texture_info; - vkUpdateDescriptorSets(*device_, 1, &descriptor_write, 0, nullptr); - - return VK_SUCCESS; - } - - void Shutdown() { - if (descriptor_set_) { - vkFreeDescriptorSets(*device_, descriptor_pool_, 1, &descriptor_set_); - descriptor_set_ = nullptr; - } - - VK_SAFE_DESTROY(vkDestroyImageView, *device_, image_view_, nullptr); - VK_SAFE_DESTROY(vkDestroyImage, *device_, image_, nullptr); - VK_SAFE_DESTROY(vkFreeMemory, *device_, device_memory_, nullptr); - } - - VkResult Upload(const uint8_t* src_data) { - // TODO(benvanik): assert not in use? textures aren't dynamic right now. - - // Get device image layout. - VkImageSubresource subresource; - subresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - subresource.mipLevel = 0; - subresource.arrayLayer = 0; - VkSubresourceLayout layout; - vkGetImageSubresourceLayout(*device_, image_, &subresource, &layout); - - // Map memory for upload. - uint8_t* gpu_data = nullptr; - auto status = vkMapMemory(*device_, device_memory_, 0, layout.size, 0, - reinterpret_cast(&gpu_data)); - CheckResult(status, "vkMapMemory"); - - if (status == VK_SUCCESS) { - // Copy the entire texture, hoping its layout matches what we expect. - std::memcpy(gpu_data + layout.offset, src_data, layout.size); - - vkUnmapMemory(*device_, device_memory_); - } - - return status; - } - - // Queues a command to transition this texture to a new layout. This assumes - // the command buffer WILL be queued and executed by the device. - void TransitionLayout(VkCommandBuffer command_buffer, - VkImageLayout new_layout) { - VkImageMemoryBarrier image_barrier; - image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - image_barrier.pNext = nullptr; - image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - image_barrier.srcAccessMask = 0; - image_barrier.dstAccessMask = 0; - image_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - image_barrier.newLayout = new_layout; - image_barrier.image = image_; - image_barrier.subresourceRange = {0, 0, 1, 0, 1}; - image_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - image_layout_ = new_layout; - - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, - nullptr, 1, &image_barrier); - } - - VkDescriptorSet descriptor_set() const { return descriptor_set_; } - VkImageLayout layout() const { return image_layout_; } - - private: - ui::vulkan::VulkanDevice* device_ = nullptr; - VkDescriptorPool descriptor_pool_ = nullptr; - VkSampler sampler_ = nullptr; // Not owned. - VkImage image_ = nullptr; - VkImageLayout image_layout_ = VK_IMAGE_LAYOUT_PREINITIALIZED; - VkDeviceMemory device_memory_ = nullptr; - VkImageView image_view_ = nullptr; - VkDescriptorSet descriptor_set_ = nullptr; -}; - -VulkanImmediateDrawer::VulkanImmediateDrawer(VulkanContext* graphics_context) - : ImmediateDrawer(graphics_context), context_(graphics_context) {} - -VulkanImmediateDrawer::~VulkanImmediateDrawer() { Shutdown(); } - -VkResult VulkanImmediateDrawer::Initialize() { - auto device = context_->device(); - - // NEAREST + CLAMP - VkSamplerCreateInfo sampler_info; - sampler_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; - sampler_info.pNext = nullptr; - sampler_info.flags = 0; - sampler_info.magFilter = VK_FILTER_NEAREST; - sampler_info.minFilter = VK_FILTER_NEAREST; - sampler_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; - sampler_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - sampler_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - sampler_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - sampler_info.mipLodBias = 0.0f; - sampler_info.anisotropyEnable = VK_FALSE; - sampler_info.maxAnisotropy = 1.0f; - sampler_info.compareEnable = VK_FALSE; - sampler_info.compareOp = VK_COMPARE_OP_NEVER; - sampler_info.minLod = 0.0f; - sampler_info.maxLod = 0.0f; - sampler_info.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; - sampler_info.unnormalizedCoordinates = VK_FALSE; - auto status = vkCreateSampler(*device, &sampler_info, nullptr, - &samplers_.nearest_clamp); - CheckResult(status, "vkCreateSampler"); - if (status != VK_SUCCESS) { - return status; - } - - // NEAREST + REPEAT - sampler_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT; - sampler_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT; - sampler_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; - status = vkCreateSampler(*device, &sampler_info, nullptr, - &samplers_.nearest_repeat); - CheckResult(status, "vkCreateSampler"); - if (status != VK_SUCCESS) { - return status; - } - - // LINEAR + CLAMP - sampler_info.magFilter = VK_FILTER_LINEAR; - sampler_info.minFilter = VK_FILTER_LINEAR; - sampler_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - sampler_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - sampler_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - status = - vkCreateSampler(*device, &sampler_info, nullptr, &samplers_.linear_clamp); - CheckResult(status, "vkCreateSampler"); - if (status != VK_SUCCESS) { - return status; - } - - // LINEAR + REPEAT - sampler_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT; - sampler_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT; - sampler_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; - status = vkCreateSampler(*device, &sampler_info, nullptr, - &samplers_.linear_repeat); - CheckResult(status, "vkCreateSampler"); - if (status != VK_SUCCESS) { - return status; - } - - // Create the descriptor set layout used for our texture sampler. - // As it changes almost every draw we keep it separate from the uniform buffer - // and cache it on the textures. - VkDescriptorSetLayoutCreateInfo texture_set_layout_info; - texture_set_layout_info.sType = - VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - texture_set_layout_info.pNext = nullptr; - texture_set_layout_info.flags = 0; - texture_set_layout_info.bindingCount = 1; - VkDescriptorSetLayoutBinding texture_binding; - texture_binding.binding = 0; - texture_binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - texture_binding.descriptorCount = 1; - texture_binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - texture_binding.pImmutableSamplers = nullptr; - texture_set_layout_info.pBindings = &texture_binding; - status = vkCreateDescriptorSetLayout(*device, &texture_set_layout_info, - nullptr, &texture_set_layout_); - CheckResult(status, "vkCreateDescriptorSetLayout"); - if (status != VK_SUCCESS) { - return status; - } - - // Descriptor pool used for all of our cached descriptors. - // In the steady state we don't allocate anything, so these are all manually - // managed. - VkDescriptorPoolCreateInfo descriptor_pool_info; - descriptor_pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; - descriptor_pool_info.pNext = nullptr; - descriptor_pool_info.flags = - VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; - descriptor_pool_info.maxSets = 128; - VkDescriptorPoolSize pool_sizes[1]; - pool_sizes[0].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - pool_sizes[0].descriptorCount = 128; - descriptor_pool_info.poolSizeCount = 1; - descriptor_pool_info.pPoolSizes = pool_sizes; - status = vkCreateDescriptorPool(*device, &descriptor_pool_info, nullptr, - &descriptor_pool_); - CheckResult(status, "vkCreateDescriptorPool"); - if (status != VK_SUCCESS) { - return status; - } - - // Create the pipeline layout used for our pipeline. - // If we had multiple pipelines they would share this. - VkPipelineLayoutCreateInfo pipeline_layout_info; - pipeline_layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - pipeline_layout_info.pNext = nullptr; - pipeline_layout_info.flags = 0; - VkDescriptorSetLayout set_layouts[] = {texture_set_layout_}; - pipeline_layout_info.setLayoutCount = - static_cast(xe::countof(set_layouts)); - pipeline_layout_info.pSetLayouts = set_layouts; - VkPushConstantRange push_constant_ranges[2]; - push_constant_ranges[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - push_constant_ranges[0].offset = 0; - push_constant_ranges[0].size = sizeof(float) * 16; - push_constant_ranges[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - push_constant_ranges[1].offset = sizeof(float) * 16; - push_constant_ranges[1].size = sizeof(int); - pipeline_layout_info.pushConstantRangeCount = - static_cast(xe::countof(push_constant_ranges)); - pipeline_layout_info.pPushConstantRanges = push_constant_ranges; - status = vkCreatePipelineLayout(*device, &pipeline_layout_info, nullptr, - &pipeline_layout_); - CheckResult(status, "vkCreatePipelineLayout"); - if (status != VK_SUCCESS) { - return status; - } - - // Vertex and fragment shaders. - VkShaderModuleCreateInfo vertex_shader_info; - vertex_shader_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - vertex_shader_info.pNext = nullptr; - vertex_shader_info.flags = 0; - vertex_shader_info.codeSize = sizeof(immediate_vert); - vertex_shader_info.pCode = reinterpret_cast(immediate_vert); - VkShaderModule vertex_shader; - status = vkCreateShaderModule(*device, &vertex_shader_info, nullptr, - &vertex_shader); - CheckResult(status, "vkCreateShaderModule"); - VkShaderModuleCreateInfo fragment_shader_info; - fragment_shader_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - fragment_shader_info.pNext = nullptr; - fragment_shader_info.flags = 0; - fragment_shader_info.codeSize = sizeof(immediate_frag); - fragment_shader_info.pCode = - reinterpret_cast(immediate_frag); - VkShaderModule fragment_shader; - status = vkCreateShaderModule(*device, &fragment_shader_info, nullptr, - &fragment_shader); - CheckResult(status, "vkCreateShaderModule"); - - // Pipeline used when rendering triangles. - VkGraphicsPipelineCreateInfo pipeline_info; - pipeline_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; - pipeline_info.pNext = nullptr; - pipeline_info.flags = VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT; - VkPipelineShaderStageCreateInfo pipeline_stages[2]; - pipeline_stages[0].sType = - VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - pipeline_stages[0].pNext = nullptr; - pipeline_stages[0].flags = 0; - pipeline_stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; - pipeline_stages[0].module = vertex_shader; - pipeline_stages[0].pName = "main"; - pipeline_stages[0].pSpecializationInfo = nullptr; - pipeline_stages[1].sType = - VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - pipeline_stages[1].pNext = nullptr; - pipeline_stages[1].flags = 0; - pipeline_stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; - pipeline_stages[1].module = fragment_shader; - pipeline_stages[1].pName = "main"; - pipeline_stages[1].pSpecializationInfo = nullptr; - pipeline_info.stageCount = 2; - pipeline_info.pStages = pipeline_stages; - VkPipelineVertexInputStateCreateInfo vertex_state_info; - vertex_state_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; - vertex_state_info.pNext = nullptr; - vertex_state_info.flags = 0; - VkVertexInputBindingDescription vertex_binding_descrs[1]; - vertex_binding_descrs[0].binding = 0; - vertex_binding_descrs[0].stride = sizeof(ImmediateVertex); - vertex_binding_descrs[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX; - vertex_state_info.vertexBindingDescriptionCount = - static_cast(xe::countof(vertex_binding_descrs)); - vertex_state_info.pVertexBindingDescriptions = vertex_binding_descrs; - VkVertexInputAttributeDescription vertex_attrib_descrs[3]; - vertex_attrib_descrs[0].location = 0; - vertex_attrib_descrs[0].binding = 0; - vertex_attrib_descrs[0].format = VK_FORMAT_R32G32_SFLOAT; - vertex_attrib_descrs[0].offset = offsetof(ImmediateVertex, x); - vertex_attrib_descrs[1].location = 1; - vertex_attrib_descrs[1].binding = 0; - vertex_attrib_descrs[1].format = VK_FORMAT_R32G32_SFLOAT; - vertex_attrib_descrs[1].offset = offsetof(ImmediateVertex, u); - vertex_attrib_descrs[2].location = 2; - vertex_attrib_descrs[2].binding = 0; - vertex_attrib_descrs[2].format = VK_FORMAT_R8G8B8A8_UNORM; - vertex_attrib_descrs[2].offset = offsetof(ImmediateVertex, color); - vertex_state_info.vertexAttributeDescriptionCount = - static_cast(xe::countof(vertex_attrib_descrs)); - vertex_state_info.pVertexAttributeDescriptions = vertex_attrib_descrs; - pipeline_info.pVertexInputState = &vertex_state_info; - VkPipelineInputAssemblyStateCreateInfo input_info; - input_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; - input_info.pNext = nullptr; - input_info.flags = 0; - input_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; - input_info.primitiveRestartEnable = VK_FALSE; - pipeline_info.pInputAssemblyState = &input_info; - pipeline_info.pTessellationState = nullptr; - VkPipelineViewportStateCreateInfo viewport_state_info; - viewport_state_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; - viewport_state_info.pNext = nullptr; - viewport_state_info.flags = 0; - viewport_state_info.viewportCount = 1; - viewport_state_info.pViewports = nullptr; - viewport_state_info.scissorCount = 1; - viewport_state_info.pScissors = nullptr; - pipeline_info.pViewportState = &viewport_state_info; - VkPipelineRasterizationStateCreateInfo rasterization_info; - rasterization_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; - rasterization_info.pNext = nullptr; - rasterization_info.flags = 0; - rasterization_info.depthClampEnable = VK_FALSE; - rasterization_info.rasterizerDiscardEnable = VK_FALSE; - rasterization_info.polygonMode = VK_POLYGON_MODE_FILL; - rasterization_info.cullMode = VK_CULL_MODE_NONE; - rasterization_info.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; - rasterization_info.depthBiasEnable = VK_FALSE; - rasterization_info.depthBiasConstantFactor = 0; - rasterization_info.depthBiasClamp = 0; - rasterization_info.depthBiasSlopeFactor = 0; - rasterization_info.lineWidth = 1.0f; - pipeline_info.pRasterizationState = &rasterization_info; - VkPipelineMultisampleStateCreateInfo multisample_info; - multisample_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; - multisample_info.pNext = nullptr; - multisample_info.flags = 0; - multisample_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; - multisample_info.sampleShadingEnable = VK_FALSE; - multisample_info.minSampleShading = 0; - multisample_info.pSampleMask = nullptr; - multisample_info.alphaToCoverageEnable = VK_FALSE; - multisample_info.alphaToOneEnable = VK_FALSE; - pipeline_info.pMultisampleState = &multisample_info; - pipeline_info.pDepthStencilState = nullptr; - VkPipelineColorBlendStateCreateInfo blend_info; - blend_info.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; - blend_info.pNext = nullptr; - blend_info.flags = 0; - blend_info.logicOpEnable = VK_FALSE; - blend_info.logicOp = VK_LOGIC_OP_NO_OP; - VkPipelineColorBlendAttachmentState blend_attachments[1]; - blend_attachments[0].blendEnable = VK_TRUE; - blend_attachments[0].srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; - blend_attachments[0].dstColorBlendFactor = - VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - blend_attachments[0].colorBlendOp = VK_BLEND_OP_ADD; - blend_attachments[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; - blend_attachments[0].dstAlphaBlendFactor = - VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - blend_attachments[0].alphaBlendOp = VK_BLEND_OP_ADD; - blend_attachments[0].colorWriteMask = 0xF; - blend_info.attachmentCount = - static_cast(xe::countof(blend_attachments)); - blend_info.pAttachments = blend_attachments; - std::memset(blend_info.blendConstants, 0, sizeof(blend_info.blendConstants)); - pipeline_info.pColorBlendState = &blend_info; - VkPipelineDynamicStateCreateInfo dynamic_state_info; - dynamic_state_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; - dynamic_state_info.pNext = nullptr; - dynamic_state_info.flags = 0; - VkDynamicState dynamic_states[] = { - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, - }; - dynamic_state_info.dynamicStateCount = - static_cast(xe::countof(dynamic_states)); - dynamic_state_info.pDynamicStates = dynamic_states; - pipeline_info.pDynamicState = &dynamic_state_info; - pipeline_info.layout = pipeline_layout_; - pipeline_info.renderPass = context_->swap_chain()->render_pass(); - pipeline_info.subpass = 0; - pipeline_info.basePipelineHandle = nullptr; - pipeline_info.basePipelineIndex = -1; - if (status == VK_SUCCESS) { - status = vkCreateGraphicsPipelines(*device, nullptr, 1, &pipeline_info, - nullptr, &triangle_pipeline_); - CheckResult(status, "vkCreateGraphicsPipelines"); - } - - // Silly, but let's make a pipeline just for drawing lines. - pipeline_info.flags = VK_PIPELINE_CREATE_DERIVATIVE_BIT; - input_info.topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST; - pipeline_info.basePipelineHandle = triangle_pipeline_; - pipeline_info.basePipelineIndex = -1; - if (status == VK_SUCCESS) { - status = vkCreateGraphicsPipelines(*device, nullptr, 1, &pipeline_info, - nullptr, &line_pipeline_); - CheckResult(status, "vkCreateGraphicsPipelines"); - } - - VK_SAFE_DESTROY(vkDestroyShaderModule, *device, vertex_shader, nullptr); - VK_SAFE_DESTROY(vkDestroyShaderModule, *device, fragment_shader, nullptr); - - // Allocate the buffer we'll use for our vertex and index data. - circular_buffer_ = std::make_unique(device); - - return status; -} - -void VulkanImmediateDrawer::Shutdown() { - auto device = context_->device(); - - circular_buffer_.reset(); - - VK_SAFE_DESTROY(vkDestroyPipeline, *device, line_pipeline_, nullptr); - VK_SAFE_DESTROY(vkDestroyPipeline, *device, triangle_pipeline_, nullptr); - VK_SAFE_DESTROY(vkDestroyPipelineLayout, *device, pipeline_layout_, nullptr); - - VK_SAFE_DESTROY(vkDestroyDescriptorPool, *device, descriptor_pool_, nullptr); - VK_SAFE_DESTROY(vkDestroyDescriptorSetLayout, *device, texture_set_layout_, - nullptr); - - VK_SAFE_DESTROY(vkDestroySampler, *device, samplers_.nearest_clamp, nullptr); - VK_SAFE_DESTROY(vkDestroySampler, *device, samplers_.nearest_repeat, nullptr); - VK_SAFE_DESTROY(vkDestroySampler, *device, samplers_.linear_clamp, nullptr); - VK_SAFE_DESTROY(vkDestroySampler, *device, samplers_.linear_repeat, nullptr); -} - std::unique_ptr VulkanImmediateDrawer::CreateTexture( uint32_t width, uint32_t height, ImmediateTextureFilter filter, bool repeat, const uint8_t* data) { - auto device = context_->device(); - - VkResult status; - VkSampler sampler = GetSampler(filter, repeat); - - auto texture = std::make_unique( - device, descriptor_pool_, sampler, width, height); - status = texture->Initialize(texture_set_layout_); - if (status != VK_SUCCESS) { - texture->Shutdown(); - return nullptr; - } - - if (data) { - UpdateTexture(texture.get(), data); - } - return std::unique_ptr(texture.release()); -} - -std::unique_ptr VulkanImmediateDrawer::WrapTexture( - VkImageView image_view, VkSampler sampler, uint32_t width, - uint32_t height) { - VkResult status; - - auto texture = std::make_unique( - context_->device(), descriptor_pool_, sampler, width, height); - status = texture->Initialize(texture_set_layout_, image_view); - if (status != VK_SUCCESS) { - texture->Shutdown(); - return nullptr; - } - - return texture; + return nullptr; } void VulkanImmediateDrawer::UpdateTexture(ImmediateTexture* texture, - const uint8_t* data) { - static_cast(texture)->Upload(data); -} + const uint8_t* data) {} void VulkanImmediateDrawer::Begin(int render_target_width, - int render_target_height) { - auto device = context_->device(); - auto swap_chain = context_->swap_chain(); - assert_null(current_cmd_buffer_); - current_cmd_buffer_ = swap_chain->render_cmd_buffer(); - current_render_target_width_ = render_target_width; - current_render_target_height_ = render_target_height; + int render_target_height) {} - // Viewport changes only once per batch. - VkViewport viewport; - viewport.x = 0.0f; - viewport.y = 0.0f; - viewport.width = static_cast(render_target_width); - viewport.height = static_cast(render_target_height); - viewport.minDepth = 0.0f; - viewport.maxDepth = 1.0f; - vkCmdSetViewport(current_cmd_buffer_, 0, 1, &viewport); +void VulkanImmediateDrawer::BeginDrawBatch(const ImmediateDrawBatch& batch) {} - // Update projection matrix. - const float ortho_projection[4][4] = { - {2.0f / render_target_width, 0.0f, 0.0f, 0.0f}, - {0.0f, 2.0f / -render_target_height, 0.0f, 0.0f}, - {0.0f, 0.0f, -1.0f, 0.0f}, - {-1.0f, 1.0f, 0.0f, 1.0f}, - }; - vkCmdPushConstants(current_cmd_buffer_, pipeline_layout_, - VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(float) * 16, - ortho_projection); -} - -void VulkanImmediateDrawer::BeginDrawBatch(const ImmediateDrawBatch& batch) { - auto device = context_->device(); - - // Upload vertices. - VkDeviceSize vertices_offset = circular_buffer_->Emplace( - batch.vertices, batch.vertex_count * sizeof(ImmediateVertex)); - if (vertices_offset == VK_WHOLE_SIZE) { - // TODO(benvanik): die? - return; - } - auto vertex_buffer = circular_buffer_->vertex_buffer(); - vkCmdBindVertexBuffers(current_cmd_buffer_, 0, 1, &vertex_buffer, - &vertices_offset); - - // Upload indices. - if (batch.indices) { - VkDeviceSize indices_offset = circular_buffer_->Emplace( - batch.indices, batch.index_count * sizeof(uint16_t)); - if (indices_offset == VK_WHOLE_SIZE) { - // TODO(benvanik): die? - return; - } - vkCmdBindIndexBuffer(current_cmd_buffer_, circular_buffer_->index_buffer(), - indices_offset, VK_INDEX_TYPE_UINT16); - } - - batch_has_index_buffer_ = !!batch.indices; -} - -void VulkanImmediateDrawer::Draw(const ImmediateDraw& draw) { - switch (draw.primitive_type) { - case ImmediatePrimitiveType::kLines: - vkCmdBindPipeline(current_cmd_buffer_, VK_PIPELINE_BIND_POINT_GRAPHICS, - line_pipeline_); - break; - case ImmediatePrimitiveType::kTriangles: - vkCmdBindPipeline(current_cmd_buffer_, VK_PIPELINE_BIND_POINT_GRAPHICS, - triangle_pipeline_); - break; - } - - // Setup texture binding. - auto texture = reinterpret_cast(draw.texture_handle); - if (texture) { - if (texture->layout() != VK_IMAGE_LAYOUT_GENERAL) { - texture->TransitionLayout(current_cmd_buffer_, VK_IMAGE_LAYOUT_GENERAL); - } - - auto texture_set = texture->descriptor_set(); - if (!texture_set) { - XELOGW("Failed to acquire texture descriptor set for immediate drawer!"); - } - - vkCmdBindDescriptorSets(current_cmd_buffer_, - VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout_, - 0, 1, &texture_set, 0, nullptr); - } - - // Use push constants for our per-draw changes. - // Here, the restrict_texture_samples uniform. - int restrict_texture_samples = draw.restrict_texture_samples ? 1 : 0; - vkCmdPushConstants(current_cmd_buffer_, pipeline_layout_, - VK_SHADER_STAGE_FRAGMENT_BIT, sizeof(float) * 16, - sizeof(int), &restrict_texture_samples); - - // Scissor, if enabled. - // Scissor can be disabled by making it the full screen. - VkRect2D scissor; - if (draw.scissor) { - scissor.offset.x = draw.scissor_rect[0]; - scissor.offset.y = current_render_target_height_ - - (draw.scissor_rect[1] + draw.scissor_rect[3]); - scissor.extent.width = draw.scissor_rect[2]; - scissor.extent.height = draw.scissor_rect[3]; - } else { - scissor.offset.x = 0; - scissor.offset.y = 0; - scissor.extent.width = current_render_target_width_; - scissor.extent.height = current_render_target_height_; - } - vkCmdSetScissor(current_cmd_buffer_, 0, 1, &scissor); - - // Issue draw. - if (batch_has_index_buffer_) { - vkCmdDrawIndexed(current_cmd_buffer_, draw.count, 1, draw.index_offset, - draw.base_vertex, 0); - } else { - vkCmdDraw(current_cmd_buffer_, draw.count, 1, draw.base_vertex, 0); - } -} +void VulkanImmediateDrawer::Draw(const ImmediateDraw& draw) {} void VulkanImmediateDrawer::EndDrawBatch() {} -void VulkanImmediateDrawer::End() { current_cmd_buffer_ = nullptr; } - -VkSampler VulkanImmediateDrawer::GetSampler(ImmediateTextureFilter filter, - bool repeat) { - VkSampler sampler = nullptr; - switch (filter) { - case ImmediateTextureFilter::kNearest: - sampler = repeat ? samplers_.nearest_repeat : samplers_.nearest_clamp; - break; - case ImmediateTextureFilter::kLinear: - sampler = repeat ? samplers_.linear_repeat : samplers_.linear_clamp; - break; - default: - assert_unhandled_case(filter); - sampler = samplers_.nearest_clamp; - break; - } - - return sampler; -} +void VulkanImmediateDrawer::End() {} } // namespace vulkan } // namespace ui diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.h b/src/xenia/ui/vulkan/vulkan_immediate_drawer.h index 6e4f5ce1a..f51ffdd97 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.h +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.h @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * + * Copyright 2020 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -10,35 +10,19 @@ #ifndef XENIA_UI_VULKAN_VULKAN_IMMEDIATE_DRAWER_H_ #define XENIA_UI_VULKAN_VULKAN_IMMEDIATE_DRAWER_H_ -#include - #include "xenia/ui/immediate_drawer.h" -#include "xenia/ui/vulkan/vulkan.h" namespace xe { namespace ui { namespace vulkan { -class LightweightCircularBuffer; -class VulkanContext; - class VulkanImmediateDrawer : public ImmediateDrawer { public: - VulkanImmediateDrawer(VulkanContext* graphics_context); - ~VulkanImmediateDrawer() override; - - VkResult Initialize(); - void Shutdown(); - std::unique_ptr CreateTexture(uint32_t width, uint32_t height, ImmediateTextureFilter filter, bool repeat, const uint8_t* data) override; - std::unique_ptr WrapTexture(VkImageView image_view, - VkSampler sampler, - uint32_t width, - uint32_t height); void UpdateTexture(ImmediateTexture* texture, const uint8_t* data) override; void Begin(int render_target_width, int render_target_height) override; @@ -46,31 +30,6 @@ class VulkanImmediateDrawer : public ImmediateDrawer { void Draw(const ImmediateDraw& draw) override; void EndDrawBatch() override; void End() override; - - VkSampler GetSampler(ImmediateTextureFilter filter, bool repeat); - - private: - VulkanContext* context_ = nullptr; - - struct { - VkSampler nearest_clamp = nullptr; - VkSampler nearest_repeat = nullptr; - VkSampler linear_clamp = nullptr; - VkSampler linear_repeat = nullptr; - } samplers_; - - VkDescriptorSetLayout texture_set_layout_ = nullptr; - VkDescriptorPool descriptor_pool_ = nullptr; - VkPipelineLayout pipeline_layout_ = nullptr; - VkPipeline triangle_pipeline_ = nullptr; - VkPipeline line_pipeline_ = nullptr; - - std::unique_ptr circular_buffer_; - - bool batch_has_index_buffer_ = false; - VkCommandBuffer current_cmd_buffer_ = nullptr; - int current_render_target_width_ = 0; - int current_render_target_height_ = 0; }; } // namespace vulkan diff --git a/src/xenia/ui/vulkan/vulkan_instance.cc b/src/xenia/ui/vulkan/vulkan_instance.cc deleted file mode 100644 index b324f86f2..000000000 --- a/src/xenia/ui/vulkan/vulkan_instance.cc +++ /dev/null @@ -1,540 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/ui/vulkan/vulkan_instance.h" - -#include -#include -#include - -#include "third_party/renderdoc/renderdoc_app.h" -#include "third_party/volk/volk.h" - -#include "xenia/base/assert.h" -#include "xenia/base/logging.h" -#include "xenia/base/math.h" -#include "xenia/base/profiling.h" -#include "xenia/ui/vulkan/vulkan.h" -#include "xenia/ui/vulkan/vulkan_immediate_drawer.h" -#include "xenia/ui/vulkan/vulkan_util.h" -#include "xenia/ui/window.h" - -#if XE_PLATFORM_LINUX -#include "xenia/ui/window_gtk.h" -#endif - -#define VK_API_VERSION VK_API_VERSION_1_1 - -namespace xe { -namespace ui { -namespace vulkan { - -VulkanInstance::VulkanInstance() { - if (cvars::vulkan_validation) { - DeclareRequiredLayer("VK_LAYER_LUNARG_standard_validation", - Version::Make(0, 0, 0), true); - // DeclareRequiredLayer("VK_LAYER_GOOGLE_unique_objects", Version::Make(0, - // 0, 0), true); - /* - DeclareRequiredLayer("VK_LAYER_GOOGLE_threading", Version::Make(0, 0, 0), - true); - DeclareRequiredLayer("VK_LAYER_LUNARG_core_validation", - Version::Make(0, 0, 0), true); - DeclareRequiredLayer("VK_LAYER_LUNARG_object_tracker", - Version::Make(0, 0, 0), true); - DeclareRequiredLayer("VK_LAYER_LUNARG_draw_state", Version::Make(0, 0, 0), - true); - DeclareRequiredLayer("VK_LAYER_LUNARG_parameter_validation", - Version::Make(0, 0, 0), true); - DeclareRequiredLayer("VK_LAYER_LUNARG_swapchain", Version::Make(0, 0, 0), - true); - DeclareRequiredLayer("VK_LAYER_LUNARG_device_limits", - Version::Make(0, 0, 0), true); - DeclareRequiredLayer("VK_LAYER_LUNARG_image", Version::Make(0, 0, 0), true); - */ - DeclareRequiredExtension(VK_EXT_DEBUG_REPORT_EXTENSION_NAME, - Version::Make(0, 0, 0), true); - } - - DeclareRequiredExtension(VK_EXT_DEBUG_MARKER_EXTENSION_NAME, - Version::Make(0, 0, 0), true); -} - -VulkanInstance::~VulkanInstance() { DestroyInstance(); } - -bool VulkanInstance::Initialize() { - auto version = Version::Parse(VK_API_VERSION); - XELOGVK("Initializing Vulkan {}...", version.pretty_string); - if (volkInitialize() != VK_SUCCESS) { - XELOGE("volkInitialize() failed!"); - return false; - } - - // Get all of the global layers and extensions provided by the system. - if (!QueryGlobals()) { - XELOGE("Failed to query instance globals"); - return false; - } - - // Create the vulkan instance used by the application with our required - // extensions and layers. - if (!CreateInstance()) { - XELOGE("Failed to create instance"); - return false; - } - - // Query available devices so that we can pick one. - if (!QueryDevices()) { - XELOGE("Failed to query devices"); - return false; - } - - // Hook into renderdoc, if it's available. - EnableRenderDoc(); - - XELOGVK("Instance initialized successfully!"); - return true; -} - -bool VulkanInstance::EnableRenderDoc() { - // RenderDoc injects itself into our process, so we should be able to get it. - pRENDERDOC_GetAPI get_api = nullptr; -#if XE_PLATFORM_WIN32 - auto module_handle = GetModuleHandleW(L"renderdoc.dll"); - if (!module_handle) { - XELOGI("RenderDoc support requested but it is not attached"); - return false; - } - get_api = reinterpret_cast( - GetProcAddress(module_handle, "RENDERDOC_GetAPI")); -#else -// TODO(benvanik): dlsym/etc - abstracted in base/. -#endif // XE_PLATFORM_32 - if (!get_api) { - XELOGI("RenderDoc support requested but it is not attached"); - return false; - } - - // Request all API function pointers. - if (!get_api(eRENDERDOC_API_Version_1_0_1, - reinterpret_cast(&renderdoc_api_))) { - XELOGE("RenderDoc found but was unable to get API - version mismatch?"); - return false; - } - auto api = reinterpret_cast(renderdoc_api_); - - // Query version. - int major; - int minor; - int patch; - api->GetAPIVersion(&major, &minor, &patch); - XELOGI("RenderDoc attached; {}.{}.{}", major, minor, patch); - - is_renderdoc_attached_ = true; - - return true; -} - -bool VulkanInstance::QueryGlobals() { - // Scan global layers and accumulate properties. - // We do this in a loop so that we can allocate the required amount of - // memory and handle race conditions while querying. - uint32_t count = 0; - std::vector global_layer_properties; - VkResult err; - do { - err = vkEnumerateInstanceLayerProperties(&count, nullptr); - CheckResult(err, "vkEnumerateInstanceLayerProperties"); - global_layer_properties.resize(count); - err = vkEnumerateInstanceLayerProperties(&count, - global_layer_properties.data()); - } while (err == VK_INCOMPLETE); - CheckResult(err, "vkEnumerateInstanceLayerProperties"); - global_layers_.resize(count); - for (size_t i = 0; i < global_layers_.size(); ++i) { - auto& global_layer = global_layers_[i]; - global_layer.properties = global_layer_properties[i]; - - // Get all extensions available for the layer. - do { - err = vkEnumerateInstanceExtensionProperties( - global_layer.properties.layerName, &count, nullptr); - CheckResult(err, "vkEnumerateInstanceExtensionProperties"); - global_layer.extensions.resize(count); - err = vkEnumerateInstanceExtensionProperties( - global_layer.properties.layerName, &count, - global_layer.extensions.data()); - } while (err == VK_INCOMPLETE); - CheckResult(err, "vkEnumerateInstanceExtensionProperties"); - } - XELOGVK("Found {} global layers:", global_layers_.size()); - for (size_t i = 0; i < global_layers_.size(); ++i) { - auto& global_layer = global_layers_[i]; - auto spec_version = Version::Parse(global_layer.properties.specVersion); - auto impl_version = - Version::Parse(global_layer.properties.implementationVersion); - XELOGVK("- {} (spec: {}, impl: {})", global_layer.properties.layerName, - spec_version.pretty_string, impl_version.pretty_string); - XELOGVK(" {}", global_layer.properties.description); - if (!global_layer.extensions.empty()) { - XELOGVK(" {} extensions:", global_layer.extensions.size()); - DumpExtensions(global_layer.extensions, " "); - } - } - - // Scan global extensions. - do { - err = vkEnumerateInstanceExtensionProperties(nullptr, &count, nullptr); - CheckResult(err, "vkEnumerateInstanceExtensionProperties"); - global_extensions_.resize(count); - err = vkEnumerateInstanceExtensionProperties(nullptr, &count, - global_extensions_.data()); - } while (err == VK_INCOMPLETE); - CheckResult(err, "vkEnumerateInstanceExtensionProperties"); - XELOGVK("Found {} global extensions:", global_extensions_.size()); - DumpExtensions(global_extensions_, ""); - - return true; -} - -bool VulkanInstance::CreateInstance() { - XELOGVK("Verifying layers and extensions..."); - - // Gather list of enabled layer names. - auto layers_result = CheckRequirements(required_layers_, global_layers_); - auto& enabled_layers = layers_result.second; - - // Gather list of enabled extension names. - auto extensions_result = - CheckRequirements(required_extensions_, global_extensions_); - auto& enabled_extensions = extensions_result.second; - - // We wait until both extensions and layers are checked before failing out so - // that the user gets a complete list of what they have/don't. - if (!extensions_result.first || !layers_result.first) { - XELOGE("Layer and extension verification failed; aborting initialization"); - return false; - } - - XELOGVK("Initializing application instance..."); - - // TODO(benvanik): use GetEntryInfo? - VkApplicationInfo application_info; - application_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; - application_info.pNext = nullptr; - application_info.pApplicationName = "xenia"; - application_info.applicationVersion = 1; - application_info.pEngineName = "xenia"; - application_info.engineVersion = 1; - application_info.apiVersion = VK_API_VERSION; - - VkInstanceCreateInfo instance_info; - instance_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; - instance_info.pNext = nullptr; - instance_info.flags = 0; - instance_info.pApplicationInfo = &application_info; - instance_info.enabledLayerCount = - static_cast(enabled_layers.size()); - instance_info.ppEnabledLayerNames = enabled_layers.data(); - instance_info.enabledExtensionCount = - static_cast(enabled_extensions.size()); - instance_info.ppEnabledExtensionNames = enabled_extensions.data(); - - auto err = vkCreateInstance(&instance_info, nullptr, &handle); - if (err != VK_SUCCESS) { - XELOGE("vkCreateInstance returned {}", to_string(err)); - } - switch (err) { - case VK_SUCCESS: - // Ok! - break; - case VK_ERROR_INITIALIZATION_FAILED: - XELOGE("Instance initialization failed; generic"); - return false; - case VK_ERROR_INCOMPATIBLE_DRIVER: - XELOGE( - "Instance initialization failed; cannot find a compatible Vulkan " - "installable client driver (ICD)"); - return false; - case VK_ERROR_EXTENSION_NOT_PRESENT: - XELOGE("Instance initialization failed; requested extension not present"); - return false; - case VK_ERROR_LAYER_NOT_PRESENT: - XELOGE("Instance initialization failed; requested layer not present"); - return false; - default: - XELOGE("Instance initialization failed; unknown: {}", to_string(err)); - return false; - } - - // Load Vulkan entrypoints and extensions. - volkLoadInstance(handle); - - // Enable debug validation, if needed. - EnableDebugValidation(); - - return true; -} - -void VulkanInstance::DestroyInstance() { - if (!handle) { - return; - } - DisableDebugValidation(); - vkDestroyInstance(handle, nullptr); - handle = nullptr; -} - -VkBool32 VKAPI_PTR DebugMessageCallback(VkDebugReportFlagsEXT flags, - VkDebugReportObjectTypeEXT objectType, - uint64_t object, size_t location, - int32_t messageCode, - const char* pLayerPrefix, - const char* pMessage, void* pUserData) { - if (strcmp(pLayerPrefix, "Validation") == 0) { - const char* blacklist[] = { - "bound but it was never updated. You may want to either update it or " - "not bind it.", - "is being used in draw but has not been updated.", - }; - for (uint32_t i = 0; i < xe::countof(blacklist); ++i) { - if (strstr(pMessage, blacklist[i]) != nullptr) { - return false; - } - } - } - - auto instance = reinterpret_cast(pUserData); - const char* message_type = "UNKNOWN"; - if (flags & VK_DEBUG_REPORT_ERROR_BIT_EXT) { - message_type = "ERROR"; - } else if (flags & VK_DEBUG_REPORT_WARNING_BIT_EXT) { - message_type = "WARN"; - } else if (flags & VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT) { - message_type = "PERF WARN"; - } else if (flags & VK_DEBUG_REPORT_INFORMATION_BIT_EXT) { - message_type = "INFO"; - } else if (flags & VK_DEBUG_REPORT_DEBUG_BIT_EXT) { - message_type = "DEBUG"; - } - - XELOGVK("[{}/{}:{}] {}", pLayerPrefix, message_type, messageCode, pMessage); - return false; -} - -void VulkanInstance::EnableDebugValidation() { - if (dbg_report_callback_) { - DisableDebugValidation(); - } - auto vk_create_debug_report_callback_ext = - reinterpret_cast( - vkGetInstanceProcAddr(handle, "vkCreateDebugReportCallbackEXT")); - if (!vk_create_debug_report_callback_ext) { - XELOGVK("Debug validation layer not installed; ignoring"); - return; - } - VkDebugReportCallbackCreateInfoEXT create_info; - create_info.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT; - create_info.pNext = nullptr; - // TODO(benvanik): flags to set these. - create_info.flags = - VK_DEBUG_REPORT_INFORMATION_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT | - VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT | - VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_DEBUG_BIT_EXT; - create_info.pfnCallback = &DebugMessageCallback; - create_info.pUserData = this; - auto status = vk_create_debug_report_callback_ext( - handle, &create_info, nullptr, &dbg_report_callback_); - if (status == VK_SUCCESS) { - XELOGVK("Debug validation layer enabled"); - } else { - XELOGVK("Debug validation layer failed to install; error {}", - to_string(status)); - } -} - -void VulkanInstance::DisableDebugValidation() { - if (!dbg_report_callback_) { - return; - } - auto vk_destroy_debug_report_callback_ext = - reinterpret_cast( - vkGetInstanceProcAddr(handle, "vkDestroyDebugReportCallbackEXT")); - if (!vk_destroy_debug_report_callback_ext) { - return; - } - vk_destroy_debug_report_callback_ext(handle, dbg_report_callback_, nullptr); - dbg_report_callback_ = nullptr; -} - -bool VulkanInstance::QueryDevices() { - // Get handles to all devices. - uint32_t count = 0; - std::vector device_handles; - auto err = vkEnumeratePhysicalDevices(handle, &count, nullptr); - CheckResult(err, "vkEnumeratePhysicalDevices"); - - device_handles.resize(count); - err = vkEnumeratePhysicalDevices(handle, &count, device_handles.data()); - CheckResult(err, "vkEnumeratePhysicalDevices"); - - // Query device info. - for (size_t i = 0; i < device_handles.size(); ++i) { - auto device_handle = device_handles[i]; - DeviceInfo device_info; - device_info.handle = device_handle; - - // Query general attributes. - vkGetPhysicalDeviceProperties(device_handle, &device_info.properties); - vkGetPhysicalDeviceFeatures(device_handle, &device_info.features); - vkGetPhysicalDeviceMemoryProperties(device_handle, - &device_info.memory_properties); - - // Gather queue family properties. - vkGetPhysicalDeviceQueueFamilyProperties(device_handle, &count, nullptr); - device_info.queue_family_properties.resize(count); - vkGetPhysicalDeviceQueueFamilyProperties( - device_handle, &count, device_info.queue_family_properties.data()); - - // Gather layers. - std::vector layer_properties; - err = vkEnumerateDeviceLayerProperties(device_handle, &count, nullptr); - CheckResult(err, "vkEnumerateDeviceLayerProperties"); - layer_properties.resize(count); - err = vkEnumerateDeviceLayerProperties(device_handle, &count, - layer_properties.data()); - CheckResult(err, "vkEnumerateDeviceLayerProperties"); - for (size_t j = 0; j < layer_properties.size(); ++j) { - LayerInfo layer_info; - layer_info.properties = layer_properties[j]; - err = vkEnumerateDeviceExtensionProperties( - device_handle, layer_info.properties.layerName, &count, nullptr); - CheckResult(err, "vkEnumerateDeviceExtensionProperties"); - layer_info.extensions.resize(count); - err = vkEnumerateDeviceExtensionProperties( - device_handle, layer_info.properties.layerName, &count, - layer_info.extensions.data()); - CheckResult(err, "vkEnumerateDeviceExtensionProperties"); - device_info.layers.push_back(std::move(layer_info)); - } - - // Gather extensions. - err = vkEnumerateDeviceExtensionProperties(device_handle, nullptr, &count, - nullptr); - CheckResult(err, "vkEnumerateDeviceExtensionProperties"); - device_info.extensions.resize(count); - err = vkEnumerateDeviceExtensionProperties(device_handle, nullptr, &count, - device_info.extensions.data()); - CheckResult(err, "vkEnumerateDeviceExtensionProperties"); - - available_devices_.push_back(std::move(device_info)); - } - - XELOGVK("Found {} physical devices:", available_devices_.size()); - for (size_t i = 0; i < available_devices_.size(); ++i) { - auto& device_info = available_devices_[i]; - XELOGVK("- Device {}:", i); - DumpDeviceInfo(device_info); - } - - return true; -} - -void VulkanInstance::DumpLayers(const std::vector& layers, - const char* indent) { - for (size_t i = 0; i < layers.size(); ++i) { - auto& layer = layers[i]; - auto spec_version = Version::Parse(layer.properties.specVersion); - auto impl_version = Version::Parse(layer.properties.implementationVersion); - XELOGVK("{}- {} (spec: {}, impl: {})", indent, layer.properties.layerName, - spec_version.pretty_string, impl_version.pretty_string); - XELOGVK("{} {}", indent, layer.properties.description); - if (!layer.extensions.empty()) { - XELOGVK("{} {} extensions:", indent, layer.extensions.size()); - DumpExtensions(layer.extensions, std::strlen(indent) ? " " : " "); - } - } -} - -void VulkanInstance::DumpExtensions( - const std::vector& extensions, const char* indent) { - for (size_t i = 0; i < extensions.size(); ++i) { - auto& extension = extensions[i]; - auto version = Version::Parse(extension.specVersion); - XELOGVK("{}- {} ({})", indent, extension.extensionName, - version.pretty_string); - } -} - -void VulkanInstance::DumpDeviceInfo(const DeviceInfo& device_info) { - auto& properties = device_info.properties; - auto api_version = Version::Parse(properties.apiVersion); - auto driver_version = Version::Parse(properties.driverVersion); - XELOGVK(" apiVersion = {}", api_version.pretty_string); - XELOGVK(" driverVersion = {}", driver_version.pretty_string); - XELOGVK(" vendorId = {:#04x}", properties.vendorID); - XELOGVK(" deviceId = {:#04x}", properties.deviceID); - XELOGVK(" deviceType = {}", to_string(properties.deviceType)); - XELOGVK(" deviceName = {}", properties.deviceName); - - auto& memory_props = device_info.memory_properties; - XELOGVK(" Memory Heaps:"); - for (size_t j = 0; j < memory_props.memoryHeapCount; ++j) { - XELOGVK(" - Heap {}: {} bytes", j, memory_props.memoryHeaps[j].size); - for (size_t k = 0; k < memory_props.memoryTypeCount; ++k) { - if (memory_props.memoryTypes[k].heapIndex == j) { - XELOGVK(" - Type {}:", k); - auto type_flags = memory_props.memoryTypes[k].propertyFlags; - if (!type_flags) { - XELOGVK(" VK_MEMORY_PROPERTY_DEVICE_ONLY"); - } - if (type_flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) { - XELOGVK(" VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT"); - } - if (type_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { - XELOGVK(" VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT"); - } - if (type_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) { - XELOGVK(" VK_MEMORY_PROPERTY_HOST_COHERENT_BIT"); - } - if (type_flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) { - XELOGVK(" VK_MEMORY_PROPERTY_HOST_CACHED_BIT"); - } - if (type_flags & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) { - XELOGVK(" VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT"); - } - } - } - } - - XELOGVK(" Queue Families:"); - for (size_t j = 0; j < device_info.queue_family_properties.size(); ++j) { - auto& queue_props = device_info.queue_family_properties[j]; - XELOGVK(" - Queue {}:", j); - XELOGVK( - " queueFlags = {}{}{}{}", - (queue_props.queueFlags & VK_QUEUE_GRAPHICS_BIT) ? "graphics, " : "", - (queue_props.queueFlags & VK_QUEUE_COMPUTE_BIT) ? "compute, " : "", - (queue_props.queueFlags & VK_QUEUE_TRANSFER_BIT) ? "transfer, " : "", - (queue_props.queueFlags & VK_QUEUE_SPARSE_BINDING_BIT) ? "sparse, " - : ""); - XELOGVK(" queueCount = {}", queue_props.queueCount); - XELOGVK(" timestampValidBits = {}", queue_props.timestampValidBits); - } - - XELOGVK(" Layers:"); - DumpLayers(device_info.layers, " "); - - XELOGVK(" Extensions:"); - DumpExtensions(device_info.extensions, " "); -} - -} // namespace vulkan -} // namespace ui -} // namespace xe diff --git a/src/xenia/ui/vulkan/vulkan_instance.h b/src/xenia/ui/vulkan/vulkan_instance.h deleted file mode 100644 index 6a86933bc..000000000 --- a/src/xenia/ui/vulkan/vulkan_instance.h +++ /dev/null @@ -1,105 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2014 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_UI_VULKAN_VULKAN_INSTANCE_H_ -#define XENIA_UI_VULKAN_VULKAN_INSTANCE_H_ - -#include -#include -#include - -#include "xenia/ui/vulkan/vulkan.h" -#include "xenia/ui/vulkan/vulkan_util.h" -#include "xenia/ui/window.h" - -namespace xe { -namespace ui { -namespace vulkan { - -// Wrappers and utilities for VkInstance. -class VulkanInstance { - public: - VulkanInstance(); - ~VulkanInstance(); - - VkInstance handle = nullptr; - - operator VkInstance() const { return handle; } - - // Declares a layer to verify and enable upon initialization. - // Must be called before Initialize. - void DeclareRequiredLayer(std::string name, uint32_t min_version, - bool is_optional) { - required_layers_.push_back({name, min_version, is_optional}); - } - - // Declares an extension to verify and enable upon initialization. - // Must be called before Initialize. - void DeclareRequiredExtension(std::string name, uint32_t min_version, - bool is_optional) { - required_extensions_.push_back({name, min_version, is_optional}); - } - - // Initializes the instance, querying and enabling extensions and layers and - // preparing the instance for general use. - // If initialization succeeds it's likely that no more failures beyond runtime - // issues will occur. - bool Initialize(); - - // Returns a list of all available devices as detected during initialization. - const std::vector& available_devices() const { - return available_devices_; - } - - // True if RenderDoc is attached and available for use. - bool is_renderdoc_attached() const { return is_renderdoc_attached_; } - // RenderDoc API handle, if attached. - void* renderdoc_api() const { return renderdoc_api_; } - - private: - // Attempts to enable RenderDoc via the API, if it is attached. - bool EnableRenderDoc(); - - // Queries the system to find global extensions and layers. - bool QueryGlobals(); - - // Creates the instance, enabling required extensions and layers. - bool CreateInstance(); - void DestroyInstance(); - - // Enables debugging info and callbacks for supported layers. - void EnableDebugValidation(); - void DisableDebugValidation(); - - // Queries all available physical devices. - bool QueryDevices(); - - void DumpLayers(const std::vector& layers, const char* indent); - void DumpExtensions(const std::vector& extensions, - const char* indent); - void DumpDeviceInfo(const DeviceInfo& device_info); - - std::vector required_layers_; - std::vector required_extensions_; - - std::vector global_layers_; - std::vector global_extensions_; - std::vector available_devices_; - - VkDebugReportCallbackEXT dbg_report_callback_ = nullptr; - - void* renderdoc_api_ = nullptr; - bool is_renderdoc_attached_ = false; -}; - -} // namespace vulkan -} // namespace ui -} // namespace xe - -#endif // XENIA_UI_VULKAN_VULKAN_INSTANCE_H_ diff --git a/src/xenia/ui/vulkan/vulkan_mem_alloc.h b/src/xenia/ui/vulkan/vulkan_mem_alloc.h deleted file mode 100644 index caf89aae6..000000000 --- a/src/xenia/ui/vulkan/vulkan_mem_alloc.h +++ /dev/null @@ -1,44 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2018 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_UI_VULKAN_VULKAN_MEM_ALLOC_H_ -#define XENIA_UI_VULKAN_VULKAN_MEM_ALLOC_H_ - -#include "third_party/volk/volk.h" - -#define VMA_STATIC_VULKAN_FUNCTIONS 0 -#include "third_party/vulkan/vk_mem_alloc.h" - -namespace xe { -namespace ui { -namespace vulkan { - -inline void FillVMAVulkanFunctions(VmaVulkanFunctions* vma_funcs) { - vma_funcs->vkGetPhysicalDeviceProperties = vkGetPhysicalDeviceProperties; - vma_funcs->vkGetPhysicalDeviceMemoryProperties = - vkGetPhysicalDeviceMemoryProperties; - vma_funcs->vkAllocateMemory = vkAllocateMemory; - vma_funcs->vkFreeMemory = vkFreeMemory; - vma_funcs->vkMapMemory = vkMapMemory; - vma_funcs->vkUnmapMemory = vkUnmapMemory; - vma_funcs->vkBindBufferMemory = vkBindBufferMemory; - vma_funcs->vkBindImageMemory = vkBindImageMemory; - vma_funcs->vkGetBufferMemoryRequirements = vkGetBufferMemoryRequirements; - vma_funcs->vkGetImageMemoryRequirements = vkGetImageMemoryRequirements; - vma_funcs->vkCreateBuffer = vkCreateBuffer; - vma_funcs->vkDestroyBuffer = vkDestroyBuffer; - vma_funcs->vkCreateImage = vkCreateImage; - vma_funcs->vkDestroyImage = vkDestroyImage; -} - -} // namespace vulkan -} // namespace ui -} // namespace xe - -#endif // XENIA_UI_VULKAN_VULKAN_MEM_ALLOC_H_ \ No newline at end of file diff --git a/src/xenia/ui/vulkan/vulkan_provider.cc b/src/xenia/ui/vulkan/vulkan_provider.cc index 119f03992..669c04d31 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.cc +++ b/src/xenia/ui/vulkan/vulkan_provider.cc @@ -2,23 +2,15 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * + * Copyright 2020 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ #include "xenia/ui/vulkan/vulkan_provider.h" -#include - #include "xenia/base/logging.h" #include "xenia/ui/vulkan/vulkan_context.h" -#include "xenia/ui/vulkan/vulkan_device.h" -#include "xenia/ui/vulkan/vulkan_instance.h" -#include "xenia/ui/vulkan/vulkan_util.h" - -DEFINE_uint64(vulkan_device_index, 0, "Index of the physical device to use.", - "Vulkan"); namespace xe { namespace ui { @@ -28,10 +20,11 @@ std::unique_ptr VulkanProvider::Create(Window* main_window) { std::unique_ptr provider(new VulkanProvider(main_window)); if (!provider->Initialize()) { xe::FatalError( - "Unable to initialize Vulkan graphics subsystem.\n" + "Unable to initialize Direct3D 12 graphics subsystem.\n" "\n" - "Ensure you have the latest drivers for your GPU and that it " - "supports Vulkan.\n" + "Ensure that you have the latest drivers for your GPU and it supports " + "Vulkan, and that you have the latest Vulkan runtime installed, which " + "can be downloaded at https://vulkan.lunarg.com/sdk/home.\n" "\n" "See https://xenia.jp/faq/ for more information and a list of " "supported GPUs."); @@ -43,49 +36,7 @@ std::unique_ptr VulkanProvider::Create(Window* main_window) { VulkanProvider::VulkanProvider(Window* main_window) : GraphicsProvider(main_window) {} -VulkanProvider::~VulkanProvider() { - device_.reset(); - instance_.reset(); -} - -bool VulkanProvider::Initialize() { - instance_ = std::make_unique(); - - // Always enable the swapchain. -#if XE_PLATFORM_WIN32 - instance_->DeclareRequiredExtension("VK_KHR_surface", Version::Make(0, 0, 0), - false); - instance_->DeclareRequiredExtension("VK_KHR_win32_surface", - Version::Make(0, 0, 0), false); -#endif - - // Attempt initialization and device query. - if (!instance_->Initialize()) { - XELOGE("Failed to initialize vulkan instance"); - return false; - } - - // Pick the device to use. - auto available_devices = instance_->available_devices(); - if (available_devices.empty()) { - XELOGE("No devices available for use"); - return false; - } - size_t device_index = - std::min(available_devices.size(), cvars::vulkan_device_index); - auto& device_info = available_devices[device_index]; - - // Create the device. - device_ = std::make_unique(instance_.get()); - device_->DeclareRequiredExtension("VK_KHR_swapchain", Version::Make(0, 0, 0), - false); - if (!device_->Initialize(device_info)) { - XELOGE("Unable to initialize device"); - return false; - } - - return true; -} +bool VulkanProvider::Initialize() { return false; } std::unique_ptr VulkanProvider::CreateContext( Window* target_window) { diff --git a/src/xenia/ui/vulkan/vulkan_provider.h b/src/xenia/ui/vulkan/vulkan_provider.h index f4a8080e3..3313b8d92 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.h +++ b/src/xenia/ui/vulkan/vulkan_provider.h @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * + * Copyright 2020 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -18,29 +18,18 @@ namespace xe { namespace ui { namespace vulkan { -class VulkanDevice; -class VulkanInstance; - class VulkanProvider : public GraphicsProvider { public: - ~VulkanProvider() override; - static std::unique_ptr Create(Window* main_window); - VulkanInstance* instance() const { return instance_.get(); } - VulkanDevice* device() const { return device_.get(); } - std::unique_ptr CreateContext( Window* target_window) override; std::unique_ptr CreateOffscreenContext() override; - protected: + private: explicit VulkanProvider(Window* main_window); bool Initialize(); - - std::unique_ptr instance_; - std::unique_ptr device_; }; } // namespace vulkan diff --git a/src/xenia/ui/vulkan/vulkan_swap_chain.cc b/src/xenia/ui/vulkan/vulkan_swap_chain.cc deleted file mode 100644 index ae91e493b..000000000 --- a/src/xenia/ui/vulkan/vulkan_swap_chain.cc +++ /dev/null @@ -1,811 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/ui/vulkan/vulkan_swap_chain.h" - -#include -#include - -#include "xenia/base/assert.h" -#include "xenia/base/cvar.h" -#include "xenia/base/logging.h" -#include "xenia/base/math.h" -#include "xenia/ui/vulkan/vulkan.h" -#include "xenia/ui/vulkan/vulkan_device.h" -#include "xenia/ui/vulkan/vulkan_instance.h" -#include "xenia/ui/vulkan/vulkan_util.h" - -DEFINE_bool(vulkan_random_clear_color, false, - "Randomizes framebuffer clear color.", "Vulkan"); - -namespace xe { -namespace ui { -namespace vulkan { - -VulkanSwapChain::VulkanSwapChain(VulkanInstance* instance, VulkanDevice* device) - : instance_(instance), device_(device) {} - -VulkanSwapChain::~VulkanSwapChain() { Shutdown(); } - -VkResult VulkanSwapChain::Initialize(VkSurfaceKHR surface) { - surface_ = surface; - VkResult status; - - // Find a queue family that supports presentation. - VkBool32 surface_supported = false; - uint32_t queue_family_index = -1; - for (uint32_t i = 0; - i < device_->device_info().queue_family_properties.size(); i++) { - const VkQueueFamilyProperties& family_props = - device_->device_info().queue_family_properties[i]; - if (!(family_props.queueFlags & VK_QUEUE_GRAPHICS_BIT) || - !(family_props.queueFlags & VK_QUEUE_TRANSFER_BIT)) { - continue; - } - - status = vkGetPhysicalDeviceSurfaceSupportKHR(*device_, i, surface, - &surface_supported); - if (status == VK_SUCCESS && surface_supported == VK_TRUE) { - queue_family_index = i; - break; - } - } - - if (!surface_supported) { - XELOGE( - "Physical device does not have a queue that supports " - "graphics/transfer/presentation!"); - return VK_ERROR_INCOMPATIBLE_DRIVER; - } - - presentation_queue_ = device_->AcquireQueue(queue_family_index); - presentation_queue_family_ = queue_family_index; - if (!presentation_queue_) { - // That's okay, use the primary queue. - presentation_queue_ = device_->primary_queue(); - presentation_queue_mutex_ = &device_->primary_queue_mutex(); - presentation_queue_family_ = device_->queue_family_index(); - - if (!presentation_queue_) { - XELOGE("Failed to acquire swap chain presentation queue!"); - return VK_ERROR_INITIALIZATION_FAILED; - } - } - - // Query supported target formats. - uint32_t count = 0; - status = - vkGetPhysicalDeviceSurfaceFormatsKHR(*device_, surface_, &count, nullptr); - CheckResult(status, "vkGetPhysicalDeviceSurfaceFormatsKHR"); - std::vector surface_formats; - surface_formats.resize(count); - status = vkGetPhysicalDeviceSurfaceFormatsKHR(*device_, surface_, &count, - surface_formats.data()); - CheckResult(status, "vkGetPhysicalDeviceSurfaceFormatsKHR"); - if (status != VK_SUCCESS) { - return status; - } - - // If the format list includes just one entry of VK_FORMAT_UNDEFINED the - // surface has no preferred format. - // Otherwise, at least one supported format will be returned. - assert_true(surface_formats.size() >= 1); - if (surface_formats.size() == 1 && - surface_formats[0].format == VK_FORMAT_UNDEFINED) { - // Fallback to common RGBA. - surface_format_ = VK_FORMAT_R8G8B8A8_UNORM; - } else { - // Use first defined format. - surface_format_ = surface_formats[0].format; - } - - // Query surface min/max/caps. - VkSurfaceCapabilitiesKHR surface_caps; - status = vkGetPhysicalDeviceSurfaceCapabilitiesKHR(*device_, surface_, - &surface_caps); - CheckResult(status, "vkGetPhysicalDeviceSurfaceCapabilitiesKHR"); - if (status != VK_SUCCESS) { - return status; - } - - // Query surface properties so we can configure ourselves within bounds. - std::vector present_modes; - status = vkGetPhysicalDeviceSurfacePresentModesKHR(*device_, surface_, &count, - nullptr); - CheckResult(status, "vkGetPhysicalDeviceSurfacePresentModesKHR"); - if (status != VK_SUCCESS) { - return status; - } - - present_modes.resize(count); - status = vkGetPhysicalDeviceSurfacePresentModesKHR(*device_, surface_, &count, - present_modes.data()); - CheckResult(status, "vkGetPhysicalDeviceSurfacePresentModesKHR"); - if (status != VK_SUCCESS) { - return status; - } - - // Calculate swapchain target dimensions. - VkExtent2D extent = surface_caps.currentExtent; - if (surface_caps.currentExtent.width == -1) { - assert_true(surface_caps.currentExtent.height == -1); - // Undefined extents, so we need to pick something. - XELOGI("Swap chain target surface extents undefined; guessing value"); - extent.width = 1280; - extent.height = 720; - } - surface_width_ = extent.width; - surface_height_ = extent.height; - - // Always prefer mailbox mode (non-tearing, low-latency). - // If it's not available we'll use immediate (tearing, low-latency). - // If not even that we fall back to FIFO, which sucks. - VkPresentModeKHR present_mode = VK_PRESENT_MODE_FIFO_KHR; - for (size_t i = 0; i < present_modes.size(); ++i) { - if (present_modes[i] == VK_PRESENT_MODE_MAILBOX_KHR) { - // This is the best, so early-out. - present_mode = VK_PRESENT_MODE_MAILBOX_KHR; - break; - } else if (present_modes[i] == VK_PRESENT_MODE_IMMEDIATE_KHR) { - present_mode = VK_PRESENT_MODE_IMMEDIATE_KHR; - } - } - - // Determine the number of images (1 + number queued). - uint32_t image_count = surface_caps.minImageCount + 1; - if (surface_caps.maxImageCount > 0 && - image_count > surface_caps.maxImageCount) { - // Too many requested - use whatever we can. - XELOGI("Requested number of swapchain images ({}) exceeds maximum ({})", - image_count, surface_caps.maxImageCount); - image_count = surface_caps.maxImageCount; - } - - // Always pass through whatever transform the surface started with (so long - // as it's supported). - VkSurfaceTransformFlagBitsKHR pre_transform = surface_caps.currentTransform; - - VkSwapchainCreateInfoKHR create_info; - create_info.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR; - create_info.pNext = nullptr; - create_info.flags = 0; - create_info.surface = surface_; - create_info.minImageCount = image_count; - create_info.imageFormat = surface_format_; - create_info.imageColorSpace = VK_COLORSPACE_SRGB_NONLINEAR_KHR; - create_info.imageExtent.width = extent.width; - create_info.imageExtent.height = extent.height; - create_info.imageArrayLayers = 1; - create_info.imageUsage = - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; - create_info.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; - create_info.queueFamilyIndexCount = 0; - create_info.pQueueFamilyIndices = nullptr; - create_info.preTransform = pre_transform; - create_info.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; - create_info.presentMode = present_mode; - create_info.clipped = VK_TRUE; - create_info.oldSwapchain = nullptr; - - XELOGVK("Creating swap chain:"); - XELOGVK(" minImageCount = {}", create_info.minImageCount); - XELOGVK(" imageFormat = {}", to_string(create_info.imageFormat)); - XELOGVK(" imageExtent = {} x {}", create_info.imageExtent.width, - create_info.imageExtent.height); - auto pre_transform_str = to_flags_string(create_info.preTransform); - XELOGVK(" preTransform = {}", pre_transform_str); - XELOGVK(" imageArrayLayers = {}", create_info.imageArrayLayers); - XELOGVK(" presentMode = {}", to_string(create_info.presentMode)); - XELOGVK(" clipped = {}", create_info.clipped ? "true" : "false"); - XELOGVK(" imageColorSpace = {}", to_string(create_info.imageColorSpace)); - auto image_usage_flags_str = to_flags_string( - static_cast(create_info.imageUsage)); - XELOGVK(" imageUsageFlags = {}", image_usage_flags_str); - XELOGVK(" imageSharingMode = {}", to_string(create_info.imageSharingMode)); - XELOGVK(" queueFamilyCount = {}", create_info.queueFamilyIndexCount); - - status = vkCreateSwapchainKHR(*device_, &create_info, nullptr, &handle); - if (status != VK_SUCCESS) { - XELOGE("Failed to create swapchain: {}", to_string(status)); - return status; - } - - // Create the pool used for transient buffers, so we can reset them all at - // once. - VkCommandPoolCreateInfo cmd_pool_info; - cmd_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; - cmd_pool_info.pNext = nullptr; - cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; - cmd_pool_info.queueFamilyIndex = presentation_queue_family_; - status = vkCreateCommandPool(*device_, &cmd_pool_info, nullptr, &cmd_pool_); - CheckResult(status, "vkCreateCommandPool"); - if (status != VK_SUCCESS) { - return status; - } - - // Primary command buffer - VkCommandBufferAllocateInfo cmd_buffer_info; - cmd_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - cmd_buffer_info.pNext = nullptr; - cmd_buffer_info.commandPool = cmd_pool_; - cmd_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; - cmd_buffer_info.commandBufferCount = 2; - status = vkAllocateCommandBuffers(*device_, &cmd_buffer_info, &cmd_buffer_); - CheckResult(status, "vkCreateCommandBuffer"); - if (status != VK_SUCCESS) { - return status; - } - - // Make two command buffers we'll do all our primary rendering from. - VkCommandBuffer command_buffers[2]; - cmd_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_SECONDARY; - cmd_buffer_info.commandBufferCount = 2; - status = - vkAllocateCommandBuffers(*device_, &cmd_buffer_info, command_buffers); - CheckResult(status, "vkCreateCommandBuffer"); - if (status != VK_SUCCESS) { - return status; - } - - render_cmd_buffer_ = command_buffers[0]; - copy_cmd_buffer_ = command_buffers[1]; - - // Create the render pass used to draw to the swap chain. - // The actual framebuffer attached will depend on which image we are drawing - // into. - VkAttachmentDescription color_attachment; - color_attachment.flags = 0; - color_attachment.format = surface_format_; - color_attachment.samples = VK_SAMPLE_COUNT_1_BIT; - color_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; // CLEAR; - color_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; - color_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; - color_attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; - color_attachment.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - color_attachment.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - VkAttachmentReference color_reference; - color_reference.attachment = 0; - color_reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - VkAttachmentReference depth_reference; - depth_reference.attachment = VK_ATTACHMENT_UNUSED; - depth_reference.layout = VK_IMAGE_LAYOUT_UNDEFINED; - VkSubpassDescription render_subpass; - render_subpass.flags = 0; - render_subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - render_subpass.inputAttachmentCount = 0; - render_subpass.pInputAttachments = nullptr; - render_subpass.colorAttachmentCount = 1; - render_subpass.pColorAttachments = &color_reference; - render_subpass.pResolveAttachments = nullptr; - render_subpass.pDepthStencilAttachment = &depth_reference; - render_subpass.preserveAttachmentCount = 0, - render_subpass.pPreserveAttachments = nullptr; - VkRenderPassCreateInfo render_pass_info; - render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; - render_pass_info.pNext = nullptr; - render_pass_info.flags = 0; - render_pass_info.attachmentCount = 1; - render_pass_info.pAttachments = &color_attachment; - render_pass_info.subpassCount = 1; - render_pass_info.pSubpasses = &render_subpass; - render_pass_info.dependencyCount = 0; - render_pass_info.pDependencies = nullptr; - status = - vkCreateRenderPass(*device_, &render_pass_info, nullptr, &render_pass_); - CheckResult(status, "vkCreateRenderPass"); - if (status != VK_SUCCESS) { - return status; - } - - // Create a semaphore we'll use to synchronize with the swapchain. - VkSemaphoreCreateInfo semaphore_info; - semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; - semaphore_info.pNext = nullptr; - semaphore_info.flags = 0; - status = vkCreateSemaphore(*device_, &semaphore_info, nullptr, - &image_available_semaphore_); - CheckResult(status, "vkCreateSemaphore"); - if (status != VK_SUCCESS) { - return status; - } - - // Create another semaphore used to synchronize writes to the swap image. - status = vkCreateSemaphore(*device_, &semaphore_info, nullptr, - &image_usage_semaphore_); - CheckResult(status, "vkCreateSemaphore"); - if (status != VK_SUCCESS) { - return status; - } - - // Get images we will be presenting to. - // Note that this may differ from our requested amount. - uint32_t actual_image_count = 0; - std::vector images; - status = - vkGetSwapchainImagesKHR(*device_, handle, &actual_image_count, nullptr); - CheckResult(status, "vkGetSwapchainImagesKHR"); - if (status != VK_SUCCESS) { - return status; - } - - images.resize(actual_image_count); - status = vkGetSwapchainImagesKHR(*device_, handle, &actual_image_count, - images.data()); - CheckResult(status, "vkGetSwapchainImagesKHR"); - if (status != VK_SUCCESS) { - return status; - } - - // Create all buffers. - buffers_.resize(images.size()); - for (size_t i = 0; i < buffers_.size(); ++i) { - status = InitializeBuffer(&buffers_[i], images[i]); - if (status != VK_SUCCESS) { - XELOGE("Failed to initialize a swapchain buffer"); - return status; - } - - buffers_[i].image_layout = VK_IMAGE_LAYOUT_UNDEFINED; - } - - // Create a fence we'll use to wait for commands to finish. - VkFenceCreateInfo fence_create_info = { - VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, - nullptr, - VK_FENCE_CREATE_SIGNALED_BIT, - }; - status = vkCreateFence(*device_, &fence_create_info, nullptr, - &synchronization_fence_); - CheckResult(status, "vkGetSwapchainImagesKHR"); - if (status != VK_SUCCESS) { - return status; - } - - XELOGVK("Swap chain initialized successfully!"); - return VK_SUCCESS; -} - -VkResult VulkanSwapChain::InitializeBuffer(Buffer* buffer, - VkImage target_image) { - DestroyBuffer(buffer); - buffer->image = target_image; - - VkResult status; - - // Create an image view for the presentation image. - // This will be used as a framebuffer attachment. - VkImageViewCreateInfo image_view_info; - image_view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - image_view_info.pNext = nullptr; - image_view_info.flags = 0; - image_view_info.image = buffer->image; - image_view_info.viewType = VK_IMAGE_VIEW_TYPE_2D; - image_view_info.format = surface_format_; - image_view_info.components.r = VK_COMPONENT_SWIZZLE_R; - image_view_info.components.g = VK_COMPONENT_SWIZZLE_G; - image_view_info.components.b = VK_COMPONENT_SWIZZLE_B; - image_view_info.components.a = VK_COMPONENT_SWIZZLE_A; - image_view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - image_view_info.subresourceRange.baseMipLevel = 0; - image_view_info.subresourceRange.levelCount = 1; - image_view_info.subresourceRange.baseArrayLayer = 0; - image_view_info.subresourceRange.layerCount = 1; - status = vkCreateImageView(*device_, &image_view_info, nullptr, - &buffer->image_view); - CheckResult(status, "vkCreateImageView"); - if (status != VK_SUCCESS) { - return status; - } - - // Create the framebuffer used to render into this image. - VkImageView attachments[] = {buffer->image_view}; - VkFramebufferCreateInfo framebuffer_info; - framebuffer_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; - framebuffer_info.pNext = nullptr; - framebuffer_info.flags = 0; - framebuffer_info.renderPass = render_pass_; - framebuffer_info.attachmentCount = - static_cast(xe::countof(attachments)); - framebuffer_info.pAttachments = attachments; - framebuffer_info.width = surface_width_; - framebuffer_info.height = surface_height_; - framebuffer_info.layers = 1; - status = vkCreateFramebuffer(*device_, &framebuffer_info, nullptr, - &buffer->framebuffer); - CheckResult(status, "vkCreateFramebuffer"); - if (status != VK_SUCCESS) { - return status; - } - - return VK_SUCCESS; -} - -void VulkanSwapChain::DestroyBuffer(Buffer* buffer) { - if (buffer->framebuffer) { - vkDestroyFramebuffer(*device_, buffer->framebuffer, nullptr); - buffer->framebuffer = nullptr; - } - if (buffer->image_view) { - vkDestroyImageView(*device_, buffer->image_view, nullptr); - buffer->image_view = nullptr; - } - // Image is taken care of by the presentation engine. - buffer->image = nullptr; -} - -VkResult VulkanSwapChain::Reinitialize() { - // Hacky, but stash the surface so we can reuse it. - auto surface = surface_; - surface_ = nullptr; - Shutdown(); - return Initialize(surface); -} - -void VulkanSwapChain::WaitOnSemaphore(VkSemaphore sem) { - wait_semaphores_.push_back(sem); -} - -void VulkanSwapChain::Shutdown() { - // TODO(benvanik): properly wait for a clean state. - for (auto& buffer : buffers_) { - DestroyBuffer(&buffer); - } - buffers_.clear(); - - VK_SAFE_DESTROY(vkDestroySemaphore, *device_, image_available_semaphore_, - nullptr); - VK_SAFE_DESTROY(vkDestroyRenderPass, *device_, render_pass_, nullptr); - - if (copy_cmd_buffer_) { - vkFreeCommandBuffers(*device_, cmd_pool_, 1, ©_cmd_buffer_); - copy_cmd_buffer_ = nullptr; - } - if (render_cmd_buffer_) { - vkFreeCommandBuffers(*device_, cmd_pool_, 1, &render_cmd_buffer_); - render_cmd_buffer_ = nullptr; - } - VK_SAFE_DESTROY(vkDestroyCommandPool, *device_, cmd_pool_, nullptr); - - if (presentation_queue_) { - if (!presentation_queue_mutex_) { - // We own the queue and need to release it. - device_->ReleaseQueue(presentation_queue_, presentation_queue_family_); - } - presentation_queue_ = nullptr; - presentation_queue_mutex_ = nullptr; - presentation_queue_family_ = -1; - } - - VK_SAFE_DESTROY(vkDestroyFence, *device_, synchronization_fence_, nullptr); - - // images_ doesn't need to be cleaned up as the swapchain does it implicitly. - VK_SAFE_DESTROY(vkDestroySwapchainKHR, *device_, handle, nullptr); - VK_SAFE_DESTROY(vkDestroySurfaceKHR, *instance_, surface_, nullptr); -} - -VkResult VulkanSwapChain::Begin() { - wait_semaphores_.clear(); - - VkResult status; - - // Wait for the last swap to finish. - status = vkWaitForFences(*device_, 1, &synchronization_fence_, VK_TRUE, -1); - if (status != VK_SUCCESS) { - return status; - } - - status = vkResetFences(*device_, 1, &synchronization_fence_); - if (status != VK_SUCCESS) { - return status; - } - - // Get the index of the next available swapchain image. - status = - vkAcquireNextImageKHR(*device_, handle, 0, image_available_semaphore_, - nullptr, ¤t_buffer_index_); - if (status != VK_SUCCESS) { - return status; - } - - // Wait for the acquire semaphore to be signaled so that the following - // operations know they can start modifying the image. - VkSubmitInfo wait_submit_info; - wait_submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - wait_submit_info.pNext = nullptr; - - VkPipelineStageFlags wait_dst_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; - wait_submit_info.waitSemaphoreCount = 1; - wait_submit_info.pWaitSemaphores = &image_available_semaphore_; - wait_submit_info.pWaitDstStageMask = &wait_dst_stage; - - wait_submit_info.commandBufferCount = 0; - wait_submit_info.pCommandBuffers = nullptr; - wait_submit_info.signalSemaphoreCount = 1; - wait_submit_info.pSignalSemaphores = &image_usage_semaphore_; - if (presentation_queue_mutex_) { - presentation_queue_mutex_->lock(); - } - status = vkQueueSubmit(presentation_queue_, 1, &wait_submit_info, nullptr); - if (presentation_queue_mutex_) { - presentation_queue_mutex_->unlock(); - } - if (status != VK_SUCCESS) { - return status; - } - - // Reset all command buffers. - vkResetCommandBuffer(render_cmd_buffer_, 0); - vkResetCommandBuffer(copy_cmd_buffer_, 0); - auto& current_buffer = buffers_[current_buffer_index_]; - - // Build the command buffer that will execute all queued rendering buffers. - VkCommandBufferInheritanceInfo inherit_info; - inherit_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO; - inherit_info.pNext = nullptr; - inherit_info.renderPass = render_pass_; - inherit_info.subpass = 0; - inherit_info.framebuffer = current_buffer.framebuffer; - inherit_info.occlusionQueryEnable = VK_FALSE; - inherit_info.queryFlags = 0; - inherit_info.pipelineStatistics = 0; - - VkCommandBufferBeginInfo begin_info; - begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - begin_info.pNext = nullptr; - begin_info.flags = VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT | - VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - begin_info.pInheritanceInfo = &inherit_info; - status = vkBeginCommandBuffer(render_cmd_buffer_, &begin_info); - CheckResult(status, "vkBeginCommandBuffer"); - if (status != VK_SUCCESS) { - return status; - } - - // Start recording the copy command buffer as well. - begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - status = vkBeginCommandBuffer(copy_cmd_buffer_, &begin_info); - CheckResult(status, "vkBeginCommandBuffer"); - if (status != VK_SUCCESS) { - return status; - } - - // First: Issue a command to clear the render target. - VkImageSubresourceRange clear_range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; - VkClearColorValue clear_color; - clear_color.float32[0] = 238 / 255.0f; - clear_color.float32[1] = 238 / 255.0f; - clear_color.float32[2] = 238 / 255.0f; - clear_color.float32[3] = 1.0f; - if (cvars::vulkan_random_clear_color) { - clear_color.float32[0] = - rand() / static_cast(RAND_MAX); // NOLINT(runtime/threadsafe_fn) - clear_color.float32[1] = 1.0f; - clear_color.float32[2] = 0.0f; - } - vkCmdClearColorImage(copy_cmd_buffer_, current_buffer.image, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_color, 1, - &clear_range); - - return VK_SUCCESS; -} - -VkResult VulkanSwapChain::End() { - auto& current_buffer = buffers_[current_buffer_index_]; - VkResult status; - - status = vkEndCommandBuffer(render_cmd_buffer_); - CheckResult(status, "vkEndCommandBuffer"); - if (status != VK_SUCCESS) { - return status; - } - - status = vkEndCommandBuffer(copy_cmd_buffer_); - CheckResult(status, "vkEndCommandBuffer"); - if (status != VK_SUCCESS) { - return status; - } - - // Build primary command buffer. - status = vkResetCommandBuffer(cmd_buffer_, 0); - CheckResult(status, "vkResetCommandBuffer"); - if (status != VK_SUCCESS) { - return status; - } - - VkCommandBufferBeginInfo begin_info; - begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - begin_info.pNext = nullptr; - begin_info.flags = 0; - begin_info.pInheritanceInfo = nullptr; - status = vkBeginCommandBuffer(cmd_buffer_, &begin_info); - CheckResult(status, "vkBeginCommandBuffer"); - if (status != VK_SUCCESS) { - return status; - } - - // Transition the image to a format we can copy to. - VkImageMemoryBarrier pre_image_copy_barrier; - pre_image_copy_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - pre_image_copy_barrier.pNext = nullptr; - pre_image_copy_barrier.srcAccessMask = VK_ACCESS_MEMORY_READ_BIT; - pre_image_copy_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - pre_image_copy_barrier.oldLayout = current_buffer.image_layout; - pre_image_copy_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - pre_image_copy_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - pre_image_copy_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - pre_image_copy_barrier.image = current_buffer.image; - pre_image_copy_barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, - 1}; - vkCmdPipelineBarrier(cmd_buffer_, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, - nullptr, 1, &pre_image_copy_barrier); - - current_buffer.image_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - - // Execute copy commands - vkCmdExecuteCommands(cmd_buffer_, 1, ©_cmd_buffer_); - - // Transition the image to a color attachment target for drawing. - VkImageMemoryBarrier pre_image_memory_barrier; - pre_image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - pre_image_memory_barrier.pNext = nullptr; - pre_image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - pre_image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - pre_image_memory_barrier.image = current_buffer.image; - pre_image_memory_barrier.subresourceRange.aspectMask = - VK_IMAGE_ASPECT_COLOR_BIT; - pre_image_memory_barrier.subresourceRange.baseMipLevel = 0; - pre_image_memory_barrier.subresourceRange.levelCount = 1; - pre_image_memory_barrier.subresourceRange.baseArrayLayer = 0; - pre_image_memory_barrier.subresourceRange.layerCount = 1; - - pre_image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - pre_image_memory_barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - pre_image_memory_barrier.oldLayout = current_buffer.image_layout; - pre_image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - vkCmdPipelineBarrier(cmd_buffer_, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, 0, - nullptr, 0, nullptr, 1, &pre_image_memory_barrier); - - current_buffer.image_layout = pre_image_memory_barrier.newLayout; - - // Begin render pass. - VkRenderPassBeginInfo render_pass_begin_info; - render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - render_pass_begin_info.pNext = nullptr; - render_pass_begin_info.renderPass = render_pass_; - render_pass_begin_info.framebuffer = current_buffer.framebuffer; - render_pass_begin_info.renderArea.offset.x = 0; - render_pass_begin_info.renderArea.offset.y = 0; - render_pass_begin_info.renderArea.extent.width = surface_width_; - render_pass_begin_info.renderArea.extent.height = surface_height_; - render_pass_begin_info.clearValueCount = 0; - render_pass_begin_info.pClearValues = nullptr; - vkCmdBeginRenderPass(cmd_buffer_, &render_pass_begin_info, - VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS); - - // Render commands. - vkCmdExecuteCommands(cmd_buffer_, 1, &render_cmd_buffer_); - - // End render pass. - vkCmdEndRenderPass(cmd_buffer_); - - // Transition the image to a format the presentation engine can source from. - // FIXME: Do we need more synchronization here between the copy buffer? - VkImageMemoryBarrier post_image_memory_barrier; - post_image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - post_image_memory_barrier.pNext = nullptr; - post_image_memory_barrier.srcAccessMask = - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - post_image_memory_barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; - post_image_memory_barrier.oldLayout = current_buffer.image_layout; - post_image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; - post_image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - post_image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - post_image_memory_barrier.image = current_buffer.image; - post_image_memory_barrier.subresourceRange.aspectMask = - VK_IMAGE_ASPECT_COLOR_BIT; - post_image_memory_barrier.subresourceRange.baseMipLevel = 0; - post_image_memory_barrier.subresourceRange.levelCount = 1; - post_image_memory_barrier.subresourceRange.baseArrayLayer = 0; - post_image_memory_barrier.subresourceRange.layerCount = 1; - vkCmdPipelineBarrier(cmd_buffer_, - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, - nullptr, 1, &post_image_memory_barrier); - - current_buffer.image_layout = post_image_memory_barrier.newLayout; - - status = vkEndCommandBuffer(cmd_buffer_); - CheckResult(status, "vkEndCommandBuffer"); - if (status != VK_SUCCESS) { - return status; - } - - std::vector semaphores; - std::vector wait_dst_stages; - for (size_t i = 0; i < wait_semaphores_.size(); i++) { - semaphores.push_back(wait_semaphores_[i]); - wait_dst_stages.push_back(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); - } - semaphores.push_back(image_usage_semaphore_); - wait_dst_stages.push_back(VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); - - // Submit commands. - // Wait on the image usage semaphore (signaled when an image is available) - VkSubmitInfo render_submit_info; - render_submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - render_submit_info.pNext = nullptr; - render_submit_info.waitSemaphoreCount = uint32_t(semaphores.size()); - render_submit_info.pWaitSemaphores = semaphores.data(); - render_submit_info.pWaitDstStageMask = wait_dst_stages.data(); - render_submit_info.commandBufferCount = 1; - render_submit_info.pCommandBuffers = &cmd_buffer_; - render_submit_info.signalSemaphoreCount = 0; - render_submit_info.pSignalSemaphores = nullptr; - if (presentation_queue_mutex_) { - presentation_queue_mutex_->lock(); - } - status = vkQueueSubmit(presentation_queue_, 1, &render_submit_info, - synchronization_fence_); - if (presentation_queue_mutex_) { - presentation_queue_mutex_->unlock(); - } - - if (status != VK_SUCCESS) { - return status; - } - - // Queue the present of our current image. - const VkSwapchainKHR swap_chains[] = {handle}; - const uint32_t swap_chain_image_indices[] = {current_buffer_index_}; - VkPresentInfoKHR present_info; - present_info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; - present_info.pNext = nullptr; - present_info.waitSemaphoreCount = 0; - present_info.pWaitSemaphores = nullptr; - present_info.swapchainCount = static_cast(xe::countof(swap_chains)); - present_info.pSwapchains = swap_chains; - present_info.pImageIndices = swap_chain_image_indices; - present_info.pResults = nullptr; - if (presentation_queue_mutex_) { - presentation_queue_mutex_->lock(); - } - status = vkQueuePresentKHR(presentation_queue_, &present_info); - if (presentation_queue_mutex_) { - presentation_queue_mutex_->unlock(); - } - - switch (status) { - case VK_SUCCESS: - break; - case VK_SUBOPTIMAL_KHR: - // We are not rendering at the right size - but the presentation engine - // will scale the output for us. - status = VK_SUCCESS; - break; - case VK_ERROR_OUT_OF_DATE_KHR: - // Lost presentation ability; need to recreate the swapchain. - // TODO(benvanik): recreate swapchain. - assert_always("Swapchain recreation not implemented"); - break; - case VK_ERROR_DEVICE_LOST: - // Fatal. Device lost. - break; - default: - XELOGE("Failed to queue present: {}", to_string(status)); - assert_always("Unexpected queue present failure"); - } - - return status; -} - -} // namespace vulkan -} // namespace ui -} // namespace xe diff --git a/src/xenia/ui/vulkan/vulkan_swap_chain.h b/src/xenia/ui/vulkan/vulkan_swap_chain.h deleted file mode 100644 index 0adb35ac5..000000000 --- a/src/xenia/ui/vulkan/vulkan_swap_chain.h +++ /dev/null @@ -1,106 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2014 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_UI_VULKAN_VULKAN_SWAP_CHAIN_H_ -#define XENIA_UI_VULKAN_VULKAN_SWAP_CHAIN_H_ - -#include -#include -#include -#include - -#include "xenia/ui/vulkan/vulkan.h" -#include "xenia/ui/vulkan/vulkan_util.h" - -namespace xe { -namespace ui { -namespace vulkan { - -class VulkanDevice; -class VulkanInstance; - -class VulkanSwapChain { - public: - VulkanSwapChain(VulkanInstance* instance, VulkanDevice* device); - ~VulkanSwapChain(); - - VkSwapchainKHR handle = nullptr; - - operator VkSwapchainKHR() const { return handle; } - - uint32_t surface_width() const { return surface_width_; } - uint32_t surface_height() const { return surface_height_; } - VkImage surface_image() const { - return buffers_[current_buffer_index_].image; - } - - // Render pass used for compositing. - VkRenderPass render_pass() const { return render_pass_; } - // Render command buffer, active inside the render pass from Begin to End. - VkCommandBuffer render_cmd_buffer() const { return render_cmd_buffer_; } - // Copy commands, ran before the render command buffer. - VkCommandBuffer copy_cmd_buffer() const { return copy_cmd_buffer_; } - - // Initializes the swap chain with the given WSI surface. - VkResult Initialize(VkSurfaceKHR surface); - // Reinitializes the swap chain with the initial surface. - // The surface will be retained but all other swap chain resources will be - // torn down and recreated with the new surface properties (size/etc). - VkResult Reinitialize(); - - // Waits on and signals a semaphore in this operation. - void WaitOnSemaphore(VkSemaphore sem); - - // Begins the swap operation, preparing state for rendering. - VkResult Begin(); - // Ends the swap operation, finalizing rendering and presenting the results. - VkResult End(); - - private: - struct Buffer { - VkImage image = nullptr; - VkImageLayout image_layout = VK_IMAGE_LAYOUT_UNDEFINED; - VkImageView image_view = nullptr; - VkFramebuffer framebuffer = nullptr; - }; - - VkResult InitializeBuffer(Buffer* buffer, VkImage target_image); - void DestroyBuffer(Buffer* buffer); - - // Safely releases all swap chain resources. - void Shutdown(); - - VulkanInstance* instance_ = nullptr; - VulkanDevice* device_ = nullptr; - - VkFence synchronization_fence_ = nullptr; - VkQueue presentation_queue_ = nullptr; - std::mutex* presentation_queue_mutex_ = nullptr; - uint32_t presentation_queue_family_ = -1; - VkSurfaceKHR surface_ = nullptr; - uint32_t surface_width_ = 0; - uint32_t surface_height_ = 0; - VkFormat surface_format_ = VK_FORMAT_UNDEFINED; - VkCommandPool cmd_pool_ = nullptr; - VkCommandBuffer cmd_buffer_ = nullptr; - VkCommandBuffer copy_cmd_buffer_ = nullptr; - VkCommandBuffer render_cmd_buffer_ = nullptr; - VkRenderPass render_pass_ = nullptr; - VkSemaphore image_available_semaphore_ = nullptr; - VkSemaphore image_usage_semaphore_ = nullptr; - uint32_t current_buffer_index_ = 0; - std::vector buffers_; - std::vector wait_semaphores_; -}; - -} // namespace vulkan -} // namespace ui -} // namespace xe - -#endif // XENIA_UI_VULKAN_VULKAN_SWAP_CHAIN_H_ diff --git a/src/xenia/ui/vulkan/vulkan_util.cc b/src/xenia/ui/vulkan/vulkan_util.cc deleted file mode 100644 index a237b45d1..000000000 --- a/src/xenia/ui/vulkan/vulkan_util.cc +++ /dev/null @@ -1,504 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/ui/vulkan/vulkan_util.h" - -#include "third_party/fmt/include/fmt/format.h" -#include "xenia/base/assert.h" -#include "xenia/base/logging.h" - -// Implement AMD's VMA here. -#define VMA_IMPLEMENTATION -#include "xenia/ui/vulkan/vulkan_mem_alloc.h" - -namespace xe { -namespace ui { -namespace vulkan { - -uint32_t Version::Make(uint32_t major, uint32_t minor, uint32_t patch) { - return VK_MAKE_VERSION(major, minor, patch); -} - -Version Version::Parse(uint32_t value) { - Version version; - version.major = VK_VERSION_MAJOR(value); - version.minor = VK_VERSION_MINOR(value); - version.patch = VK_VERSION_PATCH(value); - version.pretty_string = - fmt::format("{}.{}.{}", version.major, version.minor, version.patch); - return version; -} - -const char* to_string(VkFormat format) { - switch (format) { -#define STR(r) \ - case r: \ - return #r - STR(VK_FORMAT_UNDEFINED); - STR(VK_FORMAT_R4G4_UNORM_PACK8); - STR(VK_FORMAT_R4G4B4A4_UNORM_PACK16); - STR(VK_FORMAT_B4G4R4A4_UNORM_PACK16); - STR(VK_FORMAT_R5G6B5_UNORM_PACK16); - STR(VK_FORMAT_B5G6R5_UNORM_PACK16); - STR(VK_FORMAT_R5G5B5A1_UNORM_PACK16); - STR(VK_FORMAT_B5G5R5A1_UNORM_PACK16); - STR(VK_FORMAT_A1R5G5B5_UNORM_PACK16); - STR(VK_FORMAT_R8_UNORM); - STR(VK_FORMAT_R8_SNORM); - STR(VK_FORMAT_R8_USCALED); - STR(VK_FORMAT_R8_SSCALED); - STR(VK_FORMAT_R8_UINT); - STR(VK_FORMAT_R8_SINT); - STR(VK_FORMAT_R8_SRGB); - STR(VK_FORMAT_R8G8_UNORM); - STR(VK_FORMAT_R8G8_SNORM); - STR(VK_FORMAT_R8G8_USCALED); - STR(VK_FORMAT_R8G8_SSCALED); - STR(VK_FORMAT_R8G8_UINT); - STR(VK_FORMAT_R8G8_SINT); - STR(VK_FORMAT_R8G8_SRGB); - STR(VK_FORMAT_R8G8B8_UNORM); - STR(VK_FORMAT_R8G8B8_SNORM); - STR(VK_FORMAT_R8G8B8_USCALED); - STR(VK_FORMAT_R8G8B8_SSCALED); - STR(VK_FORMAT_R8G8B8_UINT); - STR(VK_FORMAT_R8G8B8_SINT); - STR(VK_FORMAT_R8G8B8_SRGB); - STR(VK_FORMAT_B8G8R8_UNORM); - STR(VK_FORMAT_B8G8R8_SNORM); - STR(VK_FORMAT_B8G8R8_USCALED); - STR(VK_FORMAT_B8G8R8_SSCALED); - STR(VK_FORMAT_B8G8R8_UINT); - STR(VK_FORMAT_B8G8R8_SINT); - STR(VK_FORMAT_B8G8R8_SRGB); - STR(VK_FORMAT_R8G8B8A8_UNORM); - STR(VK_FORMAT_R8G8B8A8_SNORM); - STR(VK_FORMAT_R8G8B8A8_USCALED); - STR(VK_FORMAT_R8G8B8A8_SSCALED); - STR(VK_FORMAT_R8G8B8A8_UINT); - STR(VK_FORMAT_R8G8B8A8_SINT); - STR(VK_FORMAT_R8G8B8A8_SRGB); - STR(VK_FORMAT_B8G8R8A8_UNORM); - STR(VK_FORMAT_B8G8R8A8_SNORM); - STR(VK_FORMAT_B8G8R8A8_USCALED); - STR(VK_FORMAT_B8G8R8A8_SSCALED); - STR(VK_FORMAT_B8G8R8A8_UINT); - STR(VK_FORMAT_B8G8R8A8_SINT); - STR(VK_FORMAT_B8G8R8A8_SRGB); - STR(VK_FORMAT_A8B8G8R8_UNORM_PACK32); - STR(VK_FORMAT_A8B8G8R8_SNORM_PACK32); - STR(VK_FORMAT_A8B8G8R8_USCALED_PACK32); - STR(VK_FORMAT_A8B8G8R8_SSCALED_PACK32); - STR(VK_FORMAT_A8B8G8R8_UINT_PACK32); - STR(VK_FORMAT_A8B8G8R8_SINT_PACK32); - STR(VK_FORMAT_A8B8G8R8_SRGB_PACK32); - STR(VK_FORMAT_A2R10G10B10_UNORM_PACK32); - STR(VK_FORMAT_A2R10G10B10_SNORM_PACK32); - STR(VK_FORMAT_A2R10G10B10_USCALED_PACK32); - STR(VK_FORMAT_A2R10G10B10_SSCALED_PACK32); - STR(VK_FORMAT_A2R10G10B10_UINT_PACK32); - STR(VK_FORMAT_A2R10G10B10_SINT_PACK32); - STR(VK_FORMAT_A2B10G10R10_UNORM_PACK32); - STR(VK_FORMAT_A2B10G10R10_SNORM_PACK32); - STR(VK_FORMAT_A2B10G10R10_USCALED_PACK32); - STR(VK_FORMAT_A2B10G10R10_SSCALED_PACK32); - STR(VK_FORMAT_A2B10G10R10_UINT_PACK32); - STR(VK_FORMAT_A2B10G10R10_SINT_PACK32); - STR(VK_FORMAT_R16_UNORM); - STR(VK_FORMAT_R16_SNORM); - STR(VK_FORMAT_R16_USCALED); - STR(VK_FORMAT_R16_SSCALED); - STR(VK_FORMAT_R16_UINT); - STR(VK_FORMAT_R16_SINT); - STR(VK_FORMAT_R16_SFLOAT); - STR(VK_FORMAT_R16G16_UNORM); - STR(VK_FORMAT_R16G16_SNORM); - STR(VK_FORMAT_R16G16_USCALED); - STR(VK_FORMAT_R16G16_SSCALED); - STR(VK_FORMAT_R16G16_UINT); - STR(VK_FORMAT_R16G16_SINT); - STR(VK_FORMAT_R16G16_SFLOAT); - STR(VK_FORMAT_R16G16B16_UNORM); - STR(VK_FORMAT_R16G16B16_SNORM); - STR(VK_FORMAT_R16G16B16_USCALED); - STR(VK_FORMAT_R16G16B16_SSCALED); - STR(VK_FORMAT_R16G16B16_UINT); - STR(VK_FORMAT_R16G16B16_SINT); - STR(VK_FORMAT_R16G16B16_SFLOAT); - STR(VK_FORMAT_R16G16B16A16_UNORM); - STR(VK_FORMAT_R16G16B16A16_SNORM); - STR(VK_FORMAT_R16G16B16A16_USCALED); - STR(VK_FORMAT_R16G16B16A16_SSCALED); - STR(VK_FORMAT_R16G16B16A16_UINT); - STR(VK_FORMAT_R16G16B16A16_SINT); - STR(VK_FORMAT_R16G16B16A16_SFLOAT); - STR(VK_FORMAT_R32_UINT); - STR(VK_FORMAT_R32_SINT); - STR(VK_FORMAT_R32_SFLOAT); - STR(VK_FORMAT_R32G32_UINT); - STR(VK_FORMAT_R32G32_SINT); - STR(VK_FORMAT_R32G32_SFLOAT); - STR(VK_FORMAT_R32G32B32_UINT); - STR(VK_FORMAT_R32G32B32_SINT); - STR(VK_FORMAT_R32G32B32_SFLOAT); - STR(VK_FORMAT_R32G32B32A32_UINT); - STR(VK_FORMAT_R32G32B32A32_SINT); - STR(VK_FORMAT_R32G32B32A32_SFLOAT); - STR(VK_FORMAT_R64_UINT); - STR(VK_FORMAT_R64_SINT); - STR(VK_FORMAT_R64_SFLOAT); - STR(VK_FORMAT_R64G64_UINT); - STR(VK_FORMAT_R64G64_SINT); - STR(VK_FORMAT_R64G64_SFLOAT); - STR(VK_FORMAT_R64G64B64_UINT); - STR(VK_FORMAT_R64G64B64_SINT); - STR(VK_FORMAT_R64G64B64_SFLOAT); - STR(VK_FORMAT_R64G64B64A64_UINT); - STR(VK_FORMAT_R64G64B64A64_SINT); - STR(VK_FORMAT_R64G64B64A64_SFLOAT); - STR(VK_FORMAT_B10G11R11_UFLOAT_PACK32); - STR(VK_FORMAT_E5B9G9R9_UFLOAT_PACK32); - STR(VK_FORMAT_D16_UNORM); - STR(VK_FORMAT_X8_D24_UNORM_PACK32); - STR(VK_FORMAT_D32_SFLOAT); - STR(VK_FORMAT_S8_UINT); - STR(VK_FORMAT_D16_UNORM_S8_UINT); - STR(VK_FORMAT_D24_UNORM_S8_UINT); - STR(VK_FORMAT_D32_SFLOAT_S8_UINT); - STR(VK_FORMAT_BC1_RGB_UNORM_BLOCK); - STR(VK_FORMAT_BC1_RGB_SRGB_BLOCK); - STR(VK_FORMAT_BC1_RGBA_UNORM_BLOCK); - STR(VK_FORMAT_BC1_RGBA_SRGB_BLOCK); - STR(VK_FORMAT_BC2_UNORM_BLOCK); - STR(VK_FORMAT_BC2_SRGB_BLOCK); - STR(VK_FORMAT_BC3_UNORM_BLOCK); - STR(VK_FORMAT_BC3_SRGB_BLOCK); - STR(VK_FORMAT_BC4_UNORM_BLOCK); - STR(VK_FORMAT_BC4_SNORM_BLOCK); - STR(VK_FORMAT_BC5_UNORM_BLOCK); - STR(VK_FORMAT_BC5_SNORM_BLOCK); - STR(VK_FORMAT_BC6H_UFLOAT_BLOCK); - STR(VK_FORMAT_BC6H_SFLOAT_BLOCK); - STR(VK_FORMAT_BC7_UNORM_BLOCK); - STR(VK_FORMAT_BC7_SRGB_BLOCK); - STR(VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK); - STR(VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK); - STR(VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK); - STR(VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK); - STR(VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK); - STR(VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK); - STR(VK_FORMAT_EAC_R11_UNORM_BLOCK); - STR(VK_FORMAT_EAC_R11_SNORM_BLOCK); - STR(VK_FORMAT_EAC_R11G11_UNORM_BLOCK); - STR(VK_FORMAT_EAC_R11G11_SNORM_BLOCK); - STR(VK_FORMAT_ASTC_4x4_UNORM_BLOCK); - STR(VK_FORMAT_ASTC_4x4_SRGB_BLOCK); - STR(VK_FORMAT_ASTC_5x4_UNORM_BLOCK); - STR(VK_FORMAT_ASTC_5x4_SRGB_BLOCK); - STR(VK_FORMAT_ASTC_5x5_UNORM_BLOCK); - STR(VK_FORMAT_ASTC_5x5_SRGB_BLOCK); - STR(VK_FORMAT_ASTC_6x5_UNORM_BLOCK); - STR(VK_FORMAT_ASTC_6x5_SRGB_BLOCK); - STR(VK_FORMAT_ASTC_6x6_UNORM_BLOCK); - STR(VK_FORMAT_ASTC_6x6_SRGB_BLOCK); - STR(VK_FORMAT_ASTC_8x5_UNORM_BLOCK); - STR(VK_FORMAT_ASTC_8x5_SRGB_BLOCK); - STR(VK_FORMAT_ASTC_8x6_UNORM_BLOCK); - STR(VK_FORMAT_ASTC_8x6_SRGB_BLOCK); - STR(VK_FORMAT_ASTC_8x8_UNORM_BLOCK); - STR(VK_FORMAT_ASTC_8x8_SRGB_BLOCK); - STR(VK_FORMAT_ASTC_10x5_UNORM_BLOCK); - STR(VK_FORMAT_ASTC_10x5_SRGB_BLOCK); - STR(VK_FORMAT_ASTC_10x6_UNORM_BLOCK); - STR(VK_FORMAT_ASTC_10x6_SRGB_BLOCK); - STR(VK_FORMAT_ASTC_10x8_UNORM_BLOCK); - STR(VK_FORMAT_ASTC_10x8_SRGB_BLOCK); - STR(VK_FORMAT_ASTC_10x10_UNORM_BLOCK); - STR(VK_FORMAT_ASTC_10x10_SRGB_BLOCK); - STR(VK_FORMAT_ASTC_12x10_UNORM_BLOCK); - STR(VK_FORMAT_ASTC_12x10_SRGB_BLOCK); - STR(VK_FORMAT_ASTC_12x12_UNORM_BLOCK); - STR(VK_FORMAT_ASTC_12x12_SRGB_BLOCK); -#undef STR - default: - return "UNKNOWN_FORMAT"; - } -} - -const char* to_string(VkPhysicalDeviceType type) { - switch (type) { -#define STR(r) \ - case r: \ - return #r - STR(VK_PHYSICAL_DEVICE_TYPE_OTHER); - STR(VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU); - STR(VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU); - STR(VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU); - STR(VK_PHYSICAL_DEVICE_TYPE_CPU); -#undef STR - default: - return "UNKNOWN_DEVICE"; - } -} - -const char* to_string(VkSharingMode sharing_mode) { - switch (sharing_mode) { -#define STR(r) \ - case r: \ - return #r - STR(VK_SHARING_MODE_EXCLUSIVE); - STR(VK_SHARING_MODE_CONCURRENT); -#undef STR - default: - return "UNKNOWN_SHARING_MODE"; - } -} - -const char* to_string(VkResult result) { - switch (result) { -#define STR(r) \ - case r: \ - return #r - STR(VK_SUCCESS); - STR(VK_NOT_READY); - STR(VK_TIMEOUT); - STR(VK_EVENT_SET); - STR(VK_EVENT_RESET); - STR(VK_INCOMPLETE); - STR(VK_ERROR_OUT_OF_HOST_MEMORY); - STR(VK_ERROR_OUT_OF_DEVICE_MEMORY); - STR(VK_ERROR_INITIALIZATION_FAILED); - STR(VK_ERROR_DEVICE_LOST); - STR(VK_ERROR_MEMORY_MAP_FAILED); - STR(VK_ERROR_LAYER_NOT_PRESENT); - STR(VK_ERROR_EXTENSION_NOT_PRESENT); - STR(VK_ERROR_FEATURE_NOT_PRESENT); - STR(VK_ERROR_INCOMPATIBLE_DRIVER); - STR(VK_ERROR_TOO_MANY_OBJECTS); - STR(VK_ERROR_FORMAT_NOT_SUPPORTED); - STR(VK_ERROR_SURFACE_LOST_KHR); - STR(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR); - STR(VK_SUBOPTIMAL_KHR); - STR(VK_ERROR_OUT_OF_DATE_KHR); - STR(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR); - STR(VK_ERROR_VALIDATION_FAILED_EXT); - STR(VK_ERROR_INVALID_SHADER_NV); - STR(VK_ERROR_OUT_OF_POOL_MEMORY_KHR); - STR(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR); -#undef STR - default: - return "UNKNOWN_RESULT"; - } -} - -std::string to_flags_string(VkImageUsageFlagBits flags) { - std::string result; -#define OR_FLAG(f) \ - if (flags & f) { \ - if (!result.empty()) { \ - result += " | "; \ - } \ - result += #f; \ - } - OR_FLAG(VK_IMAGE_USAGE_TRANSFER_SRC_BIT); - OR_FLAG(VK_IMAGE_USAGE_TRANSFER_DST_BIT); - OR_FLAG(VK_IMAGE_USAGE_SAMPLED_BIT); - OR_FLAG(VK_IMAGE_USAGE_STORAGE_BIT); - OR_FLAG(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); - OR_FLAG(VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT); - OR_FLAG(VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT); - OR_FLAG(VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT); -#undef OR_FLAG - return result; -} - -std::string to_flags_string(VkFormatFeatureFlagBits flags) { - std::string result; -#define OR_FLAG(f) \ - if (flags & f) { \ - if (!result.empty()) { \ - result += " | "; \ - } \ - result += #f; \ - } - OR_FLAG(VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT); - OR_FLAG(VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT); - OR_FLAG(VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT); - OR_FLAG(VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT); - OR_FLAG(VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT); - OR_FLAG(VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT); - OR_FLAG(VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT); - OR_FLAG(VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT); - OR_FLAG(VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT); - OR_FLAG(VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT); - OR_FLAG(VK_FORMAT_FEATURE_BLIT_SRC_BIT); - OR_FLAG(VK_FORMAT_FEATURE_BLIT_DST_BIT); - OR_FLAG(VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT); - OR_FLAG(VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_CUBIC_BIT_IMG); - OR_FLAG(VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR); - OR_FLAG(VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR); - OR_FLAG(VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT_EXT); -#undef OR_FLAG - return result; -} - -std::string to_flags_string(VkSurfaceTransformFlagBitsKHR flags) { - std::string result; -#define OR_FLAG(f) \ - if (flags & f) { \ - if (!result.empty()) { \ - result += " | "; \ - } \ - result += #f; \ - } - OR_FLAG(VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR); - OR_FLAG(VK_SURFACE_TRANSFORM_ROTATE_90_BIT_KHR); - OR_FLAG(VK_SURFACE_TRANSFORM_ROTATE_180_BIT_KHR); - OR_FLAG(VK_SURFACE_TRANSFORM_ROTATE_270_BIT_KHR); - OR_FLAG(VK_SURFACE_TRANSFORM_HORIZONTAL_MIRROR_BIT_KHR); - OR_FLAG(VK_SURFACE_TRANSFORM_HORIZONTAL_MIRROR_ROTATE_90_BIT_KHR); - OR_FLAG(VK_SURFACE_TRANSFORM_HORIZONTAL_MIRROR_ROTATE_180_BIT_KHR); - OR_FLAG(VK_SURFACE_TRANSFORM_HORIZONTAL_MIRROR_ROTATE_270_BIT_KHR); - OR_FLAG(VK_SURFACE_TRANSFORM_INHERIT_BIT_KHR); -#undef OR_FLAG - return result; -} - -const char* to_string(VkColorSpaceKHR color_space) { - switch (color_space) { -#define STR(r) \ - case r: \ - return #r - STR(VK_COLORSPACE_SRGB_NONLINEAR_KHR); -#undef STR - default: - return "UNKNOWN_COLORSPACE"; - } -} - -const char* to_string(VkPresentModeKHR present_mode) { - switch (present_mode) { -#define STR(r) \ - case r: \ - return #r - STR(VK_PRESENT_MODE_IMMEDIATE_KHR); - STR(VK_PRESENT_MODE_MAILBOX_KHR); - STR(VK_PRESENT_MODE_FIFO_KHR); - STR(VK_PRESENT_MODE_FIFO_RELAXED_KHR); -#undef STR - default: - return "UNKNOWN_PRESENT_MODE"; - } -} - -void FatalVulkanError(std::string error) { - xe::FatalError( - error + - "\n\n" - "Ensure you have the latest drivers for your GPU and that it supports " - "Vulkan.\n" - "\n" - "See https://xenia.jp/faq/ for more information and a list of supported " - "GPUs."); -} - -void CheckResult(VkResult result, const char* action) { - if (result) { - XELOGE("Vulkan check: {} returned {}", action, to_string(result)); - } - assert_true(result == VK_SUCCESS, action); -} - -std::pair> CheckRequirements( - const std::vector& requirements, - const std::vector& layer_infos) { - bool any_missing = false; - std::vector enabled_layers; - for (auto& requirement : requirements) { - bool found = false; - for (size_t j = 0; j < layer_infos.size(); ++j) { - auto layer_name = layer_infos[j].properties.layerName; - auto layer_version = - Version::Parse(layer_infos[j].properties.specVersion); - if (requirement.name == layer_name) { - found = true; - if (requirement.min_version > layer_infos[j].properties.specVersion) { - if (requirement.is_optional) { - XELOGVK("- optional layer {} ({}) version mismatch", layer_name, - layer_version.pretty_string); - continue; - } - XELOGE("ERROR: required layer {} ({}) version mismatch", layer_name, - layer_version.pretty_string); - any_missing = true; - break; - } - XELOGVK("- enabling layer {} ({})", layer_name, - layer_version.pretty_string); - enabled_layers.push_back(layer_name); - break; - } - } - if (!found) { - if (requirement.is_optional) { - XELOGVK("- optional layer {} not found", requirement.name); - } else { - XELOGE("ERROR: required layer {} not found", requirement.name); - any_missing = true; - } - } - } - return {!any_missing, enabled_layers}; -} - -std::pair> CheckRequirements( - const std::vector& requirements, - const std::vector& extension_properties) { - bool any_missing = false; - std::vector enabled_extensions; - for (auto& requirement : requirements) { - bool found = false; - for (size_t j = 0; j < extension_properties.size(); ++j) { - auto extension_name = extension_properties[j].extensionName; - auto extension_version = - Version::Parse(extension_properties[j].specVersion); - if (requirement.name == extension_name) { - found = true; - if (requirement.min_version > extension_properties[j].specVersion) { - if (requirement.is_optional) { - XELOGVK("- optional extension {} ({}) version mismatch", - extension_name, extension_version.pretty_string); - continue; - } - XELOGE("ERROR: required extension {} ({}) version mismatch", - extension_name, extension_version.pretty_string); - any_missing = true; - break; - } - XELOGVK("- enabling extension {} ({})", extension_name, - extension_version.pretty_string); - enabled_extensions.push_back(extension_name); - break; - } - } - if (!found) { - if (requirement.is_optional) { - XELOGVK("- optional extension {} not found", requirement.name); - } else { - XELOGE("ERROR: required extension {} not found", requirement.name); - any_missing = true; - } - } - } - return {!any_missing, enabled_extensions}; -} - -} // namespace vulkan -} // namespace ui -} // namespace xe diff --git a/src/xenia/ui/vulkan/vulkan_util.h b/src/xenia/ui/vulkan/vulkan_util.h deleted file mode 100644 index ac47e5d4b..000000000 --- a/src/xenia/ui/vulkan/vulkan_util.h +++ /dev/null @@ -1,136 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2016 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_UI_VULKAN_VULKAN_UTIL_H_ -#define XENIA_UI_VULKAN_VULKAN_UTIL_H_ - -#include -#include -#include - -#include "xenia/ui/vulkan/vulkan.h" - -namespace xe { -namespace ui { -class Window; -} // namespace ui -} // namespace xe - -namespace xe { -namespace ui { -namespace vulkan { - -#define VK_SAFE_DESTROY(fn, dev, obj, alloc) \ - \ - do { \ - if (obj) { \ - fn(dev, obj, alloc); \ - obj = nullptr; \ - } \ - \ - } while (0) - -class Fence { - public: - Fence(VkDevice device) : device_(device) { - VkFenceCreateInfo fence_info; - fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - fence_info.pNext = nullptr; - fence_info.flags = 0; - vkCreateFence(device, &fence_info, nullptr, &fence_); - } - ~Fence() { - vkDestroyFence(device_, fence_, nullptr); - fence_ = nullptr; - } - - VkResult status() const { return vkGetFenceStatus(device_, fence_); } - - VkFence fence() const { return fence_; } - operator VkFence() const { return fence_; } - - private: - VkDevice device_; - VkFence fence_ = nullptr; -}; - -struct Version { - uint32_t major; - uint32_t minor; - uint32_t patch; - std::string pretty_string; - - static uint32_t Make(uint32_t major, uint32_t minor, uint32_t patch); - - static Version Parse(uint32_t value); -}; - -const char* to_string(VkFormat format); -const char* to_string(VkPhysicalDeviceType type); -const char* to_string(VkSharingMode sharing_mode); -const char* to_string(VkResult result); - -std::string to_flags_string(VkImageUsageFlagBits flags); -std::string to_flags_string(VkFormatFeatureFlagBits flags); -std::string to_flags_string(VkSurfaceTransformFlagBitsKHR flags); - -const char* to_string(VkColorSpaceKHR color_space); -const char* to_string(VkPresentModeKHR present_mode); - -// Throws a fatal error with some Vulkan help text. -void FatalVulkanError(std::string error); - -// Logs and assets expecting the result to be VK_SUCCESS. -void CheckResult(VkResult result, const char* action); - -struct LayerInfo { - VkLayerProperties properties; - std::vector extensions; -}; - -struct DeviceInfo { - VkPhysicalDevice handle; - VkPhysicalDeviceProperties properties; - VkPhysicalDeviceFeatures features; - VkPhysicalDeviceMemoryProperties memory_properties; - std::vector queue_family_properties; - std::vector layers; - std::vector extensions; -}; - -// Defines a requirement for a layer or extension, used to both verify and -// enable them on initialization. -struct Requirement { - // Layer or extension name. - std::string name; - // Minimum required spec version of the layer or extension. - uint32_t min_version; - // True if the requirement is optional (will not cause verification to fail). - bool is_optional; -}; - -// Gets a list of enabled layer names based on the given layer requirements and -// available layer info. -// Returns a boolean indicating whether all required layers are present. -std::pair> CheckRequirements( - const std::vector& requirements, - const std::vector& layer_infos); - -// Gets a list of enabled extension names based on the given extension -// requirements and available extensions. -// Returns a boolean indicating whether all required extensions are present. -std::pair> CheckRequirements( - const std::vector& requirements, - const std::vector& extension_properties); - -} // namespace vulkan -} // namespace ui -} // namespace xe - -#endif // XENIA_UI_VULKAN_VULKAN_UTIL_H_ diff --git a/src/xenia/ui/vulkan/vulkan_window_demo.cc b/src/xenia/ui/vulkan/vulkan_window_demo.cc deleted file mode 100644 index 12965197b..000000000 --- a/src/xenia/ui/vulkan/vulkan_window_demo.cc +++ /dev/null @@ -1,29 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include -#include - -#include "xenia/base/main.h" -#include "xenia/ui/vulkan/vulkan_provider.h" -#include "xenia/ui/window.h" - -namespace xe { -namespace ui { - -int window_demo_main(const std::vector& args); - -std::unique_ptr CreateDemoGraphicsProvider(Window* window) { - return xe::ui::vulkan::VulkanProvider::Create(window); -} - -} // namespace ui -} // namespace xe - -DEFINE_ENTRY_POINT("xenia-ui-window-vulkan-demo", xe::ui::window_demo_main, ""); diff --git a/xenia-build b/xenia-build index 0fafa738d..25675c809 100755 --- a/xenia-build +++ b/xenia-build @@ -434,7 +434,7 @@ def discover_commands(subparsers): 'tidy': TidyCommand(subparsers), } if sys.platform == 'win32': - commands['buildhlsl'] = BuildHlslCommand(subparsers) + commands['gendxbc'] = GenDxbcCommand(subparsers) return commands @@ -764,16 +764,16 @@ class GenSpirvCommand(Command): return 0 -class BuildHlslCommand(Command): - """'buildhlsl' command.""" +class GenDxbcCommand(Command): + """'gendxbc' command.""" def __init__(self, subparsers, *args, **kwargs): - super(BuildHlslCommand, self).__init__( + super(GenDxbcCommand, self).__init__( subparsers, - name='buildhlsl', + name='gendxbc', help_short='Generates Direct3D shader binaries and header files.', help_long=''' - Generates the .cso/.h binaries under src/xenia/*/d3d12/shaders/dxbc/. + Generates the .cso/.h binaries under src/xenia/*/shaders/bytecode/d3d12_5_1/. Run after modifying any .hs/vs/ds/gs/ps/cs.hlsl files. ''', *args, **kwargs) @@ -811,7 +811,8 @@ class BuildHlslCommand(Command): src_name = os.path.splitext(os.path.basename(src_file))[0] identifier = os.path.basename(src_file)[:-5].replace('.', '_') - bin_path = os.path.join(os.path.dirname(src_file), 'dxbc') + bin_path = os.path.join(os.path.dirname(src_file), + 'bytecode/d3d12_5_1') if not os.path.exists(bin_path): os.mkdir(bin_path) cso_file = os.path.join(bin_path, identifier) + '.cso' @@ -830,7 +831,7 @@ class BuildHlslCommand(Command): # bin2c so we get a header file we can compile in. with open(h_file, 'w') as out_file: - out_file.write('// generated from `xb buildhlsl`\n') + out_file.write('// generated from `xb gendxbc`\n') out_file.write('// source: %s\n' % os.path.basename(src_file)) out_file.write('const uint8_t %s[] = {' % (identifier)) with open(cso_file, 'rb') as in_file: From d2852a52b34ce2f60e560df16002b3523778ae0a Mon Sep 17 00:00:00 2001 From: Triang3l Date: Mon, 31 Aug 2020 22:04:00 +0300 Subject: [PATCH 002/123] [Vulkan] Update headers to 1.2.148.1, remove VMA --- third_party/vulkan/vk_mem_alloc.h | 8513 ----------------------- third_party/vulkan/vk_platform.h | 14 +- third_party/vulkan/vulkan.h | 42 +- third_party/vulkan/vulkan_android.h | 32 +- third_party/vulkan/vulkan_beta.h | 428 ++ third_party/vulkan/vulkan_core.h | 6148 ++++++++++++---- third_party/vulkan/vulkan_directfb.h | 54 + third_party/vulkan/vulkan_fuchsia.h | 47 + third_party/vulkan/vulkan_ggp.h | 58 + third_party/vulkan/vulkan_ios.h | 27 +- third_party/vulkan/vulkan_macos.h | 27 +- third_party/vulkan/vulkan_metal.h | 54 + third_party/vulkan/vulkan_mir.h | 65 - third_party/vulkan/vulkan_vi.h | 27 +- third_party/vulkan/vulkan_wayland.h | 27 +- third_party/vulkan/vulkan_win32.h | 101 +- third_party/vulkan/vulkan_xcb.h | 27 +- third_party/vulkan/vulkan_xlib.h | 27 +- third_party/vulkan/vulkan_xlib_xrandr.h | 25 +- 19 files changed, 5627 insertions(+), 10116 deletions(-) delete mode 100644 third_party/vulkan/vk_mem_alloc.h create mode 100644 third_party/vulkan/vulkan_beta.h create mode 100644 third_party/vulkan/vulkan_directfb.h create mode 100644 third_party/vulkan/vulkan_fuchsia.h create mode 100644 third_party/vulkan/vulkan_ggp.h create mode 100644 third_party/vulkan/vulkan_metal.h delete mode 100644 third_party/vulkan/vulkan_mir.h diff --git a/third_party/vulkan/vk_mem_alloc.h b/third_party/vulkan/vk_mem_alloc.h deleted file mode 100644 index 71d78b795..000000000 --- a/third_party/vulkan/vk_mem_alloc.h +++ /dev/null @@ -1,8513 +0,0 @@ -// -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. -// - -#ifndef AMD_VULKAN_MEMORY_ALLOCATOR_H -#define AMD_VULKAN_MEMORY_ALLOCATOR_H - -#ifdef __cplusplus -extern "C" { -#endif - -/** \mainpage Vulkan Memory Allocator - -Version 2.0.0-alpha.6 (2017-11-13) - -Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. \n -License: MIT - -Documentation of all members: vk_mem_alloc.h - -Table of contents: - -- User guide - - \subpage quick_start - - \subpage choosing_memory_type - - \subpage memory_mapping - - \subpage custom_memory_pools - - \subpage defragmentation - - \subpage lost_allocations - - \subpage allocation_annotation -- \subpage configuration - - \subpage vk_khr_dedicated_allocation -- \subpage thread_safety - -See also: - -- [Source repository on GitHub](https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator) -- [Product page on GPUOpen](https://gpuopen.com/gaming-product/vulkan-memory-allocator/) - - - - -\page quick_start Quick start - -\section project_setup Project setup - -In your project code: - --# Include "vk_mem_alloc.h" file wherever you want to use the library. --# In exacly one C++ file define following macro before include to build library - implementation. - -\code -#define VMA_IMPLEMENTATION -#include "vk_mem_alloc.h" -\endcode - -\section initialization Initialization - -At program startup: - --# Initialize Vulkan to have `VkPhysicalDevice` and `VkDevice` object. --# Fill VmaAllocatorCreateInfo structure and create `VmaAllocator` object by - calling vmaCreateAllocator(). - -\code -VmaAllocatorCreateInfo allocatorInfo = {}; -allocatorInfo.physicalDevice = physicalDevice; -allocatorInfo.device = device; - -VmaAllocator allocator; -vmaCreateAllocator(&allocatorInfo, &allocator); -\endcode - -\section resource_allocation Resource allocation - -When you want to create a buffer or image: - --# Fill `VkBufferCreateInfo` / `VkImageCreateInfo` structure. --# Fill VmaAllocationCreateInfo structure. --# Call vmaCreateBuffer() / vmaCreateImage() to get `VkBuffer`/`VkImage` with memory - already allocated and bound to it. - -\code -VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; -bufferInfo.size = 65536; -bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - -VmaAllocationCreateInfo allocInfo = {}; -allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; - -VkBuffer buffer; -VmaAllocation allocation; -vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); -\endcode - -Don't forget to destroy your objects when no longer needed: - -\code -vmaDestroyBuffer(allocator, buffer, allocation); -vmaDestroyAllocator(allocator); -\endcode - - -\page choosing_memory_type Choosing memory type - -Physical devices in Vulkan support various combinations of memory heaps and -types. Help with choosing correct and optimal memory type for your specific -resource is one of the key features of this library. You can use it by filling -appropriate members of VmaAllocationCreateInfo structure, as described below. -You can also combine multiple methods. - --# If you just want to find memory type index that meets your requirements, you - can use function vmaFindMemoryTypeIndex(). --# If you want to allocate a region of device memory without association with any - specific image or buffer, you can use function vmaAllocateMemory(). Usage of - this function is not recommended and usually not needed. --# If you already have a buffer or an image created, you want to allocate memory - for it and then you will bind it yourself, you can use function - vmaAllocateMemoryForBuffer(), vmaAllocateMemoryForImage(). --# If you want to create a buffer or an image, allocate memory for it and bind - them together, all in one call, you can use function vmaCreateBuffer(), - vmaCreateImage(). This is the recommended way to use this library. - -When using 3. or 4., the library internally queries Vulkan for memory types -supported for that buffer or image (function `vkGetBufferMemoryRequirements()`) -and uses only one of these types. - -If no memory type can be found that meets all the requirements, these functions -return `VK_ERROR_FEATURE_NOT_PRESENT`. - -You can leave VmaAllocationCreateInfo structure completely filled with zeros. -It means no requirements are specified for memory type. -It is valid, although not very useful. - -\section choosing_memory_type_usage Usage - -The easiest way to specify memory requirements is to fill member -VmaAllocationCreateInfo::usage using one of the values of enum `VmaMemoryUsage`. -It defines high level, common usage types. - -For example, if you want to create a uniform buffer that will be filled using -transfer only once or infrequently and used for rendering every frame, you can -do it using following code: - -\code -VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; -bufferInfo.size = 65536; -bufferInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - -VmaAllocationCreateInfo allocInfo = {}; -allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; - -VkBuffer buffer; -VmaAllocation allocation; -vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); -\endcode - -\section choosing_memory_type_required_preferred_flags Required and preferred flags - -You can specify more detailed requirements by filling members -VmaAllocationCreateInfo::requiredFlags and VmaAllocationCreateInfo::preferredFlags -with a combination of bits from enum `VkMemoryPropertyFlags`. For example, -if you want to create a buffer that will be persistently mapped on host (so it -must be `HOST_VISIBLE`) and preferably will also be `HOST_COHERENT` and `HOST_CACHED`, -use following code: - -\code -VmaAllocationCreateInfo allocInfo = {}; -allocInfo.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; -allocInfo.preferredFlags = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; -allocInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; - -VkBuffer buffer; -VmaAllocation allocation; -vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); -\endcode - -A memory type is chosen that has all the required flags and as many preferred -flags set as possible. - -If you use VmaAllocationCreateInfo::usage, it is just internally converted to -a set of required and preferred flags. - -\section choosing_memory_type_explicit_memory_types Explicit memory types - -If you inspected memory types available on the physical device and you have -a preference for memory types that you want to use, you can fill member -VmaAllocationCreateInfo::memoryTypeBits. It is a bit mask, where each bit set -means that a memory type with that index is allowed to be used for the -allocation. Special value 0, just like UINT32_MAX, means there are no -restrictions to memory type index. - -Please note that this member is NOT just a memory type index. -Still you can use it to choose just one, specific memory type. -For example, if you already determined that your buffer should be created in -memory type 2, use following code: - -\code -uint32_t memoryTypeIndex = 2; - -VmaAllocationCreateInfo allocInfo = {}; -allocInfo.memoryTypeBits = 1u << memoryTypeIndex; - -VkBuffer buffer; -VmaAllocation allocation; -vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); -\endcode - -\section choosing_memory_type_custom_memory_pools Custom memory pools - -If you allocate from custom memory pool, all the ways of specifying memory -requirements described above are not applicable and the aforementioned members -of VmaAllocationCreateInfo structure are ignored. Memory type is selected -explicitly when creating the pool and then used to make all the allocations from -that pool. For further details, see \ref custom_memory_pools. - - -\page memory_mapping Memory mapping - -\section persistently_mapped_memory Persistently mapped memory - -If you need to map memory on host, it may happen that two allocations are -assigned to the same `VkDeviceMemory` block, so if you map them both at the same -time, it will cause error because mapping single memory block multiple times is -illegal in Vulkan. - -TODO update this... - -It is safer, more convenient and more efficient to use special feature designed -for that: persistently mapped memory. Allocations made with -`VMA_ALLOCATION_CREATE_MAPPED_BIT` flag set in -VmaAllocationCreateInfo::flags are returned from device memory -blocks that stay mapped all the time, so you can just access CPU pointer to it. -VmaAllocationInfo::pMappedData pointer is already offseted to the beginning of -particular allocation. Example: - -\code -VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; -bufCreateInfo.size = 1024; -bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; - -VmaAllocationCreateInfo allocCreateInfo = {}; -allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY; -allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; - -VkBuffer buf; -VmaAllocation alloc; -VmaAllocationInfo allocInfo; -vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); - -// Buffer is immediately mapped. You can access its memory. -memcpy(allocInfo.pMappedData, myData, 1024); -\endcode - -Memory in Vulkan doesn't need to be unmapped before using it e.g. for transfers, -but if you are not sure whether it's `HOST_COHERENT` (here is surely is because -it's created with `VMA_MEMORY_USAGE_CPU_ONLY`), you should check it. If it's -not, you should call `vkInvalidateMappedMemoryRanges()` before reading and -`vkFlushMappedMemoryRanges()` after writing to mapped memory on CPU. Example: - -\code -VkMemoryPropertyFlags memFlags; -vmaGetMemoryTypeProperties(allocator, allocInfo.memoryType, &memFlags); -if((memFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) == 0) -{ - VkMappedMemoryRange memRange = { VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE }; - memRange.memory = allocInfo.deviceMemory; - memRange.offset = allocInfo.offset; - memRange.size = allocInfo.size; - vkFlushMappedMemoryRanges(device, 1, &memRange); -} -\endcode - -\section amd_perf_note Note on performance - -There is a situation that you should be careful about. It happens only if all of -following conditions are met: - --# You use AMD GPU. --# You use the memory type that is both `DEVICE_LOCAL` and `HOST_VISIBLE` - (used when you specify `VMA_MEMORY_USAGE_CPU_TO_GPU`). --# Operating system is Windows 7 or 8.x (Windows 10 is not affected because it - uses WDDM2). - -Then whenever a `VkDeviceMemory` block allocated from this memory type is mapped -for the time of any call to `vkQueueSubmit()` or `vkQueuePresentKHR()`, this -block is migrated by WDDM to system RAM, which degrades performance. It doesn't -matter if that particular memory block is actually used by the command buffer -being submitted. - -To avoid this problem, either make sure to unmap all allocations made from this -memory type before your Submit and Present, or use `VMA_MEMORY_USAGE_GPU_ONLY` -and transfer from a staging buffer in `VMA_MEMORY_USAGE_CPU_ONLY`, which can -safely stay mapped all the time. - -\page custom_memory_pools Custom memory pools - -The library automatically creates and manages default memory pool for each -memory type available on the device. A pool contains a number of -`VkDeviceMemory` blocks. You can create custom pool and allocate memory out of -it. It can be useful if you want to: - -- Keep certain kind of allocations separate from others. -- Enforce particular size of Vulkan memory blocks. -- Limit maximum amount of Vulkan memory allocated for that pool. - -To use custom memory pools: - --# Fill VmaPoolCreateInfo structure. --# Call vmaCreatePool() to obtain `VmaPool` handle. --# When making an allocation, set VmaAllocationCreateInfo::pool to this handle. - You don't need to specify any other parameters of this structure, like usage. - -Example: - -\code -// Create a pool that could have at most 2 blocks, 128 MB each. -VmaPoolCreateInfo poolCreateInfo = {}; -poolCreateInfo.memoryTypeIndex = ... -poolCreateInfo.blockSize = 128ull * 1024 * 1024; -poolCreateInfo.maxBlockCount = 2; - -VmaPool pool; -vmaCreatePool(allocator, &poolCreateInfo, &pool); - -// Allocate a buffer out of it. -VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; -bufCreateInfo.size = 1024; -bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - -VmaAllocationCreateInfo allocCreateInfo = {}; -allocCreateInfo.pool = pool; - -VkBuffer buf; -VmaAllocation alloc; -VmaAllocationInfo allocInfo; -vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); -\endcode - -You have to free all allocations made from this pool before destroying it. - -\code -vmaDestroyBuffer(allocator, buf, alloc); -vmaDestroyPool(allocator, pool); -\endcode - -\page defragmentation Defragmentation - -Interleaved allocations and deallocations of many objects of varying size can -cause fragmentation, which can lead to a situation where the library is unable -to find a continuous range of free memory for a new allocation despite there is -enough free space, just scattered across many small free ranges between existing -allocations. - -To mitigate this problem, you can use vmaDefragment(). Given set of allocations, -this function can move them to compact used memory, ensure more continuous free -space and possibly also free some `VkDeviceMemory`. It can work only on -allocations made from memory type that is `HOST_VISIBLE`. Allocations are -modified to point to the new `VkDeviceMemory` and offset. Data in this memory is -also `memmove`-ed to the new place. However, if you have images or buffers bound -to these allocations (and you certainly do), you need to destroy, recreate, and -bind them to the new place in memory. - -For further details and example code, see documentation of function -vmaDefragment(). - -\page lost_allocations Lost allocations - -If your game oversubscribes video memory, if may work OK in previous-generation -graphics APIs (DirectX 9, 10, 11, OpenGL) because resources are automatically -paged to system RAM. In Vulkan you can't do it because when you run out of -memory, an allocation just fails. If you have more data (e.g. textures) that can -fit into VRAM and you don't need it all at once, you may want to upload them to -GPU on demand and "push out" ones that are not used for a long time to make room -for the new ones, effectively using VRAM (or a cartain memory pool) as a form of -cache. Vulkan Memory Allocator can help you with that by supporting a concept of -"lost allocations". - -To create an allocation that can become lost, include `VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT` -flag in VmaAllocationCreateInfo::flags. Before using a buffer or image bound to -such allocation in every new frame, you need to query it if it's not lost. To -check it: call vmaGetAllocationInfo() and see if VmaAllocationInfo::deviceMemory -is not `VK_NULL_HANDLE`. If the allocation is lost, you should not use it or -buffer/image bound to it. You mustn't forget to destroy this allocation and this -buffer/image. - -To create an allocation that can make some other allocations lost to make room -for it, use `VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT` flag. You will -usually use both flags `VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT` and -`VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT` at the same time. - -Warning! Current implementation uses quite naive, brute force algorithm, -which can make allocation calls that use `VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT` -flag quite slow. A new, more optimal algorithm and data structure to speed this -up is planned for the future. - -When interleaving creation of new allocations with usage of existing ones, -how do you make sure that an allocation won't become lost while it's used in the -current frame? - -It is ensured because vmaGetAllocationInfo() not only returns allocation -parameters and checks whether it's not lost, but when it's not, it also -atomically marks it as used in the current frame, which makes it impossible to -become lost in that frame. It uses lockless algorithm, so it works fast and -doesn't involve locking any internal mutex. - -What if my allocation may still be in use by the GPU when it's rendering a -previous frame while I already submit new frame on the CPU? - -You can make sure that allocations "touched" by vmaGetAllocationInfo() will not -become lost for a number of additional frames back from the current one by -specifying this number as VmaAllocatorCreateInfo::frameInUseCount (for default -memory pool) and VmaPoolCreateInfo::frameInUseCount (for custom pool). - -How do you inform the library when new frame starts? - -You need to call function vmaSetCurrentFrameIndex(). - -Example code: - -\code -struct MyBuffer -{ - VkBuffer m_Buf = nullptr; - VmaAllocation m_Alloc = nullptr; - - // Called when the buffer is really needed in the current frame. - void EnsureBuffer(); -}; - -void MyBuffer::EnsureBuffer() -{ - // Buffer has been created. - if(m_Buf != VK_NULL_HANDLE) - { - // Check if its allocation is not lost + mark it as used in current frame. - VmaAllocationInfo allocInfo; - vmaGetAllocationInfo(allocator, m_Alloc, &allocInfo); - if(allocInfo.deviceMemory != VK_NULL_HANDLE) - { - // It's all OK - safe to use m_Buf. - return; - } - } - - // Buffer not yet exists or lost - destroy and recreate it. - - vmaDestroyBuffer(allocator, m_Buf, m_Alloc); - - VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; - bufCreateInfo.size = 1024; - bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - - VmaAllocationCreateInfo allocCreateInfo = {}; - allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; - allocCreateInfo.flags = VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT | - VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT; - - vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &m_Buf, &m_Alloc, nullptr); -} -\endcode - -When using lost allocations, you may see some Vulkan validation layer warnings -about overlapping regions of memory bound to different kinds of buffers and -images. This is still valid as long as you implement proper handling of lost -allocations (like in the example above) and don't use them. - -The library uses following algorithm for allocation, in order: - --# Try to find free range of memory in existing blocks. --# If failed, try to create a new block of `VkDeviceMemory`, with preferred block size. --# If failed, try to create such block with size/2 and size/4. --# If failed and `VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT` flag was - specified, try to find space in existing blocks, possilby making some other - allocations lost. --# If failed, try to allocate separate `VkDeviceMemory` for this allocation, - just like when you use `VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT`. --# If failed, choose other memory type that meets the requirements specified in - VmaAllocationCreateInfo and go to point 1. --# If failed, return `VK_ERROR_OUT_OF_DEVICE_MEMORY`. - - -\page allocation_annotation Allocation names and user data - -\section allocation_user_data Allocation user data - -You can annotate allocations with your own information, e.g. for debugging purposes. -To do that, fill VmaAllocationCreateInfo::pUserData field when creating -an allocation. It's an opaque `void*` pointer. You can use it e.g. as a pointer, -some handle, index, key, ordinal number or any other value that would associate -the allocation with your custom metadata. - -\code -VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; -// Fill bufferInfo... - -MyBufferMetadata* pMetadata = CreateBufferMetadata(); - -VmaAllocationCreateInfo allocCreateInfo = {}; -allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; -allocCreateInfo.pUserData = pMetadata; - -VkBuffer buffer; -VmaAllocation allocation; -vmaCreateBuffer(allocator, &bufferInfo, &allocCreateInfo, &buffer, &allocation, nullptr); -\endcode - -The pointer may be later retrieved as VmaAllocationInfo::pUserData: - -\code -VmaAllocationInfo allocInfo; -vmaGetAllocationInfo(allocator, allocation, &allocInfo); -MyBufferMetadata* pMetadata = (MyBufferMetadata*)allocInfo.pUserData; -\endcode - -It can also be changed using function vmaSetAllocationUserData(). - -Values of (non-zero) allocations' `pUserData` are printed in JSON report created by -vmaBuildStatsString(), in hexadecimal form. - -\section allocation_names Allocation names - -There is alternative mode available where `pUserData` pointer is used to point to -a null-terminated string, giving a name to the allocation. To use this mode, -set `VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT` flag in VmaAllocationCreateInfo::flags. -Then `pUserData` passed as VmaAllocationCreateInfo::pUserData or argument to -vmaSetAllocationUserData() must be either null or pointer to a null-terminated string. -The library creates internal copy of the string, so the pointer you pass doesn't need -to be valid for whole lifetime of the allocation. You can free it after the call. - -\code -VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; -// Fill imageInfo... - -std::string imageName = "Texture: "; -imageName += fileName; - -VmaAllocationCreateInfo allocCreateInfo = {}; -allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; -allocCreateInfo.flags = VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT; -allocCreateInfo.pUserData = imageName.c_str(); - -VkImage image; -VmaAllocation allocation; -vmaCreateImage(allocator, &imageInfo, &allocCreateInfo, &image, &allocation, nullptr); -\endcode - -The value of `pUserData` pointer of the allocation will be different than the one -you passed when setting allocation's name - pointing to a buffer managed -internally that holds copy of the string. - -\code -VmaAllocationInfo allocInfo; -vmaGetAllocationInfo(allocator, allocation, &allocInfo); -const char* imageName = (const char*)allocInfo.pUserData; -printf("Image name: %s\n", imageName); -\endcode - -That string is also printed in JSON report created by vmaBuildStatsString(). - -\page configuration Configuration - -Please check "CONFIGURATION SECTION" in the code to find macros that you can define -before each include of this file or change directly in this file to provide -your own implementation of basic facilities like assert, `min()` and `max()` functions, -mutex etc. C++ STL is used by default, but changing these allows you to get rid -of any STL usage if you want, as many game developers tend to do. - -\section config_Vulkan_functions Pointers to Vulkan functions - -The library uses Vulkan functions straight from the `vulkan.h` header by default. -If you want to provide your own pointers to these functions, e.g. fetched using -`vkGetInstanceProcAddr()` and `vkGetDeviceProcAddr()`: - --# Define `VMA_STATIC_VULKAN_FUNCTIONS 0`. --# Provide valid pointers through VmaAllocatorCreateInfo::pVulkanFunctions. - -\section custom_memory_allocator Custom host memory allocator - -If you use custom allocator for CPU memory rather than default operator `new` -and `delete` from C++, you can make this library using your allocator as well -by filling optional member VmaAllocatorCreateInfo::pAllocationCallbacks. These -functions will be passed to Vulkan, as well as used by the library itself to -make any CPU-side allocations. - -\section allocation_callbacks Device memory allocation callbacks - -The library makes calls to `vkAllocateMemory()` and `vkFreeMemory()` internally. -You can setup callbacks to be informed about these calls, e.g. for the purpose -of gathering some statistics. To do it, fill optional member -VmaAllocatorCreateInfo::pDeviceMemoryCallbacks. - -\section heap_memory_limit Device heap memory limit - -If you want to test how your program behaves with limited amount of Vulkan device -memory available without switching your graphics card to one that really has -smaller VRAM, you can use a feature of this library intended for this purpose. -To do it, fill optional member VmaAllocatorCreateInfo::pHeapSizeLimit. - - - -\page vk_khr_dedicated_allocation VK_KHR_dedicated_allocation - -VK_KHR_dedicated_allocation is a Vulkan extension which can be used to improve -performance on some GPUs. It augments Vulkan API with possibility to query -driver whether it prefers particular buffer or image to have its own, dedicated -allocation (separate `VkDeviceMemory` block) for better efficiency - to be able -to do some internal optimizations. - -The extension is supported by this library. It will be used automatically when -enabled. To enable it: - -1 . When creating Vulkan device, check if following 2 device extensions are -supported (call `vkEnumerateDeviceExtensionProperties()`). -If yes, enable them (fill `VkDeviceCreateInfo::ppEnabledExtensionNames`). - -- VK_KHR_get_memory_requirements2 -- VK_KHR_dedicated_allocation - -If you enabled these extensions: - -2 . Use `VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT` flag when creating -your `VmaAllocator` to inform the library that you enabled required extensions -and you want the library to use them. - -\code -allocatorInfo.flags |= VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT; - -vmaCreateAllocator(&allocatorInfo, &allocator); -\endcode - -That's all. The extension will be automatically used whenever you create a -buffer using vmaCreateBuffer() or image using vmaCreateImage(). - -When using the extension together with Vulkan Validation Layer, you will receive -warnings like this: - - vkBindBufferMemory(): Binding memory to buffer 0x33 but vkGetBufferMemoryRequirements() has not been called on that buffer. - -It is OK, you should just ignore it. It happens because you use function -`vkGetBufferMemoryRequirements2KHR()` instead of standard -`vkGetBufferMemoryRequirements()`, while the validation layer seems to be -unaware of it. - -To learn more about this extension, see: - -- [VK_KHR_dedicated_allocation in Vulkan specification](https://www.khronos.org/registry/vulkan/specs/1.0-extensions/html/vkspec.html#VK_KHR_dedicated_allocation) -- [VK_KHR_dedicated_allocation unofficial manual](http://asawicki.info/articles/VK_KHR_dedicated_allocation.php5) - - - -\page thread_safety Thread safety - -- The library has no global state, so separate `VmaAllocator` objects can be used - independently. -- By default, all calls to functions that take `VmaAllocator` as first parameter - are safe to call from multiple threads simultaneously because they are - synchronized internally when needed. -- When the allocator is created with `VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT` - flag, calls to functions that take such `VmaAllocator` object must be - synchronized externally. -- Access to a `VmaAllocation` object must be externally synchronized. For example, - you must not call vmaGetAllocationInfo() and vmaMapMemory() from different - threads at the same time if you pass the same `VmaAllocation` object to these - functions. - -*/ - -#include - -VK_DEFINE_HANDLE(VmaAllocator) - -/// Callback function called after successful vkAllocateMemory. -typedef void (VKAPI_PTR *PFN_vmaAllocateDeviceMemoryFunction)( - VmaAllocator allocator, - uint32_t memoryType, - VkDeviceMemory memory, - VkDeviceSize size); -/// Callback function called before vkFreeMemory. -typedef void (VKAPI_PTR *PFN_vmaFreeDeviceMemoryFunction)( - VmaAllocator allocator, - uint32_t memoryType, - VkDeviceMemory memory, - VkDeviceSize size); - -/** \brief Set of callbacks that the library will call for `vkAllocateMemory` and `vkFreeMemory`. - -Provided for informative purpose, e.g. to gather statistics about number of -allocations or total amount of memory allocated in Vulkan. - -Used in VmaAllocatorCreateInfo::pDeviceMemoryCallbacks. -*/ -typedef struct VmaDeviceMemoryCallbacks { - /// Optional, can be null. - PFN_vmaAllocateDeviceMemoryFunction pfnAllocate; - /// Optional, can be null. - PFN_vmaFreeDeviceMemoryFunction pfnFree; -} VmaDeviceMemoryCallbacks; - -/// Flags for created VmaAllocator. -typedef enum VmaAllocatorCreateFlagBits { - /** \brief Allocator and all objects created from it will not be synchronized internally, so you must guarantee they are used from only one thread at a time or synchronized externally by you. - - Using this flag may increase performance because internal mutexes are not used. - */ - VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT = 0x00000001, - /** \brief Enables usage of VK_KHR_dedicated_allocation extension. - - Using this extenion will automatically allocate dedicated blocks of memory for - some buffers and images instead of suballocating place for them out of bigger - memory blocks (as if you explicitly used VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT - flag) when it is recommended by the driver. It may improve performance on some - GPUs. - - You may set this flag only if you found out that following device extensions are - supported, you enabled them while creating Vulkan device passed as - VmaAllocatorCreateInfo::device, and you want them to be used internally by this - library: - - - VK_KHR_get_memory_requirements2 - - VK_KHR_dedicated_allocation - -When this flag is set, you can experience following warnings reported by Vulkan -validation layer. You can ignore them. - -> vkBindBufferMemory(): Binding memory to buffer 0x2d but vkGetBufferMemoryRequirements() has not been called on that buffer. - */ - VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT = 0x00000002, - - VMA_ALLOCATOR_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF -} VmaAllocatorCreateFlagBits; -typedef VkFlags VmaAllocatorCreateFlags; - -/** \brief Pointers to some Vulkan functions - a subset used by the library. - -Used in VmaAllocatorCreateInfo::pVulkanFunctions. -*/ -typedef struct VmaVulkanFunctions { - PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties; - PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties; - PFN_vkAllocateMemory vkAllocateMemory; - PFN_vkFreeMemory vkFreeMemory; - PFN_vkMapMemory vkMapMemory; - PFN_vkUnmapMemory vkUnmapMemory; - PFN_vkBindBufferMemory vkBindBufferMemory; - PFN_vkBindImageMemory vkBindImageMemory; - PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements; - PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements; - PFN_vkCreateBuffer vkCreateBuffer; - PFN_vkDestroyBuffer vkDestroyBuffer; - PFN_vkCreateImage vkCreateImage; - PFN_vkDestroyImage vkDestroyImage; - PFN_vkGetBufferMemoryRequirements2KHR vkGetBufferMemoryRequirements2KHR; - PFN_vkGetImageMemoryRequirements2KHR vkGetImageMemoryRequirements2KHR; -} VmaVulkanFunctions; - -/// Description of a Allocator to be created. -typedef struct VmaAllocatorCreateInfo -{ - /// Flags for created allocator. Use VmaAllocatorCreateFlagBits enum. - VmaAllocatorCreateFlags flags; - /// Vulkan physical device. - /** It must be valid throughout whole lifetime of created allocator. */ - VkPhysicalDevice physicalDevice; - /// Vulkan device. - /** It must be valid throughout whole lifetime of created allocator. */ - VkDevice device; - /// Preferred size of a single `VkDeviceMemory` block to be allocated from large heaps. - /** Set to 0 to use default, which is currently 256 MB. */ - VkDeviceSize preferredLargeHeapBlockSize; - /// Preferred size of a single `VkDeviceMemory` block to be allocated from small heaps <= 512 MB. - /** Set to 0 to use default, which is currently 64 MB. */ - VkDeviceSize preferredSmallHeapBlockSize; - /// Custom CPU memory allocation callbacks. - /** Optional, can be null. When specified, will also be used for all CPU-side memory allocations. */ - const VkAllocationCallbacks* pAllocationCallbacks; - /// Informative callbacks for vkAllocateMemory, vkFreeMemory. - /** Optional, can be null. */ - const VmaDeviceMemoryCallbacks* pDeviceMemoryCallbacks; - /** \brief Maximum number of additional frames that are in use at the same time as current frame. - - This value is used only when you make allocations with - VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT flag. Such allocation cannot become - lost if allocation.lastUseFrameIndex >= allocator.currentFrameIndex - frameInUseCount. - - For example, if you double-buffer your command buffers, so resources used for - rendering in previous frame may still be in use by the GPU at the moment you - allocate resources needed for the current frame, set this value to 1. - - If you want to allow any allocations other than used in the current frame to - become lost, set this value to 0. - */ - uint32_t frameInUseCount; - /** \brief Either NULL or a pointer to an array of limits on maximum number of bytes that can be allocated out of particular Vulkan memory heap. - - If not NULL, it must be a pointer to an array of - `VkPhysicalDeviceMemoryProperties::memoryHeapCount` elements, defining limit on - maximum number of bytes that can be allocated out of particular Vulkan memory - heap. - - Any of the elements may be equal to `VK_WHOLE_SIZE`, which means no limit on that - heap. This is also the default in case of `pHeapSizeLimit` = NULL. - - If there is a limit defined for a heap: - - - If user tries to allocate more memory from that heap using this allocator, - the allocation fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. - - If the limit is smaller than heap size reported in `VkMemoryHeap::size`, the - value of this limit will be reported instead when using vmaGetMemoryProperties(). - - Warning! Using this feature may not be equivalent to installing a GPU with - smaller amount of memory, because graphics driver doesn't necessary fail new - allocations with `VK_ERROR_OUT_OF_DEVICE_MEMORY` result when memory capacity is - exceeded. It may return success and just silently migrate some device memory - blocks to system RAM. - */ - const VkDeviceSize* pHeapSizeLimit; - /** \brief Pointers to Vulkan functions. Can be null if you leave define `VMA_STATIC_VULKAN_FUNCTIONS 1`. - - If you leave define `VMA_STATIC_VULKAN_FUNCTIONS 1` in configuration section, - you can pass null as this member, because the library will fetch pointers to - Vulkan functions internally in a static way, like: - - vulkanFunctions.vkAllocateMemory = &vkAllocateMemory; - - Fill this member if you want to provide your own pointers to Vulkan functions, - e.g. fetched using `vkGetInstanceProcAddr()` and `vkGetDeviceProcAddr()`. - */ - const VmaVulkanFunctions* pVulkanFunctions; -} VmaAllocatorCreateInfo; - -/// Creates Allocator object. -VkResult vmaCreateAllocator( - const VmaAllocatorCreateInfo* pCreateInfo, - VmaAllocator* pAllocator); - -/// Destroys allocator object. -void vmaDestroyAllocator( - VmaAllocator allocator); - -/** -PhysicalDeviceProperties are fetched from physicalDevice by the allocator. -You can access it here, without fetching it again on your own. -*/ -void vmaGetPhysicalDeviceProperties( - VmaAllocator allocator, - const VkPhysicalDeviceProperties** ppPhysicalDeviceProperties); - -/** -PhysicalDeviceMemoryProperties are fetched from physicalDevice by the allocator. -You can access it here, without fetching it again on your own. -*/ -void vmaGetMemoryProperties( - VmaAllocator allocator, - const VkPhysicalDeviceMemoryProperties** ppPhysicalDeviceMemoryProperties); - -/** -\brief Given Memory Type Index, returns Property Flags of this memory type. - -This is just a convenience function. Same information can be obtained using -vmaGetMemoryProperties(). -*/ -void vmaGetMemoryTypeProperties( - VmaAllocator allocator, - uint32_t memoryTypeIndex, - VkMemoryPropertyFlags* pFlags); - -/** \brief Sets index of the current frame. - -This function must be used if you make allocations with -`VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT` and -`VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT` flags to inform the allocator -when a new frame begins. Allocations queried using vmaGetAllocationInfo() cannot -become lost in the current frame. -*/ -void vmaSetCurrentFrameIndex( - VmaAllocator allocator, - uint32_t frameIndex); - -/** \brief Calculated statistics of memory usage in entire allocator. -*/ -typedef struct VmaStatInfo -{ - /// Number of `VkDeviceMemory` Vulkan memory blocks allocated. - uint32_t blockCount; - /// Number of `VmaAllocation` allocation objects allocated. - uint32_t allocationCount; - /// Number of free ranges of memory between allocations. - uint32_t unusedRangeCount; - /// Total number of bytes occupied by all allocations. - VkDeviceSize usedBytes; - /// Total number of bytes occupied by unused ranges. - VkDeviceSize unusedBytes; - VkDeviceSize allocationSizeMin, allocationSizeAvg, allocationSizeMax; - VkDeviceSize unusedRangeSizeMin, unusedRangeSizeAvg, unusedRangeSizeMax; -} VmaStatInfo; - -/// General statistics from current state of Allocator. -typedef struct VmaStats -{ - VmaStatInfo memoryType[VK_MAX_MEMORY_TYPES]; - VmaStatInfo memoryHeap[VK_MAX_MEMORY_HEAPS]; - VmaStatInfo total; -} VmaStats; - -/// Retrieves statistics from current state of the Allocator. -void vmaCalculateStats( - VmaAllocator allocator, - VmaStats* pStats); - -#define VMA_STATS_STRING_ENABLED 1 - -#if VMA_STATS_STRING_ENABLED - -/// Builds and returns statistics as string in JSON format. -/** @param[out] ppStatsString Must be freed using vmaFreeStatsString() function. -*/ -void vmaBuildStatsString( - VmaAllocator allocator, - char** ppStatsString, - VkBool32 detailedMap); - -void vmaFreeStatsString( - VmaAllocator allocator, - char* pStatsString); - -#endif // #if VMA_STATS_STRING_ENABLED - -VK_DEFINE_HANDLE(VmaPool) - -typedef enum VmaMemoryUsage -{ - /** No intended memory usage specified. - Use other members of VmaAllocationCreateInfo to specify your requirements. - */ - VMA_MEMORY_USAGE_UNKNOWN = 0, - /** Memory will be used on device only, so faster access from the device is preferred. - It usually means device-local GPU memory. - No need to be mappable on host. - Good e.g. for images to be used as attachments, images containing textures to be sampled, - buffers used as vertex buffer, index buffer, uniform buffer and majority of - other types of resources used by device. - You can still do transfers from/to such resource to/from host memory. - - The allocation may still end up in `HOST_VISIBLE` memory on some implementations. - In such case, you are free to map it. - You can also use `VMA_ALLOCATION_CREATE_MAPPED_BIT` with this usage type. - */ - VMA_MEMORY_USAGE_GPU_ONLY = 1, - /** Memory will be mapped and used on host. - It usually means CPU system memory. - Could be used for transfer to/from device. - Good e.g. for "staging" copy of buffers and images, used as transfer source or destination. - Resources created in this pool may still be accessible to the device, but access to them can be slower. - - Guarantees to be `HOST_VISIBLE` and `HOST_COHERENT`. - */ - VMA_MEMORY_USAGE_CPU_ONLY = 2, - /** Memory will be used for frequent (dynamic) updates from host and reads on device (upload). - Good e.g. for vertex buffers or uniform buffers updated every frame. - - Guarantees to be `HOST_VISIBLE`. - */ - VMA_MEMORY_USAGE_CPU_TO_GPU = 3, - /** Memory will be used for frequent writing on device and readback on host (download). - - Guarantees to be `HOST_VISIBLE`. - */ - VMA_MEMORY_USAGE_GPU_TO_CPU = 4, - VMA_MEMORY_USAGE_MAX_ENUM = 0x7FFFFFFF -} VmaMemoryUsage; - -/// Flags to be passed as VmaAllocationCreateInfo::flags. -typedef enum VmaAllocationCreateFlagBits { - /** \brief Set this flag if the allocation should have its own memory block. - - Use it for special, big resources, like fullscreen images used as attachments. - - This flag must also be used for host visible resources that you want to map - simultaneously because otherwise they might end up as regions of the same - `VkDeviceMemory`, while mapping same `VkDeviceMemory` multiple times - simultaneously is illegal. - - You should not use this flag if VmaAllocationCreateInfo::pool is not null. - */ - VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT = 0x00000001, - - /** \brief Set this flag to only try to allocate from existing `VkDeviceMemory` blocks and never create new such block. - - If new allocation cannot be placed in any of the existing blocks, allocation - fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY` error. - - You should not use `VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT` and - `VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT` at the same time. It makes no sense. - - If VmaAllocationCreateInfo::pool is not null, this flag is implied and ignored. */ - VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT = 0x00000002, - /** \brief Set this flag to use a memory that will be persistently mapped and retrieve pointer to it. - - Pointer to mapped memory will be returned through VmaAllocationInfo::pMappedData. - - Is it valid to use this flag for allocation made from memory type that is not - `HOST_VISIBLE`. This flag is then ignored and memory is not mapped. This is - useful if you need an allocation that is efficient to use on GPU - (`DEVICE_LOCAL`) and still want to map it directly if possible on platforms that - support it (e.g. Intel GPU). - - You should not use this flag together with `VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT`. - */ - VMA_ALLOCATION_CREATE_MAPPED_BIT = 0x00000004, - /** Allocation created with this flag can become lost as a result of another - allocation with `VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT` flag, so you - must check it before use. - - To check if allocation is not lost, call vmaGetAllocationInfo() and check if - VmaAllocationInfo::deviceMemory is not `VK_NULL_HANDLE`. - - For details about supporting lost allocations, see Lost Allocations - chapter of User Guide on Main Page. - - You should not use this flag together with `VMA_ALLOCATION_CREATE_MAPPED_BIT`. - */ - VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT = 0x00000008, - /** While creating allocation using this flag, other allocations that were - created with flag `VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT` can become lost. - - For details about supporting lost allocations, see Lost Allocations - chapter of User Guide on Main Page. - */ - VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT = 0x00000010, - /** Set this flag to treat VmaAllocationCreateInfo::pUserData as pointer to a - null-terminated string. Instead of copying pointer value, a local copy of the - string is made and stored in allocation's pUserData. The string is automatically - freed together with the allocation. It is also used in vmaBuildStatsString(). - */ - VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT = 0x00000020, - - VMA_ALLOCATION_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF -} VmaAllocationCreateFlagBits; -typedef VkFlags VmaAllocationCreateFlags; - -typedef struct VmaAllocationCreateInfo -{ - /// Use VmaAllocationCreateFlagBits enum. - VmaAllocationCreateFlags flags; - /** \brief Intended usage of memory. - - You can leave `VMA_MEMORY_USAGE_UNKNOWN` if you specify memory requirements in other way. \n - If `pool` is not null, this member is ignored. - */ - VmaMemoryUsage usage; - /** \brief Flags that must be set in a Memory Type chosen for an allocation. - - Leave 0 if you specify memory requirements in other way. \n - If `pool` is not null, this member is ignored.*/ - VkMemoryPropertyFlags requiredFlags; - /** \brief Flags that preferably should be set in a memory type chosen for an allocation. - - Set to 0 if no additional flags are prefered. \n - If `pool` is not null, this member is ignored. */ - VkMemoryPropertyFlags preferredFlags; - /** \brief Bitmask containing one bit set for every memory type acceptable for this allocation. - - Value 0 is equivalent to `UINT32_MAX` - it means any memory type is accepted if - it meets other requirements specified by this structure, with no further - restrictions on memory type index. \n - If `pool` is not null, this member is ignored. - */ - uint32_t memoryTypeBits; - /** \brief Pool that this allocation should be created in. - - Leave `VK_NULL_HANDLE` to allocate from default pool. If not null, members: - `usage`, `requiredFlags`, `preferredFlags`, `memoryTypeBits` are ignored. - */ - VmaPool pool; - /** \brief Custom general-purpose pointer that will be stored in VmaAllocation, can be read as VmaAllocationInfo::pUserData and changed using vmaSetAllocationUserData(). - - If `VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT` is used, it must be either - null or pointer to a null-terminated string. The string will be then copied to - internal buffer, so it doesn't need to be valid after allocation call. - */ - void* pUserData; -} VmaAllocationCreateInfo; - -/** -This algorithm tries to find a memory type that: - -- Is allowed by memoryTypeBits. -- Contains all the flags from pAllocationCreateInfo->requiredFlags. -- Matches intended usage. -- Has as many flags from pAllocationCreateInfo->preferredFlags as possible. - -\return Returns VK_ERROR_FEATURE_NOT_PRESENT if not found. Receiving such result -from this function or any other allocating function probably means that your -device doesn't support any memory type with requested features for the specific -type of resource you want to use it for. Please check parameters of your -resource, like image layout (OPTIMAL versus LINEAR) or mip level count. -*/ -VkResult vmaFindMemoryTypeIndex( - VmaAllocator allocator, - uint32_t memoryTypeBits, - const VmaAllocationCreateInfo* pAllocationCreateInfo, - uint32_t* pMemoryTypeIndex); - -/// Flags to be passed as VmaPoolCreateInfo::flags. -typedef enum VmaPoolCreateFlagBits { - /** \brief Use this flag if you always allocate only buffers and linear images or only optimal images out of this pool and so Buffer-Image Granularity can be ignored. - - This is na optional optimization flag. - - If you always allocate using vmaCreateBuffer(), vmaCreateImage(), - vmaAllocateMemoryForBuffer(), then you don't need to use it because allocator - knows exact type of your allocations so it can handle Buffer-Image Granularity - in the optimal way. - - If you also allocate using vmaAllocateMemoryForImage() or vmaAllocateMemory(), - exact type of such allocations is not known, so allocator must be conservative - in handling Buffer-Image Granularity, which can lead to suboptimal allocation - (wasted memory). In that case, if you can make sure you always allocate only - buffers and linear images or only optimal images out of this pool, use this flag - to make allocator disregard Buffer-Image Granularity and so make allocations - more optimal. - */ - VMA_POOL_CREATE_IGNORE_BUFFER_IMAGE_GRANULARITY_BIT = 0x00000002, - - VMA_POOL_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF -} VmaPoolCreateFlagBits; -typedef VkFlags VmaPoolCreateFlags; - -/** \brief Describes parameter of created `VmaPool`. -*/ -typedef struct VmaPoolCreateInfo { - /** \brief Vulkan memory type index to allocate this pool from. - */ - uint32_t memoryTypeIndex; - /** \brief Use combination of `VmaPoolCreateFlagBits`. - */ - VmaPoolCreateFlags flags; - /** \brief Size of a single `VkDeviceMemory` block to be allocated as part of this pool, in bytes. - - Optional. Leave 0 to use default. - */ - VkDeviceSize blockSize; - /** \brief Minimum number of blocks to be always allocated in this pool, even if they stay empty. - - Set to 0 to have no preallocated blocks and let the pool be completely empty. - */ - size_t minBlockCount; - /** \brief Maximum number of blocks that can be allocated in this pool. - - Optional. Set to 0 to use `SIZE_MAX`, which means no limit. - - Set to same value as minBlockCount to have fixed amount of memory allocated - throuout whole lifetime of this pool. - */ - size_t maxBlockCount; - /** \brief Maximum number of additional frames that are in use at the same time as current frame. - - This value is used only when you make allocations with - `VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT` flag. Such allocation cannot become - lost if allocation.lastUseFrameIndex >= allocator.currentFrameIndex - frameInUseCount. - - For example, if you double-buffer your command buffers, so resources used for - rendering in previous frame may still be in use by the GPU at the moment you - allocate resources needed for the current frame, set this value to 1. - - If you want to allow any allocations other than used in the current frame to - become lost, set this value to 0. - */ - uint32_t frameInUseCount; -} VmaPoolCreateInfo; - -/** \brief Describes parameter of existing `VmaPool`. -*/ -typedef struct VmaPoolStats { - /** \brief Total amount of `VkDeviceMemory` allocated from Vulkan for this pool, in bytes. - */ - VkDeviceSize size; - /** \brief Total number of bytes in the pool not used by any `VmaAllocation`. - */ - VkDeviceSize unusedSize; - /** \brief Number of VmaAllocation objects created from this pool that were not destroyed or lost. - */ - size_t allocationCount; - /** \brief Number of continuous memory ranges in the pool not used by any `VmaAllocation`. - */ - size_t unusedRangeCount; - /** \brief Size of the largest continuous free memory region. - - Making a new allocation of that size is not guaranteed to succeed because of - possible additional margin required to respect alignment and buffer/image - granularity. - */ - VkDeviceSize unusedRangeSizeMax; -} VmaPoolStats; - -/** \brief Allocates Vulkan device memory and creates `VmaPool` object. - -@param allocator Allocator object. -@param pCreateInfo Parameters of pool to create. -@param[out] pPool Handle to created pool. -*/ -VkResult vmaCreatePool( - VmaAllocator allocator, - const VmaPoolCreateInfo* pCreateInfo, - VmaPool* pPool); - -/** \brief Destroys VmaPool object and frees Vulkan device memory. -*/ -void vmaDestroyPool( - VmaAllocator allocator, - VmaPool pool); - -/** \brief Retrieves statistics of existing VmaPool object. - -@param allocator Allocator object. -@param pool Pool object. -@param[out] pPoolStats Statistics of specified pool. -*/ -void vmaGetPoolStats( - VmaAllocator allocator, - VmaPool pool, - VmaPoolStats* pPoolStats); - -/** \brief Marks all allocations in given pool as lost if they are not used in current frame or VmaPoolCreateInfo::frameInUseCount back from now. - -@param allocator Allocator object. -@param pool Pool. -@param[out] pLostAllocationCount Number of allocations marked as lost. Optional - pass null if you don't need this information. -*/ -void vmaMakePoolAllocationsLost( - VmaAllocator allocator, - VmaPool pool, - size_t* pLostAllocationCount); - -VK_DEFINE_HANDLE(VmaAllocation) - -/** \brief Parameters of `VmaAllocation` objects, that can be retrieved using function vmaGetAllocationInfo(). -*/ -typedef struct VmaAllocationInfo { - /** \brief Memory type index that this allocation was allocated from. - - It never changes. - */ - uint32_t memoryType; - /** \brief Handle to Vulkan memory object. - - Same memory object can be shared by multiple allocations. - - It can change after call to vmaDefragment() if this allocation is passed to the function, or if allocation is lost. - - If the allocation is lost, it is equal to `VK_NULL_HANDLE`. - */ - VkDeviceMemory deviceMemory; - /** \brief Offset into deviceMemory object to the beginning of this allocation, in bytes. (deviceMemory, offset) pair is unique to this allocation. - - It can change after call to vmaDefragment() if this allocation is passed to the function, or if allocation is lost. - */ - VkDeviceSize offset; - /** \brief Size of this allocation, in bytes. - - It never changes, unless allocation is lost. - */ - VkDeviceSize size; - /** \brief Pointer to the beginning of this allocation as mapped data. - - If the allocation hasn't been mapped using vmaMapMemory() and hasn't been - created with `VMA_ALLOCATION_CREATE_MAPPED_BIT` flag, this value null. - - It can change after call to vmaMapMemory(), vmaUnmapMemory(). - It can also change after call to vmaDefragment() if this allocation is passed to the function. - */ - void* pMappedData; - /** \brief Custom general-purpose pointer that was passed as VmaAllocationCreateInfo::pUserData or set using vmaSetAllocationUserData(). - - It can change after call to vmaSetAllocationUserData() for this allocation. - */ - void* pUserData; -} VmaAllocationInfo; - -/** \brief General purpose memory allocation. - -@param[out] pAllocation Handle to allocated memory. -@param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). - -You should free the memory using vmaFreeMemory(). - -It is recommended to use vmaAllocateMemoryForBuffer(), vmaAllocateMemoryForImage(), -vmaCreateBuffer(), vmaCreateImage() instead whenever possible. -*/ -VkResult vmaAllocateMemory( - VmaAllocator allocator, - const VkMemoryRequirements* pVkMemoryRequirements, - const VmaAllocationCreateInfo* pCreateInfo, - VmaAllocation* pAllocation, - VmaAllocationInfo* pAllocationInfo); - -/** -@param[out] pAllocation Handle to allocated memory. -@param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). - -You should free the memory using vmaFreeMemory(). -*/ -VkResult vmaAllocateMemoryForBuffer( - VmaAllocator allocator, - VkBuffer buffer, - const VmaAllocationCreateInfo* pCreateInfo, - VmaAllocation* pAllocation, - VmaAllocationInfo* pAllocationInfo); - -/// Function similar to vmaAllocateMemoryForBuffer(). -VkResult vmaAllocateMemoryForImage( - VmaAllocator allocator, - VkImage image, - const VmaAllocationCreateInfo* pCreateInfo, - VmaAllocation* pAllocation, - VmaAllocationInfo* pAllocationInfo); - -/// Frees memory previously allocated using vmaAllocateMemory(), vmaAllocateMemoryForBuffer(), or vmaAllocateMemoryForImage(). -void vmaFreeMemory( - VmaAllocator allocator, - VmaAllocation allocation); - -/// Returns current information about specified allocation. -void vmaGetAllocationInfo( - VmaAllocator allocator, - VmaAllocation allocation, - VmaAllocationInfo* pAllocationInfo); - -/** \brief Sets pUserData in given allocation to new value. - -If the allocation was created with VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT, -pUserData must be either null, or pointer to a null-terminated string. The function -makes local copy of the string and sets it as allocation's pUserData. String -passed as pUserData doesn't need to be valid for whole lifetime of the allocation - -you can free it after this call. String previously pointed by allocation's -pUserData is freed from memory. - -If the flag was not used, the value of pointer pUserData is just copied to -allocation's pUserData. It is opaque, so you can use it however you want - e.g. -as a pointer, ordinal number or some handle to you own data. -*/ -void vmaSetAllocationUserData( - VmaAllocator allocator, - VmaAllocation allocation, - void* pUserData); - -/** \brief Creates new allocation that is in lost state from the beginning. - -It can be useful if you need a dummy, non-null allocation. - -You still need to destroy created object using vmaFreeMemory(). - -Returned allocation is not tied to any specific memory pool or memory type and -not bound to any image or buffer. It has size = 0. It cannot be turned into -a real, non-empty allocation. -*/ -void vmaCreateLostAllocation( - VmaAllocator allocator, - VmaAllocation* pAllocation); - -/** \brief Maps memory represented by given allocation and returns pointer to it. - -Maps memory represented by given allocation to make it accessible to CPU code. -When succeeded, `*ppData` contains pointer to first byte of this memory. -If the allocation is part of bigger `VkDeviceMemory` block, the pointer is -correctly offseted to the beginning of region assigned to this particular -allocation. - -Mapping is internally reference-counted and synchronized, so despite raw Vulkan -function `vkMapMemory()` cannot be used to map same block of `VkDeviceMemory` -multiple times simultaneously, it is safe to call this function on allocations -assigned to the same memory block. Actual Vulkan memory will be mapped on first -mapping and unmapped on last unmapping. - -If the function succeeded, you must call vmaUnmapMemory() to unmap the -allocation when mapping is no longer needed or before freeing the allocation, at -the latest. - -It also safe to call this function multiple times on the same allocation. You -must call vmaUnmapMemory() same number of times as you called vmaMapMemory(). - -It is also safe to call this function on allocation created with -`VMA_ALLOCATION_CREATE_MAPPED_BIT` flag. Its memory stays mapped all the time. -You must still call vmaUnmapMemory() same number of times as you called -vmaMapMemory(). You must not call vmaUnmapMemory() additional time to free the -"0-th" mapping made automatically due to `VMA_ALLOCATION_CREATE_MAPPED_BIT` flag. - -This function fails when used on allocation made in memory type that is not -`HOST_VISIBLE`. - -This function always fails when called for allocation that was created with -`VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT` flag. Such allocations cannot be -mapped. -*/ -VkResult vmaMapMemory( - VmaAllocator allocator, - VmaAllocation allocation, - void** ppData); - -/** \brief Unmaps memory represented by given allocation, mapped previously using vmaMapMemory(). - -For details, see description of vmaMapMemory(). -*/ -void vmaUnmapMemory( - VmaAllocator allocator, - VmaAllocation allocation); - -/** \brief Optional configuration parameters to be passed to function vmaDefragment(). */ -typedef struct VmaDefragmentationInfo { - /** \brief Maximum total numbers of bytes that can be copied while moving allocations to different places. - - Default is `VK_WHOLE_SIZE`, which means no limit. - */ - VkDeviceSize maxBytesToMove; - /** \brief Maximum number of allocations that can be moved to different place. - - Default is `UINT32_MAX`, which means no limit. - */ - uint32_t maxAllocationsToMove; -} VmaDefragmentationInfo; - -/** \brief Statistics returned by function vmaDefragment(). */ -typedef struct VmaDefragmentationStats { - /// Total number of bytes that have been copied while moving allocations to different places. - VkDeviceSize bytesMoved; - /// Total number of bytes that have been released to the system by freeing empty `VkDeviceMemory` objects. - VkDeviceSize bytesFreed; - /// Number of allocations that have been moved to different places. - uint32_t allocationsMoved; - /// Number of empty `VkDeviceMemory` objects that have been released to the system. - uint32_t deviceMemoryBlocksFreed; -} VmaDefragmentationStats; - -/** \brief Compacts memory by moving allocations. - -@param pAllocations Array of allocations that can be moved during this compation. -@param allocationCount Number of elements in pAllocations and pAllocationsChanged arrays. -@param[out] pAllocationsChanged Array of boolean values that will indicate whether matching allocation in pAllocations array has been moved. This parameter is optional. Pass null if you don't need this information. -@param pDefragmentationInfo Configuration parameters. Optional - pass null to use default values. -@param[out] pDefragmentationStats Statistics returned by the function. Optional - pass null if you don't need this information. -@return VK_SUCCESS if completed, VK_INCOMPLETE if succeeded but didn't make all possible optimizations because limits specified in pDefragmentationInfo have been reached, negative error code in case of error. - -This function works by moving allocations to different places (different -`VkDeviceMemory` objects and/or different offsets) in order to optimize memory -usage. Only allocations that are in pAllocations array can be moved. All other -allocations are considered nonmovable in this call. Basic rules: - -- Only allocations made in memory types that have - `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` flag can be compacted. You may pass other - allocations but it makes no sense - these will never be moved. -- You may pass allocations made with `VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT` but - it makes no sense - they will never be moved. -- Both allocations made with or without `VMA_ALLOCATION_CREATE_MAPPED_BIT` - flag can be compacted. If not persistently mapped, memory will be mapped - temporarily inside this function if needed. -- You must not pass same `VmaAllocation` object multiple times in pAllocations array. - -The function also frees empty `VkDeviceMemory` blocks. - -After allocation has been moved, its VmaAllocationInfo::deviceMemory and/or -VmaAllocationInfo::offset changes. You must query them again using -vmaGetAllocationInfo() if you need them. - -If an allocation has been moved, data in memory is copied to new place -automatically, but if it was bound to a buffer or an image, you must destroy -that object yourself, create new one and bind it to the new memory pointed by -the allocation. You must use `vkDestroyBuffer()`, `vkDestroyImage()`, -`vkCreateBuffer()`, `vkCreateImage()` for that purpose and NOT vmaDestroyBuffer(), -vmaDestroyImage(), vmaCreateBuffer(), vmaCreateImage()! Example: - -\code -VkDevice device = ...; -VmaAllocator allocator = ...; -std::vector buffers = ...; -std::vector allocations = ...; - -std::vector allocationsChanged(allocations.size()); -vmaDefragment(allocator, allocations.data(), allocations.size(), allocationsChanged.data(), nullptr, nullptr); - -for(size_t i = 0; i < allocations.size(); ++i) -{ - if(allocationsChanged[i]) - { - VmaAllocationInfo allocInfo; - vmaGetAllocationInfo(allocator, allocations[i], &allocInfo); - - vkDestroyBuffer(device, buffers[i], nullptr); - - VkBufferCreateInfo bufferInfo = ...; - vkCreateBuffer(device, &bufferInfo, nullptr, &buffers[i]); - - // You can make dummy call to vkGetBufferMemoryRequirements here to silence validation layer warning. - - vkBindBufferMemory(device, buffers[i], allocInfo.deviceMemory, allocInfo.offset); - } -} -\endcode - -Warning! This function is not correct according to Vulkan specification. Use it -at your own risk. That's becuase Vulkan doesn't guarantee that memory -requirements (size and alignment) for a new buffer or image are consistent. They -may be different even for subsequent calls with the same parameters. It really -does happen on some platforms, especially with images. - -This function may be time-consuming, so you shouldn't call it too often (like -every frame or after every resource creation/destruction), but rater you can -call it on special occasions (like when reloading a game level, when you just -destroyed a lot of objects). -*/ -VkResult vmaDefragment( - VmaAllocator allocator, - VmaAllocation* pAllocations, - size_t allocationCount, - VkBool32* pAllocationsChanged, - const VmaDefragmentationInfo *pDefragmentationInfo, - VmaDefragmentationStats* pDefragmentationStats); - -/** -@param[out] pBuffer Buffer that was created. -@param[out] pAllocation Allocation that was created. -@param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). - -This function automatically: - --# Creates buffer. --# Allocates appropriate memory for it. --# Binds the buffer with the memory. - -If any of these operations fail, buffer and allocation are not created, -returned value is negative error code, *pBuffer and *pAllocation are null. - -If the function succeeded, you must destroy both buffer and allocation when you -no longer need them using either convenience function vmaDestroyBuffer() or -separately, using `vkDestroyBuffer()` and vmaFreeMemory(). - -If VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT flag was used, -VK_KHR_dedicated_allocation extension is used internally to query driver whether -it requires or prefers the new buffer to have dedicated allocation. If yes, -and if dedicated allocation is possible (VmaAllocationCreateInfo::pool is null -and VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT is not used), it creates dedicated -allocation for this buffer, just like when using -VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. -*/ -VkResult vmaCreateBuffer( - VmaAllocator allocator, - const VkBufferCreateInfo* pBufferCreateInfo, - const VmaAllocationCreateInfo* pAllocationCreateInfo, - VkBuffer* pBuffer, - VmaAllocation* pAllocation, - VmaAllocationInfo* pAllocationInfo); - -/** \brief Destroys Vulkan buffer and frees allocated memory. - -This is just a convenience function equivalent to: - -\code -vkDestroyBuffer(device, buffer, allocationCallbacks); -vmaFreeMemory(allocator, allocation); -\endcode - -It it safe to pass null as buffer and/or allocation. -*/ -void vmaDestroyBuffer( - VmaAllocator allocator, - VkBuffer buffer, - VmaAllocation allocation); - -/// Function similar to vmaCreateBuffer(). -VkResult vmaCreateImage( - VmaAllocator allocator, - const VkImageCreateInfo* pImageCreateInfo, - const VmaAllocationCreateInfo* pAllocationCreateInfo, - VkImage* pImage, - VmaAllocation* pAllocation, - VmaAllocationInfo* pAllocationInfo); - -/** \brief Destroys Vulkan image and frees allocated memory. - -This is just a convenience function equivalent to: - -\code -vkDestroyImage(device, image, allocationCallbacks); -vmaFreeMemory(allocator, allocation); -\endcode - -It it safe to pass null as image and/or allocation. -*/ -void vmaDestroyImage( - VmaAllocator allocator, - VkImage image, - VmaAllocation allocation); - -#ifdef __cplusplus -} -#endif - -#endif // AMD_VULKAN_MEMORY_ALLOCATOR_H - -// For Visual Studio IntelliSense. -#ifdef __INTELLISENSE__ -#define VMA_IMPLEMENTATION -#endif - -#ifdef VMA_IMPLEMENTATION -#undef VMA_IMPLEMENTATION - -#include -#include -#include - -/******************************************************************************* -CONFIGURATION SECTION - -Define some of these macros before each #include of this header or change them -here if you need other then default behavior depending on your environment. -*/ - -/* -Define this macro to 1 to make the library fetch pointers to Vulkan functions -internally, like: - - vulkanFunctions.vkAllocateMemory = &vkAllocateMemory; - -Define to 0 if you are going to provide you own pointers to Vulkan functions via -VmaAllocatorCreateInfo::pVulkanFunctions. -*/ -#ifndef VMA_STATIC_VULKAN_FUNCTIONS -#define VMA_STATIC_VULKAN_FUNCTIONS 1 -#endif - -// Define this macro to 1 to make the library use STL containers instead of its own implementation. -//#define VMA_USE_STL_CONTAINERS 1 - -/* Set this macro to 1 to make the library including and using STL containers: -std::pair, std::vector, std::list, std::unordered_map. - -Set it to 0 or undefined to make the library using its own implementation of -the containers. -*/ -#if VMA_USE_STL_CONTAINERS - #define VMA_USE_STL_VECTOR 1 - #define VMA_USE_STL_UNORDERED_MAP 1 - #define VMA_USE_STL_LIST 1 -#endif - -#if VMA_USE_STL_VECTOR - #include -#endif - -#if VMA_USE_STL_UNORDERED_MAP - #include -#endif - -#if VMA_USE_STL_LIST - #include -#endif - -/* -Following headers are used in this CONFIGURATION section only, so feel free to -remove them if not needed. -*/ -#include // for assert -#include // for min, max -#include // for std::mutex -#include // for std::atomic - -#if !defined(_WIN32) - #include // for aligned_alloc() -#endif - -// Normal assert to check for programmer's errors, especially in Debug configuration. -#ifndef VMA_ASSERT - #ifdef _DEBUG - #define VMA_ASSERT(expr) assert(expr) - #else - #define VMA_ASSERT(expr) - #endif -#endif - -// Assert that will be called very often, like inside data structures e.g. operator[]. -// Making it non-empty can make program slow. -#ifndef VMA_HEAVY_ASSERT - #ifdef _DEBUG - #define VMA_HEAVY_ASSERT(expr) //VMA_ASSERT(expr) - #else - #define VMA_HEAVY_ASSERT(expr) - #endif -#endif - -#ifndef VMA_NULL - // Value used as null pointer. Define it to e.g.: nullptr, NULL, 0, (void*)0. - #define VMA_NULL nullptr -#endif - -#ifndef VMA_ALIGN_OF - #define VMA_ALIGN_OF(type) (__alignof(type)) -#endif - -#ifndef VMA_SYSTEM_ALIGNED_MALLOC - #if defined(_WIN32) - #define VMA_SYSTEM_ALIGNED_MALLOC(size, alignment) (_aligned_malloc((size), (alignment))) - #else - #define VMA_SYSTEM_ALIGNED_MALLOC(size, alignment) (aligned_alloc((alignment), (size) )) - #endif -#endif - -#ifndef VMA_SYSTEM_FREE - #if defined(_WIN32) - #define VMA_SYSTEM_FREE(ptr) _aligned_free(ptr) - #else - #define VMA_SYSTEM_FREE(ptr) free(ptr) - #endif -#endif - -#ifndef VMA_MIN - #define VMA_MIN(v1, v2) (std::min((v1), (v2))) -#endif - -#ifndef VMA_MAX - #define VMA_MAX(v1, v2) (std::max((v1), (v2))) -#endif - -#ifndef VMA_SWAP - #define VMA_SWAP(v1, v2) std::swap((v1), (v2)) -#endif - -#ifndef VMA_SORT - #define VMA_SORT(beg, end, cmp) std::sort(beg, end, cmp) -#endif - -#ifndef VMA_DEBUG_LOG - #define VMA_DEBUG_LOG(format, ...) - /* - #define VMA_DEBUG_LOG(format, ...) do { \ - printf(format, __VA_ARGS__); \ - printf("\n"); \ - } while(false) - */ -#endif - -// Define this macro to 1 to enable functions: vmaBuildStatsString, vmaFreeStatsString. -#if VMA_STATS_STRING_ENABLED - static inline void VmaUint32ToStr(char* outStr, size_t strLen, uint32_t num) - { - snprintf(outStr, strLen, "%u", static_cast(num)); - } - static inline void VmaUint64ToStr(char* outStr, size_t strLen, uint64_t num) - { - snprintf(outStr, strLen, "%llu", static_cast(num)); - } - static inline void VmaPtrToStr(char* outStr, size_t strLen, const void* ptr) - { - snprintf(outStr, strLen, "%p", ptr); - } -#endif - -#ifndef VMA_MUTEX - class VmaMutex - { - public: - VmaMutex() { } - ~VmaMutex() { } - void Lock() { m_Mutex.lock(); } - void Unlock() { m_Mutex.unlock(); } - private: - std::mutex m_Mutex; - }; - #define VMA_MUTEX VmaMutex -#endif - -/* -If providing your own implementation, you need to implement a subset of std::atomic: - -- Constructor(uint32_t desired) -- uint32_t load() const -- void store(uint32_t desired) -- bool compare_exchange_weak(uint32_t& expected, uint32_t desired) -*/ -#ifndef VMA_ATOMIC_UINT32 - #define VMA_ATOMIC_UINT32 std::atomic -#endif - -#ifndef VMA_BEST_FIT - /** - Main parameter for function assessing how good is a free suballocation for a new - allocation request. - - - Set to 1 to use Best-Fit algorithm - prefer smaller blocks, as close to the - size of requested allocations as possible. - - Set to 0 to use Worst-Fit algorithm - prefer larger blocks, as large as - possible. - - Experiments in special testing environment showed that Best-Fit algorithm is - better. - */ - #define VMA_BEST_FIT (1) -#endif - -#ifndef VMA_DEBUG_ALWAYS_DEDICATED_MEMORY - /** - Every allocation will have its own memory block. - Define to 1 for debugging purposes only. - */ - #define VMA_DEBUG_ALWAYS_DEDICATED_MEMORY (0) -#endif - -#ifndef VMA_DEBUG_ALIGNMENT - /** - Minimum alignment of all suballocations, in bytes. - Set to more than 1 for debugging purposes only. Must be power of two. - */ - #define VMA_DEBUG_ALIGNMENT (1) -#endif - -#ifndef VMA_DEBUG_MARGIN - /** - Minimum margin between suballocations, in bytes. - Set nonzero for debugging purposes only. - */ - #define VMA_DEBUG_MARGIN (0) -#endif - -#ifndef VMA_DEBUG_GLOBAL_MUTEX - /** - Set this to 1 for debugging purposes only, to enable single mutex protecting all - entry calls to the library. Can be useful for debugging multithreading issues. - */ - #define VMA_DEBUG_GLOBAL_MUTEX (0) -#endif - -#ifndef VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY - /** - Minimum value for VkPhysicalDeviceLimits::bufferImageGranularity. - Set to more than 1 for debugging purposes only. Must be power of two. - */ - #define VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY (1) -#endif - -#ifndef VMA_SMALL_HEAP_MAX_SIZE - /// Maximum size of a memory heap in Vulkan to consider it "small". - #define VMA_SMALL_HEAP_MAX_SIZE (512 * 1024 * 1024) -#endif - -#ifndef VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE - /// Default size of a block allocated as single VkDeviceMemory from a "large" heap. - #define VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE (256 * 1024 * 1024) -#endif - -#ifndef VMA_DEFAULT_SMALL_HEAP_BLOCK_SIZE - /// Default size of a block allocated as single VkDeviceMemory from a "small" heap. - #define VMA_DEFAULT_SMALL_HEAP_BLOCK_SIZE (64 * 1024 * 1024) -#endif - -static const uint32_t VMA_FRAME_INDEX_LOST = UINT32_MAX; - -/******************************************************************************* -END OF CONFIGURATION -*/ - -static VkAllocationCallbacks VmaEmptyAllocationCallbacks = { - VMA_NULL, VMA_NULL, VMA_NULL, VMA_NULL, VMA_NULL, VMA_NULL }; - -// Returns number of bits set to 1 in (v). -static inline uint32_t VmaCountBitsSet(uint32_t v) -{ - uint32_t c = v - ((v >> 1) & 0x55555555); - c = ((c >> 2) & 0x33333333) + (c & 0x33333333); - c = ((c >> 4) + c) & 0x0F0F0F0F; - c = ((c >> 8) + c) & 0x00FF00FF; - c = ((c >> 16) + c) & 0x0000FFFF; - return c; -} - -// Aligns given value up to nearest multiply of align value. For example: VmaAlignUp(11, 8) = 16. -// Use types like uint32_t, uint64_t as T. -template -static inline T VmaAlignUp(T val, T align) -{ - return (val + align - 1) / align * align; -} - -// Division with mathematical rounding to nearest number. -template -inline T VmaRoundDiv(T x, T y) -{ - return (x + (y / (T)2)) / y; -} - -#ifndef VMA_SORT - -template -Iterator VmaQuickSortPartition(Iterator beg, Iterator end, Compare cmp) -{ - Iterator centerValue = end; --centerValue; - Iterator insertIndex = beg; - for(Iterator memTypeIndex = beg; memTypeIndex < centerValue; ++memTypeIndex) - { - if(cmp(*memTypeIndex, *centerValue)) - { - if(insertIndex != memTypeIndex) - { - VMA_SWAP(*memTypeIndex, *insertIndex); - } - ++insertIndex; - } - } - if(insertIndex != centerValue) - { - VMA_SWAP(*insertIndex, *centerValue); - } - return insertIndex; -} - -template -void VmaQuickSort(Iterator beg, Iterator end, Compare cmp) -{ - if(beg < end) - { - Iterator it = VmaQuickSortPartition(beg, end, cmp); - VmaQuickSort(beg, it, cmp); - VmaQuickSort(it + 1, end, cmp); - } -} - -#define VMA_SORT(beg, end, cmp) VmaQuickSort(beg, end, cmp) - -#endif // #ifndef VMA_SORT - -/* -Returns true if two memory blocks occupy overlapping pages. -ResourceA must be in less memory offset than ResourceB. - -Algorithm is based on "Vulkan 1.0.39 - A Specification (with all registered Vulkan extensions)" -chapter 11.6 "Resource Memory Association", paragraph "Buffer-Image Granularity". -*/ -static inline bool VmaBlocksOnSamePage( - VkDeviceSize resourceAOffset, - VkDeviceSize resourceASize, - VkDeviceSize resourceBOffset, - VkDeviceSize pageSize) -{ - VMA_ASSERT(resourceAOffset + resourceASize <= resourceBOffset && resourceASize > 0 && pageSize > 0); - VkDeviceSize resourceAEnd = resourceAOffset + resourceASize - 1; - VkDeviceSize resourceAEndPage = resourceAEnd & ~(pageSize - 1); - VkDeviceSize resourceBStart = resourceBOffset; - VkDeviceSize resourceBStartPage = resourceBStart & ~(pageSize - 1); - return resourceAEndPage == resourceBStartPage; -} - -enum VmaSuballocationType -{ - VMA_SUBALLOCATION_TYPE_FREE = 0, - VMA_SUBALLOCATION_TYPE_UNKNOWN = 1, - VMA_SUBALLOCATION_TYPE_BUFFER = 2, - VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN = 3, - VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR = 4, - VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL = 5, - VMA_SUBALLOCATION_TYPE_MAX_ENUM = 0x7FFFFFFF -}; - -/* -Returns true if given suballocation types could conflict and must respect -VkPhysicalDeviceLimits::bufferImageGranularity. They conflict if one is buffer -or linear image and another one is optimal image. If type is unknown, behave -conservatively. -*/ -static inline bool VmaIsBufferImageGranularityConflict( - VmaSuballocationType suballocType1, - VmaSuballocationType suballocType2) -{ - if(suballocType1 > suballocType2) - { - VMA_SWAP(suballocType1, suballocType2); - } - - switch(suballocType1) - { - case VMA_SUBALLOCATION_TYPE_FREE: - return false; - case VMA_SUBALLOCATION_TYPE_UNKNOWN: - return true; - case VMA_SUBALLOCATION_TYPE_BUFFER: - return - suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN || - suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL; - case VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN: - return - suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN || - suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR || - suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL; - case VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR: - return - suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL; - case VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL: - return false; - default: - VMA_ASSERT(0); - return true; - } -} - -// Helper RAII class to lock a mutex in constructor and unlock it in destructor (at the end of scope). -struct VmaMutexLock -{ -public: - VmaMutexLock(VMA_MUTEX& mutex, bool useMutex) : - m_pMutex(useMutex ? &mutex : VMA_NULL) - { - if(m_pMutex) - { - m_pMutex->Lock(); - } - } - - ~VmaMutexLock() - { - if(m_pMutex) - { - m_pMutex->Unlock(); - } - } - -private: - VMA_MUTEX* m_pMutex; -}; - -#if VMA_DEBUG_GLOBAL_MUTEX - static VMA_MUTEX gDebugGlobalMutex; - #define VMA_DEBUG_GLOBAL_MUTEX_LOCK VmaMutexLock debugGlobalMutexLock(gDebugGlobalMutex, true); -#else - #define VMA_DEBUG_GLOBAL_MUTEX_LOCK -#endif - -// Minimum size of a free suballocation to register it in the free suballocation collection. -static const VkDeviceSize VMA_MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER = 16; - -/* -Performs binary search and returns iterator to first element that is greater or -equal to (key), according to comparison (cmp). - -Cmp should return true if first argument is less than second argument. - -Returned value is the found element, if present in the collection or place where -new element with value (key) should be inserted. -*/ -template -static IterT VmaBinaryFindFirstNotLess(IterT beg, IterT end, const KeyT &key, CmpT cmp) -{ - size_t down = 0, up = (end - beg); - while(down < up) - { - const size_t mid = (down + up) / 2; - if(cmp(*(beg+mid), key)) - { - down = mid + 1; - } - else - { - up = mid; - } - } - return beg + down; -} - -//////////////////////////////////////////////////////////////////////////////// -// Memory allocation - -static void* VmaMalloc(const VkAllocationCallbacks* pAllocationCallbacks, size_t size, size_t alignment) -{ - if((pAllocationCallbacks != VMA_NULL) && - (pAllocationCallbacks->pfnAllocation != VMA_NULL)) - { - return (*pAllocationCallbacks->pfnAllocation)( - pAllocationCallbacks->pUserData, - size, - alignment, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - } - else - { - return VMA_SYSTEM_ALIGNED_MALLOC(size, alignment); - } -} - -static void VmaFree(const VkAllocationCallbacks* pAllocationCallbacks, void* ptr) -{ - if((pAllocationCallbacks != VMA_NULL) && - (pAllocationCallbacks->pfnFree != VMA_NULL)) - { - (*pAllocationCallbacks->pfnFree)(pAllocationCallbacks->pUserData, ptr); - } - else - { - VMA_SYSTEM_FREE(ptr); - } -} - -template -static T* VmaAllocate(const VkAllocationCallbacks* pAllocationCallbacks) -{ - return (T*)VmaMalloc(pAllocationCallbacks, sizeof(T), VMA_ALIGN_OF(T)); -} - -template -static T* VmaAllocateArray(const VkAllocationCallbacks* pAllocationCallbacks, size_t count) -{ - return (T*)VmaMalloc(pAllocationCallbacks, sizeof(T) * count, VMA_ALIGN_OF(T)); -} - -#define vma_new(allocator, type) new(VmaAllocate(allocator))(type) - -#define vma_new_array(allocator, type, count) new(VmaAllocateArray((allocator), (count)))(type) - -template -static void vma_delete(const VkAllocationCallbacks* pAllocationCallbacks, T* ptr) -{ - ptr->~T(); - VmaFree(pAllocationCallbacks, ptr); -} - -template -static void vma_delete_array(const VkAllocationCallbacks* pAllocationCallbacks, T* ptr, size_t count) -{ - if(ptr != VMA_NULL) - { - for(size_t i = count; i--; ) - { - ptr[i].~T(); - } - VmaFree(pAllocationCallbacks, ptr); - } -} - -// STL-compatible allocator. -template -class VmaStlAllocator -{ -public: - const VkAllocationCallbacks* const m_pCallbacks; - typedef T value_type; - - VmaStlAllocator(const VkAllocationCallbacks* pCallbacks) : m_pCallbacks(pCallbacks) { } - template VmaStlAllocator(const VmaStlAllocator& src) : m_pCallbacks(src.m_pCallbacks) { } - - T* allocate(size_t n) { return VmaAllocateArray(m_pCallbacks, n); } - void deallocate(T* p, size_t n) { VmaFree(m_pCallbacks, p); } - - template - bool operator==(const VmaStlAllocator& rhs) const - { - return m_pCallbacks == rhs.m_pCallbacks; - } - template - bool operator!=(const VmaStlAllocator& rhs) const - { - return m_pCallbacks != rhs.m_pCallbacks; - } - - VmaStlAllocator& operator=(const VmaStlAllocator& x) = delete; -}; - -#if VMA_USE_STL_VECTOR - -#define VmaVector std::vector - -template -static void VmaVectorInsert(std::vector& vec, size_t index, const T& item) -{ - vec.insert(vec.begin() + index, item); -} - -template -static void VmaVectorRemove(std::vector& vec, size_t index) -{ - vec.erase(vec.begin() + index); -} - -#else // #if VMA_USE_STL_VECTOR - -/* Class with interface compatible with subset of std::vector. -T must be POD because constructors and destructors are not called and memcpy is -used for these objects. */ -template -class VmaVector -{ -public: - typedef T value_type; - - VmaVector(const AllocatorT& allocator) : - m_Allocator(allocator), - m_pArray(VMA_NULL), - m_Count(0), - m_Capacity(0) - { - } - - VmaVector(size_t count, const AllocatorT& allocator) : - m_Allocator(allocator), - m_pArray(count ? (T*)VmaAllocateArray(allocator.m_pCallbacks, count) : VMA_NULL), - m_Count(count), - m_Capacity(count) - { - } - - VmaVector(const VmaVector& src) : - m_Allocator(src.m_Allocator), - m_pArray(src.m_Count ? (T*)VmaAllocateArray(src.m_Allocator.m_pCallbacks, src.m_Count) : VMA_NULL), - m_Count(src.m_Count), - m_Capacity(src.m_Count) - { - if(m_Count != 0) - { - memcpy(m_pArray, src.m_pArray, m_Count * sizeof(T)); - } - } - - ~VmaVector() - { - VmaFree(m_Allocator.m_pCallbacks, m_pArray); - } - - VmaVector& operator=(const VmaVector& rhs) - { - if(&rhs != this) - { - resize(rhs.m_Count); - if(m_Count != 0) - { - memcpy(m_pArray, rhs.m_pArray, m_Count * sizeof(T)); - } - } - return *this; - } - - bool empty() const { return m_Count == 0; } - size_t size() const { return m_Count; } - T* data() { return m_pArray; } - const T* data() const { return m_pArray; } - - T& operator[](size_t index) - { - VMA_HEAVY_ASSERT(index < m_Count); - return m_pArray[index]; - } - const T& operator[](size_t index) const - { - VMA_HEAVY_ASSERT(index < m_Count); - return m_pArray[index]; - } - - T& front() - { - VMA_HEAVY_ASSERT(m_Count > 0); - return m_pArray[0]; - } - const T& front() const - { - VMA_HEAVY_ASSERT(m_Count > 0); - return m_pArray[0]; - } - T& back() - { - VMA_HEAVY_ASSERT(m_Count > 0); - return m_pArray[m_Count - 1]; - } - const T& back() const - { - VMA_HEAVY_ASSERT(m_Count > 0); - return m_pArray[m_Count - 1]; - } - - void reserve(size_t newCapacity, bool freeMemory = false) - { - newCapacity = VMA_MAX(newCapacity, m_Count); - - if((newCapacity < m_Capacity) && !freeMemory) - { - newCapacity = m_Capacity; - } - - if(newCapacity != m_Capacity) - { - T* const newArray = newCapacity ? VmaAllocateArray(m_Allocator, newCapacity) : VMA_NULL; - if(m_Count != 0) - { - memcpy(newArray, m_pArray, m_Count * sizeof(T)); - } - VmaFree(m_Allocator.m_pCallbacks, m_pArray); - m_Capacity = newCapacity; - m_pArray = newArray; - } - } - - void resize(size_t newCount, bool freeMemory = false) - { - size_t newCapacity = m_Capacity; - if(newCount > m_Capacity) - { - newCapacity = VMA_MAX(newCount, VMA_MAX(m_Capacity * 3 / 2, (size_t)8)); - } - else if(freeMemory) - { - newCapacity = newCount; - } - - if(newCapacity != m_Capacity) - { - T* const newArray = newCapacity ? VmaAllocateArray(m_Allocator.m_pCallbacks, newCapacity) : VMA_NULL; - const size_t elementsToCopy = VMA_MIN(m_Count, newCount); - if(elementsToCopy != 0) - { - memcpy(newArray, m_pArray, elementsToCopy * sizeof(T)); - } - VmaFree(m_Allocator.m_pCallbacks, m_pArray); - m_Capacity = newCapacity; - m_pArray = newArray; - } - - m_Count = newCount; - } - - void clear(bool freeMemory = false) - { - resize(0, freeMemory); - } - - void insert(size_t index, const T& src) - { - VMA_HEAVY_ASSERT(index <= m_Count); - const size_t oldCount = size(); - resize(oldCount + 1); - if(index < oldCount) - { - memmove(m_pArray + (index + 1), m_pArray + index, (oldCount - index) * sizeof(T)); - } - m_pArray[index] = src; - } - - void remove(size_t index) - { - VMA_HEAVY_ASSERT(index < m_Count); - const size_t oldCount = size(); - if(index < oldCount - 1) - { - memmove(m_pArray + index, m_pArray + (index + 1), (oldCount - index - 1) * sizeof(T)); - } - resize(oldCount - 1); - } - - void push_back(const T& src) - { - const size_t newIndex = size(); - resize(newIndex + 1); - m_pArray[newIndex] = src; - } - - void pop_back() - { - VMA_HEAVY_ASSERT(m_Count > 0); - resize(size() - 1); - } - - void push_front(const T& src) - { - insert(0, src); - } - - void pop_front() - { - VMA_HEAVY_ASSERT(m_Count > 0); - remove(0); - } - - typedef T* iterator; - - iterator begin() { return m_pArray; } - iterator end() { return m_pArray + m_Count; } - -private: - AllocatorT m_Allocator; - T* m_pArray; - size_t m_Count; - size_t m_Capacity; -}; - -template -static void VmaVectorInsert(VmaVector& vec, size_t index, const T& item) -{ - vec.insert(index, item); -} - -template -static void VmaVectorRemove(VmaVector& vec, size_t index) -{ - vec.remove(index); -} - -#endif // #if VMA_USE_STL_VECTOR - -template -size_t VmaVectorInsertSorted(VectorT& vector, const typename VectorT::value_type& value) -{ - const size_t indexToInsert = VmaBinaryFindFirstNotLess( - vector.data(), - vector.data() + vector.size(), - value, - CmpLess()) - vector.data(); - VmaVectorInsert(vector, indexToInsert, value); - return indexToInsert; -} - -template -bool VmaVectorRemoveSorted(VectorT& vector, const typename VectorT::value_type& value) -{ - CmpLess comparator; - typename VectorT::iterator it = VmaBinaryFindFirstNotLess( - vector.begin(), - vector.end(), - value, - comparator); - if((it != vector.end()) && !comparator(*it, value) && !comparator(value, *it)) - { - size_t indexToRemove = it - vector.begin(); - VmaVectorRemove(vector, indexToRemove); - return true; - } - return false; -} - -template -size_t VmaVectorFindSorted(const VectorT& vector, const typename VectorT::value_type& value) -{ - CmpLess comparator; - typename VectorT::iterator it = VmaBinaryFindFirstNotLess( - vector.data(), - vector.data() + vector.size(), - value, - comparator); - if(it != vector.size() && !comparator(*it, value) && !comparator(value, *it)) - { - return it - vector.begin(); - } - else - { - return vector.size(); - } -} - -//////////////////////////////////////////////////////////////////////////////// -// class VmaPoolAllocator - -/* -Allocator for objects of type T using a list of arrays (pools) to speed up -allocation. Number of elements that can be allocated is not bounded because -allocator can create multiple blocks. -*/ -template -class VmaPoolAllocator -{ -public: - VmaPoolAllocator(const VkAllocationCallbacks* pAllocationCallbacks, size_t itemsPerBlock); - ~VmaPoolAllocator(); - void Clear(); - T* Alloc(); - void Free(T* ptr); - -private: - union Item - { - uint32_t NextFreeIndex; - T Value; - }; - - struct ItemBlock - { - Item* pItems; - uint32_t FirstFreeIndex; - }; - - const VkAllocationCallbacks* m_pAllocationCallbacks; - size_t m_ItemsPerBlock; - VmaVector< ItemBlock, VmaStlAllocator > m_ItemBlocks; - - ItemBlock& CreateNewBlock(); -}; - -template -VmaPoolAllocator::VmaPoolAllocator(const VkAllocationCallbacks* pAllocationCallbacks, size_t itemsPerBlock) : - m_pAllocationCallbacks(pAllocationCallbacks), - m_ItemsPerBlock(itemsPerBlock), - m_ItemBlocks(VmaStlAllocator(pAllocationCallbacks)) -{ - VMA_ASSERT(itemsPerBlock > 0); -} - -template -VmaPoolAllocator::~VmaPoolAllocator() -{ - Clear(); -} - -template -void VmaPoolAllocator::Clear() -{ - for(size_t i = m_ItemBlocks.size(); i--; ) - vma_delete_array(m_pAllocationCallbacks, m_ItemBlocks[i].pItems, m_ItemsPerBlock); - m_ItemBlocks.clear(); -} - -template -T* VmaPoolAllocator::Alloc() -{ - for(size_t i = m_ItemBlocks.size(); i--; ) - { - ItemBlock& block = m_ItemBlocks[i]; - // This block has some free items: Use first one. - if(block.FirstFreeIndex != UINT32_MAX) - { - Item* const pItem = &block.pItems[block.FirstFreeIndex]; - block.FirstFreeIndex = pItem->NextFreeIndex; - return &pItem->Value; - } - } - - // No block has free item: Create new one and use it. - ItemBlock& newBlock = CreateNewBlock(); - Item* const pItem = &newBlock.pItems[0]; - newBlock.FirstFreeIndex = pItem->NextFreeIndex; - return &pItem->Value; -} - -template -void VmaPoolAllocator::Free(T* ptr) -{ - // Search all memory blocks to find ptr. - for(size_t i = 0; i < m_ItemBlocks.size(); ++i) - { - ItemBlock& block = m_ItemBlocks[i]; - - // Casting to union. - Item* pItemPtr; - memcpy(&pItemPtr, &ptr, sizeof(pItemPtr)); - - // Check if pItemPtr is in address range of this block. - if((pItemPtr >= block.pItems) && (pItemPtr < block.pItems + m_ItemsPerBlock)) - { - const uint32_t index = static_cast(pItemPtr - block.pItems); - pItemPtr->NextFreeIndex = block.FirstFreeIndex; - block.FirstFreeIndex = index; - return; - } - } - VMA_ASSERT(0 && "Pointer doesn't belong to this memory pool."); -} - -template -typename VmaPoolAllocator::ItemBlock& VmaPoolAllocator::CreateNewBlock() -{ - ItemBlock newBlock = { - vma_new_array(m_pAllocationCallbacks, Item, m_ItemsPerBlock), 0 }; - - m_ItemBlocks.push_back(newBlock); - - // Setup singly-linked list of all free items in this block. - for(uint32_t i = 0; i < m_ItemsPerBlock - 1; ++i) - newBlock.pItems[i].NextFreeIndex = i + 1; - newBlock.pItems[m_ItemsPerBlock - 1].NextFreeIndex = UINT32_MAX; - return m_ItemBlocks.back(); -} - -//////////////////////////////////////////////////////////////////////////////// -// class VmaRawList, VmaList - -#if VMA_USE_STL_LIST - -#define VmaList std::list - -#else // #if VMA_USE_STL_LIST - -template -struct VmaListItem -{ - VmaListItem* pPrev; - VmaListItem* pNext; - T Value; -}; - -// Doubly linked list. -template -class VmaRawList -{ -public: - typedef VmaListItem ItemType; - - VmaRawList(const VkAllocationCallbacks* pAllocationCallbacks); - ~VmaRawList(); - void Clear(); - - size_t GetCount() const { return m_Count; } - bool IsEmpty() const { return m_Count == 0; } - - ItemType* Front() { return m_pFront; } - const ItemType* Front() const { return m_pFront; } - ItemType* Back() { return m_pBack; } - const ItemType* Back() const { return m_pBack; } - - ItemType* PushBack(); - ItemType* PushFront(); - ItemType* PushBack(const T& value); - ItemType* PushFront(const T& value); - void PopBack(); - void PopFront(); - - // Item can be null - it means PushBack. - ItemType* InsertBefore(ItemType* pItem); - // Item can be null - it means PushFront. - ItemType* InsertAfter(ItemType* pItem); - - ItemType* InsertBefore(ItemType* pItem, const T& value); - ItemType* InsertAfter(ItemType* pItem, const T& value); - - void Remove(ItemType* pItem); - -private: - const VkAllocationCallbacks* const m_pAllocationCallbacks; - VmaPoolAllocator m_ItemAllocator; - ItemType* m_pFront; - ItemType* m_pBack; - size_t m_Count; - - // Declared not defined, to block copy constructor and assignment operator. - VmaRawList(const VmaRawList& src); - VmaRawList& operator=(const VmaRawList& rhs); -}; - -template -VmaRawList::VmaRawList(const VkAllocationCallbacks* pAllocationCallbacks) : - m_pAllocationCallbacks(pAllocationCallbacks), - m_ItemAllocator(pAllocationCallbacks, 128), - m_pFront(VMA_NULL), - m_pBack(VMA_NULL), - m_Count(0) -{ -} - -template -VmaRawList::~VmaRawList() -{ - // Intentionally not calling Clear, because that would be unnecessary - // computations to return all items to m_ItemAllocator as free. -} - -template -void VmaRawList::Clear() -{ - if(IsEmpty() == false) - { - ItemType* pItem = m_pBack; - while(pItem != VMA_NULL) - { - ItemType* const pPrevItem = pItem->pPrev; - m_ItemAllocator.Free(pItem); - pItem = pPrevItem; - } - m_pFront = VMA_NULL; - m_pBack = VMA_NULL; - m_Count = 0; - } -} - -template -VmaListItem* VmaRawList::PushBack() -{ - ItemType* const pNewItem = m_ItemAllocator.Alloc(); - pNewItem->pNext = VMA_NULL; - if(IsEmpty()) - { - pNewItem->pPrev = VMA_NULL; - m_pFront = pNewItem; - m_pBack = pNewItem; - m_Count = 1; - } - else - { - pNewItem->pPrev = m_pBack; - m_pBack->pNext = pNewItem; - m_pBack = pNewItem; - ++m_Count; - } - return pNewItem; -} - -template -VmaListItem* VmaRawList::PushFront() -{ - ItemType* const pNewItem = m_ItemAllocator.Alloc(); - pNewItem->pPrev = VMA_NULL; - if(IsEmpty()) - { - pNewItem->pNext = VMA_NULL; - m_pFront = pNewItem; - m_pBack = pNewItem; - m_Count = 1; - } - else - { - pNewItem->pNext = m_pFront; - m_pFront->pPrev = pNewItem; - m_pFront = pNewItem; - ++m_Count; - } - return pNewItem; -} - -template -VmaListItem* VmaRawList::PushBack(const T& value) -{ - ItemType* const pNewItem = PushBack(); - pNewItem->Value = value; - return pNewItem; -} - -template -VmaListItem* VmaRawList::PushFront(const T& value) -{ - ItemType* const pNewItem = PushFront(); - pNewItem->Value = value; - return pNewItem; -} - -template -void VmaRawList::PopBack() -{ - VMA_HEAVY_ASSERT(m_Count > 0); - ItemType* const pBackItem = m_pBack; - ItemType* const pPrevItem = pBackItem->pPrev; - if(pPrevItem != VMA_NULL) - { - pPrevItem->pNext = VMA_NULL; - } - m_pBack = pPrevItem; - m_ItemAllocator.Free(pBackItem); - --m_Count; -} - -template -void VmaRawList::PopFront() -{ - VMA_HEAVY_ASSERT(m_Count > 0); - ItemType* const pFrontItem = m_pFront; - ItemType* const pNextItem = pFrontItem->pNext; - if(pNextItem != VMA_NULL) - { - pNextItem->pPrev = VMA_NULL; - } - m_pFront = pNextItem; - m_ItemAllocator.Free(pFrontItem); - --m_Count; -} - -template -void VmaRawList::Remove(ItemType* pItem) -{ - VMA_HEAVY_ASSERT(pItem != VMA_NULL); - VMA_HEAVY_ASSERT(m_Count > 0); - - if(pItem->pPrev != VMA_NULL) - { - pItem->pPrev->pNext = pItem->pNext; - } - else - { - VMA_HEAVY_ASSERT(m_pFront == pItem); - m_pFront = pItem->pNext; - } - - if(pItem->pNext != VMA_NULL) - { - pItem->pNext->pPrev = pItem->pPrev; - } - else - { - VMA_HEAVY_ASSERT(m_pBack == pItem); - m_pBack = pItem->pPrev; - } - - m_ItemAllocator.Free(pItem); - --m_Count; -} - -template -VmaListItem* VmaRawList::InsertBefore(ItemType* pItem) -{ - if(pItem != VMA_NULL) - { - ItemType* const prevItem = pItem->pPrev; - ItemType* const newItem = m_ItemAllocator.Alloc(); - newItem->pPrev = prevItem; - newItem->pNext = pItem; - pItem->pPrev = newItem; - if(prevItem != VMA_NULL) - { - prevItem->pNext = newItem; - } - else - { - VMA_HEAVY_ASSERT(m_pFront == pItem); - m_pFront = newItem; - } - ++m_Count; - return newItem; - } - else - return PushBack(); -} - -template -VmaListItem* VmaRawList::InsertAfter(ItemType* pItem) -{ - if(pItem != VMA_NULL) - { - ItemType* const nextItem = pItem->pNext; - ItemType* const newItem = m_ItemAllocator.Alloc(); - newItem->pNext = nextItem; - newItem->pPrev = pItem; - pItem->pNext = newItem; - if(nextItem != VMA_NULL) - { - nextItem->pPrev = newItem; - } - else - { - VMA_HEAVY_ASSERT(m_pBack == pItem); - m_pBack = newItem; - } - ++m_Count; - return newItem; - } - else - return PushFront(); -} - -template -VmaListItem* VmaRawList::InsertBefore(ItemType* pItem, const T& value) -{ - ItemType* const newItem = InsertBefore(pItem); - newItem->Value = value; - return newItem; -} - -template -VmaListItem* VmaRawList::InsertAfter(ItemType* pItem, const T& value) -{ - ItemType* const newItem = InsertAfter(pItem); - newItem->Value = value; - return newItem; -} - -template -class VmaList -{ -public: - class iterator - { - public: - iterator() : - m_pList(VMA_NULL), - m_pItem(VMA_NULL) - { - } - - T& operator*() const - { - VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); - return m_pItem->Value; - } - T* operator->() const - { - VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); - return &m_pItem->Value; - } - - iterator& operator++() - { - VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); - m_pItem = m_pItem->pNext; - return *this; - } - iterator& operator--() - { - if(m_pItem != VMA_NULL) - { - m_pItem = m_pItem->pPrev; - } - else - { - VMA_HEAVY_ASSERT(!m_pList.IsEmpty()); - m_pItem = m_pList->Back(); - } - return *this; - } - - iterator operator++(int) - { - iterator result = *this; - ++*this; - return result; - } - iterator operator--(int) - { - iterator result = *this; - --*this; - return result; - } - - bool operator==(const iterator& rhs) const - { - VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); - return m_pItem == rhs.m_pItem; - } - bool operator!=(const iterator& rhs) const - { - VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); - return m_pItem != rhs.m_pItem; - } - - private: - VmaRawList* m_pList; - VmaListItem* m_pItem; - - iterator(VmaRawList* pList, VmaListItem* pItem) : - m_pList(pList), - m_pItem(pItem) - { - } - - friend class VmaList; - }; - - class const_iterator - { - public: - const_iterator() : - m_pList(VMA_NULL), - m_pItem(VMA_NULL) - { - } - - const_iterator(const iterator& src) : - m_pList(src.m_pList), - m_pItem(src.m_pItem) - { - } - - const T& operator*() const - { - VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); - return m_pItem->Value; - } - const T* operator->() const - { - VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); - return &m_pItem->Value; - } - - const_iterator& operator++() - { - VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); - m_pItem = m_pItem->pNext; - return *this; - } - const_iterator& operator--() - { - if(m_pItem != VMA_NULL) - { - m_pItem = m_pItem->pPrev; - } - else - { - VMA_HEAVY_ASSERT(!m_pList->IsEmpty()); - m_pItem = m_pList->Back(); - } - return *this; - } - - const_iterator operator++(int) - { - const_iterator result = *this; - ++*this; - return result; - } - const_iterator operator--(int) - { - const_iterator result = *this; - --*this; - return result; - } - - bool operator==(const const_iterator& rhs) const - { - VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); - return m_pItem == rhs.m_pItem; - } - bool operator!=(const const_iterator& rhs) const - { - VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); - return m_pItem != rhs.m_pItem; - } - - private: - const_iterator(const VmaRawList* pList, const VmaListItem* pItem) : - m_pList(pList), - m_pItem(pItem) - { - } - - const VmaRawList* m_pList; - const VmaListItem* m_pItem; - - friend class VmaList; - }; - - VmaList(const AllocatorT& allocator) : m_RawList(allocator.m_pCallbacks) { } - - bool empty() const { return m_RawList.IsEmpty(); } - size_t size() const { return m_RawList.GetCount(); } - - iterator begin() { return iterator(&m_RawList, m_RawList.Front()); } - iterator end() { return iterator(&m_RawList, VMA_NULL); } - - const_iterator cbegin() const { return const_iterator(&m_RawList, m_RawList.Front()); } - const_iterator cend() const { return const_iterator(&m_RawList, VMA_NULL); } - - void clear() { m_RawList.Clear(); } - void push_back(const T& value) { m_RawList.PushBack(value); } - void erase(iterator it) { m_RawList.Remove(it.m_pItem); } - iterator insert(iterator it, const T& value) { return iterator(&m_RawList, m_RawList.InsertBefore(it.m_pItem, value)); } - -private: - VmaRawList m_RawList; -}; - -#endif // #if VMA_USE_STL_LIST - -//////////////////////////////////////////////////////////////////////////////// -// class VmaMap - -// Unused in this version. -#if 0 - -#if VMA_USE_STL_UNORDERED_MAP - -#define VmaPair std::pair - -#define VMA_MAP_TYPE(KeyT, ValueT) \ - std::unordered_map< KeyT, ValueT, std::hash, std::equal_to, VmaStlAllocator< std::pair > > - -#else // #if VMA_USE_STL_UNORDERED_MAP - -template -struct VmaPair -{ - T1 first; - T2 second; - - VmaPair() : first(), second() { } - VmaPair(const T1& firstSrc, const T2& secondSrc) : first(firstSrc), second(secondSrc) { } -}; - -/* Class compatible with subset of interface of std::unordered_map. -KeyT, ValueT must be POD because they will be stored in VmaVector. -*/ -template -class VmaMap -{ -public: - typedef VmaPair PairType; - typedef PairType* iterator; - - VmaMap(const VmaStlAllocator& allocator) : m_Vector(allocator) { } - - iterator begin() { return m_Vector.begin(); } - iterator end() { return m_Vector.end(); } - - void insert(const PairType& pair); - iterator find(const KeyT& key); - void erase(iterator it); - -private: - VmaVector< PairType, VmaStlAllocator > m_Vector; -}; - -#define VMA_MAP_TYPE(KeyT, ValueT) VmaMap - -template -struct VmaPairFirstLess -{ - bool operator()(const VmaPair& lhs, const VmaPair& rhs) const - { - return lhs.first < rhs.first; - } - bool operator()(const VmaPair& lhs, const FirstT& rhsFirst) const - { - return lhs.first < rhsFirst; - } -}; - -template -void VmaMap::insert(const PairType& pair) -{ - const size_t indexToInsert = VmaBinaryFindFirstNotLess( - m_Vector.data(), - m_Vector.data() + m_Vector.size(), - pair, - VmaPairFirstLess()) - m_Vector.data(); - VmaVectorInsert(m_Vector, indexToInsert, pair); -} - -template -VmaPair* VmaMap::find(const KeyT& key) -{ - PairType* it = VmaBinaryFindFirstNotLess( - m_Vector.data(), - m_Vector.data() + m_Vector.size(), - key, - VmaPairFirstLess()); - if((it != m_Vector.end()) && (it->first == key)) - { - return it; - } - else - { - return m_Vector.end(); - } -} - -template -void VmaMap::erase(iterator it) -{ - VmaVectorRemove(m_Vector, it - m_Vector.begin()); -} - -#endif // #if VMA_USE_STL_UNORDERED_MAP - -#endif // #if 0 - -//////////////////////////////////////////////////////////////////////////////// - -class VmaDeviceMemoryBlock; - -struct VmaAllocation_T -{ -private: - static const uint8_t MAP_COUNT_FLAG_PERSISTENT_MAP = 0x80; - - enum FLAGS - { - FLAG_USER_DATA_STRING = 0x01, - }; - -public: - enum ALLOCATION_TYPE - { - ALLOCATION_TYPE_NONE, - ALLOCATION_TYPE_BLOCK, - ALLOCATION_TYPE_DEDICATED, - }; - - VmaAllocation_T(uint32_t currentFrameIndex, bool userDataString) : - m_Alignment(1), - m_Size(0), - m_pUserData(VMA_NULL), - m_LastUseFrameIndex(currentFrameIndex), - m_Type((uint8_t)ALLOCATION_TYPE_NONE), - m_SuballocationType((uint8_t)VMA_SUBALLOCATION_TYPE_UNKNOWN), - m_MapCount(0), - m_Flags(userDataString ? (uint8_t)FLAG_USER_DATA_STRING : 0) - { - } - - ~VmaAllocation_T() - { - VMA_ASSERT((m_MapCount & ~MAP_COUNT_FLAG_PERSISTENT_MAP) == 0 && "Allocation was not unmapped before destruction."); - - // Check if owned string was freed. - VMA_ASSERT(m_pUserData == VMA_NULL); - } - - void InitBlockAllocation( - VmaPool hPool, - VmaDeviceMemoryBlock* block, - VkDeviceSize offset, - VkDeviceSize alignment, - VkDeviceSize size, - VmaSuballocationType suballocationType, - bool mapped, - bool canBecomeLost) - { - VMA_ASSERT(m_Type == ALLOCATION_TYPE_NONE); - VMA_ASSERT(block != VMA_NULL); - m_Type = (uint8_t)ALLOCATION_TYPE_BLOCK; - m_Alignment = alignment; - m_Size = size; - m_MapCount = mapped ? MAP_COUNT_FLAG_PERSISTENT_MAP : 0; - m_SuballocationType = (uint8_t)suballocationType; - m_BlockAllocation.m_hPool = hPool; - m_BlockAllocation.m_Block = block; - m_BlockAllocation.m_Offset = offset; - m_BlockAllocation.m_CanBecomeLost = canBecomeLost; - } - - void InitLost() - { - VMA_ASSERT(m_Type == ALLOCATION_TYPE_NONE); - VMA_ASSERT(m_LastUseFrameIndex.load() == VMA_FRAME_INDEX_LOST); - m_Type = (uint8_t)ALLOCATION_TYPE_BLOCK; - m_BlockAllocation.m_hPool = VK_NULL_HANDLE; - m_BlockAllocation.m_Block = VMA_NULL; - m_BlockAllocation.m_Offset = 0; - m_BlockAllocation.m_CanBecomeLost = true; - } - - void ChangeBlockAllocation( - VmaDeviceMemoryBlock* block, - VkDeviceSize offset) - { - VMA_ASSERT(block != VMA_NULL); - VMA_ASSERT(m_Type == ALLOCATION_TYPE_BLOCK); - m_BlockAllocation.m_Block = block; - m_BlockAllocation.m_Offset = offset; - } - - // pMappedData not null means allocation is created with MAPPED flag. - void InitDedicatedAllocation( - uint32_t memoryTypeIndex, - VkDeviceMemory hMemory, - VmaSuballocationType suballocationType, - void* pMappedData, - VkDeviceSize size) - { - VMA_ASSERT(m_Type == ALLOCATION_TYPE_NONE); - VMA_ASSERT(hMemory != VK_NULL_HANDLE); - m_Type = (uint8_t)ALLOCATION_TYPE_DEDICATED; - m_Alignment = 0; - m_Size = size; - m_SuballocationType = (uint8_t)suballocationType; - m_MapCount = (pMappedData != VMA_NULL) ? MAP_COUNT_FLAG_PERSISTENT_MAP : 0; - m_DedicatedAllocation.m_MemoryTypeIndex = memoryTypeIndex; - m_DedicatedAllocation.m_hMemory = hMemory; - m_DedicatedAllocation.m_pMappedData = pMappedData; - } - - ALLOCATION_TYPE GetType() const { return (ALLOCATION_TYPE)m_Type; } - VkDeviceSize GetAlignment() const { return m_Alignment; } - VkDeviceSize GetSize() const { return m_Size; } - bool IsUserDataString() const { return (m_Flags & FLAG_USER_DATA_STRING) != 0; } - void* GetUserData() const { return m_pUserData; } - void SetUserData(VmaAllocator hAllocator, void* pUserData); - VmaSuballocationType GetSuballocationType() const { return (VmaSuballocationType)m_SuballocationType; } - - VmaDeviceMemoryBlock* GetBlock() const - { - VMA_ASSERT(m_Type == ALLOCATION_TYPE_BLOCK); - return m_BlockAllocation.m_Block; - } - VkDeviceSize GetOffset() const; - VkDeviceMemory GetMemory() const; - uint32_t GetMemoryTypeIndex() const; - bool IsPersistentMap() const { return (m_MapCount & MAP_COUNT_FLAG_PERSISTENT_MAP) != 0; } - void* GetMappedData() const; - bool CanBecomeLost() const; - VmaPool GetPool() const; - - uint32_t GetLastUseFrameIndex() const - { - return m_LastUseFrameIndex.load(); - } - bool CompareExchangeLastUseFrameIndex(uint32_t& expected, uint32_t desired) - { - return m_LastUseFrameIndex.compare_exchange_weak(expected, desired); - } - /* - - If hAllocation.LastUseFrameIndex + frameInUseCount < allocator.CurrentFrameIndex, - makes it lost by setting LastUseFrameIndex = VMA_FRAME_INDEX_LOST and returns true. - - Else, returns false. - - If hAllocation is already lost, assert - you should not call it then. - If hAllocation was not created with CAN_BECOME_LOST_BIT, assert. - */ - bool MakeLost(uint32_t currentFrameIndex, uint32_t frameInUseCount); - - void DedicatedAllocCalcStatsInfo(VmaStatInfo& outInfo) - { - VMA_ASSERT(m_Type == ALLOCATION_TYPE_DEDICATED); - outInfo.blockCount = 1; - outInfo.allocationCount = 1; - outInfo.unusedRangeCount = 0; - outInfo.usedBytes = m_Size; - outInfo.unusedBytes = 0; - outInfo.allocationSizeMin = outInfo.allocationSizeMax = m_Size; - outInfo.unusedRangeSizeMin = UINT64_MAX; - outInfo.unusedRangeSizeMax = 0; - } - - void BlockAllocMap(); - void BlockAllocUnmap(); - VkResult DedicatedAllocMap(VmaAllocator hAllocator, void** ppData); - void DedicatedAllocUnmap(VmaAllocator hAllocator); - -private: - VkDeviceSize m_Alignment; - VkDeviceSize m_Size; - void* m_pUserData; - VMA_ATOMIC_UINT32 m_LastUseFrameIndex; - uint8_t m_Type; // ALLOCATION_TYPE - uint8_t m_SuballocationType; // VmaSuballocationType - // Bit 0x80 is set when allocation was created with VMA_ALLOCATION_CREATE_MAPPED_BIT. - // Bits with mask 0x7F, used only when ALLOCATION_TYPE_DEDICATED, are reference counter for vmaMapMemory()/vmaUnmapMemory(). - uint8_t m_MapCount; - uint8_t m_Flags; // enum FLAGS - - // Allocation out of VmaDeviceMemoryBlock. - struct BlockAllocation - { - VmaPool m_hPool; // Null if belongs to general memory. - VmaDeviceMemoryBlock* m_Block; - VkDeviceSize m_Offset; - bool m_CanBecomeLost; - }; - - // Allocation for an object that has its own private VkDeviceMemory. - struct DedicatedAllocation - { - uint32_t m_MemoryTypeIndex; - VkDeviceMemory m_hMemory; - void* m_pMappedData; // Not null means memory is mapped. - }; - - union - { - // Allocation out of VmaDeviceMemoryBlock. - BlockAllocation m_BlockAllocation; - // Allocation for an object that has its own private VkDeviceMemory. - DedicatedAllocation m_DedicatedAllocation; - }; - - void FreeUserDataString(VmaAllocator hAllocator); -}; - -/* -Represents a region of VmaDeviceMemoryBlock that is either assigned and returned as -allocated memory block or free. -*/ -struct VmaSuballocation -{ - VkDeviceSize offset; - VkDeviceSize size; - VmaAllocation hAllocation; - VmaSuballocationType type; -}; - -typedef VmaList< VmaSuballocation, VmaStlAllocator > VmaSuballocationList; - -// Cost of one additional allocation lost, as equivalent in bytes. -static const VkDeviceSize VMA_LOST_ALLOCATION_COST = 1048576; - -/* -Parameters of planned allocation inside a VmaDeviceMemoryBlock. - -If canMakeOtherLost was false: -- item points to a FREE suballocation. -- itemsToMakeLostCount is 0. - -If canMakeOtherLost was true: -- item points to first of sequence of suballocations, which are either FREE, - or point to VmaAllocations that can become lost. -- itemsToMakeLostCount is the number of VmaAllocations that need to be made lost for - the requested allocation to succeed. -*/ -struct VmaAllocationRequest -{ - VkDeviceSize offset; - VkDeviceSize sumFreeSize; // Sum size of free items that overlap with proposed allocation. - VkDeviceSize sumItemSize; // Sum size of items to make lost that overlap with proposed allocation. - VmaSuballocationList::iterator item; - size_t itemsToMakeLostCount; - - VkDeviceSize CalcCost() const - { - return sumItemSize + itemsToMakeLostCount * VMA_LOST_ALLOCATION_COST; - } -}; - -/* -Data structure used for bookkeeping of allocations and unused ranges of memory -in a single VkDeviceMemory block. -*/ -class VmaBlockMetadata -{ -public: - VmaBlockMetadata(VmaAllocator hAllocator); - ~VmaBlockMetadata(); - void Init(VkDeviceSize size); - - // Validates all data structures inside this object. If not valid, returns false. - bool Validate() const; - VkDeviceSize GetSize() const { return m_Size; } - size_t GetAllocationCount() const { return m_Suballocations.size() - m_FreeCount; } - VkDeviceSize GetSumFreeSize() const { return m_SumFreeSize; } - VkDeviceSize GetUnusedRangeSizeMax() const; - // Returns true if this block is empty - contains only single free suballocation. - bool IsEmpty() const; - - void CalcAllocationStatInfo(VmaStatInfo& outInfo) const; - void AddPoolStats(VmaPoolStats& inoutStats) const; - -#if VMA_STATS_STRING_ENABLED - void PrintDetailedMap(class VmaJsonWriter& json) const; -#endif - - // Creates trivial request for case when block is empty. - void CreateFirstAllocationRequest(VmaAllocationRequest* pAllocationRequest); - - // Tries to find a place for suballocation with given parameters inside this block. - // If succeeded, fills pAllocationRequest and returns true. - // If failed, returns false. - bool CreateAllocationRequest( - uint32_t currentFrameIndex, - uint32_t frameInUseCount, - VkDeviceSize bufferImageGranularity, - VkDeviceSize allocSize, - VkDeviceSize allocAlignment, - VmaSuballocationType allocType, - bool canMakeOtherLost, - VmaAllocationRequest* pAllocationRequest); - - bool MakeRequestedAllocationsLost( - uint32_t currentFrameIndex, - uint32_t frameInUseCount, - VmaAllocationRequest* pAllocationRequest); - - uint32_t MakeAllocationsLost(uint32_t currentFrameIndex, uint32_t frameInUseCount); - - // Makes actual allocation based on request. Request must already be checked and valid. - void Alloc( - const VmaAllocationRequest& request, - VmaSuballocationType type, - VkDeviceSize allocSize, - VmaAllocation hAllocation); - - // Frees suballocation assigned to given memory region. - void Free(const VmaAllocation allocation); - -private: - VkDeviceSize m_Size; - uint32_t m_FreeCount; - VkDeviceSize m_SumFreeSize; - VmaSuballocationList m_Suballocations; - // Suballocations that are free and have size greater than certain threshold. - // Sorted by size, ascending. - VmaVector< VmaSuballocationList::iterator, VmaStlAllocator< VmaSuballocationList::iterator > > m_FreeSuballocationsBySize; - - bool ValidateFreeSuballocationList() const; - - // Checks if requested suballocation with given parameters can be placed in given pFreeSuballocItem. - // If yes, fills pOffset and returns true. If no, returns false. - bool CheckAllocation( - uint32_t currentFrameIndex, - uint32_t frameInUseCount, - VkDeviceSize bufferImageGranularity, - VkDeviceSize allocSize, - VkDeviceSize allocAlignment, - VmaSuballocationType allocType, - VmaSuballocationList::const_iterator suballocItem, - bool canMakeOtherLost, - VkDeviceSize* pOffset, - size_t* itemsToMakeLostCount, - VkDeviceSize* pSumFreeSize, - VkDeviceSize* pSumItemSize) const; - // Given free suballocation, it merges it with following one, which must also be free. - void MergeFreeWithNext(VmaSuballocationList::iterator item); - // Releases given suballocation, making it free. - // Merges it with adjacent free suballocations if applicable. - // Returns iterator to new free suballocation at this place. - VmaSuballocationList::iterator FreeSuballocation(VmaSuballocationList::iterator suballocItem); - // Given free suballocation, it inserts it into sorted list of - // m_FreeSuballocationsBySize if it's suitable. - void RegisterFreeSuballocation(VmaSuballocationList::iterator item); - // Given free suballocation, it removes it from sorted list of - // m_FreeSuballocationsBySize if it's suitable. - void UnregisterFreeSuballocation(VmaSuballocationList::iterator item); -}; - -// Helper class that represents mapped memory. Synchronized internally. -class VmaDeviceMemoryMapping -{ -public: - VmaDeviceMemoryMapping(); - ~VmaDeviceMemoryMapping(); - - void* GetMappedData() const { return m_pMappedData; } - - // ppData can be null. - VkResult Map(VmaAllocator hAllocator, VkDeviceMemory hMemory, void **ppData); - void Unmap(VmaAllocator hAllocator, VkDeviceMemory hMemory); - -private: - VMA_MUTEX m_Mutex; - uint32_t m_MapCount; - void* m_pMappedData; -}; - -/* -Represents a single block of device memory (`VkDeviceMemory`) with all the -data about its regions (aka suballocations, `VmaAllocation`), assigned and free. - -Thread-safety: This class must be externally synchronized. -*/ -class VmaDeviceMemoryBlock -{ -public: - uint32_t m_MemoryTypeIndex; - VkDeviceMemory m_hMemory; - VmaDeviceMemoryMapping m_Mapping; - VmaBlockMetadata m_Metadata; - - VmaDeviceMemoryBlock(VmaAllocator hAllocator); - - ~VmaDeviceMemoryBlock() - { - VMA_ASSERT(m_hMemory == VK_NULL_HANDLE); - } - - // Always call after construction. - void Init( - uint32_t newMemoryTypeIndex, - VkDeviceMemory newMemory, - VkDeviceSize newSize); - // Always call before destruction. - void Destroy(VmaAllocator allocator); - - // Validates all data structures inside this object. If not valid, returns false. - bool Validate() const; - - // ppData can be null. - VkResult Map(VmaAllocator hAllocator, void** ppData); - void Unmap(VmaAllocator hAllocator); -}; - -struct VmaPointerLess -{ - bool operator()(const void* lhs, const void* rhs) const - { - return lhs < rhs; - } -}; - -class VmaDefragmentator; - -/* -Sequence of VmaDeviceMemoryBlock. Represents memory blocks allocated for a specific -Vulkan memory type. - -Synchronized internally with a mutex. -*/ -struct VmaBlockVector -{ - VmaBlockVector( - VmaAllocator hAllocator, - uint32_t memoryTypeIndex, - VkDeviceSize preferredBlockSize, - size_t minBlockCount, - size_t maxBlockCount, - VkDeviceSize bufferImageGranularity, - uint32_t frameInUseCount, - bool isCustomPool); - ~VmaBlockVector(); - - VkResult CreateMinBlocks(); - - uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; } - VkDeviceSize GetPreferredBlockSize() const { return m_PreferredBlockSize; } - VkDeviceSize GetBufferImageGranularity() const { return m_BufferImageGranularity; } - uint32_t GetFrameInUseCount() const { return m_FrameInUseCount; } - - void GetPoolStats(VmaPoolStats* pStats); - - bool IsEmpty() const { return m_Blocks.empty(); } - - VkResult Allocate( - VmaPool hCurrentPool, - uint32_t currentFrameIndex, - const VkMemoryRequirements& vkMemReq, - const VmaAllocationCreateInfo& createInfo, - VmaSuballocationType suballocType, - VmaAllocation* pAllocation); - - void Free( - VmaAllocation hAllocation); - - // Adds statistics of this BlockVector to pStats. - void AddStats(VmaStats* pStats); - -#if VMA_STATS_STRING_ENABLED - void PrintDetailedMap(class VmaJsonWriter& json); -#endif - - void MakePoolAllocationsLost( - uint32_t currentFrameIndex, - size_t* pLostAllocationCount); - - VmaDefragmentator* EnsureDefragmentator( - VmaAllocator hAllocator, - uint32_t currentFrameIndex); - - VkResult Defragment( - VmaDefragmentationStats* pDefragmentationStats, - VkDeviceSize& maxBytesToMove, - uint32_t& maxAllocationsToMove); - - void DestroyDefragmentator(); - -private: - friend class VmaDefragmentator; - - const VmaAllocator m_hAllocator; - const uint32_t m_MemoryTypeIndex; - const VkDeviceSize m_PreferredBlockSize; - const size_t m_MinBlockCount; - const size_t m_MaxBlockCount; - const VkDeviceSize m_BufferImageGranularity; - const uint32_t m_FrameInUseCount; - const bool m_IsCustomPool; - VMA_MUTEX m_Mutex; - // Incrementally sorted by sumFreeSize, ascending. - VmaVector< VmaDeviceMemoryBlock*, VmaStlAllocator > m_Blocks; - /* There can be at most one allocation that is completely empty - a - hysteresis to avoid pessimistic case of alternating creation and destruction - of a VkDeviceMemory. */ - bool m_HasEmptyBlock; - VmaDefragmentator* m_pDefragmentator; - - // Finds and removes given block from vector. - void Remove(VmaDeviceMemoryBlock* pBlock); - - // Performs single step in sorting m_Blocks. They may not be fully sorted - // after this call. - void IncrementallySortBlocks(); - - VkResult CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIndex); -}; - -struct VmaPool_T -{ -public: - VmaBlockVector m_BlockVector; - - // Takes ownership. - VmaPool_T( - VmaAllocator hAllocator, - const VmaPoolCreateInfo& createInfo); - ~VmaPool_T(); - - VmaBlockVector& GetBlockVector() { return m_BlockVector; } - -#if VMA_STATS_STRING_ENABLED - //void PrintDetailedMap(class VmaStringBuilder& sb); -#endif -}; - -class VmaDefragmentator -{ - const VmaAllocator m_hAllocator; - VmaBlockVector* const m_pBlockVector; - uint32_t m_CurrentFrameIndex; - VkDeviceSize m_BytesMoved; - uint32_t m_AllocationsMoved; - - struct AllocationInfo - { - VmaAllocation m_hAllocation; - VkBool32* m_pChanged; - - AllocationInfo() : - m_hAllocation(VK_NULL_HANDLE), - m_pChanged(VMA_NULL) - { - } - }; - - struct AllocationInfoSizeGreater - { - bool operator()(const AllocationInfo& lhs, const AllocationInfo& rhs) const - { - return lhs.m_hAllocation->GetSize() > rhs.m_hAllocation->GetSize(); - } - }; - - // Used between AddAllocation and Defragment. - VmaVector< AllocationInfo, VmaStlAllocator > m_Allocations; - - struct BlockInfo - { - VmaDeviceMemoryBlock* m_pBlock; - bool m_HasNonMovableAllocations; - VmaVector< AllocationInfo, VmaStlAllocator > m_Allocations; - - BlockInfo(const VkAllocationCallbacks* pAllocationCallbacks) : - m_pBlock(VMA_NULL), - m_HasNonMovableAllocations(true), - m_Allocations(pAllocationCallbacks), - m_pMappedDataForDefragmentation(VMA_NULL) - { - } - - void CalcHasNonMovableAllocations() - { - const size_t blockAllocCount = m_pBlock->m_Metadata.GetAllocationCount(); - const size_t defragmentAllocCount = m_Allocations.size(); - m_HasNonMovableAllocations = blockAllocCount != defragmentAllocCount; - } - - void SortAllocationsBySizeDescecnding() - { - VMA_SORT(m_Allocations.begin(), m_Allocations.end(), AllocationInfoSizeGreater()); - } - - VkResult EnsureMapping(VmaAllocator hAllocator, void** ppMappedData); - void Unmap(VmaAllocator hAllocator); - - private: - // Not null if mapped for defragmentation only, not originally mapped. - void* m_pMappedDataForDefragmentation; - }; - - struct BlockPointerLess - { - bool operator()(const BlockInfo* pLhsBlockInfo, const VmaDeviceMemoryBlock* pRhsBlock) const - { - return pLhsBlockInfo->m_pBlock < pRhsBlock; - } - bool operator()(const BlockInfo* pLhsBlockInfo, const BlockInfo* pRhsBlockInfo) const - { - return pLhsBlockInfo->m_pBlock < pRhsBlockInfo->m_pBlock; - } - }; - - // 1. Blocks with some non-movable allocations go first. - // 2. Blocks with smaller sumFreeSize go first. - struct BlockInfoCompareMoveDestination - { - bool operator()(const BlockInfo* pLhsBlockInfo, const BlockInfo* pRhsBlockInfo) const - { - if(pLhsBlockInfo->m_HasNonMovableAllocations && !pRhsBlockInfo->m_HasNonMovableAllocations) - { - return true; - } - if(!pLhsBlockInfo->m_HasNonMovableAllocations && pRhsBlockInfo->m_HasNonMovableAllocations) - { - return false; - } - if(pLhsBlockInfo->m_pBlock->m_Metadata.GetSumFreeSize() < pRhsBlockInfo->m_pBlock->m_Metadata.GetSumFreeSize()) - { - return true; - } - return false; - } - }; - - typedef VmaVector< BlockInfo*, VmaStlAllocator > BlockInfoVector; - BlockInfoVector m_Blocks; - - VkResult DefragmentRound( - VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove); - - static bool MoveMakesSense( - size_t dstBlockIndex, VkDeviceSize dstOffset, - size_t srcBlockIndex, VkDeviceSize srcOffset); - -public: - VmaDefragmentator( - VmaAllocator hAllocator, - VmaBlockVector* pBlockVector, - uint32_t currentFrameIndex); - - ~VmaDefragmentator(); - - VkDeviceSize GetBytesMoved() const { return m_BytesMoved; } - uint32_t GetAllocationsMoved() const { return m_AllocationsMoved; } - - void AddAllocation(VmaAllocation hAlloc, VkBool32* pChanged); - - VkResult Defragment( - VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove); -}; - -// Main allocator object. -struct VmaAllocator_T -{ - bool m_UseMutex; - bool m_UseKhrDedicatedAllocation; - VkDevice m_hDevice; - bool m_AllocationCallbacksSpecified; - VkAllocationCallbacks m_AllocationCallbacks; - VmaDeviceMemoryCallbacks m_DeviceMemoryCallbacks; - - // Number of bytes free out of limit, or VK_WHOLE_SIZE if not limit for that heap. - VkDeviceSize m_HeapSizeLimit[VK_MAX_MEMORY_HEAPS]; - VMA_MUTEX m_HeapSizeLimitMutex; - - VkPhysicalDeviceProperties m_PhysicalDeviceProperties; - VkPhysicalDeviceMemoryProperties m_MemProps; - - // Default pools. - VmaBlockVector* m_pBlockVectors[VK_MAX_MEMORY_TYPES]; - - // Each vector is sorted by memory (handle value). - typedef VmaVector< VmaAllocation, VmaStlAllocator > AllocationVectorType; - AllocationVectorType* m_pDedicatedAllocations[VK_MAX_MEMORY_TYPES]; - VMA_MUTEX m_DedicatedAllocationsMutex[VK_MAX_MEMORY_TYPES]; - - VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo); - ~VmaAllocator_T(); - - const VkAllocationCallbacks* GetAllocationCallbacks() const - { - return m_AllocationCallbacksSpecified ? &m_AllocationCallbacks : 0; - } - const VmaVulkanFunctions& GetVulkanFunctions() const - { - return m_VulkanFunctions; - } - - VkDeviceSize GetBufferImageGranularity() const - { - return VMA_MAX( - static_cast(VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY), - m_PhysicalDeviceProperties.limits.bufferImageGranularity); - } - - uint32_t GetMemoryHeapCount() const { return m_MemProps.memoryHeapCount; } - uint32_t GetMemoryTypeCount() const { return m_MemProps.memoryTypeCount; } - - uint32_t MemoryTypeIndexToHeapIndex(uint32_t memTypeIndex) const - { - VMA_ASSERT(memTypeIndex < m_MemProps.memoryTypeCount); - return m_MemProps.memoryTypes[memTypeIndex].heapIndex; - } - - void GetBufferMemoryRequirements( - VkBuffer hBuffer, - VkMemoryRequirements& memReq, - bool& requiresDedicatedAllocation, - bool& prefersDedicatedAllocation) const; - void GetImageMemoryRequirements( - VkImage hImage, - VkMemoryRequirements& memReq, - bool& requiresDedicatedAllocation, - bool& prefersDedicatedAllocation) const; - - // Main allocation function. - VkResult AllocateMemory( - const VkMemoryRequirements& vkMemReq, - bool requiresDedicatedAllocation, - bool prefersDedicatedAllocation, - VkBuffer dedicatedBuffer, - VkImage dedicatedImage, - const VmaAllocationCreateInfo& createInfo, - VmaSuballocationType suballocType, - VmaAllocation* pAllocation); - - // Main deallocation function. - void FreeMemory(const VmaAllocation allocation); - - void CalculateStats(VmaStats* pStats); - -#if VMA_STATS_STRING_ENABLED - void PrintDetailedMap(class VmaJsonWriter& json); -#endif - - VkResult Defragment( - VmaAllocation* pAllocations, - size_t allocationCount, - VkBool32* pAllocationsChanged, - const VmaDefragmentationInfo* pDefragmentationInfo, - VmaDefragmentationStats* pDefragmentationStats); - - void GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationInfo* pAllocationInfo); - - VkResult CreatePool(const VmaPoolCreateInfo* pCreateInfo, VmaPool* pPool); - void DestroyPool(VmaPool pool); - void GetPoolStats(VmaPool pool, VmaPoolStats* pPoolStats); - - void SetCurrentFrameIndex(uint32_t frameIndex); - - void MakePoolAllocationsLost( - VmaPool hPool, - size_t* pLostAllocationCount); - - void CreateLostAllocation(VmaAllocation* pAllocation); - - VkResult AllocateVulkanMemory(const VkMemoryAllocateInfo* pAllocateInfo, VkDeviceMemory* pMemory); - void FreeVulkanMemory(uint32_t memoryType, VkDeviceSize size, VkDeviceMemory hMemory); - - VkResult Map(VmaAllocation hAllocation, void** ppData); - void Unmap(VmaAllocation hAllocation); - -private: - VkDeviceSize m_PreferredLargeHeapBlockSize; - VkDeviceSize m_PreferredSmallHeapBlockSize; - - VkPhysicalDevice m_PhysicalDevice; - VMA_ATOMIC_UINT32 m_CurrentFrameIndex; - - VMA_MUTEX m_PoolsMutex; - // Protected by m_PoolsMutex. Sorted by pointer value. - VmaVector > m_Pools; - - VmaVulkanFunctions m_VulkanFunctions; - - void ImportVulkanFunctions(const VmaVulkanFunctions* pVulkanFunctions); - - VkDeviceSize CalcPreferredBlockSize(uint32_t memTypeIndex); - - VkResult AllocateMemoryOfType( - const VkMemoryRequirements& vkMemReq, - bool dedicatedAllocation, - VkBuffer dedicatedBuffer, - VkImage dedicatedImage, - const VmaAllocationCreateInfo& createInfo, - uint32_t memTypeIndex, - VmaSuballocationType suballocType, - VmaAllocation* pAllocation); - - // Allocates and registers new VkDeviceMemory specifically for single allocation. - VkResult AllocateDedicatedMemory( - VkDeviceSize size, - VmaSuballocationType suballocType, - uint32_t memTypeIndex, - bool map, - bool isUserDataString, - void* pUserData, - VkBuffer dedicatedBuffer, - VkImage dedicatedImage, - VmaAllocation* pAllocation); - - // Tries to free pMemory as Dedicated Memory. Returns true if found and freed. - void FreeDedicatedMemory(VmaAllocation allocation); -}; - -//////////////////////////////////////////////////////////////////////////////// -// Memory allocation #2 after VmaAllocator_T definition - -static void* VmaMalloc(VmaAllocator hAllocator, size_t size, size_t alignment) -{ - return VmaMalloc(&hAllocator->m_AllocationCallbacks, size, alignment); -} - -static void VmaFree(VmaAllocator hAllocator, void* ptr) -{ - VmaFree(&hAllocator->m_AllocationCallbacks, ptr); -} - -template -static T* VmaAllocate(VmaAllocator hAllocator) -{ - return (T*)VmaMalloc(hAllocator, sizeof(T), VMA_ALIGN_OF(T)); -} - -template -static T* VmaAllocateArray(VmaAllocator hAllocator, size_t count) -{ - return (T*)VmaMalloc(hAllocator, sizeof(T) * count, VMA_ALIGN_OF(T)); -} - -template -static void vma_delete(VmaAllocator hAllocator, T* ptr) -{ - if(ptr != VMA_NULL) - { - ptr->~T(); - VmaFree(hAllocator, ptr); - } -} - -template -static void vma_delete_array(VmaAllocator hAllocator, T* ptr, size_t count) -{ - if(ptr != VMA_NULL) - { - for(size_t i = count; i--; ) - ptr[i].~T(); - VmaFree(hAllocator, ptr); - } -} - -//////////////////////////////////////////////////////////////////////////////// -// VmaStringBuilder - -#if VMA_STATS_STRING_ENABLED - -class VmaStringBuilder -{ -public: - VmaStringBuilder(VmaAllocator alloc) : m_Data(VmaStlAllocator(alloc->GetAllocationCallbacks())) { } - size_t GetLength() const { return m_Data.size(); } - const char* GetData() const { return m_Data.data(); } - - void Add(char ch) { m_Data.push_back(ch); } - void Add(const char* pStr); - void AddNewLine() { Add('\n'); } - void AddNumber(uint32_t num); - void AddNumber(uint64_t num); - void AddPointer(const void* ptr); - -private: - VmaVector< char, VmaStlAllocator > m_Data; -}; - -void VmaStringBuilder::Add(const char* pStr) -{ - const size_t strLen = strlen(pStr); - if(strLen > 0) - { - const size_t oldCount = m_Data.size(); - m_Data.resize(oldCount + strLen); - memcpy(m_Data.data() + oldCount, pStr, strLen); - } -} - -void VmaStringBuilder::AddNumber(uint32_t num) -{ - char buf[11]; - VmaUint32ToStr(buf, sizeof(buf), num); - Add(buf); -} - -void VmaStringBuilder::AddNumber(uint64_t num) -{ - char buf[21]; - VmaUint64ToStr(buf, sizeof(buf), num); - Add(buf); -} - -void VmaStringBuilder::AddPointer(const void* ptr) -{ - char buf[21]; - VmaPtrToStr(buf, sizeof(buf), ptr); - Add(buf); -} - -#endif // #if VMA_STATS_STRING_ENABLED - -//////////////////////////////////////////////////////////////////////////////// -// VmaJsonWriter - -#if VMA_STATS_STRING_ENABLED - -class VmaJsonWriter -{ -public: - VmaJsonWriter(const VkAllocationCallbacks* pAllocationCallbacks, VmaStringBuilder& sb); - ~VmaJsonWriter(); - - void BeginObject(bool singleLine = false); - void EndObject(); - - void BeginArray(bool singleLine = false); - void EndArray(); - - void WriteString(const char* pStr); - void BeginString(const char* pStr = VMA_NULL); - void ContinueString(const char* pStr); - void ContinueString(uint32_t n); - void ContinueString(uint64_t n); - void ContinueString_Pointer(const void* ptr); - void EndString(const char* pStr = VMA_NULL); - - void WriteNumber(uint32_t n); - void WriteNumber(uint64_t n); - void WriteBool(bool b); - void WriteNull(); - -private: - static const char* const INDENT; - - enum COLLECTION_TYPE - { - COLLECTION_TYPE_OBJECT, - COLLECTION_TYPE_ARRAY, - }; - struct StackItem - { - COLLECTION_TYPE type; - uint32_t valueCount; - bool singleLineMode; - }; - - VmaStringBuilder& m_SB; - VmaVector< StackItem, VmaStlAllocator > m_Stack; - bool m_InsideString; - - void BeginValue(bool isString); - void WriteIndent(bool oneLess = false); -}; - -const char* const VmaJsonWriter::INDENT = " "; - -VmaJsonWriter::VmaJsonWriter(const VkAllocationCallbacks* pAllocationCallbacks, VmaStringBuilder& sb) : - m_SB(sb), - m_Stack(VmaStlAllocator(pAllocationCallbacks)), - m_InsideString(false) -{ -} - -VmaJsonWriter::~VmaJsonWriter() -{ - VMA_ASSERT(!m_InsideString); - VMA_ASSERT(m_Stack.empty()); -} - -void VmaJsonWriter::BeginObject(bool singleLine) -{ - VMA_ASSERT(!m_InsideString); - - BeginValue(false); - m_SB.Add('{'); - - StackItem item; - item.type = COLLECTION_TYPE_OBJECT; - item.valueCount = 0; - item.singleLineMode = singleLine; - m_Stack.push_back(item); -} - -void VmaJsonWriter::EndObject() -{ - VMA_ASSERT(!m_InsideString); - - WriteIndent(true); - m_SB.Add('}'); - - VMA_ASSERT(!m_Stack.empty() && m_Stack.back().type == COLLECTION_TYPE_OBJECT); - m_Stack.pop_back(); -} - -void VmaJsonWriter::BeginArray(bool singleLine) -{ - VMA_ASSERT(!m_InsideString); - - BeginValue(false); - m_SB.Add('['); - - StackItem item; - item.type = COLLECTION_TYPE_ARRAY; - item.valueCount = 0; - item.singleLineMode = singleLine; - m_Stack.push_back(item); -} - -void VmaJsonWriter::EndArray() -{ - VMA_ASSERT(!m_InsideString); - - WriteIndent(true); - m_SB.Add(']'); - - VMA_ASSERT(!m_Stack.empty() && m_Stack.back().type == COLLECTION_TYPE_ARRAY); - m_Stack.pop_back(); -} - -void VmaJsonWriter::WriteString(const char* pStr) -{ - BeginString(pStr); - EndString(); -} - -void VmaJsonWriter::BeginString(const char* pStr) -{ - VMA_ASSERT(!m_InsideString); - - BeginValue(true); - m_SB.Add('"'); - m_InsideString = true; - if(pStr != VMA_NULL && pStr[0] != '\0') - { - ContinueString(pStr); - } -} - -void VmaJsonWriter::ContinueString(const char* pStr) -{ - VMA_ASSERT(m_InsideString); - - const size_t strLen = strlen(pStr); - for(size_t i = 0; i < strLen; ++i) - { - char ch = pStr[i]; - if(ch == '\'') - { - m_SB.Add("\\\\"); - } - else if(ch == '"') - { - m_SB.Add("\\\""); - } - else if(ch >= 32) - { - m_SB.Add(ch); - } - else switch(ch) - { - case '\b': - m_SB.Add("\\b"); - break; - case '\f': - m_SB.Add("\\f"); - break; - case '\n': - m_SB.Add("\\n"); - break; - case '\r': - m_SB.Add("\\r"); - break; - case '\t': - m_SB.Add("\\t"); - break; - default: - VMA_ASSERT(0 && "Character not currently supported."); - break; - } - } -} - -void VmaJsonWriter::ContinueString(uint32_t n) -{ - VMA_ASSERT(m_InsideString); - m_SB.AddNumber(n); -} - -void VmaJsonWriter::ContinueString(uint64_t n) -{ - VMA_ASSERT(m_InsideString); - m_SB.AddNumber(n); -} - -void VmaJsonWriter::ContinueString_Pointer(const void* ptr) -{ - VMA_ASSERT(m_InsideString); - m_SB.AddPointer(ptr); -} - -void VmaJsonWriter::EndString(const char* pStr) -{ - VMA_ASSERT(m_InsideString); - if(pStr != VMA_NULL && pStr[0] != '\0') - { - ContinueString(pStr); - } - m_SB.Add('"'); - m_InsideString = false; -} - -void VmaJsonWriter::WriteNumber(uint32_t n) -{ - VMA_ASSERT(!m_InsideString); - BeginValue(false); - m_SB.AddNumber(n); -} - -void VmaJsonWriter::WriteNumber(uint64_t n) -{ - VMA_ASSERT(!m_InsideString); - BeginValue(false); - m_SB.AddNumber(n); -} - -void VmaJsonWriter::WriteBool(bool b) -{ - VMA_ASSERT(!m_InsideString); - BeginValue(false); - m_SB.Add(b ? "true" : "false"); -} - -void VmaJsonWriter::WriteNull() -{ - VMA_ASSERT(!m_InsideString); - BeginValue(false); - m_SB.Add("null"); -} - -void VmaJsonWriter::BeginValue(bool isString) -{ - if(!m_Stack.empty()) - { - StackItem& currItem = m_Stack.back(); - if(currItem.type == COLLECTION_TYPE_OBJECT && - currItem.valueCount % 2 == 0) - { - VMA_ASSERT(isString); - } - - if(currItem.type == COLLECTION_TYPE_OBJECT && - currItem.valueCount % 2 != 0) - { - m_SB.Add(": "); - } - else if(currItem.valueCount > 0) - { - m_SB.Add(", "); - WriteIndent(); - } - else - { - WriteIndent(); - } - ++currItem.valueCount; - } -} - -void VmaJsonWriter::WriteIndent(bool oneLess) -{ - if(!m_Stack.empty() && !m_Stack.back().singleLineMode) - { - m_SB.AddNewLine(); - - size_t count = m_Stack.size(); - if(count > 0 && oneLess) - { - --count; - } - for(size_t i = 0; i < count; ++i) - { - m_SB.Add(INDENT); - } - } -} - -#endif // #if VMA_STATS_STRING_ENABLED - -//////////////////////////////////////////////////////////////////////////////// - -void VmaAllocation_T::SetUserData(VmaAllocator hAllocator, void* pUserData) -{ - if(IsUserDataString()) - { - VMA_ASSERT(pUserData == VMA_NULL || pUserData != m_pUserData); - - FreeUserDataString(hAllocator); - - if(pUserData != VMA_NULL) - { - const char* const newStrSrc = (char*)pUserData; - const size_t newStrLen = strlen(newStrSrc); - char* const newStrDst = vma_new_array(hAllocator, char, newStrLen + 1); - memcpy(newStrDst, newStrSrc, newStrLen + 1); - m_pUserData = newStrDst; - } - } - else - { - m_pUserData = pUserData; - } -} - -VkDeviceSize VmaAllocation_T::GetOffset() const -{ - switch(m_Type) - { - case ALLOCATION_TYPE_BLOCK: - return m_BlockAllocation.m_Offset; - case ALLOCATION_TYPE_DEDICATED: - return 0; - default: - VMA_ASSERT(0); - return 0; - } -} - -VkDeviceMemory VmaAllocation_T::GetMemory() const -{ - switch(m_Type) - { - case ALLOCATION_TYPE_BLOCK: - return m_BlockAllocation.m_Block->m_hMemory; - case ALLOCATION_TYPE_DEDICATED: - return m_DedicatedAllocation.m_hMemory; - default: - VMA_ASSERT(0); - return VK_NULL_HANDLE; - } -} - -uint32_t VmaAllocation_T::GetMemoryTypeIndex() const -{ - switch(m_Type) - { - case ALLOCATION_TYPE_BLOCK: - return m_BlockAllocation.m_Block->m_MemoryTypeIndex; - case ALLOCATION_TYPE_DEDICATED: - return m_DedicatedAllocation.m_MemoryTypeIndex; - default: - VMA_ASSERT(0); - return UINT32_MAX; - } -} - -void* VmaAllocation_T::GetMappedData() const -{ - switch(m_Type) - { - case ALLOCATION_TYPE_BLOCK: - if(m_MapCount != 0) - { - void* pBlockData = m_BlockAllocation.m_Block->m_Mapping.GetMappedData(); - VMA_ASSERT(pBlockData != VMA_NULL); - return (char*)pBlockData + m_BlockAllocation.m_Offset; - } - else - { - return VMA_NULL; - } - break; - case ALLOCATION_TYPE_DEDICATED: - VMA_ASSERT((m_DedicatedAllocation.m_pMappedData != VMA_NULL) == (m_MapCount != 0)); - return m_DedicatedAllocation.m_pMappedData; - default: - VMA_ASSERT(0); - return VMA_NULL; - } -} - -bool VmaAllocation_T::CanBecomeLost() const -{ - switch(m_Type) - { - case ALLOCATION_TYPE_BLOCK: - return m_BlockAllocation.m_CanBecomeLost; - case ALLOCATION_TYPE_DEDICATED: - return false; - default: - VMA_ASSERT(0); - return false; - } -} - -VmaPool VmaAllocation_T::GetPool() const -{ - VMA_ASSERT(m_Type == ALLOCATION_TYPE_BLOCK); - return m_BlockAllocation.m_hPool; -} - -bool VmaAllocation_T::MakeLost(uint32_t currentFrameIndex, uint32_t frameInUseCount) -{ - VMA_ASSERT(CanBecomeLost()); - - /* - Warning: This is a carefully designed algorithm. - Do not modify unless you really know what you're doing :) - */ - uint32_t localLastUseFrameIndex = GetLastUseFrameIndex(); - for(;;) - { - if(localLastUseFrameIndex == VMA_FRAME_INDEX_LOST) - { - VMA_ASSERT(0); - return false; - } - else if(localLastUseFrameIndex + frameInUseCount >= currentFrameIndex) - { - return false; - } - else // Last use time earlier than current time. - { - if(CompareExchangeLastUseFrameIndex(localLastUseFrameIndex, VMA_FRAME_INDEX_LOST)) - { - // Setting hAllocation.LastUseFrameIndex atomic to VMA_FRAME_INDEX_LOST is enough to mark it as LOST. - // Calling code just needs to unregister this allocation in owning VmaDeviceMemoryBlock. - return true; - } - } - } -} - -void VmaAllocation_T::FreeUserDataString(VmaAllocator hAllocator) -{ - VMA_ASSERT(IsUserDataString()); - if(m_pUserData != VMA_NULL) - { - char* const oldStr = (char*)m_pUserData; - const size_t oldStrLen = strlen(oldStr); - vma_delete_array(hAllocator, oldStr, oldStrLen + 1); - m_pUserData = VMA_NULL; - } -} - -void VmaAllocation_T::BlockAllocMap() -{ - VMA_ASSERT(GetType() == ALLOCATION_TYPE_BLOCK); - - if((m_MapCount & ~MAP_COUNT_FLAG_PERSISTENT_MAP) < 0x7F) - { - ++m_MapCount; - } - else - { - VMA_ASSERT(0 && "Allocation mapped too many times simultaneously."); - } -} - -void VmaAllocation_T::BlockAllocUnmap() -{ - VMA_ASSERT(GetType() == ALLOCATION_TYPE_BLOCK); - - if((m_MapCount & ~MAP_COUNT_FLAG_PERSISTENT_MAP) != 0) - { - --m_MapCount; - } - else - { - VMA_ASSERT(0 && "Unmapping allocation not previously mapped."); - } -} - -VkResult VmaAllocation_T::DedicatedAllocMap(VmaAllocator hAllocator, void** ppData) -{ - VMA_ASSERT(GetType() == ALLOCATION_TYPE_DEDICATED); - - if(m_MapCount != 0) - { - if((m_MapCount & ~MAP_COUNT_FLAG_PERSISTENT_MAP) < 0x7F) - { - VMA_ASSERT(m_DedicatedAllocation.m_pMappedData != VMA_NULL); - *ppData = m_DedicatedAllocation.m_pMappedData; - ++m_MapCount; - return VK_SUCCESS; - } - else - { - VMA_ASSERT(0 && "Dedicated allocation mapped too many times simultaneously."); - return VK_ERROR_MEMORY_MAP_FAILED; - } - } - else - { - VkResult result = (*hAllocator->GetVulkanFunctions().vkMapMemory)( - hAllocator->m_hDevice, - m_DedicatedAllocation.m_hMemory, - 0, // offset - VK_WHOLE_SIZE, - 0, // flags - ppData); - if(result == VK_SUCCESS) - { - m_DedicatedAllocation.m_pMappedData = *ppData; - m_MapCount = 1; - } - return result; - } -} - -void VmaAllocation_T::DedicatedAllocUnmap(VmaAllocator hAllocator) -{ - VMA_ASSERT(GetType() == ALLOCATION_TYPE_DEDICATED); - - if((m_MapCount & ~MAP_COUNT_FLAG_PERSISTENT_MAP) != 0) - { - --m_MapCount; - if(m_MapCount == 0) - { - m_DedicatedAllocation.m_pMappedData = VMA_NULL; - (*hAllocator->GetVulkanFunctions().vkUnmapMemory)( - hAllocator->m_hDevice, - m_DedicatedAllocation.m_hMemory); - } - } - else - { - VMA_ASSERT(0 && "Unmapping dedicated allocation not previously mapped."); - } -} - -#if VMA_STATS_STRING_ENABLED - -// Correspond to values of enum VmaSuballocationType. -static const char* VMA_SUBALLOCATION_TYPE_NAMES[] = { - "FREE", - "UNKNOWN", - "BUFFER", - "IMAGE_UNKNOWN", - "IMAGE_LINEAR", - "IMAGE_OPTIMAL", -}; - -static void VmaPrintStatInfo(VmaJsonWriter& json, const VmaStatInfo& stat) -{ - json.BeginObject(); - - json.WriteString("Blocks"); - json.WriteNumber(stat.blockCount); - - json.WriteString("Allocations"); - json.WriteNumber(stat.allocationCount); - - json.WriteString("UnusedRanges"); - json.WriteNumber(stat.unusedRangeCount); - - json.WriteString("UsedBytes"); - json.WriteNumber(stat.usedBytes); - - json.WriteString("UnusedBytes"); - json.WriteNumber(stat.unusedBytes); - - if(stat.allocationCount > 1) - { - json.WriteString("AllocationSize"); - json.BeginObject(true); - json.WriteString("Min"); - json.WriteNumber(stat.allocationSizeMin); - json.WriteString("Avg"); - json.WriteNumber(stat.allocationSizeAvg); - json.WriteString("Max"); - json.WriteNumber(stat.allocationSizeMax); - json.EndObject(); - } - - if(stat.unusedRangeCount > 1) - { - json.WriteString("UnusedRangeSize"); - json.BeginObject(true); - json.WriteString("Min"); - json.WriteNumber(stat.unusedRangeSizeMin); - json.WriteString("Avg"); - json.WriteNumber(stat.unusedRangeSizeAvg); - json.WriteString("Max"); - json.WriteNumber(stat.unusedRangeSizeMax); - json.EndObject(); - } - - json.EndObject(); -} - -#endif // #if VMA_STATS_STRING_ENABLED - -struct VmaSuballocationItemSizeLess -{ - bool operator()( - const VmaSuballocationList::iterator lhs, - const VmaSuballocationList::iterator rhs) const - { - return lhs->size < rhs->size; - } - bool operator()( - const VmaSuballocationList::iterator lhs, - VkDeviceSize rhsSize) const - { - return lhs->size < rhsSize; - } -}; - -//////////////////////////////////////////////////////////////////////////////// -// class VmaBlockMetadata - -VmaBlockMetadata::VmaBlockMetadata(VmaAllocator hAllocator) : - m_Size(0), - m_FreeCount(0), - m_SumFreeSize(0), - m_Suballocations(VmaStlAllocator(hAllocator->GetAllocationCallbacks())), - m_FreeSuballocationsBySize(VmaStlAllocator(hAllocator->GetAllocationCallbacks())) -{ -} - -VmaBlockMetadata::~VmaBlockMetadata() -{ -} - -void VmaBlockMetadata::Init(VkDeviceSize size) -{ - m_Size = size; - m_FreeCount = 1; - m_SumFreeSize = size; - - VmaSuballocation suballoc = {}; - suballoc.offset = 0; - suballoc.size = size; - suballoc.type = VMA_SUBALLOCATION_TYPE_FREE; - suballoc.hAllocation = VK_NULL_HANDLE; - - m_Suballocations.push_back(suballoc); - VmaSuballocationList::iterator suballocItem = m_Suballocations.end(); - --suballocItem; - m_FreeSuballocationsBySize.push_back(suballocItem); -} - -bool VmaBlockMetadata::Validate() const -{ - if(m_Suballocations.empty()) - { - return false; - } - - // Expected offset of new suballocation as calculates from previous ones. - VkDeviceSize calculatedOffset = 0; - // Expected number of free suballocations as calculated from traversing their list. - uint32_t calculatedFreeCount = 0; - // Expected sum size of free suballocations as calculated from traversing their list. - VkDeviceSize calculatedSumFreeSize = 0; - // Expected number of free suballocations that should be registered in - // m_FreeSuballocationsBySize calculated from traversing their list. - size_t freeSuballocationsToRegister = 0; - // True if previous visisted suballocation was free. - bool prevFree = false; - - for(VmaSuballocationList::const_iterator suballocItem = m_Suballocations.cbegin(); - suballocItem != m_Suballocations.cend(); - ++suballocItem) - { - const VmaSuballocation& subAlloc = *suballocItem; - - // Actual offset of this suballocation doesn't match expected one. - if(subAlloc.offset != calculatedOffset) - { - return false; - } - - const bool currFree = (subAlloc.type == VMA_SUBALLOCATION_TYPE_FREE); - // Two adjacent free suballocations are invalid. They should be merged. - if(prevFree && currFree) - { - return false; - } - prevFree = currFree; - - if(currFree != (subAlloc.hAllocation == VK_NULL_HANDLE)) - { - return false; - } - - if(currFree) - { - calculatedSumFreeSize += subAlloc.size; - ++calculatedFreeCount; - if(subAlloc.size >= VMA_MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER) - { - ++freeSuballocationsToRegister; - } - } - - calculatedOffset += subAlloc.size; - } - - // Number of free suballocations registered in m_FreeSuballocationsBySize doesn't - // match expected one. - if(m_FreeSuballocationsBySize.size() != freeSuballocationsToRegister) - { - return false; - } - - VkDeviceSize lastSize = 0; - for(size_t i = 0; i < m_FreeSuballocationsBySize.size(); ++i) - { - VmaSuballocationList::iterator suballocItem = m_FreeSuballocationsBySize[i]; - - // Only free suballocations can be registered in m_FreeSuballocationsBySize. - if(suballocItem->type != VMA_SUBALLOCATION_TYPE_FREE) - { - return false; - } - // They must be sorted by size ascending. - if(suballocItem->size < lastSize) - { - return false; - } - - lastSize = suballocItem->size; - } - - // Check if totals match calculacted values. - return - ValidateFreeSuballocationList() && - (calculatedOffset == m_Size) && - (calculatedSumFreeSize == m_SumFreeSize) && - (calculatedFreeCount == m_FreeCount); -} - -VkDeviceSize VmaBlockMetadata::GetUnusedRangeSizeMax() const -{ - if(!m_FreeSuballocationsBySize.empty()) - { - return m_FreeSuballocationsBySize.back()->size; - } - else - { - return 0; - } -} - -bool VmaBlockMetadata::IsEmpty() const -{ - return (m_Suballocations.size() == 1) && (m_FreeCount == 1); -} - -void VmaBlockMetadata::CalcAllocationStatInfo(VmaStatInfo& outInfo) const -{ - outInfo.blockCount = 1; - - const uint32_t rangeCount = (uint32_t)m_Suballocations.size(); - outInfo.allocationCount = rangeCount - m_FreeCount; - outInfo.unusedRangeCount = m_FreeCount; - - outInfo.unusedBytes = m_SumFreeSize; - outInfo.usedBytes = m_Size - outInfo.unusedBytes; - - outInfo.allocationSizeMin = UINT64_MAX; - outInfo.allocationSizeMax = 0; - outInfo.unusedRangeSizeMin = UINT64_MAX; - outInfo.unusedRangeSizeMax = 0; - - for(VmaSuballocationList::const_iterator suballocItem = m_Suballocations.cbegin(); - suballocItem != m_Suballocations.cend(); - ++suballocItem) - { - const VmaSuballocation& suballoc = *suballocItem; - if(suballoc.type != VMA_SUBALLOCATION_TYPE_FREE) - { - outInfo.allocationSizeMin = VMA_MIN(outInfo.allocationSizeMin, suballoc.size); - outInfo.allocationSizeMax = VMA_MAX(outInfo.allocationSizeMax, suballoc.size); - } - else - { - outInfo.unusedRangeSizeMin = VMA_MIN(outInfo.unusedRangeSizeMin, suballoc.size); - outInfo.unusedRangeSizeMax = VMA_MAX(outInfo.unusedRangeSizeMax, suballoc.size); - } - } -} - -void VmaBlockMetadata::AddPoolStats(VmaPoolStats& inoutStats) const -{ - const uint32_t rangeCount = (uint32_t)m_Suballocations.size(); - - inoutStats.size += m_Size; - inoutStats.unusedSize += m_SumFreeSize; - inoutStats.allocationCount += rangeCount - m_FreeCount; - inoutStats.unusedRangeCount += m_FreeCount; - inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, GetUnusedRangeSizeMax()); -} - -#if VMA_STATS_STRING_ENABLED - -void VmaBlockMetadata::PrintDetailedMap(class VmaJsonWriter& json) const -{ - json.BeginObject(); - - json.WriteString("TotalBytes"); - json.WriteNumber(m_Size); - - json.WriteString("UnusedBytes"); - json.WriteNumber(m_SumFreeSize); - - json.WriteString("Allocations"); - json.WriteNumber(m_Suballocations.size() - m_FreeCount); - - json.WriteString("UnusedRanges"); - json.WriteNumber(m_FreeCount); - - json.WriteString("Suballocations"); - json.BeginArray(); - size_t i = 0; - for(VmaSuballocationList::const_iterator suballocItem = m_Suballocations.cbegin(); - suballocItem != m_Suballocations.cend(); - ++suballocItem, ++i) - { - json.BeginObject(true); - - json.WriteString("Type"); - json.WriteString(VMA_SUBALLOCATION_TYPE_NAMES[suballocItem->type]); - - json.WriteString("Size"); - json.WriteNumber(suballocItem->size); - - json.WriteString("Offset"); - json.WriteNumber(suballocItem->offset); - - if(suballocItem->type != VMA_SUBALLOCATION_TYPE_FREE) - { - const void* pUserData = suballocItem->hAllocation->GetUserData(); - if(pUserData != VMA_NULL) - { - json.WriteString("UserData"); - if(suballocItem->hAllocation->IsUserDataString()) - { - json.WriteString((const char*)pUserData); - } - else - { - json.BeginString(); - json.ContinueString_Pointer(pUserData); - json.EndString(); - } - } - } - - json.EndObject(); - } - json.EndArray(); - - json.EndObject(); -} - -#endif // #if VMA_STATS_STRING_ENABLED - -/* -How many suitable free suballocations to analyze before choosing best one. -- Set to 1 to use First-Fit algorithm - first suitable free suballocation will - be chosen. -- Set to UINT32_MAX to use Best-Fit/Worst-Fit algorithm - all suitable free - suballocations will be analized and best one will be chosen. -- Any other value is also acceptable. -*/ -//static const uint32_t MAX_SUITABLE_SUBALLOCATIONS_TO_CHECK = 8; - -void VmaBlockMetadata::CreateFirstAllocationRequest(VmaAllocationRequest* pAllocationRequest) -{ - VMA_ASSERT(IsEmpty()); - pAllocationRequest->offset = 0; - pAllocationRequest->sumFreeSize = m_SumFreeSize; - pAllocationRequest->sumItemSize = 0; - pAllocationRequest->item = m_Suballocations.begin(); - pAllocationRequest->itemsToMakeLostCount = 0; -} - -bool VmaBlockMetadata::CreateAllocationRequest( - uint32_t currentFrameIndex, - uint32_t frameInUseCount, - VkDeviceSize bufferImageGranularity, - VkDeviceSize allocSize, - VkDeviceSize allocAlignment, - VmaSuballocationType allocType, - bool canMakeOtherLost, - VmaAllocationRequest* pAllocationRequest) -{ - VMA_ASSERT(allocSize > 0); - VMA_ASSERT(allocType != VMA_SUBALLOCATION_TYPE_FREE); - VMA_ASSERT(pAllocationRequest != VMA_NULL); - VMA_HEAVY_ASSERT(Validate()); - - // There is not enough total free space in this block to fullfill the request: Early return. - if(canMakeOtherLost == false && m_SumFreeSize < allocSize) - { - return false; - } - - // New algorithm, efficiently searching freeSuballocationsBySize. - const size_t freeSuballocCount = m_FreeSuballocationsBySize.size(); - if(freeSuballocCount > 0) - { - if(VMA_BEST_FIT) - { - // Find first free suballocation with size not less than allocSize. - VmaSuballocationList::iterator* const it = VmaBinaryFindFirstNotLess( - m_FreeSuballocationsBySize.data(), - m_FreeSuballocationsBySize.data() + freeSuballocCount, - allocSize, - VmaSuballocationItemSizeLess()); - size_t index = it - m_FreeSuballocationsBySize.data(); - for(; index < freeSuballocCount; ++index) - { - if(CheckAllocation( - currentFrameIndex, - frameInUseCount, - bufferImageGranularity, - allocSize, - allocAlignment, - allocType, - m_FreeSuballocationsBySize[index], - false, // canMakeOtherLost - &pAllocationRequest->offset, - &pAllocationRequest->itemsToMakeLostCount, - &pAllocationRequest->sumFreeSize, - &pAllocationRequest->sumItemSize)) - { - pAllocationRequest->item = m_FreeSuballocationsBySize[index]; - return true; - } - } - } - else - { - // Search staring from biggest suballocations. - for(size_t index = freeSuballocCount; index--; ) - { - if(CheckAllocation( - currentFrameIndex, - frameInUseCount, - bufferImageGranularity, - allocSize, - allocAlignment, - allocType, - m_FreeSuballocationsBySize[index], - false, // canMakeOtherLost - &pAllocationRequest->offset, - &pAllocationRequest->itemsToMakeLostCount, - &pAllocationRequest->sumFreeSize, - &pAllocationRequest->sumItemSize)) - { - pAllocationRequest->item = m_FreeSuballocationsBySize[index]; - return true; - } - } - } - } - - if(canMakeOtherLost) - { - // Brute-force algorithm. TODO: Come up with something better. - - pAllocationRequest->sumFreeSize = VK_WHOLE_SIZE; - pAllocationRequest->sumItemSize = VK_WHOLE_SIZE; - - VmaAllocationRequest tmpAllocRequest = {}; - for(VmaSuballocationList::iterator suballocIt = m_Suballocations.begin(); - suballocIt != m_Suballocations.end(); - ++suballocIt) - { - if(suballocIt->type == VMA_SUBALLOCATION_TYPE_FREE || - suballocIt->hAllocation->CanBecomeLost()) - { - if(CheckAllocation( - currentFrameIndex, - frameInUseCount, - bufferImageGranularity, - allocSize, - allocAlignment, - allocType, - suballocIt, - canMakeOtherLost, - &tmpAllocRequest.offset, - &tmpAllocRequest.itemsToMakeLostCount, - &tmpAllocRequest.sumFreeSize, - &tmpAllocRequest.sumItemSize)) - { - tmpAllocRequest.item = suballocIt; - - if(tmpAllocRequest.CalcCost() < pAllocationRequest->CalcCost()) - { - *pAllocationRequest = tmpAllocRequest; - } - } - } - } - - if(pAllocationRequest->sumItemSize != VK_WHOLE_SIZE) - { - return true; - } - } - - return false; -} - -bool VmaBlockMetadata::MakeRequestedAllocationsLost( - uint32_t currentFrameIndex, - uint32_t frameInUseCount, - VmaAllocationRequest* pAllocationRequest) -{ - while(pAllocationRequest->itemsToMakeLostCount > 0) - { - if(pAllocationRequest->item->type == VMA_SUBALLOCATION_TYPE_FREE) - { - ++pAllocationRequest->item; - } - VMA_ASSERT(pAllocationRequest->item != m_Suballocations.end()); - VMA_ASSERT(pAllocationRequest->item->hAllocation != VK_NULL_HANDLE); - VMA_ASSERT(pAllocationRequest->item->hAllocation->CanBecomeLost()); - if(pAllocationRequest->item->hAllocation->MakeLost(currentFrameIndex, frameInUseCount)) - { - pAllocationRequest->item = FreeSuballocation(pAllocationRequest->item); - --pAllocationRequest->itemsToMakeLostCount; - } - else - { - return false; - } - } - - VMA_HEAVY_ASSERT(Validate()); - VMA_ASSERT(pAllocationRequest->item != m_Suballocations.end()); - VMA_ASSERT(pAllocationRequest->item->type == VMA_SUBALLOCATION_TYPE_FREE); - - return true; -} - -uint32_t VmaBlockMetadata::MakeAllocationsLost(uint32_t currentFrameIndex, uint32_t frameInUseCount) -{ - uint32_t lostAllocationCount = 0; - for(VmaSuballocationList::iterator it = m_Suballocations.begin(); - it != m_Suballocations.end(); - ++it) - { - if(it->type != VMA_SUBALLOCATION_TYPE_FREE && - it->hAllocation->CanBecomeLost() && - it->hAllocation->MakeLost(currentFrameIndex, frameInUseCount)) - { - it = FreeSuballocation(it); - ++lostAllocationCount; - } - } - return lostAllocationCount; -} - -void VmaBlockMetadata::Alloc( - const VmaAllocationRequest& request, - VmaSuballocationType type, - VkDeviceSize allocSize, - VmaAllocation hAllocation) -{ - VMA_ASSERT(request.item != m_Suballocations.end()); - VmaSuballocation& suballoc = *request.item; - // Given suballocation is a free block. - VMA_ASSERT(suballoc.type == VMA_SUBALLOCATION_TYPE_FREE); - // Given offset is inside this suballocation. - VMA_ASSERT(request.offset >= suballoc.offset); - const VkDeviceSize paddingBegin = request.offset - suballoc.offset; - VMA_ASSERT(suballoc.size >= paddingBegin + allocSize); - const VkDeviceSize paddingEnd = suballoc.size - paddingBegin - allocSize; - - // Unregister this free suballocation from m_FreeSuballocationsBySize and update - // it to become used. - UnregisterFreeSuballocation(request.item); - - suballoc.offset = request.offset; - suballoc.size = allocSize; - suballoc.type = type; - suballoc.hAllocation = hAllocation; - - // If there are any free bytes remaining at the end, insert new free suballocation after current one. - if(paddingEnd) - { - VmaSuballocation paddingSuballoc = {}; - paddingSuballoc.offset = request.offset + allocSize; - paddingSuballoc.size = paddingEnd; - paddingSuballoc.type = VMA_SUBALLOCATION_TYPE_FREE; - VmaSuballocationList::iterator next = request.item; - ++next; - const VmaSuballocationList::iterator paddingEndItem = - m_Suballocations.insert(next, paddingSuballoc); - RegisterFreeSuballocation(paddingEndItem); - } - - // If there are any free bytes remaining at the beginning, insert new free suballocation before current one. - if(paddingBegin) - { - VmaSuballocation paddingSuballoc = {}; - paddingSuballoc.offset = request.offset - paddingBegin; - paddingSuballoc.size = paddingBegin; - paddingSuballoc.type = VMA_SUBALLOCATION_TYPE_FREE; - const VmaSuballocationList::iterator paddingBeginItem = - m_Suballocations.insert(request.item, paddingSuballoc); - RegisterFreeSuballocation(paddingBeginItem); - } - - // Update totals. - m_FreeCount = m_FreeCount - 1; - if(paddingBegin > 0) - { - ++m_FreeCount; - } - if(paddingEnd > 0) - { - ++m_FreeCount; - } - m_SumFreeSize -= allocSize; -} - -void VmaBlockMetadata::Free(const VmaAllocation allocation) -{ - for(VmaSuballocationList::iterator suballocItem = m_Suballocations.begin(); - suballocItem != m_Suballocations.end(); - ++suballocItem) - { - VmaSuballocation& suballoc = *suballocItem; - if(suballoc.hAllocation == allocation) - { - FreeSuballocation(suballocItem); - VMA_HEAVY_ASSERT(Validate()); - return; - } - } - VMA_ASSERT(0 && "Not found!"); -} - -bool VmaBlockMetadata::ValidateFreeSuballocationList() const -{ - VkDeviceSize lastSize = 0; - for(size_t i = 0, count = m_FreeSuballocationsBySize.size(); i < count; ++i) - { - const VmaSuballocationList::iterator it = m_FreeSuballocationsBySize[i]; - - if(it->type != VMA_SUBALLOCATION_TYPE_FREE) - { - VMA_ASSERT(0); - return false; - } - if(it->size < VMA_MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER) - { - VMA_ASSERT(0); - return false; - } - if(it->size < lastSize) - { - VMA_ASSERT(0); - return false; - } - - lastSize = it->size; - } - return true; -} - -bool VmaBlockMetadata::CheckAllocation( - uint32_t currentFrameIndex, - uint32_t frameInUseCount, - VkDeviceSize bufferImageGranularity, - VkDeviceSize allocSize, - VkDeviceSize allocAlignment, - VmaSuballocationType allocType, - VmaSuballocationList::const_iterator suballocItem, - bool canMakeOtherLost, - VkDeviceSize* pOffset, - size_t* itemsToMakeLostCount, - VkDeviceSize* pSumFreeSize, - VkDeviceSize* pSumItemSize) const -{ - VMA_ASSERT(allocSize > 0); - VMA_ASSERT(allocType != VMA_SUBALLOCATION_TYPE_FREE); - VMA_ASSERT(suballocItem != m_Suballocations.cend()); - VMA_ASSERT(pOffset != VMA_NULL); - - *itemsToMakeLostCount = 0; - *pSumFreeSize = 0; - *pSumItemSize = 0; - - if(canMakeOtherLost) - { - if(suballocItem->type == VMA_SUBALLOCATION_TYPE_FREE) - { - *pSumFreeSize = suballocItem->size; - } - else - { - if(suballocItem->hAllocation->CanBecomeLost() && - suballocItem->hAllocation->GetLastUseFrameIndex() + frameInUseCount < currentFrameIndex) - { - ++*itemsToMakeLostCount; - *pSumItemSize = suballocItem->size; - } - else - { - return false; - } - } - - // Remaining size is too small for this request: Early return. - if(m_Size - suballocItem->offset < allocSize) - { - return false; - } - - // Start from offset equal to beginning of this suballocation. - *pOffset = suballocItem->offset; - - // Apply VMA_DEBUG_MARGIN at the beginning. - if((VMA_DEBUG_MARGIN > 0) && suballocItem != m_Suballocations.cbegin()) - { - *pOffset += VMA_DEBUG_MARGIN; - } - - // Apply alignment. - const VkDeviceSize alignment = VMA_MAX(allocAlignment, static_cast(VMA_DEBUG_ALIGNMENT)); - *pOffset = VmaAlignUp(*pOffset, alignment); - - // Check previous suballocations for BufferImageGranularity conflicts. - // Make bigger alignment if necessary. - if(bufferImageGranularity > 1) - { - bool bufferImageGranularityConflict = false; - VmaSuballocationList::const_iterator prevSuballocItem = suballocItem; - while(prevSuballocItem != m_Suballocations.cbegin()) - { - --prevSuballocItem; - const VmaSuballocation& prevSuballoc = *prevSuballocItem; - if(VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, *pOffset, bufferImageGranularity)) - { - if(VmaIsBufferImageGranularityConflict(prevSuballoc.type, allocType)) - { - bufferImageGranularityConflict = true; - break; - } - } - else - // Already on previous page. - break; - } - if(bufferImageGranularityConflict) - { - *pOffset = VmaAlignUp(*pOffset, bufferImageGranularity); - } - } - - // Now that we have final *pOffset, check if we are past suballocItem. - // If yes, return false - this function should be called for another suballocItem as starting point. - if(*pOffset >= suballocItem->offset + suballocItem->size) - { - return false; - } - - // Calculate padding at the beginning based on current offset. - const VkDeviceSize paddingBegin = *pOffset - suballocItem->offset; - - // Calculate required margin at the end if this is not last suballocation. - VmaSuballocationList::const_iterator next = suballocItem; - ++next; - const VkDeviceSize requiredEndMargin = - (next != m_Suballocations.cend()) ? VMA_DEBUG_MARGIN : 0; - - const VkDeviceSize totalSize = paddingBegin + allocSize + requiredEndMargin; - // Another early return check. - if(suballocItem->offset + totalSize > m_Size) - { - return false; - } - - // Advance lastSuballocItem until desired size is reached. - // Update itemsToMakeLostCount. - VmaSuballocationList::const_iterator lastSuballocItem = suballocItem; - if(totalSize > suballocItem->size) - { - VkDeviceSize remainingSize = totalSize - suballocItem->size; - while(remainingSize > 0) - { - ++lastSuballocItem; - if(lastSuballocItem == m_Suballocations.cend()) - { - return false; - } - if(lastSuballocItem->type == VMA_SUBALLOCATION_TYPE_FREE) - { - *pSumFreeSize += lastSuballocItem->size; - } - else - { - VMA_ASSERT(lastSuballocItem->hAllocation != VK_NULL_HANDLE); - if(lastSuballocItem->hAllocation->CanBecomeLost() && - lastSuballocItem->hAllocation->GetLastUseFrameIndex() + frameInUseCount < currentFrameIndex) - { - ++*itemsToMakeLostCount; - *pSumItemSize += lastSuballocItem->size; - } - else - { - return false; - } - } - remainingSize = (lastSuballocItem->size < remainingSize) ? - remainingSize - lastSuballocItem->size : 0; - } - } - - // Check next suballocations for BufferImageGranularity conflicts. - // If conflict exists, we must mark more allocations lost or fail. - if(bufferImageGranularity > 1) - { - VmaSuballocationList::const_iterator nextSuballocItem = lastSuballocItem; - ++nextSuballocItem; - while(nextSuballocItem != m_Suballocations.cend()) - { - const VmaSuballocation& nextSuballoc = *nextSuballocItem; - if(VmaBlocksOnSamePage(*pOffset, allocSize, nextSuballoc.offset, bufferImageGranularity)) - { - if(VmaIsBufferImageGranularityConflict(allocType, nextSuballoc.type)) - { - VMA_ASSERT(nextSuballoc.hAllocation != VK_NULL_HANDLE); - if(nextSuballoc.hAllocation->CanBecomeLost() && - nextSuballoc.hAllocation->GetLastUseFrameIndex() + frameInUseCount < currentFrameIndex) - { - ++*itemsToMakeLostCount; - } - else - { - return false; - } - } - } - else - { - // Already on next page. - break; - } - ++nextSuballocItem; - } - } - } - else - { - const VmaSuballocation& suballoc = *suballocItem; - VMA_ASSERT(suballoc.type == VMA_SUBALLOCATION_TYPE_FREE); - - *pSumFreeSize = suballoc.size; - - // Size of this suballocation is too small for this request: Early return. - if(suballoc.size < allocSize) - { - return false; - } - - // Start from offset equal to beginning of this suballocation. - *pOffset = suballoc.offset; - - // Apply VMA_DEBUG_MARGIN at the beginning. - if((VMA_DEBUG_MARGIN > 0) && suballocItem != m_Suballocations.cbegin()) - { - *pOffset += VMA_DEBUG_MARGIN; - } - - // Apply alignment. - const VkDeviceSize alignment = VMA_MAX(allocAlignment, static_cast(VMA_DEBUG_ALIGNMENT)); - *pOffset = VmaAlignUp(*pOffset, alignment); - - // Check previous suballocations for BufferImageGranularity conflicts. - // Make bigger alignment if necessary. - if(bufferImageGranularity > 1) - { - bool bufferImageGranularityConflict = false; - VmaSuballocationList::const_iterator prevSuballocItem = suballocItem; - while(prevSuballocItem != m_Suballocations.cbegin()) - { - --prevSuballocItem; - const VmaSuballocation& prevSuballoc = *prevSuballocItem; - if(VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, *pOffset, bufferImageGranularity)) - { - if(VmaIsBufferImageGranularityConflict(prevSuballoc.type, allocType)) - { - bufferImageGranularityConflict = true; - break; - } - } - else - // Already on previous page. - break; - } - if(bufferImageGranularityConflict) - { - *pOffset = VmaAlignUp(*pOffset, bufferImageGranularity); - } - } - - // Calculate padding at the beginning based on current offset. - const VkDeviceSize paddingBegin = *pOffset - suballoc.offset; - - // Calculate required margin at the end if this is not last suballocation. - VmaSuballocationList::const_iterator next = suballocItem; - ++next; - const VkDeviceSize requiredEndMargin = - (next != m_Suballocations.cend()) ? VMA_DEBUG_MARGIN : 0; - - // Fail if requested size plus margin before and after is bigger than size of this suballocation. - if(paddingBegin + allocSize + requiredEndMargin > suballoc.size) - { - return false; - } - - // Check next suballocations for BufferImageGranularity conflicts. - // If conflict exists, allocation cannot be made here. - if(bufferImageGranularity > 1) - { - VmaSuballocationList::const_iterator nextSuballocItem = suballocItem; - ++nextSuballocItem; - while(nextSuballocItem != m_Suballocations.cend()) - { - const VmaSuballocation& nextSuballoc = *nextSuballocItem; - if(VmaBlocksOnSamePage(*pOffset, allocSize, nextSuballoc.offset, bufferImageGranularity)) - { - if(VmaIsBufferImageGranularityConflict(allocType, nextSuballoc.type)) - { - return false; - } - } - else - { - // Already on next page. - break; - } - ++nextSuballocItem; - } - } - } - - // All tests passed: Success. pOffset is already filled. - return true; -} - -void VmaBlockMetadata::MergeFreeWithNext(VmaSuballocationList::iterator item) -{ - VMA_ASSERT(item != m_Suballocations.end()); - VMA_ASSERT(item->type == VMA_SUBALLOCATION_TYPE_FREE); - - VmaSuballocationList::iterator nextItem = item; - ++nextItem; - VMA_ASSERT(nextItem != m_Suballocations.end()); - VMA_ASSERT(nextItem->type == VMA_SUBALLOCATION_TYPE_FREE); - - item->size += nextItem->size; - --m_FreeCount; - m_Suballocations.erase(nextItem); -} - -VmaSuballocationList::iterator VmaBlockMetadata::FreeSuballocation(VmaSuballocationList::iterator suballocItem) -{ - // Change this suballocation to be marked as free. - VmaSuballocation& suballoc = *suballocItem; - suballoc.type = VMA_SUBALLOCATION_TYPE_FREE; - suballoc.hAllocation = VK_NULL_HANDLE; - - // Update totals. - ++m_FreeCount; - m_SumFreeSize += suballoc.size; - - // Merge with previous and/or next suballocation if it's also free. - bool mergeWithNext = false; - bool mergeWithPrev = false; - - VmaSuballocationList::iterator nextItem = suballocItem; - ++nextItem; - if((nextItem != m_Suballocations.end()) && (nextItem->type == VMA_SUBALLOCATION_TYPE_FREE)) - { - mergeWithNext = true; - } - - VmaSuballocationList::iterator prevItem = suballocItem; - if(suballocItem != m_Suballocations.begin()) - { - --prevItem; - if(prevItem->type == VMA_SUBALLOCATION_TYPE_FREE) - { - mergeWithPrev = true; - } - } - - if(mergeWithNext) - { - UnregisterFreeSuballocation(nextItem); - MergeFreeWithNext(suballocItem); - } - - if(mergeWithPrev) - { - UnregisterFreeSuballocation(prevItem); - MergeFreeWithNext(prevItem); - RegisterFreeSuballocation(prevItem); - return prevItem; - } - else - { - RegisterFreeSuballocation(suballocItem); - return suballocItem; - } -} - -void VmaBlockMetadata::RegisterFreeSuballocation(VmaSuballocationList::iterator item) -{ - VMA_ASSERT(item->type == VMA_SUBALLOCATION_TYPE_FREE); - VMA_ASSERT(item->size > 0); - - // You may want to enable this validation at the beginning or at the end of - // this function, depending on what do you want to check. - VMA_HEAVY_ASSERT(ValidateFreeSuballocationList()); - - if(item->size >= VMA_MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER) - { - if(m_FreeSuballocationsBySize.empty()) - { - m_FreeSuballocationsBySize.push_back(item); - } - else - { - VmaVectorInsertSorted(m_FreeSuballocationsBySize, item); - } - } - - //VMA_HEAVY_ASSERT(ValidateFreeSuballocationList()); -} - - -void VmaBlockMetadata::UnregisterFreeSuballocation(VmaSuballocationList::iterator item) -{ - VMA_ASSERT(item->type == VMA_SUBALLOCATION_TYPE_FREE); - VMA_ASSERT(item->size > 0); - - // You may want to enable this validation at the beginning or at the end of - // this function, depending on what do you want to check. - VMA_HEAVY_ASSERT(ValidateFreeSuballocationList()); - - if(item->size >= VMA_MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER) - { - VmaSuballocationList::iterator* const it = VmaBinaryFindFirstNotLess( - m_FreeSuballocationsBySize.data(), - m_FreeSuballocationsBySize.data() + m_FreeSuballocationsBySize.size(), - item, - VmaSuballocationItemSizeLess()); - for(size_t index = it - m_FreeSuballocationsBySize.data(); - index < m_FreeSuballocationsBySize.size(); - ++index) - { - if(m_FreeSuballocationsBySize[index] == item) - { - VmaVectorRemove(m_FreeSuballocationsBySize, index); - return; - } - VMA_ASSERT((m_FreeSuballocationsBySize[index]->size == item->size) && "Not found."); - } - VMA_ASSERT(0 && "Not found."); - } - - //VMA_HEAVY_ASSERT(ValidateFreeSuballocationList()); -} - -//////////////////////////////////////////////////////////////////////////////// -// class VmaDeviceMemoryMapping - -VmaDeviceMemoryMapping::VmaDeviceMemoryMapping() : - m_MapCount(0), - m_pMappedData(VMA_NULL) -{ -} - -VmaDeviceMemoryMapping::~VmaDeviceMemoryMapping() -{ - VMA_ASSERT(m_MapCount == 0 && "VkDeviceMemory block is being destroyed while it is still mapped."); -} - -VkResult VmaDeviceMemoryMapping::Map(VmaAllocator hAllocator, VkDeviceMemory hMemory, void **ppData) -{ - VmaMutexLock lock(m_Mutex, hAllocator->m_UseMutex); - if(m_MapCount != 0) - { - ++m_MapCount; - VMA_ASSERT(m_pMappedData != VMA_NULL); - if(ppData != VMA_NULL) - { - *ppData = m_pMappedData; - } - return VK_SUCCESS; - } - else - { - VkResult result = (*hAllocator->GetVulkanFunctions().vkMapMemory)( - hAllocator->m_hDevice, - hMemory, - 0, // offset - VK_WHOLE_SIZE, - 0, // flags - &m_pMappedData); - if(result == VK_SUCCESS) - { - if(ppData != VMA_NULL) - { - *ppData = m_pMappedData; - } - m_MapCount = 1; - } - return result; - } -} - -void VmaDeviceMemoryMapping::Unmap(VmaAllocator hAllocator, VkDeviceMemory hMemory) -{ - VmaMutexLock lock(m_Mutex, hAllocator->m_UseMutex); - if(m_MapCount != 0) - { - if(--m_MapCount == 0) - { - m_pMappedData = VMA_NULL; - (*hAllocator->GetVulkanFunctions().vkUnmapMemory)(hAllocator->m_hDevice, hMemory); - } - } - else - { - VMA_ASSERT(0 && "VkDeviceMemory block is being unmapped while it was not previously mapped."); - } -} - -//////////////////////////////////////////////////////////////////////////////// -// class VmaDeviceMemoryBlock - -VmaDeviceMemoryBlock::VmaDeviceMemoryBlock(VmaAllocator hAllocator) : - m_MemoryTypeIndex(UINT32_MAX), - m_hMemory(VK_NULL_HANDLE), - m_Metadata(hAllocator) -{ -} - -void VmaDeviceMemoryBlock::Init( - uint32_t newMemoryTypeIndex, - VkDeviceMemory newMemory, - VkDeviceSize newSize) -{ - VMA_ASSERT(m_hMemory == VK_NULL_HANDLE); - - m_MemoryTypeIndex = newMemoryTypeIndex; - m_hMemory = newMemory; - - m_Metadata.Init(newSize); -} - -void VmaDeviceMemoryBlock::Destroy(VmaAllocator allocator) -{ - // This is the most important assert in the entire library. - // Hitting it means you have some memory leak - unreleased VmaAllocation objects. - VMA_ASSERT(m_Metadata.IsEmpty() && "Some allocations were not freed before destruction of this memory block!"); - - VMA_ASSERT(m_hMemory != VK_NULL_HANDLE); - allocator->FreeVulkanMemory(m_MemoryTypeIndex, m_Metadata.GetSize(), m_hMemory); - m_hMemory = VK_NULL_HANDLE; -} - -bool VmaDeviceMemoryBlock::Validate() const -{ - if((m_hMemory == VK_NULL_HANDLE) || - (m_Metadata.GetSize() == 0)) - { - return false; - } - - return m_Metadata.Validate(); -} - -VkResult VmaDeviceMemoryBlock::Map(VmaAllocator hAllocator, void** ppData) -{ - return m_Mapping.Map(hAllocator, m_hMemory, ppData); -} - -void VmaDeviceMemoryBlock::Unmap(VmaAllocator hAllocator) -{ - m_Mapping.Unmap(hAllocator, m_hMemory); -} - -static void InitStatInfo(VmaStatInfo& outInfo) -{ - memset(&outInfo, 0, sizeof(outInfo)); - outInfo.allocationSizeMin = UINT64_MAX; - outInfo.unusedRangeSizeMin = UINT64_MAX; -} - -// Adds statistics srcInfo into inoutInfo, like: inoutInfo += srcInfo. -static void VmaAddStatInfo(VmaStatInfo& inoutInfo, const VmaStatInfo& srcInfo) -{ - inoutInfo.blockCount += srcInfo.blockCount; - inoutInfo.allocationCount += srcInfo.allocationCount; - inoutInfo.unusedRangeCount += srcInfo.unusedRangeCount; - inoutInfo.usedBytes += srcInfo.usedBytes; - inoutInfo.unusedBytes += srcInfo.unusedBytes; - inoutInfo.allocationSizeMin = VMA_MIN(inoutInfo.allocationSizeMin, srcInfo.allocationSizeMin); - inoutInfo.allocationSizeMax = VMA_MAX(inoutInfo.allocationSizeMax, srcInfo.allocationSizeMax); - inoutInfo.unusedRangeSizeMin = VMA_MIN(inoutInfo.unusedRangeSizeMin, srcInfo.unusedRangeSizeMin); - inoutInfo.unusedRangeSizeMax = VMA_MAX(inoutInfo.unusedRangeSizeMax, srcInfo.unusedRangeSizeMax); -} - -static void VmaPostprocessCalcStatInfo(VmaStatInfo& inoutInfo) -{ - inoutInfo.allocationSizeAvg = (inoutInfo.allocationCount > 0) ? - VmaRoundDiv(inoutInfo.usedBytes, inoutInfo.allocationCount) : 0; - inoutInfo.unusedRangeSizeAvg = (inoutInfo.unusedRangeCount > 0) ? - VmaRoundDiv(inoutInfo.unusedBytes, inoutInfo.unusedRangeCount) : 0; -} - -VmaPool_T::VmaPool_T( - VmaAllocator hAllocator, - const VmaPoolCreateInfo& createInfo) : - m_BlockVector( - hAllocator, - createInfo.memoryTypeIndex, - createInfo.blockSize, - createInfo.minBlockCount, - createInfo.maxBlockCount, - (createInfo.flags & VMA_POOL_CREATE_IGNORE_BUFFER_IMAGE_GRANULARITY_BIT) != 0 ? 1 : hAllocator->GetBufferImageGranularity(), - createInfo.frameInUseCount, - true) // isCustomPool -{ -} - -VmaPool_T::~VmaPool_T() -{ -} - -#if VMA_STATS_STRING_ENABLED - -#endif // #if VMA_STATS_STRING_ENABLED - -VmaBlockVector::VmaBlockVector( - VmaAllocator hAllocator, - uint32_t memoryTypeIndex, - VkDeviceSize preferredBlockSize, - size_t minBlockCount, - size_t maxBlockCount, - VkDeviceSize bufferImageGranularity, - uint32_t frameInUseCount, - bool isCustomPool) : - m_hAllocator(hAllocator), - m_MemoryTypeIndex(memoryTypeIndex), - m_PreferredBlockSize(preferredBlockSize), - m_MinBlockCount(minBlockCount), - m_MaxBlockCount(maxBlockCount), - m_BufferImageGranularity(bufferImageGranularity), - m_FrameInUseCount(frameInUseCount), - m_IsCustomPool(isCustomPool), - m_Blocks(VmaStlAllocator(hAllocator->GetAllocationCallbacks())), - m_HasEmptyBlock(false), - m_pDefragmentator(VMA_NULL) -{ -} - -VmaBlockVector::~VmaBlockVector() -{ - VMA_ASSERT(m_pDefragmentator == VMA_NULL); - - for(size_t i = m_Blocks.size(); i--; ) - { - m_Blocks[i]->Destroy(m_hAllocator); - vma_delete(m_hAllocator, m_Blocks[i]); - } -} - -VkResult VmaBlockVector::CreateMinBlocks() -{ - for(size_t i = 0; i < m_MinBlockCount; ++i) - { - VkResult res = CreateBlock(m_PreferredBlockSize, VMA_NULL); - if(res != VK_SUCCESS) - { - return res; - } - } - return VK_SUCCESS; -} - -void VmaBlockVector::GetPoolStats(VmaPoolStats* pStats) -{ - pStats->size = 0; - pStats->unusedSize = 0; - pStats->allocationCount = 0; - pStats->unusedRangeCount = 0; - pStats->unusedRangeSizeMax = 0; - - VmaMutexLock lock(m_Mutex, m_hAllocator->m_UseMutex); - - for(uint32_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) - { - const VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex]; - VMA_ASSERT(pBlock); - VMA_HEAVY_ASSERT(pBlock->Validate()); - pBlock->m_Metadata.AddPoolStats(*pStats); - } -} - -static const uint32_t VMA_ALLOCATION_TRY_COUNT = 32; - -VkResult VmaBlockVector::Allocate( - VmaPool hCurrentPool, - uint32_t currentFrameIndex, - const VkMemoryRequirements& vkMemReq, - const VmaAllocationCreateInfo& createInfo, - VmaSuballocationType suballocType, - VmaAllocation* pAllocation) -{ - const bool mapped = (createInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0; - const bool isUserDataString = (createInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0; - - VmaMutexLock lock(m_Mutex, m_hAllocator->m_UseMutex); - - // 1. Search existing allocations. Try to allocate without making other allocations lost. - // Forward order in m_Blocks - prefer blocks with smallest amount of free space. - for(size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex ) - { - VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex]; - VMA_ASSERT(pCurrBlock); - VmaAllocationRequest currRequest = {}; - if(pCurrBlock->m_Metadata.CreateAllocationRequest( - currentFrameIndex, - m_FrameInUseCount, - m_BufferImageGranularity, - vkMemReq.size, - vkMemReq.alignment, - suballocType, - false, // canMakeOtherLost - &currRequest)) - { - // Allocate from pCurrBlock. - VMA_ASSERT(currRequest.itemsToMakeLostCount == 0); - - if(mapped) - { - VkResult res = pCurrBlock->Map(m_hAllocator, nullptr); - if(res != VK_SUCCESS) - { - return res; - } - } - - // We no longer have an empty Allocation. - if(pCurrBlock->m_Metadata.IsEmpty()) - { - m_HasEmptyBlock = false; - } - - *pAllocation = vma_new(m_hAllocator, VmaAllocation_T)(currentFrameIndex, isUserDataString); - pCurrBlock->m_Metadata.Alloc(currRequest, suballocType, vkMemReq.size, *pAllocation); - (*pAllocation)->InitBlockAllocation( - hCurrentPool, - pCurrBlock, - currRequest.offset, - vkMemReq.alignment, - vkMemReq.size, - suballocType, - mapped, - (createInfo.flags & VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT) != 0); - VMA_HEAVY_ASSERT(pCurrBlock->Validate()); - VMA_DEBUG_LOG(" Returned from existing allocation #%u", (uint32_t)blockIndex); - (*pAllocation)->SetUserData(m_hAllocator, createInfo.pUserData); - return VK_SUCCESS; - } - } - - const bool canCreateNewBlock = - ((createInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) == 0) && - (m_Blocks.size() < m_MaxBlockCount); - - // 2. Try to create new block. - if(canCreateNewBlock) - { - // 2.1. Start with full preferredBlockSize. - VkDeviceSize blockSize = m_PreferredBlockSize; - size_t newBlockIndex = 0; - VkResult res = CreateBlock(blockSize, &newBlockIndex); - // Allocating blocks of other sizes is allowed only in default pools. - // In custom pools block size is fixed. - if(res < 0 && m_IsCustomPool == false) - { - // 2.2. Try half the size. - blockSize /= 2; - if(blockSize >= vkMemReq.size) - { - res = CreateBlock(blockSize, &newBlockIndex); - if(res < 0) - { - // 2.3. Try quarter the size. - blockSize /= 2; - if(blockSize >= vkMemReq.size) - { - res = CreateBlock(blockSize, &newBlockIndex); - } - } - } - } - if(res == VK_SUCCESS) - { - VmaDeviceMemoryBlock* const pBlock = m_Blocks[newBlockIndex]; - VMA_ASSERT(pBlock->m_Metadata.GetSize() >= vkMemReq.size); - - if(mapped) - { - res = pBlock->Map(m_hAllocator, nullptr); - if(res != VK_SUCCESS) - { - return res; - } - } - - // Allocate from pBlock. Because it is empty, dstAllocRequest can be trivially filled. - VmaAllocationRequest allocRequest; - pBlock->m_Metadata.CreateFirstAllocationRequest(&allocRequest); - *pAllocation = vma_new(m_hAllocator, VmaAllocation_T)(currentFrameIndex, isUserDataString); - pBlock->m_Metadata.Alloc(allocRequest, suballocType, vkMemReq.size, *pAllocation); - (*pAllocation)->InitBlockAllocation( - hCurrentPool, - pBlock, - allocRequest.offset, - vkMemReq.alignment, - vkMemReq.size, - suballocType, - mapped, - (createInfo.flags & VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT) != 0); - VMA_HEAVY_ASSERT(pBlock->Validate()); - VMA_DEBUG_LOG(" Created new allocation Size=%llu", allocInfo.allocationSize); - (*pAllocation)->SetUserData(m_hAllocator, createInfo.pUserData); - return VK_SUCCESS; - } - } - - const bool canMakeOtherLost = (createInfo.flags & VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT) != 0; - - // 3. Try to allocate from existing blocks with making other allocations lost. - if(canMakeOtherLost) - { - uint32_t tryIndex = 0; - for(; tryIndex < VMA_ALLOCATION_TRY_COUNT; ++tryIndex) - { - VmaDeviceMemoryBlock* pBestRequestBlock = VMA_NULL; - VmaAllocationRequest bestRequest = {}; - VkDeviceSize bestRequestCost = VK_WHOLE_SIZE; - - // 1. Search existing allocations. - // Forward order in m_Blocks - prefer blocks with smallest amount of free space. - for(size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex ) - { - VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex]; - VMA_ASSERT(pCurrBlock); - VmaAllocationRequest currRequest = {}; - if(pCurrBlock->m_Metadata.CreateAllocationRequest( - currentFrameIndex, - m_FrameInUseCount, - m_BufferImageGranularity, - vkMemReq.size, - vkMemReq.alignment, - suballocType, - canMakeOtherLost, - &currRequest)) - { - const VkDeviceSize currRequestCost = currRequest.CalcCost(); - if(pBestRequestBlock == VMA_NULL || - currRequestCost < bestRequestCost) - { - pBestRequestBlock = pCurrBlock; - bestRequest = currRequest; - bestRequestCost = currRequestCost; - - if(bestRequestCost == 0) - { - break; - } - } - } - } - - if(pBestRequestBlock != VMA_NULL) - { - if(mapped) - { - VkResult res = pBestRequestBlock->Map(m_hAllocator, nullptr); - if(res != VK_SUCCESS) - { - return res; - } - } - - if(pBestRequestBlock->m_Metadata.MakeRequestedAllocationsLost( - currentFrameIndex, - m_FrameInUseCount, - &bestRequest)) - { - // We no longer have an empty Allocation. - if(pBestRequestBlock->m_Metadata.IsEmpty()) - { - m_HasEmptyBlock = false; - } - // Allocate from this pBlock. - *pAllocation = vma_new(m_hAllocator, VmaAllocation_T)(currentFrameIndex, isUserDataString); - pBestRequestBlock->m_Metadata.Alloc(bestRequest, suballocType, vkMemReq.size, *pAllocation); - (*pAllocation)->InitBlockAllocation( - hCurrentPool, - pBestRequestBlock, - bestRequest.offset, - vkMemReq.alignment, - vkMemReq.size, - suballocType, - mapped, - (createInfo.flags & VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT) != 0); - VMA_HEAVY_ASSERT(pBlock->Validate()); - VMA_DEBUG_LOG(" Returned from existing allocation #%u", (uint32_t)blockIndex); - (*pAllocation)->SetUserData(m_hAllocator, createInfo.pUserData); - return VK_SUCCESS; - } - // else: Some allocations must have been touched while we are here. Next try. - } - else - { - // Could not find place in any of the blocks - break outer loop. - break; - } - } - /* Maximum number of tries exceeded - a very unlike event when many other - threads are simultaneously touching allocations making it impossible to make - lost at the same time as we try to allocate. */ - if(tryIndex == VMA_ALLOCATION_TRY_COUNT) - { - return VK_ERROR_TOO_MANY_OBJECTS; - } - } - - return VK_ERROR_OUT_OF_DEVICE_MEMORY; -} - -void VmaBlockVector::Free( - VmaAllocation hAllocation) -{ - VmaDeviceMemoryBlock* pBlockToDelete = VMA_NULL; - - // Scope for lock. - { - VmaMutexLock lock(m_Mutex, m_hAllocator->m_UseMutex); - - VmaDeviceMemoryBlock* pBlock = hAllocation->GetBlock(); - - if(hAllocation->IsPersistentMap()) - { - pBlock->m_Mapping.Unmap(m_hAllocator, pBlock->m_hMemory); - } - - pBlock->m_Metadata.Free(hAllocation); - VMA_HEAVY_ASSERT(pBlock->Validate()); - - VMA_DEBUG_LOG(" Freed from MemoryTypeIndex=%u", memTypeIndex); - - // pBlock became empty after this deallocation. - if(pBlock->m_Metadata.IsEmpty()) - { - // Already has empty Allocation. We don't want to have two, so delete this one. - if(m_HasEmptyBlock && m_Blocks.size() > m_MinBlockCount) - { - pBlockToDelete = pBlock; - Remove(pBlock); - } - // We now have first empty Allocation. - else - { - m_HasEmptyBlock = true; - } - } - // pBlock didn't become empty, but we have another empty block - find and free that one. - // (This is optional, heuristics.) - else if(m_HasEmptyBlock) - { - VmaDeviceMemoryBlock* pLastBlock = m_Blocks.back(); - if(pLastBlock->m_Metadata.IsEmpty() && m_Blocks.size() > m_MinBlockCount) - { - pBlockToDelete = pLastBlock; - m_Blocks.pop_back(); - m_HasEmptyBlock = false; - } - } - - IncrementallySortBlocks(); - } - - // Destruction of a free Allocation. Deferred until this point, outside of mutex - // lock, for performance reason. - if(pBlockToDelete != VMA_NULL) - { - VMA_DEBUG_LOG(" Deleted empty allocation"); - pBlockToDelete->Destroy(m_hAllocator); - vma_delete(m_hAllocator, pBlockToDelete); - } -} - -void VmaBlockVector::Remove(VmaDeviceMemoryBlock* pBlock) -{ - for(uint32_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) - { - if(m_Blocks[blockIndex] == pBlock) - { - VmaVectorRemove(m_Blocks, blockIndex); - return; - } - } - VMA_ASSERT(0); -} - -void VmaBlockVector::IncrementallySortBlocks() -{ - // Bubble sort only until first swap. - for(size_t i = 1; i < m_Blocks.size(); ++i) - { - if(m_Blocks[i - 1]->m_Metadata.GetSumFreeSize() > m_Blocks[i]->m_Metadata.GetSumFreeSize()) - { - VMA_SWAP(m_Blocks[i - 1], m_Blocks[i]); - return; - } - } -} - -VkResult VmaBlockVector::CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIndex) -{ - VkMemoryAllocateInfo allocInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO }; - allocInfo.memoryTypeIndex = m_MemoryTypeIndex; - allocInfo.allocationSize = blockSize; - VkDeviceMemory mem = VK_NULL_HANDLE; - VkResult res = m_hAllocator->AllocateVulkanMemory(&allocInfo, &mem); - if(res < 0) - { - return res; - } - - // New VkDeviceMemory successfully created. - - // Create new Allocation for it. - VmaDeviceMemoryBlock* const pBlock = vma_new(m_hAllocator, VmaDeviceMemoryBlock)(m_hAllocator); - pBlock->Init( - m_MemoryTypeIndex, - mem, - allocInfo.allocationSize); - - m_Blocks.push_back(pBlock); - if(pNewBlockIndex != VMA_NULL) - { - *pNewBlockIndex = m_Blocks.size() - 1; - } - - return VK_SUCCESS; -} - -#if VMA_STATS_STRING_ENABLED - -void VmaBlockVector::PrintDetailedMap(class VmaJsonWriter& json) -{ - VmaMutexLock lock(m_Mutex, m_hAllocator->m_UseMutex); - - json.BeginObject(); - - if(m_IsCustomPool) - { - json.WriteString("MemoryTypeIndex"); - json.WriteNumber(m_MemoryTypeIndex); - - json.WriteString("BlockSize"); - json.WriteNumber(m_PreferredBlockSize); - - json.WriteString("BlockCount"); - json.BeginObject(true); - if(m_MinBlockCount > 0) - { - json.WriteString("Min"); - json.WriteNumber(m_MinBlockCount); - } - if(m_MaxBlockCount < SIZE_MAX) - { - json.WriteString("Max"); - json.WriteNumber(m_MaxBlockCount); - } - json.WriteString("Cur"); - json.WriteNumber(m_Blocks.size()); - json.EndObject(); - - if(m_FrameInUseCount > 0) - { - json.WriteString("FrameInUseCount"); - json.WriteNumber(m_FrameInUseCount); - } - } - else - { - json.WriteString("PreferredBlockSize"); - json.WriteNumber(m_PreferredBlockSize); - } - - json.WriteString("Blocks"); - json.BeginArray(); - for(size_t i = 0; i < m_Blocks.size(); ++i) - { - m_Blocks[i]->m_Metadata.PrintDetailedMap(json); - } - json.EndArray(); - - json.EndObject(); -} - -#endif // #if VMA_STATS_STRING_ENABLED - -VmaDefragmentator* VmaBlockVector::EnsureDefragmentator( - VmaAllocator hAllocator, - uint32_t currentFrameIndex) -{ - if(m_pDefragmentator == VMA_NULL) - { - m_pDefragmentator = vma_new(m_hAllocator, VmaDefragmentator)( - hAllocator, - this, - currentFrameIndex); - } - - return m_pDefragmentator; -} - -VkResult VmaBlockVector::Defragment( - VmaDefragmentationStats* pDefragmentationStats, - VkDeviceSize& maxBytesToMove, - uint32_t& maxAllocationsToMove) -{ - if(m_pDefragmentator == VMA_NULL) - { - return VK_SUCCESS; - } - - VmaMutexLock lock(m_Mutex, m_hAllocator->m_UseMutex); - - // Defragment. - VkResult result = m_pDefragmentator->Defragment(maxBytesToMove, maxAllocationsToMove); - - // Accumulate statistics. - if(pDefragmentationStats != VMA_NULL) - { - const VkDeviceSize bytesMoved = m_pDefragmentator->GetBytesMoved(); - const uint32_t allocationsMoved = m_pDefragmentator->GetAllocationsMoved(); - pDefragmentationStats->bytesMoved += bytesMoved; - pDefragmentationStats->allocationsMoved += allocationsMoved; - VMA_ASSERT(bytesMoved <= maxBytesToMove); - VMA_ASSERT(allocationsMoved <= maxAllocationsToMove); - maxBytesToMove -= bytesMoved; - maxAllocationsToMove -= allocationsMoved; - } - - // Free empty blocks. - m_HasEmptyBlock = false; - for(size_t blockIndex = m_Blocks.size(); blockIndex--; ) - { - VmaDeviceMemoryBlock* pBlock = m_Blocks[blockIndex]; - if(pBlock->m_Metadata.IsEmpty()) - { - if(m_Blocks.size() > m_MinBlockCount) - { - if(pDefragmentationStats != VMA_NULL) - { - ++pDefragmentationStats->deviceMemoryBlocksFreed; - pDefragmentationStats->bytesFreed += pBlock->m_Metadata.GetSize(); - } - - VmaVectorRemove(m_Blocks, blockIndex); - pBlock->Destroy(m_hAllocator); - vma_delete(m_hAllocator, pBlock); - } - else - { - m_HasEmptyBlock = true; - } - } - } - - return result; -} - -void VmaBlockVector::DestroyDefragmentator() -{ - if(m_pDefragmentator != VMA_NULL) - { - vma_delete(m_hAllocator, m_pDefragmentator); - m_pDefragmentator = VMA_NULL; - } -} - -void VmaBlockVector::MakePoolAllocationsLost( - uint32_t currentFrameIndex, - size_t* pLostAllocationCount) -{ - VmaMutexLock lock(m_Mutex, m_hAllocator->m_UseMutex); - - for(uint32_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) - { - VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex]; - VMA_ASSERT(pBlock); - pBlock->m_Metadata.MakeAllocationsLost(currentFrameIndex, m_FrameInUseCount); - } -} - -void VmaBlockVector::AddStats(VmaStats* pStats) -{ - const uint32_t memTypeIndex = m_MemoryTypeIndex; - const uint32_t memHeapIndex = m_hAllocator->MemoryTypeIndexToHeapIndex(memTypeIndex); - - VmaMutexLock lock(m_Mutex, m_hAllocator->m_UseMutex); - - for(uint32_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) - { - const VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex]; - VMA_ASSERT(pBlock); - VMA_HEAVY_ASSERT(pBlock->Validate()); - VmaStatInfo allocationStatInfo; - pBlock->m_Metadata.CalcAllocationStatInfo(allocationStatInfo); - VmaAddStatInfo(pStats->total, allocationStatInfo); - VmaAddStatInfo(pStats->memoryType[memTypeIndex], allocationStatInfo); - VmaAddStatInfo(pStats->memoryHeap[memHeapIndex], allocationStatInfo); - } -} - -//////////////////////////////////////////////////////////////////////////////// -// VmaDefragmentator members definition - -VmaDefragmentator::VmaDefragmentator( - VmaAllocator hAllocator, - VmaBlockVector* pBlockVector, - uint32_t currentFrameIndex) : - m_hAllocator(hAllocator), - m_pBlockVector(pBlockVector), - m_CurrentFrameIndex(currentFrameIndex), - m_BytesMoved(0), - m_AllocationsMoved(0), - m_Allocations(VmaStlAllocator(hAllocator->GetAllocationCallbacks())), - m_Blocks(VmaStlAllocator(hAllocator->GetAllocationCallbacks())) -{ -} - -VmaDefragmentator::~VmaDefragmentator() -{ - for(size_t i = m_Blocks.size(); i--; ) - { - vma_delete(m_hAllocator, m_Blocks[i]); - } -} - -void VmaDefragmentator::AddAllocation(VmaAllocation hAlloc, VkBool32* pChanged) -{ - AllocationInfo allocInfo; - allocInfo.m_hAllocation = hAlloc; - allocInfo.m_pChanged = pChanged; - m_Allocations.push_back(allocInfo); -} - -VkResult VmaDefragmentator::BlockInfo::EnsureMapping(VmaAllocator hAllocator, void** ppMappedData) -{ - // It has already been mapped for defragmentation. - if(m_pMappedDataForDefragmentation) - { - *ppMappedData = m_pMappedDataForDefragmentation; - return VK_SUCCESS; - } - - // It is originally mapped. - if(m_pBlock->m_Mapping.GetMappedData()) - { - *ppMappedData = m_pBlock->m_Mapping.GetMappedData(); - return VK_SUCCESS; - } - - // Map on first usage. - VkResult res = m_pBlock->Map(hAllocator, &m_pMappedDataForDefragmentation); - *ppMappedData = m_pMappedDataForDefragmentation; - return res; -} - -void VmaDefragmentator::BlockInfo::Unmap(VmaAllocator hAllocator) -{ - if(m_pMappedDataForDefragmentation != VMA_NULL) - { - m_pBlock->Unmap(hAllocator); - } -} - -VkResult VmaDefragmentator::DefragmentRound( - VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove) -{ - if(m_Blocks.empty()) - { - return VK_SUCCESS; - } - - size_t srcBlockIndex = m_Blocks.size() - 1; - size_t srcAllocIndex = SIZE_MAX; - for(;;) - { - // 1. Find next allocation to move. - // 1.1. Start from last to first m_Blocks - they are sorted from most "destination" to most "source". - // 1.2. Then start from last to first m_Allocations - they are sorted from largest to smallest. - while(srcAllocIndex >= m_Blocks[srcBlockIndex]->m_Allocations.size()) - { - if(m_Blocks[srcBlockIndex]->m_Allocations.empty()) - { - // Finished: no more allocations to process. - if(srcBlockIndex == 0) - { - return VK_SUCCESS; - } - else - { - --srcBlockIndex; - srcAllocIndex = SIZE_MAX; - } - } - else - { - srcAllocIndex = m_Blocks[srcBlockIndex]->m_Allocations.size() - 1; - } - } - - BlockInfo* pSrcBlockInfo = m_Blocks[srcBlockIndex]; - AllocationInfo& allocInfo = pSrcBlockInfo->m_Allocations[srcAllocIndex]; - - const VkDeviceSize size = allocInfo.m_hAllocation->GetSize(); - const VkDeviceSize srcOffset = allocInfo.m_hAllocation->GetOffset(); - const VkDeviceSize alignment = allocInfo.m_hAllocation->GetAlignment(); - const VmaSuballocationType suballocType = allocInfo.m_hAllocation->GetSuballocationType(); - - // 2. Try to find new place for this allocation in preceding or current block. - for(size_t dstBlockIndex = 0; dstBlockIndex <= srcBlockIndex; ++dstBlockIndex) - { - BlockInfo* pDstBlockInfo = m_Blocks[dstBlockIndex]; - VmaAllocationRequest dstAllocRequest; - if(pDstBlockInfo->m_pBlock->m_Metadata.CreateAllocationRequest( - m_CurrentFrameIndex, - m_pBlockVector->GetFrameInUseCount(), - m_pBlockVector->GetBufferImageGranularity(), - size, - alignment, - suballocType, - false, // canMakeOtherLost - &dstAllocRequest) && - MoveMakesSense( - dstBlockIndex, dstAllocRequest.offset, srcBlockIndex, srcOffset)) - { - VMA_ASSERT(dstAllocRequest.itemsToMakeLostCount == 0); - - // Reached limit on number of allocations or bytes to move. - if((m_AllocationsMoved + 1 > maxAllocationsToMove) || - (m_BytesMoved + size > maxBytesToMove)) - { - return VK_INCOMPLETE; - } - - void* pDstMappedData = VMA_NULL; - VkResult res = pDstBlockInfo->EnsureMapping(m_hAllocator, &pDstMappedData); - if(res != VK_SUCCESS) - { - return res; - } - - void* pSrcMappedData = VMA_NULL; - res = pSrcBlockInfo->EnsureMapping(m_hAllocator, &pSrcMappedData); - if(res != VK_SUCCESS) - { - return res; - } - - // THE PLACE WHERE ACTUAL DATA COPY HAPPENS. - memcpy( - reinterpret_cast(pDstMappedData) + dstAllocRequest.offset, - reinterpret_cast(pSrcMappedData) + srcOffset, - static_cast(size)); - - pDstBlockInfo->m_pBlock->m_Metadata.Alloc(dstAllocRequest, suballocType, size, allocInfo.m_hAllocation); - pSrcBlockInfo->m_pBlock->m_Metadata.Free(allocInfo.m_hAllocation); - - allocInfo.m_hAllocation->ChangeBlockAllocation(pDstBlockInfo->m_pBlock, dstAllocRequest.offset); - - if(allocInfo.m_pChanged != VMA_NULL) - { - *allocInfo.m_pChanged = VK_TRUE; - } - - ++m_AllocationsMoved; - m_BytesMoved += size; - - VmaVectorRemove(pSrcBlockInfo->m_Allocations, srcAllocIndex); - - break; - } - } - - // If not processed, this allocInfo remains in pBlockInfo->m_Allocations for next round. - - if(srcAllocIndex > 0) - { - --srcAllocIndex; - } - else - { - if(srcBlockIndex > 0) - { - --srcBlockIndex; - srcAllocIndex = SIZE_MAX; - } - else - { - return VK_SUCCESS; - } - } - } -} - -VkResult VmaDefragmentator::Defragment( - VkDeviceSize maxBytesToMove, - uint32_t maxAllocationsToMove) -{ - if(m_Allocations.empty()) - { - return VK_SUCCESS; - } - - // Create block info for each block. - const size_t blockCount = m_pBlockVector->m_Blocks.size(); - for(size_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) - { - BlockInfo* pBlockInfo = vma_new(m_hAllocator, BlockInfo)(m_hAllocator->GetAllocationCallbacks()); - pBlockInfo->m_pBlock = m_pBlockVector->m_Blocks[blockIndex]; - m_Blocks.push_back(pBlockInfo); - } - - // Sort them by m_pBlock pointer value. - VMA_SORT(m_Blocks.begin(), m_Blocks.end(), BlockPointerLess()); - - // Move allocation infos from m_Allocations to appropriate m_Blocks[memTypeIndex].m_Allocations. - for(size_t blockIndex = 0, allocCount = m_Allocations.size(); blockIndex < allocCount; ++blockIndex) - { - AllocationInfo& allocInfo = m_Allocations[blockIndex]; - // Now as we are inside VmaBlockVector::m_Mutex, we can make final check if this allocation was not lost. - if(allocInfo.m_hAllocation->GetLastUseFrameIndex() != VMA_FRAME_INDEX_LOST) - { - VmaDeviceMemoryBlock* pBlock = allocInfo.m_hAllocation->GetBlock(); - BlockInfoVector::iterator it = VmaBinaryFindFirstNotLess(m_Blocks.begin(), m_Blocks.end(), pBlock, BlockPointerLess()); - if(it != m_Blocks.end() && (*it)->m_pBlock == pBlock) - { - (*it)->m_Allocations.push_back(allocInfo); - } - else - { - VMA_ASSERT(0); - } - } - } - m_Allocations.clear(); - - for(size_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) - { - BlockInfo* pBlockInfo = m_Blocks[blockIndex]; - pBlockInfo->CalcHasNonMovableAllocations(); - pBlockInfo->SortAllocationsBySizeDescecnding(); - } - - // Sort m_Blocks this time by the main criterium, from most "destination" to most "source" blocks. - VMA_SORT(m_Blocks.begin(), m_Blocks.end(), BlockInfoCompareMoveDestination()); - - // Execute defragmentation rounds (the main part). - VkResult result = VK_SUCCESS; - for(size_t round = 0; (round < 2) && (result == VK_SUCCESS); ++round) - { - result = DefragmentRound(maxBytesToMove, maxAllocationsToMove); - } - - // Unmap blocks that were mapped for defragmentation. - for(size_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) - { - m_Blocks[blockIndex]->Unmap(m_hAllocator); - } - - return result; -} - -bool VmaDefragmentator::MoveMakesSense( - size_t dstBlockIndex, VkDeviceSize dstOffset, - size_t srcBlockIndex, VkDeviceSize srcOffset) -{ - if(dstBlockIndex < srcBlockIndex) - { - return true; - } - if(dstBlockIndex > srcBlockIndex) - { - return false; - } - if(dstOffset < srcOffset) - { - return true; - } - return false; -} - -//////////////////////////////////////////////////////////////////////////////// -// VmaAllocator_T - -VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) : - m_UseMutex((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT) == 0), - m_UseKhrDedicatedAllocation((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT) != 0), - m_PhysicalDevice(pCreateInfo->physicalDevice), - m_hDevice(pCreateInfo->device), - m_AllocationCallbacksSpecified(pCreateInfo->pAllocationCallbacks != VMA_NULL), - m_AllocationCallbacks(pCreateInfo->pAllocationCallbacks ? - *pCreateInfo->pAllocationCallbacks : VmaEmptyAllocationCallbacks), - m_PreferredLargeHeapBlockSize(0), - m_PreferredSmallHeapBlockSize(0), - m_CurrentFrameIndex(0), - m_Pools(VmaStlAllocator(GetAllocationCallbacks())) -{ - VMA_ASSERT(pCreateInfo->physicalDevice && pCreateInfo->device); - - memset(&m_DeviceMemoryCallbacks, 0 ,sizeof(m_DeviceMemoryCallbacks)); - memset(&m_MemProps, 0, sizeof(m_MemProps)); - memset(&m_PhysicalDeviceProperties, 0, sizeof(m_PhysicalDeviceProperties)); - - memset(&m_pBlockVectors, 0, sizeof(m_pBlockVectors)); - memset(&m_pDedicatedAllocations, 0, sizeof(m_pDedicatedAllocations)); - - for(uint32_t i = 0; i < VK_MAX_MEMORY_HEAPS; ++i) - { - m_HeapSizeLimit[i] = VK_WHOLE_SIZE; - } - - if(pCreateInfo->pDeviceMemoryCallbacks != VMA_NULL) - { - m_DeviceMemoryCallbacks.pfnAllocate = pCreateInfo->pDeviceMemoryCallbacks->pfnAllocate; - m_DeviceMemoryCallbacks.pfnFree = pCreateInfo->pDeviceMemoryCallbacks->pfnFree; - } - - ImportVulkanFunctions(pCreateInfo->pVulkanFunctions); - - (*m_VulkanFunctions.vkGetPhysicalDeviceProperties)(m_PhysicalDevice, &m_PhysicalDeviceProperties); - (*m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties)(m_PhysicalDevice, &m_MemProps); - - m_PreferredLargeHeapBlockSize = (pCreateInfo->preferredLargeHeapBlockSize != 0) ? - pCreateInfo->preferredLargeHeapBlockSize : static_cast(VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE); - m_PreferredSmallHeapBlockSize = (pCreateInfo->preferredSmallHeapBlockSize != 0) ? - pCreateInfo->preferredSmallHeapBlockSize : static_cast(VMA_DEFAULT_SMALL_HEAP_BLOCK_SIZE); - - if(pCreateInfo->pHeapSizeLimit != VMA_NULL) - { - for(uint32_t heapIndex = 0; heapIndex < GetMemoryHeapCount(); ++heapIndex) - { - const VkDeviceSize limit = pCreateInfo->pHeapSizeLimit[heapIndex]; - if(limit != VK_WHOLE_SIZE) - { - m_HeapSizeLimit[heapIndex] = limit; - if(limit < m_MemProps.memoryHeaps[heapIndex].size) - { - m_MemProps.memoryHeaps[heapIndex].size = limit; - } - } - } - } - - for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) - { - const VkDeviceSize preferredBlockSize = CalcPreferredBlockSize(memTypeIndex); - - m_pBlockVectors[memTypeIndex] = vma_new(this, VmaBlockVector)( - this, - memTypeIndex, - preferredBlockSize, - 0, - SIZE_MAX, - GetBufferImageGranularity(), - pCreateInfo->frameInUseCount, - false); // isCustomPool - // No need to call m_pBlockVectors[memTypeIndex][blockVectorTypeIndex]->CreateMinBlocks here, - // becase minBlockCount is 0. - m_pDedicatedAllocations[memTypeIndex] = vma_new(this, AllocationVectorType)(VmaStlAllocator(GetAllocationCallbacks())); - } -} - -VmaAllocator_T::~VmaAllocator_T() -{ - VMA_ASSERT(m_Pools.empty()); - - for(size_t i = GetMemoryTypeCount(); i--; ) - { - vma_delete(this, m_pDedicatedAllocations[i]); - vma_delete(this, m_pBlockVectors[i]); - } -} - -void VmaAllocator_T::ImportVulkanFunctions(const VmaVulkanFunctions* pVulkanFunctions) -{ -#if VMA_STATIC_VULKAN_FUNCTIONS == 1 - m_VulkanFunctions.vkGetPhysicalDeviceProperties = &vkGetPhysicalDeviceProperties; - m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties = &vkGetPhysicalDeviceMemoryProperties; - m_VulkanFunctions.vkAllocateMemory = &vkAllocateMemory; - m_VulkanFunctions.vkFreeMemory = &vkFreeMemory; - m_VulkanFunctions.vkMapMemory = &vkMapMemory; - m_VulkanFunctions.vkUnmapMemory = &vkUnmapMemory; - m_VulkanFunctions.vkBindBufferMemory = &vkBindBufferMemory; - m_VulkanFunctions.vkBindImageMemory = &vkBindImageMemory; - m_VulkanFunctions.vkGetBufferMemoryRequirements = &vkGetBufferMemoryRequirements; - m_VulkanFunctions.vkGetImageMemoryRequirements = &vkGetImageMemoryRequirements; - m_VulkanFunctions.vkCreateBuffer = &vkCreateBuffer; - m_VulkanFunctions.vkDestroyBuffer = &vkDestroyBuffer; - m_VulkanFunctions.vkCreateImage = &vkCreateImage; - m_VulkanFunctions.vkDestroyImage = &vkDestroyImage; - if(m_UseKhrDedicatedAllocation) - { - m_VulkanFunctions.vkGetBufferMemoryRequirements2KHR = - (PFN_vkGetBufferMemoryRequirements2KHR)vkGetDeviceProcAddr(m_hDevice, "vkGetBufferMemoryRequirements2KHR"); - m_VulkanFunctions.vkGetImageMemoryRequirements2KHR = - (PFN_vkGetImageMemoryRequirements2KHR)vkGetDeviceProcAddr(m_hDevice, "vkGetImageMemoryRequirements2KHR"); - } -#endif // #if VMA_STATIC_VULKAN_FUNCTIONS == 1 - -#define VMA_COPY_IF_NOT_NULL(funcName) \ - if(pVulkanFunctions->funcName != VMA_NULL) m_VulkanFunctions.funcName = pVulkanFunctions->funcName; - - if(pVulkanFunctions != VMA_NULL) - { - VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceProperties); - VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceMemoryProperties); - VMA_COPY_IF_NOT_NULL(vkAllocateMemory); - VMA_COPY_IF_NOT_NULL(vkFreeMemory); - VMA_COPY_IF_NOT_NULL(vkMapMemory); - VMA_COPY_IF_NOT_NULL(vkUnmapMemory); - VMA_COPY_IF_NOT_NULL(vkBindBufferMemory); - VMA_COPY_IF_NOT_NULL(vkBindImageMemory); - VMA_COPY_IF_NOT_NULL(vkGetBufferMemoryRequirements); - VMA_COPY_IF_NOT_NULL(vkGetImageMemoryRequirements); - VMA_COPY_IF_NOT_NULL(vkCreateBuffer); - VMA_COPY_IF_NOT_NULL(vkDestroyBuffer); - VMA_COPY_IF_NOT_NULL(vkCreateImage); - VMA_COPY_IF_NOT_NULL(vkDestroyImage); - VMA_COPY_IF_NOT_NULL(vkGetBufferMemoryRequirements2KHR); - VMA_COPY_IF_NOT_NULL(vkGetImageMemoryRequirements2KHR); - } - -#undef VMA_COPY_IF_NOT_NULL - - // If these asserts are hit, you must either #define VMA_STATIC_VULKAN_FUNCTIONS 1 - // or pass valid pointers as VmaAllocatorCreateInfo::pVulkanFunctions. - VMA_ASSERT(m_VulkanFunctions.vkGetPhysicalDeviceProperties != VMA_NULL); - VMA_ASSERT(m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties != VMA_NULL); - VMA_ASSERT(m_VulkanFunctions.vkAllocateMemory != VMA_NULL); - VMA_ASSERT(m_VulkanFunctions.vkFreeMemory != VMA_NULL); - VMA_ASSERT(m_VulkanFunctions.vkMapMemory != VMA_NULL); - VMA_ASSERT(m_VulkanFunctions.vkUnmapMemory != VMA_NULL); - VMA_ASSERT(m_VulkanFunctions.vkBindBufferMemory != VMA_NULL); - VMA_ASSERT(m_VulkanFunctions.vkBindImageMemory != VMA_NULL); - VMA_ASSERT(m_VulkanFunctions.vkGetBufferMemoryRequirements != VMA_NULL); - VMA_ASSERT(m_VulkanFunctions.vkGetImageMemoryRequirements != VMA_NULL); - VMA_ASSERT(m_VulkanFunctions.vkCreateBuffer != VMA_NULL); - VMA_ASSERT(m_VulkanFunctions.vkDestroyBuffer != VMA_NULL); - VMA_ASSERT(m_VulkanFunctions.vkCreateImage != VMA_NULL); - VMA_ASSERT(m_VulkanFunctions.vkDestroyImage != VMA_NULL); - if(m_UseKhrDedicatedAllocation) - { - VMA_ASSERT(m_VulkanFunctions.vkGetBufferMemoryRequirements2KHR != VMA_NULL); - VMA_ASSERT(m_VulkanFunctions.vkGetImageMemoryRequirements2KHR != VMA_NULL); - } -} - -VkDeviceSize VmaAllocator_T::CalcPreferredBlockSize(uint32_t memTypeIndex) -{ - const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex); - const VkDeviceSize heapSize = m_MemProps.memoryHeaps[heapIndex].size; - return (heapSize <= VMA_SMALL_HEAP_MAX_SIZE) ? - m_PreferredSmallHeapBlockSize : m_PreferredLargeHeapBlockSize; -} - -VkResult VmaAllocator_T::AllocateMemoryOfType( - const VkMemoryRequirements& vkMemReq, - bool dedicatedAllocation, - VkBuffer dedicatedBuffer, - VkImage dedicatedImage, - const VmaAllocationCreateInfo& createInfo, - uint32_t memTypeIndex, - VmaSuballocationType suballocType, - VmaAllocation* pAllocation) -{ - VMA_ASSERT(pAllocation != VMA_NULL); - VMA_DEBUG_LOG(" AllocateMemory: MemoryTypeIndex=%u, Size=%llu", memTypeIndex, vkMemReq.size); - - VmaAllocationCreateInfo finalCreateInfo = createInfo; - - // If memory type is not HOST_VISIBLE, disable MAPPED. - if((finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0 && - (m_MemProps.memoryTypes[memTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) - { - finalCreateInfo.flags &= ~VMA_ALLOCATION_CREATE_MAPPED_BIT; - } - - VmaBlockVector* const blockVector = m_pBlockVectors[memTypeIndex]; - VMA_ASSERT(blockVector); - - const VkDeviceSize preferredBlockSize = blockVector->GetPreferredBlockSize(); - bool preferDedicatedMemory = - VMA_DEBUG_ALWAYS_DEDICATED_MEMORY || - dedicatedAllocation || - // Heuristics: Allocate dedicated memory if requested size if greater than half of preferred block size. - vkMemReq.size > preferredBlockSize / 2; - - if(preferDedicatedMemory && - (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) == 0 && - finalCreateInfo.pool == VK_NULL_HANDLE) - { - finalCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; - } - - if((finalCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0) - { - if((finalCreateInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) != 0) - { - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - } - else - { - return AllocateDedicatedMemory( - vkMemReq.size, - suballocType, - memTypeIndex, - (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0, - (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0, - finalCreateInfo.pUserData, - dedicatedBuffer, - dedicatedImage, - pAllocation); - } - } - else - { - VkResult res = blockVector->Allocate( - VK_NULL_HANDLE, // hCurrentPool - m_CurrentFrameIndex.load(), - vkMemReq, - finalCreateInfo, - suballocType, - pAllocation); - if(res == VK_SUCCESS) - { - return res; - } - - // 5. Try dedicated memory. - if((finalCreateInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) != 0) - { - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - } - else - { - res = AllocateDedicatedMemory( - vkMemReq.size, - suballocType, - memTypeIndex, - (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0, - (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0, - finalCreateInfo.pUserData, - dedicatedBuffer, - dedicatedImage, - pAllocation); - if(res == VK_SUCCESS) - { - // Succeeded: AllocateDedicatedMemory function already filld pMemory, nothing more to do here. - VMA_DEBUG_LOG(" Allocated as DedicatedMemory"); - return VK_SUCCESS; - } - else - { - // Everything failed: Return error code. - VMA_DEBUG_LOG(" vkAllocateMemory FAILED"); - return res; - } - } - } -} - -VkResult VmaAllocator_T::AllocateDedicatedMemory( - VkDeviceSize size, - VmaSuballocationType suballocType, - uint32_t memTypeIndex, - bool map, - bool isUserDataString, - void* pUserData, - VkBuffer dedicatedBuffer, - VkImage dedicatedImage, - VmaAllocation* pAllocation) -{ - VMA_ASSERT(pAllocation); - - VkMemoryAllocateInfo allocInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO }; - allocInfo.memoryTypeIndex = memTypeIndex; - allocInfo.allocationSize = size; - - VkMemoryDedicatedAllocateInfoKHR dedicatedAllocInfo = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR }; - if(m_UseKhrDedicatedAllocation) - { - if(dedicatedBuffer != VK_NULL_HANDLE) - { - VMA_ASSERT(dedicatedImage == VK_NULL_HANDLE); - dedicatedAllocInfo.buffer = dedicatedBuffer; - allocInfo.pNext = &dedicatedAllocInfo; - } - else if(dedicatedImage != VK_NULL_HANDLE) - { - dedicatedAllocInfo.image = dedicatedImage; - allocInfo.pNext = &dedicatedAllocInfo; - } - } - - // Allocate VkDeviceMemory. - VkDeviceMemory hMemory = VK_NULL_HANDLE; - VkResult res = AllocateVulkanMemory(&allocInfo, &hMemory); - if(res < 0) - { - VMA_DEBUG_LOG(" vkAllocateMemory FAILED"); - return res; - } - - void* pMappedData = nullptr; - if(map) - { - res = (*m_VulkanFunctions.vkMapMemory)( - m_hDevice, - hMemory, - 0, - VK_WHOLE_SIZE, - 0, - &pMappedData); - if(res < 0) - { - VMA_DEBUG_LOG(" vkMapMemory FAILED"); - FreeVulkanMemory(memTypeIndex, size, hMemory); - return res; - } - } - - *pAllocation = vma_new(this, VmaAllocation_T)(m_CurrentFrameIndex.load(), isUserDataString); - (*pAllocation)->InitDedicatedAllocation(memTypeIndex, hMemory, suballocType, pMappedData, size); - (*pAllocation)->SetUserData(this, pUserData); - - // Register it in m_pDedicatedAllocations. - { - VmaMutexLock lock(m_DedicatedAllocationsMutex[memTypeIndex], m_UseMutex); - AllocationVectorType* pDedicatedAllocations = m_pDedicatedAllocations[memTypeIndex]; - VMA_ASSERT(pDedicatedAllocations); - VmaVectorInsertSorted(*pDedicatedAllocations, *pAllocation); - } - - VMA_DEBUG_LOG(" Allocated DedicatedMemory MemoryTypeIndex=#%u", memTypeIndex); - - return VK_SUCCESS; -} - -void VmaAllocator_T::GetBufferMemoryRequirements( - VkBuffer hBuffer, - VkMemoryRequirements& memReq, - bool& requiresDedicatedAllocation, - bool& prefersDedicatedAllocation) const -{ - if(m_UseKhrDedicatedAllocation) - { - VkBufferMemoryRequirementsInfo2KHR memReqInfo = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2_KHR }; - memReqInfo.buffer = hBuffer; - - VkMemoryDedicatedRequirementsKHR memDedicatedReq = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR }; - - VkMemoryRequirements2KHR memReq2 = { VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR }; - memReq2.pNext = &memDedicatedReq; - - (*m_VulkanFunctions.vkGetBufferMemoryRequirements2KHR)(m_hDevice, &memReqInfo, &memReq2); - - memReq = memReq2.memoryRequirements; - requiresDedicatedAllocation = (memDedicatedReq.requiresDedicatedAllocation != VK_FALSE); - prefersDedicatedAllocation = (memDedicatedReq.prefersDedicatedAllocation != VK_FALSE); - } - else - { - (*m_VulkanFunctions.vkGetBufferMemoryRequirements)(m_hDevice, hBuffer, &memReq); - requiresDedicatedAllocation = false; - prefersDedicatedAllocation = false; - } -} - -void VmaAllocator_T::GetImageMemoryRequirements( - VkImage hImage, - VkMemoryRequirements& memReq, - bool& requiresDedicatedAllocation, - bool& prefersDedicatedAllocation) const -{ - if(m_UseKhrDedicatedAllocation) - { - VkImageMemoryRequirementsInfo2KHR memReqInfo = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2_KHR }; - memReqInfo.image = hImage; - - VkMemoryDedicatedRequirementsKHR memDedicatedReq = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR }; - - VkMemoryRequirements2KHR memReq2 = { VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR }; - memReq2.pNext = &memDedicatedReq; - - (*m_VulkanFunctions.vkGetImageMemoryRequirements2KHR)(m_hDevice, &memReqInfo, &memReq2); - - memReq = memReq2.memoryRequirements; - requiresDedicatedAllocation = (memDedicatedReq.requiresDedicatedAllocation != VK_FALSE); - prefersDedicatedAllocation = (memDedicatedReq.prefersDedicatedAllocation != VK_FALSE); - } - else - { - (*m_VulkanFunctions.vkGetImageMemoryRequirements)(m_hDevice, hImage, &memReq); - requiresDedicatedAllocation = false; - prefersDedicatedAllocation = false; - } -} - -VkResult VmaAllocator_T::AllocateMemory( - const VkMemoryRequirements& vkMemReq, - bool requiresDedicatedAllocation, - bool prefersDedicatedAllocation, - VkBuffer dedicatedBuffer, - VkImage dedicatedImage, - const VmaAllocationCreateInfo& createInfo, - VmaSuballocationType suballocType, - VmaAllocation* pAllocation) -{ - if((createInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0 && - (createInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) != 0) - { - VMA_ASSERT(0 && "Specifying VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT together with VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT makes no sense."); - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - } - if((createInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0 && - (createInfo.flags & VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT) != 0) - { - VMA_ASSERT(0 && "Specifying VMA_ALLOCATION_CREATE_MAPPED_BIT together with VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT is invalid."); - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - } - if(requiresDedicatedAllocation) - { - if((createInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) != 0) - { - VMA_ASSERT(0 && "VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT specified while dedicated allocation is required."); - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - } - if(createInfo.pool != VK_NULL_HANDLE) - { - VMA_ASSERT(0 && "Pool specified while dedicated allocation is required."); - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - } - } - if((createInfo.pool != VK_NULL_HANDLE) && - ((createInfo.flags & (VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT)) != 0)) - { - VMA_ASSERT(0 && "Specifying VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT when pool != null is invalid."); - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - } - - if(createInfo.pool != VK_NULL_HANDLE) - { - return createInfo.pool->m_BlockVector.Allocate( - createInfo.pool, - m_CurrentFrameIndex.load(), - vkMemReq, - createInfo, - suballocType, - pAllocation); - } - else - { - // Bit mask of memory Vulkan types acceptable for this allocation. - uint32_t memoryTypeBits = vkMemReq.memoryTypeBits; - uint32_t memTypeIndex = UINT32_MAX; - VkResult res = vmaFindMemoryTypeIndex(this, memoryTypeBits, &createInfo, &memTypeIndex); - if(res == VK_SUCCESS) - { - res = AllocateMemoryOfType( - vkMemReq, - requiresDedicatedAllocation || prefersDedicatedAllocation, - dedicatedBuffer, - dedicatedImage, - createInfo, - memTypeIndex, - suballocType, - pAllocation); - // Succeeded on first try. - if(res == VK_SUCCESS) - { - return res; - } - // Allocation from this memory type failed. Try other compatible memory types. - else - { - for(;;) - { - // Remove old memTypeIndex from list of possibilities. - memoryTypeBits &= ~(1u << memTypeIndex); - // Find alternative memTypeIndex. - res = vmaFindMemoryTypeIndex(this, memoryTypeBits, &createInfo, &memTypeIndex); - if(res == VK_SUCCESS) - { - res = AllocateMemoryOfType( - vkMemReq, - requiresDedicatedAllocation || prefersDedicatedAllocation, - dedicatedBuffer, - dedicatedImage, - createInfo, - memTypeIndex, - suballocType, - pAllocation); - // Allocation from this alternative memory type succeeded. - if(res == VK_SUCCESS) - { - return res; - } - // else: Allocation from this memory type failed. Try next one - next loop iteration. - } - // No other matching memory type index could be found. - else - { - // Not returning res, which is VK_ERROR_FEATURE_NOT_PRESENT, because we already failed to allocate once. - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - } - } - } - } - // Can't find any single memory type maching requirements. res is VK_ERROR_FEATURE_NOT_PRESENT. - else - return res; - } -} - -void VmaAllocator_T::FreeMemory(const VmaAllocation allocation) -{ - VMA_ASSERT(allocation); - - if(allocation->CanBecomeLost() == false || - allocation->GetLastUseFrameIndex() != VMA_FRAME_INDEX_LOST) - { - switch(allocation->GetType()) - { - case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: - { - VmaBlockVector* pBlockVector = VMA_NULL; - VmaPool hPool = allocation->GetPool(); - if(hPool != VK_NULL_HANDLE) - { - pBlockVector = &hPool->m_BlockVector; - } - else - { - const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex(); - pBlockVector = m_pBlockVectors[memTypeIndex]; - } - pBlockVector->Free(allocation); - } - break; - case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: - FreeDedicatedMemory(allocation); - break; - default: - VMA_ASSERT(0); - } - } - - allocation->SetUserData(this, VMA_NULL); - vma_delete(this, allocation); -} - -void VmaAllocator_T::CalculateStats(VmaStats* pStats) -{ - // Initialize. - InitStatInfo(pStats->total); - for(size_t i = 0; i < VK_MAX_MEMORY_TYPES; ++i) - InitStatInfo(pStats->memoryType[i]); - for(size_t i = 0; i < VK_MAX_MEMORY_HEAPS; ++i) - InitStatInfo(pStats->memoryHeap[i]); - - // Process default pools. - for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) - { - const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex); - VmaBlockVector* const pBlockVector = m_pBlockVectors[memTypeIndex]; - VMA_ASSERT(pBlockVector); - pBlockVector->AddStats(pStats); - } - - // Process custom pools. - { - VmaMutexLock lock(m_PoolsMutex, m_UseMutex); - for(size_t poolIndex = 0, poolCount = m_Pools.size(); poolIndex < poolCount; ++poolIndex) - { - m_Pools[poolIndex]->GetBlockVector().AddStats(pStats); - } - } - - // Process dedicated allocations. - for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) - { - const uint32_t memHeapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex); - VmaMutexLock dedicatedAllocationsLock(m_DedicatedAllocationsMutex[memTypeIndex], m_UseMutex); - AllocationVectorType* const pDedicatedAllocVector = m_pDedicatedAllocations[memTypeIndex]; - VMA_ASSERT(pDedicatedAllocVector); - for(size_t allocIndex = 0, allocCount = pDedicatedAllocVector->size(); allocIndex < allocCount; ++allocIndex) - { - VmaStatInfo allocationStatInfo; - (*pDedicatedAllocVector)[allocIndex]->DedicatedAllocCalcStatsInfo(allocationStatInfo); - VmaAddStatInfo(pStats->total, allocationStatInfo); - VmaAddStatInfo(pStats->memoryType[memTypeIndex], allocationStatInfo); - VmaAddStatInfo(pStats->memoryHeap[memHeapIndex], allocationStatInfo); - } - } - - // Postprocess. - VmaPostprocessCalcStatInfo(pStats->total); - for(size_t i = 0; i < GetMemoryTypeCount(); ++i) - VmaPostprocessCalcStatInfo(pStats->memoryType[i]); - for(size_t i = 0; i < GetMemoryHeapCount(); ++i) - VmaPostprocessCalcStatInfo(pStats->memoryHeap[i]); -} - -static const uint32_t VMA_VENDOR_ID_AMD = 4098; - -VkResult VmaAllocator_T::Defragment( - VmaAllocation* pAllocations, - size_t allocationCount, - VkBool32* pAllocationsChanged, - const VmaDefragmentationInfo* pDefragmentationInfo, - VmaDefragmentationStats* pDefragmentationStats) -{ - if(pAllocationsChanged != VMA_NULL) - { - memset(pAllocationsChanged, 0, sizeof(*pAllocationsChanged)); - } - if(pDefragmentationStats != VMA_NULL) - { - memset(pDefragmentationStats, 0, sizeof(*pDefragmentationStats)); - } - - const uint32_t currentFrameIndex = m_CurrentFrameIndex.load(); - - VmaMutexLock poolsLock(m_PoolsMutex, m_UseMutex); - - const size_t poolCount = m_Pools.size(); - - // Dispatch pAllocations among defragmentators. Create them in BlockVectors when necessary. - for(size_t allocIndex = 0; allocIndex < allocationCount; ++allocIndex) - { - VmaAllocation hAlloc = pAllocations[allocIndex]; - VMA_ASSERT(hAlloc); - const uint32_t memTypeIndex = hAlloc->GetMemoryTypeIndex(); - // DedicatedAlloc cannot be defragmented. - if((hAlloc->GetType() == VmaAllocation_T::ALLOCATION_TYPE_BLOCK) && - // Only HOST_VISIBLE memory types can be defragmented. - ((m_MemProps.memoryTypes[memTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0) && - // Lost allocation cannot be defragmented. - (hAlloc->GetLastUseFrameIndex() != VMA_FRAME_INDEX_LOST)) - { - VmaBlockVector* pAllocBlockVector = nullptr; - - const VmaPool hAllocPool = hAlloc->GetPool(); - // This allocation belongs to custom pool. - if(hAllocPool != VK_NULL_HANDLE) - { - pAllocBlockVector = &hAllocPool->GetBlockVector(); - } - // This allocation belongs to general pool. - else - { - pAllocBlockVector = m_pBlockVectors[memTypeIndex]; - } - - VmaDefragmentator* const pDefragmentator = pAllocBlockVector->EnsureDefragmentator(this, currentFrameIndex); - - VkBool32* const pChanged = (pAllocationsChanged != VMA_NULL) ? - &pAllocationsChanged[allocIndex] : VMA_NULL; - pDefragmentator->AddAllocation(hAlloc, pChanged); - } - } - - VkResult result = VK_SUCCESS; - - // ======== Main processing. - - VkDeviceSize maxBytesToMove = SIZE_MAX; - uint32_t maxAllocationsToMove = UINT32_MAX; - if(pDefragmentationInfo != VMA_NULL) - { - maxBytesToMove = pDefragmentationInfo->maxBytesToMove; - maxAllocationsToMove = pDefragmentationInfo->maxAllocationsToMove; - } - - // Process standard memory. - for(uint32_t memTypeIndex = 0; - (memTypeIndex < GetMemoryTypeCount()) && (result == VK_SUCCESS); - ++memTypeIndex) - { - // Only HOST_VISIBLE memory types can be defragmented. - if((m_MemProps.memoryTypes[memTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0) - { - result = m_pBlockVectors[memTypeIndex]->Defragment( - pDefragmentationStats, - maxBytesToMove, - maxAllocationsToMove); - } - } - - // Process custom pools. - for(size_t poolIndex = 0; (poolIndex < poolCount) && (result == VK_SUCCESS); ++poolIndex) - { - result = m_Pools[poolIndex]->GetBlockVector().Defragment( - pDefragmentationStats, - maxBytesToMove, - maxAllocationsToMove); - } - - // ======== Destroy defragmentators. - - // Process custom pools. - for(size_t poolIndex = poolCount; poolIndex--; ) - { - m_Pools[poolIndex]->GetBlockVector().DestroyDefragmentator(); - } - - // Process standard memory. - for(uint32_t memTypeIndex = GetMemoryTypeCount(); memTypeIndex--; ) - { - if((m_MemProps.memoryTypes[memTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0) - { - m_pBlockVectors[memTypeIndex]->DestroyDefragmentator(); - } - } - - return result; -} - -void VmaAllocator_T::GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationInfo* pAllocationInfo) -{ - if(hAllocation->CanBecomeLost()) - { - /* - Warning: This is a carefully designed algorithm. - Do not modify unless you really know what you're doing :) - */ - uint32_t localCurrFrameIndex = m_CurrentFrameIndex.load(); - uint32_t localLastUseFrameIndex = hAllocation->GetLastUseFrameIndex(); - for(;;) - { - if(localLastUseFrameIndex == VMA_FRAME_INDEX_LOST) - { - pAllocationInfo->memoryType = UINT32_MAX; - pAllocationInfo->deviceMemory = VK_NULL_HANDLE; - pAllocationInfo->offset = 0; - pAllocationInfo->size = hAllocation->GetSize(); - pAllocationInfo->pMappedData = VMA_NULL; - pAllocationInfo->pUserData = hAllocation->GetUserData(); - return; - } - else if(localLastUseFrameIndex == localCurrFrameIndex) - { - pAllocationInfo->memoryType = hAllocation->GetMemoryTypeIndex(); - pAllocationInfo->deviceMemory = hAllocation->GetMemory(); - pAllocationInfo->offset = hAllocation->GetOffset(); - pAllocationInfo->size = hAllocation->GetSize(); - pAllocationInfo->pMappedData = VMA_NULL; - pAllocationInfo->pUserData = hAllocation->GetUserData(); - return; - } - else // Last use time earlier than current time. - { - if(hAllocation->CompareExchangeLastUseFrameIndex(localLastUseFrameIndex, localCurrFrameIndex)) - { - localLastUseFrameIndex = localCurrFrameIndex; - } - } - } - } - else - { - pAllocationInfo->memoryType = hAllocation->GetMemoryTypeIndex(); - pAllocationInfo->deviceMemory = hAllocation->GetMemory(); - pAllocationInfo->offset = hAllocation->GetOffset(); - pAllocationInfo->size = hAllocation->GetSize(); - pAllocationInfo->pMappedData = hAllocation->GetMappedData(); - pAllocationInfo->pUserData = hAllocation->GetUserData(); - } -} - -VkResult VmaAllocator_T::CreatePool(const VmaPoolCreateInfo* pCreateInfo, VmaPool* pPool) -{ - VMA_DEBUG_LOG(" CreatePool: MemoryTypeIndex=%u", pCreateInfo->memoryTypeIndex); - - VmaPoolCreateInfo newCreateInfo = *pCreateInfo; - - if(newCreateInfo.maxBlockCount == 0) - { - newCreateInfo.maxBlockCount = SIZE_MAX; - } - if(newCreateInfo.blockSize == 0) - { - newCreateInfo.blockSize = CalcPreferredBlockSize(newCreateInfo.memoryTypeIndex); - } - - *pPool = vma_new(this, VmaPool_T)(this, newCreateInfo); - - VkResult res = (*pPool)->m_BlockVector.CreateMinBlocks(); - if(res != VK_SUCCESS) - { - vma_delete(this, *pPool); - *pPool = VMA_NULL; - return res; - } - - // Add to m_Pools. - { - VmaMutexLock lock(m_PoolsMutex, m_UseMutex); - VmaVectorInsertSorted(m_Pools, *pPool); - } - - return VK_SUCCESS; -} - -void VmaAllocator_T::DestroyPool(VmaPool pool) -{ - // Remove from m_Pools. - { - VmaMutexLock lock(m_PoolsMutex, m_UseMutex); - bool success = VmaVectorRemoveSorted(m_Pools, pool); - VMA_ASSERT(success && "Pool not found in Allocator."); - } - - vma_delete(this, pool); -} - -void VmaAllocator_T::GetPoolStats(VmaPool pool, VmaPoolStats* pPoolStats) -{ - pool->m_BlockVector.GetPoolStats(pPoolStats); -} - -void VmaAllocator_T::SetCurrentFrameIndex(uint32_t frameIndex) -{ - m_CurrentFrameIndex.store(frameIndex); -} - -void VmaAllocator_T::MakePoolAllocationsLost( - VmaPool hPool, - size_t* pLostAllocationCount) -{ - hPool->m_BlockVector.MakePoolAllocationsLost( - m_CurrentFrameIndex.load(), - pLostAllocationCount); -} - -void VmaAllocator_T::CreateLostAllocation(VmaAllocation* pAllocation) -{ - *pAllocation = vma_new(this, VmaAllocation_T)(VMA_FRAME_INDEX_LOST, false); - (*pAllocation)->InitLost(); -} - -VkResult VmaAllocator_T::AllocateVulkanMemory(const VkMemoryAllocateInfo* pAllocateInfo, VkDeviceMemory* pMemory) -{ - const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(pAllocateInfo->memoryTypeIndex); - - VkResult res; - if(m_HeapSizeLimit[heapIndex] != VK_WHOLE_SIZE) - { - VmaMutexLock lock(m_HeapSizeLimitMutex, m_UseMutex); - if(m_HeapSizeLimit[heapIndex] >= pAllocateInfo->allocationSize) - { - res = (*m_VulkanFunctions.vkAllocateMemory)(m_hDevice, pAllocateInfo, GetAllocationCallbacks(), pMemory); - if(res == VK_SUCCESS) - { - m_HeapSizeLimit[heapIndex] -= pAllocateInfo->allocationSize; - } - } - else - { - res = VK_ERROR_OUT_OF_DEVICE_MEMORY; - } - } - else - { - res = (*m_VulkanFunctions.vkAllocateMemory)(m_hDevice, pAllocateInfo, GetAllocationCallbacks(), pMemory); - } - - if(res == VK_SUCCESS && m_DeviceMemoryCallbacks.pfnAllocate != VMA_NULL) - { - (*m_DeviceMemoryCallbacks.pfnAllocate)(this, pAllocateInfo->memoryTypeIndex, *pMemory, pAllocateInfo->allocationSize); - } - - return res; -} - -void VmaAllocator_T::FreeVulkanMemory(uint32_t memoryType, VkDeviceSize size, VkDeviceMemory hMemory) -{ - if(m_DeviceMemoryCallbacks.pfnFree != VMA_NULL) - { - (*m_DeviceMemoryCallbacks.pfnFree)(this, memoryType, hMemory, size); - } - - (*m_VulkanFunctions.vkFreeMemory)(m_hDevice, hMemory, GetAllocationCallbacks()); - - const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memoryType); - if(m_HeapSizeLimit[heapIndex] != VK_WHOLE_SIZE) - { - VmaMutexLock lock(m_HeapSizeLimitMutex, m_UseMutex); - m_HeapSizeLimit[heapIndex] += size; - } -} - -VkResult VmaAllocator_T::Map(VmaAllocation hAllocation, void** ppData) -{ - if(hAllocation->CanBecomeLost()) - { - return VK_ERROR_MEMORY_MAP_FAILED; - } - - switch(hAllocation->GetType()) - { - case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: - { - VmaDeviceMemoryBlock* const pBlock = hAllocation->GetBlock(); - char *pBytes = nullptr; - VkResult res = pBlock->Map(this, (void**)&pBytes); - if(res == VK_SUCCESS) - { - *ppData = pBytes + (ptrdiff_t)hAllocation->GetOffset(); - hAllocation->BlockAllocMap(); - } - return res; - } - case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: - return hAllocation->DedicatedAllocMap(this, ppData); - default: - VMA_ASSERT(0); - return VK_ERROR_MEMORY_MAP_FAILED; - } -} - -void VmaAllocator_T::Unmap(VmaAllocation hAllocation) -{ - switch(hAllocation->GetType()) - { - case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: - { - VmaDeviceMemoryBlock* const pBlock = hAllocation->GetBlock(); - hAllocation->BlockAllocUnmap(); - pBlock->Unmap(this); - } - break; - case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: - hAllocation->DedicatedAllocUnmap(this); - break; - default: - VMA_ASSERT(0); - } -} - -void VmaAllocator_T::FreeDedicatedMemory(VmaAllocation allocation) -{ - VMA_ASSERT(allocation && allocation->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED); - - const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex(); - { - VmaMutexLock lock(m_DedicatedAllocationsMutex[memTypeIndex], m_UseMutex); - AllocationVectorType* const pDedicatedAllocations = m_pDedicatedAllocations[memTypeIndex]; - VMA_ASSERT(pDedicatedAllocations); - bool success = VmaVectorRemoveSorted(*pDedicatedAllocations, allocation); - VMA_ASSERT(success); - } - - VkDeviceMemory hMemory = allocation->GetMemory(); - - if(allocation->GetMappedData() != VMA_NULL) - { - (*m_VulkanFunctions.vkUnmapMemory)(m_hDevice, hMemory); - } - - FreeVulkanMemory(memTypeIndex, allocation->GetSize(), hMemory); - - VMA_DEBUG_LOG(" Freed DedicatedMemory MemoryTypeIndex=%u", memTypeIndex); -} - -#if VMA_STATS_STRING_ENABLED - -void VmaAllocator_T::PrintDetailedMap(VmaJsonWriter& json) -{ - bool dedicatedAllocationsStarted = false; - for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) - { - VmaMutexLock dedicatedAllocationsLock(m_DedicatedAllocationsMutex[memTypeIndex], m_UseMutex); - AllocationVectorType* const pDedicatedAllocVector = m_pDedicatedAllocations[memTypeIndex]; - VMA_ASSERT(pDedicatedAllocVector); - if(pDedicatedAllocVector->empty() == false) - { - if(dedicatedAllocationsStarted == false) - { - dedicatedAllocationsStarted = true; - json.WriteString("DedicatedAllocations"); - json.BeginObject(); - } - - json.BeginString("Type "); - json.ContinueString(memTypeIndex); - json.EndString(); - - json.BeginArray(); - - for(size_t i = 0; i < pDedicatedAllocVector->size(); ++i) - { - const VmaAllocation hAlloc = (*pDedicatedAllocVector)[i]; - json.BeginObject(true); - - json.WriteString("Type"); - json.WriteString(VMA_SUBALLOCATION_TYPE_NAMES[hAlloc->GetSuballocationType()]); - - json.WriteString("Size"); - json.WriteNumber(hAlloc->GetSize()); - - const void* pUserData = hAlloc->GetUserData(); - if(pUserData != VMA_NULL) - { - json.WriteString("UserData"); - if(hAlloc->IsUserDataString()) - { - json.WriteString((const char*)pUserData); - } - else - { - json.BeginString(); - json.ContinueString_Pointer(pUserData); - json.EndString(); - } - } - - json.EndObject(); - } - - json.EndArray(); - } - } - if(dedicatedAllocationsStarted) - { - json.EndObject(); - } - - { - bool allocationsStarted = false; - for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) - { - if(m_pBlockVectors[memTypeIndex]->IsEmpty() == false) - { - if(allocationsStarted == false) - { - allocationsStarted = true; - json.WriteString("DefaultPools"); - json.BeginObject(); - } - - json.BeginString("Type "); - json.ContinueString(memTypeIndex); - json.EndString(); - - m_pBlockVectors[memTypeIndex]->PrintDetailedMap(json); - } - } - if(allocationsStarted) - { - json.EndObject(); - } - } - - { - VmaMutexLock lock(m_PoolsMutex, m_UseMutex); - const size_t poolCount = m_Pools.size(); - if(poolCount > 0) - { - json.WriteString("Pools"); - json.BeginArray(); - for(size_t poolIndex = 0; poolIndex < poolCount; ++poolIndex) - { - m_Pools[poolIndex]->m_BlockVector.PrintDetailedMap(json); - } - json.EndArray(); - } - } -} - -#endif // #if VMA_STATS_STRING_ENABLED - -static VkResult AllocateMemoryForImage( - VmaAllocator allocator, - VkImage image, - const VmaAllocationCreateInfo* pAllocationCreateInfo, - VmaSuballocationType suballocType, - VmaAllocation* pAllocation) -{ - VMA_ASSERT(allocator && (image != VK_NULL_HANDLE) && pAllocationCreateInfo && pAllocation); - - VkMemoryRequirements vkMemReq = {}; - bool requiresDedicatedAllocation = false; - bool prefersDedicatedAllocation = false; - allocator->GetImageMemoryRequirements(image, vkMemReq, - requiresDedicatedAllocation, prefersDedicatedAllocation); - - return allocator->AllocateMemory( - vkMemReq, - requiresDedicatedAllocation, - prefersDedicatedAllocation, - VK_NULL_HANDLE, // dedicatedBuffer - image, // dedicatedImage - *pAllocationCreateInfo, - suballocType, - pAllocation); -} - -//////////////////////////////////////////////////////////////////////////////// -// Public interface - -VkResult vmaCreateAllocator( - const VmaAllocatorCreateInfo* pCreateInfo, - VmaAllocator* pAllocator) -{ - VMA_ASSERT(pCreateInfo && pAllocator); - VMA_DEBUG_LOG("vmaCreateAllocator"); - *pAllocator = vma_new(pCreateInfo->pAllocationCallbacks, VmaAllocator_T)(pCreateInfo); - return VK_SUCCESS; -} - -void vmaDestroyAllocator( - VmaAllocator allocator) -{ - if(allocator != VK_NULL_HANDLE) - { - VMA_DEBUG_LOG("vmaDestroyAllocator"); - VkAllocationCallbacks allocationCallbacks = allocator->m_AllocationCallbacks; - vma_delete(&allocationCallbacks, allocator); - } -} - -void vmaGetPhysicalDeviceProperties( - VmaAllocator allocator, - const VkPhysicalDeviceProperties **ppPhysicalDeviceProperties) -{ - VMA_ASSERT(allocator && ppPhysicalDeviceProperties); - *ppPhysicalDeviceProperties = &allocator->m_PhysicalDeviceProperties; -} - -void vmaGetMemoryProperties( - VmaAllocator allocator, - const VkPhysicalDeviceMemoryProperties** ppPhysicalDeviceMemoryProperties) -{ - VMA_ASSERT(allocator && ppPhysicalDeviceMemoryProperties); - *ppPhysicalDeviceMemoryProperties = &allocator->m_MemProps; -} - -void vmaGetMemoryTypeProperties( - VmaAllocator allocator, - uint32_t memoryTypeIndex, - VkMemoryPropertyFlags* pFlags) -{ - VMA_ASSERT(allocator && pFlags); - VMA_ASSERT(memoryTypeIndex < allocator->GetMemoryTypeCount()); - *pFlags = allocator->m_MemProps.memoryTypes[memoryTypeIndex].propertyFlags; -} - -void vmaSetCurrentFrameIndex( - VmaAllocator allocator, - uint32_t frameIndex) -{ - VMA_ASSERT(allocator); - VMA_ASSERT(frameIndex != VMA_FRAME_INDEX_LOST); - - VMA_DEBUG_GLOBAL_MUTEX_LOCK - - allocator->SetCurrentFrameIndex(frameIndex); -} - -void vmaCalculateStats( - VmaAllocator allocator, - VmaStats* pStats) -{ - VMA_ASSERT(allocator && pStats); - VMA_DEBUG_GLOBAL_MUTEX_LOCK - allocator->CalculateStats(pStats); -} - -#if VMA_STATS_STRING_ENABLED - -void vmaBuildStatsString( - VmaAllocator allocator, - char** ppStatsString, - VkBool32 detailedMap) -{ - VMA_ASSERT(allocator && ppStatsString); - VMA_DEBUG_GLOBAL_MUTEX_LOCK - - VmaStringBuilder sb(allocator); - { - VmaJsonWriter json(allocator->GetAllocationCallbacks(), sb); - json.BeginObject(); - - VmaStats stats; - allocator->CalculateStats(&stats); - - json.WriteString("Total"); - VmaPrintStatInfo(json, stats.total); - - for(uint32_t heapIndex = 0; heapIndex < allocator->GetMemoryHeapCount(); ++heapIndex) - { - json.BeginString("Heap "); - json.ContinueString(heapIndex); - json.EndString(); - json.BeginObject(); - - json.WriteString("Size"); - json.WriteNumber(allocator->m_MemProps.memoryHeaps[heapIndex].size); - - json.WriteString("Flags"); - json.BeginArray(true); - if((allocator->m_MemProps.memoryHeaps[heapIndex].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) != 0) - { - json.WriteString("DEVICE_LOCAL"); - } - json.EndArray(); - - if(stats.memoryHeap[heapIndex].blockCount > 0) - { - json.WriteString("Stats"); - VmaPrintStatInfo(json, stats.memoryHeap[heapIndex]); - } - - for(uint32_t typeIndex = 0; typeIndex < allocator->GetMemoryTypeCount(); ++typeIndex) - { - if(allocator->MemoryTypeIndexToHeapIndex(typeIndex) == heapIndex) - { - json.BeginString("Type "); - json.ContinueString(typeIndex); - json.EndString(); - - json.BeginObject(); - - json.WriteString("Flags"); - json.BeginArray(true); - VkMemoryPropertyFlags flags = allocator->m_MemProps.memoryTypes[typeIndex].propertyFlags; - if((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) - { - json.WriteString("DEVICE_LOCAL"); - } - if((flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0) - { - json.WriteString("HOST_VISIBLE"); - } - if((flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0) - { - json.WriteString("HOST_COHERENT"); - } - if((flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) != 0) - { - json.WriteString("HOST_CACHED"); - } - if((flags & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) != 0) - { - json.WriteString("LAZILY_ALLOCATED"); - } - json.EndArray(); - - if(stats.memoryType[typeIndex].blockCount > 0) - { - json.WriteString("Stats"); - VmaPrintStatInfo(json, stats.memoryType[typeIndex]); - } - - json.EndObject(); - } - } - - json.EndObject(); - } - if(detailedMap == VK_TRUE) - { - allocator->PrintDetailedMap(json); - } - - json.EndObject(); - } - - const size_t len = sb.GetLength(); - char* const pChars = vma_new_array(allocator, char, len + 1); - if(len > 0) - { - memcpy(pChars, sb.GetData(), len); - } - pChars[len] = '\0'; - *ppStatsString = pChars; -} - -void vmaFreeStatsString( - VmaAllocator allocator, - char* pStatsString) -{ - if(pStatsString != VMA_NULL) - { - VMA_ASSERT(allocator); - size_t len = strlen(pStatsString); - vma_delete_array(allocator, pStatsString, len + 1); - } -} - -#endif // #if VMA_STATS_STRING_ENABLED - -/* -This function is not protected by any mutex because it just reads immutable data. -*/ -VkResult vmaFindMemoryTypeIndex( - VmaAllocator allocator, - uint32_t memoryTypeBits, - const VmaAllocationCreateInfo* pAllocationCreateInfo, - uint32_t* pMemoryTypeIndex) -{ - VMA_ASSERT(allocator != VK_NULL_HANDLE); - VMA_ASSERT(pAllocationCreateInfo != VMA_NULL); - VMA_ASSERT(pMemoryTypeIndex != VMA_NULL); - - if(pAllocationCreateInfo->memoryTypeBits != 0) - { - memoryTypeBits &= pAllocationCreateInfo->memoryTypeBits; - } - - uint32_t requiredFlags = pAllocationCreateInfo->requiredFlags; - uint32_t preferredFlags = pAllocationCreateInfo->preferredFlags; - - // Convert usage to requiredFlags and preferredFlags. - switch(pAllocationCreateInfo->usage) - { - case VMA_MEMORY_USAGE_UNKNOWN: - break; - case VMA_MEMORY_USAGE_GPU_ONLY: - preferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - break; - case VMA_MEMORY_USAGE_CPU_ONLY: - requiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - break; - case VMA_MEMORY_USAGE_CPU_TO_GPU: - requiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; - preferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - break; - case VMA_MEMORY_USAGE_GPU_TO_CPU: - requiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; - preferredFlags |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; - break; - default: - break; - } - - *pMemoryTypeIndex = UINT32_MAX; - uint32_t minCost = UINT32_MAX; - for(uint32_t memTypeIndex = 0, memTypeBit = 1; - memTypeIndex < allocator->GetMemoryTypeCount(); - ++memTypeIndex, memTypeBit <<= 1) - { - // This memory type is acceptable according to memoryTypeBits bitmask. - if((memTypeBit & memoryTypeBits) != 0) - { - const VkMemoryPropertyFlags currFlags = - allocator->m_MemProps.memoryTypes[memTypeIndex].propertyFlags; - // This memory type contains requiredFlags. - if((requiredFlags & ~currFlags) == 0) - { - // Calculate cost as number of bits from preferredFlags not present in this memory type. - uint32_t currCost = VmaCountBitsSet(preferredFlags & ~currFlags); - // Remember memory type with lowest cost. - if(currCost < minCost) - { - *pMemoryTypeIndex = memTypeIndex; - if(currCost == 0) - { - return VK_SUCCESS; - } - minCost = currCost; - } - } - } - } - return (*pMemoryTypeIndex != UINT32_MAX) ? VK_SUCCESS : VK_ERROR_FEATURE_NOT_PRESENT; -} - -VkResult vmaCreatePool( - VmaAllocator allocator, - const VmaPoolCreateInfo* pCreateInfo, - VmaPool* pPool) -{ - VMA_ASSERT(allocator && pCreateInfo && pPool); - - VMA_DEBUG_LOG("vmaCreatePool"); - - VMA_DEBUG_GLOBAL_MUTEX_LOCK - - return allocator->CreatePool(pCreateInfo, pPool); -} - -void vmaDestroyPool( - VmaAllocator allocator, - VmaPool pool) -{ - VMA_ASSERT(allocator); - - if(pool == VK_NULL_HANDLE) - { - return; - } - - VMA_DEBUG_LOG("vmaDestroyPool"); - - VMA_DEBUG_GLOBAL_MUTEX_LOCK - - allocator->DestroyPool(pool); -} - -void vmaGetPoolStats( - VmaAllocator allocator, - VmaPool pool, - VmaPoolStats* pPoolStats) -{ - VMA_ASSERT(allocator && pool && pPoolStats); - - VMA_DEBUG_GLOBAL_MUTEX_LOCK - - allocator->GetPoolStats(pool, pPoolStats); -} - -void vmaMakePoolAllocationsLost( - VmaAllocator allocator, - VmaPool pool, - size_t* pLostAllocationCount) -{ - VMA_ASSERT(allocator && pool); - - VMA_DEBUG_GLOBAL_MUTEX_LOCK - - allocator->MakePoolAllocationsLost(pool, pLostAllocationCount); -} - -VkResult vmaAllocateMemory( - VmaAllocator allocator, - const VkMemoryRequirements* pVkMemoryRequirements, - const VmaAllocationCreateInfo* pCreateInfo, - VmaAllocation* pAllocation, - VmaAllocationInfo* pAllocationInfo) -{ - VMA_ASSERT(allocator && pVkMemoryRequirements && pCreateInfo && pAllocation); - - VMA_DEBUG_LOG("vmaAllocateMemory"); - - VMA_DEBUG_GLOBAL_MUTEX_LOCK - - VkResult result = allocator->AllocateMemory( - *pVkMemoryRequirements, - false, // requiresDedicatedAllocation - false, // prefersDedicatedAllocation - VK_NULL_HANDLE, // dedicatedBuffer - VK_NULL_HANDLE, // dedicatedImage - *pCreateInfo, - VMA_SUBALLOCATION_TYPE_UNKNOWN, - pAllocation); - - if(pAllocationInfo && result == VK_SUCCESS) - { - allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); - } - - return result; -} - -VkResult vmaAllocateMemoryForBuffer( - VmaAllocator allocator, - VkBuffer buffer, - const VmaAllocationCreateInfo* pCreateInfo, - VmaAllocation* pAllocation, - VmaAllocationInfo* pAllocationInfo) -{ - VMA_ASSERT(allocator && buffer != VK_NULL_HANDLE && pCreateInfo && pAllocation); - - VMA_DEBUG_LOG("vmaAllocateMemoryForBuffer"); - - VMA_DEBUG_GLOBAL_MUTEX_LOCK - - VkMemoryRequirements vkMemReq = {}; - bool requiresDedicatedAllocation = false; - bool prefersDedicatedAllocation = false; - allocator->GetBufferMemoryRequirements(buffer, vkMemReq, - requiresDedicatedAllocation, - prefersDedicatedAllocation); - - VkResult result = allocator->AllocateMemory( - vkMemReq, - requiresDedicatedAllocation, - prefersDedicatedAllocation, - buffer, // dedicatedBuffer - VK_NULL_HANDLE, // dedicatedImage - *pCreateInfo, - VMA_SUBALLOCATION_TYPE_BUFFER, - pAllocation); - - if(pAllocationInfo && result == VK_SUCCESS) - { - allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); - } - - return result; -} - -VkResult vmaAllocateMemoryForImage( - VmaAllocator allocator, - VkImage image, - const VmaAllocationCreateInfo* pCreateInfo, - VmaAllocation* pAllocation, - VmaAllocationInfo* pAllocationInfo) -{ - VMA_ASSERT(allocator && image != VK_NULL_HANDLE && pCreateInfo && pAllocation); - - VMA_DEBUG_LOG("vmaAllocateMemoryForImage"); - - VMA_DEBUG_GLOBAL_MUTEX_LOCK - - VkResult result = AllocateMemoryForImage( - allocator, - image, - pCreateInfo, - VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN, - pAllocation); - - if(pAllocationInfo && result == VK_SUCCESS) - { - allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); - } - - return result; -} - -void vmaFreeMemory( - VmaAllocator allocator, - VmaAllocation allocation) -{ - VMA_ASSERT(allocator && allocation); - - VMA_DEBUG_LOG("vmaFreeMemory"); - - VMA_DEBUG_GLOBAL_MUTEX_LOCK - - allocator->FreeMemory(allocation); -} - -void vmaGetAllocationInfo( - VmaAllocator allocator, - VmaAllocation allocation, - VmaAllocationInfo* pAllocationInfo) -{ - VMA_ASSERT(allocator && allocation && pAllocationInfo); - - VMA_DEBUG_GLOBAL_MUTEX_LOCK - - allocator->GetAllocationInfo(allocation, pAllocationInfo); -} - -void vmaSetAllocationUserData( - VmaAllocator allocator, - VmaAllocation allocation, - void* pUserData) -{ - VMA_ASSERT(allocator && allocation); - - VMA_DEBUG_GLOBAL_MUTEX_LOCK - - allocation->SetUserData(allocator, pUserData); -} - -void vmaCreateLostAllocation( - VmaAllocator allocator, - VmaAllocation* pAllocation) -{ - VMA_ASSERT(allocator && pAllocation); - - VMA_DEBUG_GLOBAL_MUTEX_LOCK; - - allocator->CreateLostAllocation(pAllocation); -} - -VkResult vmaMapMemory( - VmaAllocator allocator, - VmaAllocation allocation, - void** ppData) -{ - VMA_ASSERT(allocator && allocation && ppData); - - VMA_DEBUG_GLOBAL_MUTEX_LOCK - - return allocator->Map(allocation, ppData); -} - -void vmaUnmapMemory( - VmaAllocator allocator, - VmaAllocation allocation) -{ - VMA_ASSERT(allocator && allocation); - - VMA_DEBUG_GLOBAL_MUTEX_LOCK - - allocator->Unmap(allocation); -} - -VkResult vmaDefragment( - VmaAllocator allocator, - VmaAllocation* pAllocations, - size_t allocationCount, - VkBool32* pAllocationsChanged, - const VmaDefragmentationInfo *pDefragmentationInfo, - VmaDefragmentationStats* pDefragmentationStats) -{ - VMA_ASSERT(allocator && pAllocations); - - VMA_DEBUG_LOG("vmaDefragment"); - - VMA_DEBUG_GLOBAL_MUTEX_LOCK - - return allocator->Defragment(pAllocations, allocationCount, pAllocationsChanged, pDefragmentationInfo, pDefragmentationStats); -} - -VkResult vmaCreateBuffer( - VmaAllocator allocator, - const VkBufferCreateInfo* pBufferCreateInfo, - const VmaAllocationCreateInfo* pAllocationCreateInfo, - VkBuffer* pBuffer, - VmaAllocation* pAllocation, - VmaAllocationInfo* pAllocationInfo) -{ - VMA_ASSERT(allocator && pBufferCreateInfo && pAllocationCreateInfo && pBuffer && pAllocation); - - VMA_DEBUG_LOG("vmaCreateBuffer"); - - VMA_DEBUG_GLOBAL_MUTEX_LOCK - - *pBuffer = VK_NULL_HANDLE; - *pAllocation = VK_NULL_HANDLE; - - // 1. Create VkBuffer. - VkResult res = (*allocator->GetVulkanFunctions().vkCreateBuffer)( - allocator->m_hDevice, - pBufferCreateInfo, - allocator->GetAllocationCallbacks(), - pBuffer); - if(res >= 0) - { - // 2. vkGetBufferMemoryRequirements. - VkMemoryRequirements vkMemReq = {}; - bool requiresDedicatedAllocation = false; - bool prefersDedicatedAllocation = false; - allocator->GetBufferMemoryRequirements(*pBuffer, vkMemReq, - requiresDedicatedAllocation, prefersDedicatedAllocation); - - // Make sure alignment requirements for specific buffer usages reported - // in Physical Device Properties are included in alignment reported by memory requirements. - if((pBufferCreateInfo->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT) != 0) - { - VMA_ASSERT(vkMemReq.alignment % - allocator->m_PhysicalDeviceProperties.limits.minTexelBufferOffsetAlignment == 0); - } - if((pBufferCreateInfo->usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) != 0) - { - VMA_ASSERT(vkMemReq.alignment % - allocator->m_PhysicalDeviceProperties.limits.minUniformBufferOffsetAlignment == 0); - } - if((pBufferCreateInfo->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT) != 0) - { - VMA_ASSERT(vkMemReq.alignment % - allocator->m_PhysicalDeviceProperties.limits.minStorageBufferOffsetAlignment == 0); - } - - // 3. Allocate memory using allocator. - res = allocator->AllocateMemory( - vkMemReq, - requiresDedicatedAllocation, - prefersDedicatedAllocation, - *pBuffer, // dedicatedBuffer - VK_NULL_HANDLE, // dedicatedImage - *pAllocationCreateInfo, - VMA_SUBALLOCATION_TYPE_BUFFER, - pAllocation); - if(res >= 0) - { - // 3. Bind buffer with memory. - res = (*allocator->GetVulkanFunctions().vkBindBufferMemory)( - allocator->m_hDevice, - *pBuffer, - (*pAllocation)->GetMemory(), - (*pAllocation)->GetOffset()); - if(res >= 0) - { - // All steps succeeded. - if(pAllocationInfo != VMA_NULL) - { - allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); - } - return VK_SUCCESS; - } - allocator->FreeMemory(*pAllocation); - *pAllocation = VK_NULL_HANDLE; - (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks()); - *pBuffer = VK_NULL_HANDLE; - return res; - } - (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks()); - *pBuffer = VK_NULL_HANDLE; - return res; - } - return res; -} - -void vmaDestroyBuffer( - VmaAllocator allocator, - VkBuffer buffer, - VmaAllocation allocation) -{ - if(buffer != VK_NULL_HANDLE) - { - VMA_ASSERT(allocator); - - VMA_DEBUG_LOG("vmaDestroyBuffer"); - - VMA_DEBUG_GLOBAL_MUTEX_LOCK - - (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, buffer, allocator->GetAllocationCallbacks()); - - allocator->FreeMemory(allocation); - } -} - -VkResult vmaCreateImage( - VmaAllocator allocator, - const VkImageCreateInfo* pImageCreateInfo, - const VmaAllocationCreateInfo* pAllocationCreateInfo, - VkImage* pImage, - VmaAllocation* pAllocation, - VmaAllocationInfo* pAllocationInfo) -{ - VMA_ASSERT(allocator && pImageCreateInfo && pAllocationCreateInfo && pImage && pAllocation); - - VMA_DEBUG_LOG("vmaCreateImage"); - - VMA_DEBUG_GLOBAL_MUTEX_LOCK - - *pImage = VK_NULL_HANDLE; - *pAllocation = VK_NULL_HANDLE; - - // 1. Create VkImage. - VkResult res = (*allocator->GetVulkanFunctions().vkCreateImage)( - allocator->m_hDevice, - pImageCreateInfo, - allocator->GetAllocationCallbacks(), - pImage); - if(res >= 0) - { - VmaSuballocationType suballocType = pImageCreateInfo->tiling == VK_IMAGE_TILING_OPTIMAL ? - VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL : - VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR; - - // 2. Allocate memory using allocator. - res = AllocateMemoryForImage(allocator, *pImage, pAllocationCreateInfo, suballocType, pAllocation); - if(res >= 0) - { - // 3. Bind image with memory. - res = (*allocator->GetVulkanFunctions().vkBindImageMemory)( - allocator->m_hDevice, - *pImage, - (*pAllocation)->GetMemory(), - (*pAllocation)->GetOffset()); - if(res >= 0) - { - // All steps succeeded. - if(pAllocationInfo != VMA_NULL) - { - allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); - } - return VK_SUCCESS; - } - allocator->FreeMemory(*pAllocation); - *pAllocation = VK_NULL_HANDLE; - (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, *pImage, allocator->GetAllocationCallbacks()); - *pImage = VK_NULL_HANDLE; - return res; - } - (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, *pImage, allocator->GetAllocationCallbacks()); - *pImage = VK_NULL_HANDLE; - return res; - } - return res; -} - -void vmaDestroyImage( - VmaAllocator allocator, - VkImage image, - VmaAllocation allocation) -{ - if(image != VK_NULL_HANDLE) - { - VMA_ASSERT(allocator); - - VMA_DEBUG_LOG("vmaDestroyImage"); - - VMA_DEBUG_GLOBAL_MUTEX_LOCK - - (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, image, allocator->GetAllocationCallbacks()); - - allocator->FreeMemory(allocation); - } -} - -#endif // #ifdef VMA_IMPLEMENTATION diff --git a/third_party/vulkan/vk_platform.h b/third_party/vulkan/vk_platform.h index 728929924..048322d93 100644 --- a/third_party/vulkan/vk_platform.h +++ b/third_party/vulkan/vk_platform.h @@ -2,19 +2,9 @@ // File: vk_platform.h // /* -** Copyright (c) 2014-2017 The Khronos Group Inc. +** Copyright (c) 2014-2020 The Khronos Group Inc. ** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. +** SPDX-License-Identifier: Apache-2.0 */ diff --git a/third_party/vulkan/vulkan.h b/third_party/vulkan/vulkan.h index d05c8490a..b7716ec8e 100644 --- a/third_party/vulkan/vulkan.h +++ b/third_party/vulkan/vulkan.h @@ -2,19 +2,9 @@ #define VULKAN_H_ 1 /* -** Copyright (c) 2015-2018 The Khronos Group Inc. +** Copyright (c) 2015-2020 The Khronos Group Inc. ** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. +** SPDX-License-Identifier: Apache-2.0 */ #include "vk_platform.h" @@ -24,6 +14,10 @@ #include "vulkan_android.h" #endif +#ifdef VK_USE_PLATFORM_FUCHSIA +#include +#include "vulkan_fuchsia.h" +#endif #ifdef VK_USE_PLATFORM_IOS_MVK #include "vulkan_ios.h" @@ -34,13 +28,10 @@ #include "vulkan_macos.h" #endif - -#ifdef VK_USE_PLATFORM_MIR_KHR -#include -#include "vulkan_mir.h" +#ifdef VK_USE_PLATFORM_METAL_EXT +#include "vulkan_metal.h" #endif - #ifdef VK_USE_PLATFORM_VI_NN #include "vulkan_vi.h" #endif @@ -70,10 +61,27 @@ #endif +#ifdef VK_USE_PLATFORM_DIRECTFB_EXT +#include +#include "vulkan_directfb.h" +#endif + + #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT #include #include #include "vulkan_xlib_xrandr.h" #endif + +#ifdef VK_USE_PLATFORM_GGP +#include +#include "vulkan_ggp.h" +#endif + + +#ifdef VK_ENABLE_BETA_EXTENSIONS +#include "vulkan_beta.h" +#endif + #endif // VULKAN_H_ diff --git a/third_party/vulkan/vulkan_android.h b/third_party/vulkan/vulkan_android.h index 07aaeda28..50ef85f13 100644 --- a/third_party/vulkan/vulkan_android.h +++ b/third_party/vulkan/vulkan_android.h @@ -1,24 +1,10 @@ #ifndef VULKAN_ANDROID_H_ #define VULKAN_ANDROID_H_ 1 -#ifdef __cplusplus -extern "C" { -#endif - /* -** Copyright (c) 2015-2018 The Khronos Group Inc. +** Copyright (c) 2015-2020 The Khronos Group Inc. ** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. +** SPDX-License-Identifier: Apache-2.0 */ /* @@ -27,14 +13,17 @@ extern "C" { */ +#ifdef __cplusplus +extern "C" { +#endif + + + #define VK_KHR_android_surface 1 struct ANativeWindow; - #define VK_KHR_ANDROID_SURFACE_SPEC_VERSION 6 #define VK_KHR_ANDROID_SURFACE_EXTENSION_NAME "VK_KHR_android_surface" - typedef VkFlags VkAndroidSurfaceCreateFlagsKHR; - typedef struct VkAndroidSurfaceCreateInfoKHR { VkStructureType sType; const void* pNext; @@ -42,7 +31,6 @@ typedef struct VkAndroidSurfaceCreateInfoKHR { struct ANativeWindow* window; } VkAndroidSurfaceCreateInfoKHR; - typedef VkResult (VKAPI_PTR *PFN_vkCreateAndroidSurfaceKHR)(VkInstance instance, const VkAndroidSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); #ifndef VK_NO_PROTOTYPES @@ -53,12 +41,11 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateAndroidSurfaceKHR( VkSurfaceKHR* pSurface); #endif + #define VK_ANDROID_external_memory_android_hardware_buffer 1 struct AHardwareBuffer; - #define VK_ANDROID_EXTERNAL_MEMORY_ANDROID_HARDWARE_BUFFER_SPEC_VERSION 3 #define VK_ANDROID_EXTERNAL_MEMORY_ANDROID_HARDWARE_BUFFER_EXTENSION_NAME "VK_ANDROID_external_memory_android_hardware_buffer" - typedef struct VkAndroidHardwareBufferUsageANDROID { VkStructureType sType; void* pNext; @@ -103,7 +90,6 @@ typedef struct VkExternalFormatANDROID { uint64_t externalFormat; } VkExternalFormatANDROID; - typedef VkResult (VKAPI_PTR *PFN_vkGetAndroidHardwareBufferPropertiesANDROID)(VkDevice device, const struct AHardwareBuffer* buffer, VkAndroidHardwareBufferPropertiesANDROID* pProperties); typedef VkResult (VKAPI_PTR *PFN_vkGetMemoryAndroidHardwareBufferANDROID)(VkDevice device, const VkMemoryGetAndroidHardwareBufferInfoANDROID* pInfo, struct AHardwareBuffer** pBuffer); diff --git a/third_party/vulkan/vulkan_beta.h b/third_party/vulkan/vulkan_beta.h new file mode 100644 index 000000000..2904234a7 --- /dev/null +++ b/third_party/vulkan/vulkan_beta.h @@ -0,0 +1,428 @@ +#ifndef VULKAN_BETA_H_ +#define VULKAN_BETA_H_ 1 + +/* +** Copyright (c) 2015-2020 The Khronos Group Inc. +** +** SPDX-License-Identifier: Apache-2.0 +*/ + +/* +** This header is generated from the Khronos Vulkan XML API Registry. +** +*/ + + +#ifdef __cplusplus +extern "C" { +#endif + + + +#define VK_KHR_deferred_host_operations 1 +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDeferredOperationKHR) +#define VK_KHR_DEFERRED_HOST_OPERATIONS_SPEC_VERSION 3 +#define VK_KHR_DEFERRED_HOST_OPERATIONS_EXTENSION_NAME "VK_KHR_deferred_host_operations" +typedef struct VkDeferredOperationInfoKHR { + VkStructureType sType; + const void* pNext; + VkDeferredOperationKHR operationHandle; +} VkDeferredOperationInfoKHR; + +typedef VkResult (VKAPI_PTR *PFN_vkCreateDeferredOperationKHR)(VkDevice device, const VkAllocationCallbacks* pAllocator, VkDeferredOperationKHR* pDeferredOperation); +typedef void (VKAPI_PTR *PFN_vkDestroyDeferredOperationKHR)(VkDevice device, VkDeferredOperationKHR operation, const VkAllocationCallbacks* pAllocator); +typedef uint32_t (VKAPI_PTR *PFN_vkGetDeferredOperationMaxConcurrencyKHR)(VkDevice device, VkDeferredOperationKHR operation); +typedef VkResult (VKAPI_PTR *PFN_vkGetDeferredOperationResultKHR)(VkDevice device, VkDeferredOperationKHR operation); +typedef VkResult (VKAPI_PTR *PFN_vkDeferredOperationJoinKHR)(VkDevice device, VkDeferredOperationKHR operation); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateDeferredOperationKHR( + VkDevice device, + const VkAllocationCallbacks* pAllocator, + VkDeferredOperationKHR* pDeferredOperation); + +VKAPI_ATTR void VKAPI_CALL vkDestroyDeferredOperationKHR( + VkDevice device, + VkDeferredOperationKHR operation, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR uint32_t VKAPI_CALL vkGetDeferredOperationMaxConcurrencyKHR( + VkDevice device, + VkDeferredOperationKHR operation); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetDeferredOperationResultKHR( + VkDevice device, + VkDeferredOperationKHR operation); + +VKAPI_ATTR VkResult VKAPI_CALL vkDeferredOperationJoinKHR( + VkDevice device, + VkDeferredOperationKHR operation); +#endif + + +#define VK_KHR_pipeline_library 1 +#define VK_KHR_PIPELINE_LIBRARY_SPEC_VERSION 1 +#define VK_KHR_PIPELINE_LIBRARY_EXTENSION_NAME "VK_KHR_pipeline_library" +typedef struct VkPipelineLibraryCreateInfoKHR { + VkStructureType sType; + const void* pNext; + uint32_t libraryCount; + const VkPipeline* pLibraries; +} VkPipelineLibraryCreateInfoKHR; + + + +#define VK_KHR_ray_tracing 1 +#define VK_KHR_RAY_TRACING_SPEC_VERSION 8 +#define VK_KHR_RAY_TRACING_EXTENSION_NAME "VK_KHR_ray_tracing" + +typedef enum VkAccelerationStructureBuildTypeKHR { + VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR = 0, + VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR = 1, + VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_OR_DEVICE_KHR = 2, + VK_ACCELERATION_STRUCTURE_BUILD_TYPE_MAX_ENUM_KHR = 0x7FFFFFFF +} VkAccelerationStructureBuildTypeKHR; +typedef union VkDeviceOrHostAddressKHR { + VkDeviceAddress deviceAddress; + void* hostAddress; +} VkDeviceOrHostAddressKHR; + +typedef union VkDeviceOrHostAddressConstKHR { + VkDeviceAddress deviceAddress; + const void* hostAddress; +} VkDeviceOrHostAddressConstKHR; + +typedef struct VkAccelerationStructureBuildOffsetInfoKHR { + uint32_t primitiveCount; + uint32_t primitiveOffset; + uint32_t firstVertex; + uint32_t transformOffset; +} VkAccelerationStructureBuildOffsetInfoKHR; + +typedef struct VkRayTracingShaderGroupCreateInfoKHR { + VkStructureType sType; + const void* pNext; + VkRayTracingShaderGroupTypeKHR type; + uint32_t generalShader; + uint32_t closestHitShader; + uint32_t anyHitShader; + uint32_t intersectionShader; + const void* pShaderGroupCaptureReplayHandle; +} VkRayTracingShaderGroupCreateInfoKHR; + +typedef struct VkRayTracingPipelineInterfaceCreateInfoKHR { + VkStructureType sType; + const void* pNext; + uint32_t maxPayloadSize; + uint32_t maxAttributeSize; + uint32_t maxCallableSize; +} VkRayTracingPipelineInterfaceCreateInfoKHR; + +typedef struct VkRayTracingPipelineCreateInfoKHR { + VkStructureType sType; + const void* pNext; + VkPipelineCreateFlags flags; + uint32_t stageCount; + const VkPipelineShaderStageCreateInfo* pStages; + uint32_t groupCount; + const VkRayTracingShaderGroupCreateInfoKHR* pGroups; + uint32_t maxRecursionDepth; + VkPipelineLibraryCreateInfoKHR libraries; + const VkRayTracingPipelineInterfaceCreateInfoKHR* pLibraryInterface; + VkPipelineLayout layout; + VkPipeline basePipelineHandle; + int32_t basePipelineIndex; +} VkRayTracingPipelineCreateInfoKHR; + +typedef struct VkAccelerationStructureGeometryTrianglesDataKHR { + VkStructureType sType; + const void* pNext; + VkFormat vertexFormat; + VkDeviceOrHostAddressConstKHR vertexData; + VkDeviceSize vertexStride; + VkIndexType indexType; + VkDeviceOrHostAddressConstKHR indexData; + VkDeviceOrHostAddressConstKHR transformData; +} VkAccelerationStructureGeometryTrianglesDataKHR; + +typedef struct VkAccelerationStructureGeometryAabbsDataKHR { + VkStructureType sType; + const void* pNext; + VkDeviceOrHostAddressConstKHR data; + VkDeviceSize stride; +} VkAccelerationStructureGeometryAabbsDataKHR; + +typedef struct VkAccelerationStructureGeometryInstancesDataKHR { + VkStructureType sType; + const void* pNext; + VkBool32 arrayOfPointers; + VkDeviceOrHostAddressConstKHR data; +} VkAccelerationStructureGeometryInstancesDataKHR; + +typedef union VkAccelerationStructureGeometryDataKHR { + VkAccelerationStructureGeometryTrianglesDataKHR triangles; + VkAccelerationStructureGeometryAabbsDataKHR aabbs; + VkAccelerationStructureGeometryInstancesDataKHR instances; +} VkAccelerationStructureGeometryDataKHR; + +typedef struct VkAccelerationStructureGeometryKHR { + VkStructureType sType; + const void* pNext; + VkGeometryTypeKHR geometryType; + VkAccelerationStructureGeometryDataKHR geometry; + VkGeometryFlagsKHR flags; +} VkAccelerationStructureGeometryKHR; + +typedef struct VkAccelerationStructureBuildGeometryInfoKHR { + VkStructureType sType; + const void* pNext; + VkAccelerationStructureTypeKHR type; + VkBuildAccelerationStructureFlagsKHR flags; + VkBool32 update; + VkAccelerationStructureKHR srcAccelerationStructure; + VkAccelerationStructureKHR dstAccelerationStructure; + VkBool32 geometryArrayOfPointers; + uint32_t geometryCount; + const VkAccelerationStructureGeometryKHR* const* ppGeometries; + VkDeviceOrHostAddressKHR scratchData; +} VkAccelerationStructureBuildGeometryInfoKHR; + +typedef struct VkAccelerationStructureCreateGeometryTypeInfoKHR { + VkStructureType sType; + const void* pNext; + VkGeometryTypeKHR geometryType; + uint32_t maxPrimitiveCount; + VkIndexType indexType; + uint32_t maxVertexCount; + VkFormat vertexFormat; + VkBool32 allowsTransforms; +} VkAccelerationStructureCreateGeometryTypeInfoKHR; + +typedef struct VkAccelerationStructureCreateInfoKHR { + VkStructureType sType; + const void* pNext; + VkDeviceSize compactedSize; + VkAccelerationStructureTypeKHR type; + VkBuildAccelerationStructureFlagsKHR flags; + uint32_t maxGeometryCount; + const VkAccelerationStructureCreateGeometryTypeInfoKHR* pGeometryInfos; + VkDeviceAddress deviceAddress; +} VkAccelerationStructureCreateInfoKHR; + +typedef struct VkAccelerationStructureMemoryRequirementsInfoKHR { + VkStructureType sType; + const void* pNext; + VkAccelerationStructureMemoryRequirementsTypeKHR type; + VkAccelerationStructureBuildTypeKHR buildType; + VkAccelerationStructureKHR accelerationStructure; +} VkAccelerationStructureMemoryRequirementsInfoKHR; + +typedef struct VkPhysicalDeviceRayTracingFeaturesKHR { + VkStructureType sType; + void* pNext; + VkBool32 rayTracing; + VkBool32 rayTracingShaderGroupHandleCaptureReplay; + VkBool32 rayTracingShaderGroupHandleCaptureReplayMixed; + VkBool32 rayTracingAccelerationStructureCaptureReplay; + VkBool32 rayTracingIndirectTraceRays; + VkBool32 rayTracingIndirectAccelerationStructureBuild; + VkBool32 rayTracingHostAccelerationStructureCommands; + VkBool32 rayQuery; + VkBool32 rayTracingPrimitiveCulling; +} VkPhysicalDeviceRayTracingFeaturesKHR; + +typedef struct VkPhysicalDeviceRayTracingPropertiesKHR { + VkStructureType sType; + void* pNext; + uint32_t shaderGroupHandleSize; + uint32_t maxRecursionDepth; + uint32_t maxShaderGroupStride; + uint32_t shaderGroupBaseAlignment; + uint64_t maxGeometryCount; + uint64_t maxInstanceCount; + uint64_t maxPrimitiveCount; + uint32_t maxDescriptorSetAccelerationStructures; + uint32_t shaderGroupHandleCaptureReplaySize; +} VkPhysicalDeviceRayTracingPropertiesKHR; + +typedef struct VkAccelerationStructureDeviceAddressInfoKHR { + VkStructureType sType; + const void* pNext; + VkAccelerationStructureKHR accelerationStructure; +} VkAccelerationStructureDeviceAddressInfoKHR; + +typedef struct VkAccelerationStructureVersionKHR { + VkStructureType sType; + const void* pNext; + const uint8_t* versionData; +} VkAccelerationStructureVersionKHR; + +typedef struct VkStridedBufferRegionKHR { + VkBuffer buffer; + VkDeviceSize offset; + VkDeviceSize stride; + VkDeviceSize size; +} VkStridedBufferRegionKHR; + +typedef struct VkTraceRaysIndirectCommandKHR { + uint32_t width; + uint32_t height; + uint32_t depth; +} VkTraceRaysIndirectCommandKHR; + +typedef struct VkCopyAccelerationStructureToMemoryInfoKHR { + VkStructureType sType; + const void* pNext; + VkAccelerationStructureKHR src; + VkDeviceOrHostAddressKHR dst; + VkCopyAccelerationStructureModeKHR mode; +} VkCopyAccelerationStructureToMemoryInfoKHR; + +typedef struct VkCopyMemoryToAccelerationStructureInfoKHR { + VkStructureType sType; + const void* pNext; + VkDeviceOrHostAddressConstKHR src; + VkAccelerationStructureKHR dst; + VkCopyAccelerationStructureModeKHR mode; +} VkCopyMemoryToAccelerationStructureInfoKHR; + +typedef struct VkCopyAccelerationStructureInfoKHR { + VkStructureType sType; + const void* pNext; + VkAccelerationStructureKHR src; + VkAccelerationStructureKHR dst; + VkCopyAccelerationStructureModeKHR mode; +} VkCopyAccelerationStructureInfoKHR; + +typedef VkResult (VKAPI_PTR *PFN_vkCreateAccelerationStructureKHR)(VkDevice device, const VkAccelerationStructureCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkAccelerationStructureKHR* pAccelerationStructure); +typedef void (VKAPI_PTR *PFN_vkGetAccelerationStructureMemoryRequirementsKHR)(VkDevice device, const VkAccelerationStructureMemoryRequirementsInfoKHR* pInfo, VkMemoryRequirements2* pMemoryRequirements); +typedef void (VKAPI_PTR *PFN_vkCmdBuildAccelerationStructureKHR)(VkCommandBuffer commandBuffer, uint32_t infoCount, const VkAccelerationStructureBuildGeometryInfoKHR* pInfos, const VkAccelerationStructureBuildOffsetInfoKHR* const* ppOffsetInfos); +typedef void (VKAPI_PTR *PFN_vkCmdBuildAccelerationStructureIndirectKHR)(VkCommandBuffer commandBuffer, const VkAccelerationStructureBuildGeometryInfoKHR* pInfo, VkBuffer indirectBuffer, VkDeviceSize indirectOffset, uint32_t indirectStride); +typedef VkResult (VKAPI_PTR *PFN_vkBuildAccelerationStructureKHR)(VkDevice device, uint32_t infoCount, const VkAccelerationStructureBuildGeometryInfoKHR* pInfos, const VkAccelerationStructureBuildOffsetInfoKHR* const* ppOffsetInfos); +typedef VkResult (VKAPI_PTR *PFN_vkCopyAccelerationStructureKHR)(VkDevice device, const VkCopyAccelerationStructureInfoKHR* pInfo); +typedef VkResult (VKAPI_PTR *PFN_vkCopyAccelerationStructureToMemoryKHR)(VkDevice device, const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo); +typedef VkResult (VKAPI_PTR *PFN_vkCopyMemoryToAccelerationStructureKHR)(VkDevice device, const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo); +typedef VkResult (VKAPI_PTR *PFN_vkWriteAccelerationStructuresPropertiesKHR)(VkDevice device, uint32_t accelerationStructureCount, const VkAccelerationStructureKHR* pAccelerationStructures, VkQueryType queryType, size_t dataSize, void* pData, size_t stride); +typedef void (VKAPI_PTR *PFN_vkCmdCopyAccelerationStructureKHR)(VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureInfoKHR* pInfo); +typedef void (VKAPI_PTR *PFN_vkCmdCopyAccelerationStructureToMemoryKHR)(VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo); +typedef void (VKAPI_PTR *PFN_vkCmdCopyMemoryToAccelerationStructureKHR)(VkCommandBuffer commandBuffer, const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo); +typedef void (VKAPI_PTR *PFN_vkCmdTraceRaysKHR)(VkCommandBuffer commandBuffer, const VkStridedBufferRegionKHR* pRaygenShaderBindingTable, const VkStridedBufferRegionKHR* pMissShaderBindingTable, const VkStridedBufferRegionKHR* pHitShaderBindingTable, const VkStridedBufferRegionKHR* pCallableShaderBindingTable, uint32_t width, uint32_t height, uint32_t depth); +typedef VkResult (VKAPI_PTR *PFN_vkCreateRayTracingPipelinesKHR)(VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount, const VkRayTracingPipelineCreateInfoKHR* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines); +typedef VkDeviceAddress (VKAPI_PTR *PFN_vkGetAccelerationStructureDeviceAddressKHR)(VkDevice device, const VkAccelerationStructureDeviceAddressInfoKHR* pInfo); +typedef VkResult (VKAPI_PTR *PFN_vkGetRayTracingCaptureReplayShaderGroupHandlesKHR)(VkDevice device, VkPipeline pipeline, uint32_t firstGroup, uint32_t groupCount, size_t dataSize, void* pData); +typedef void (VKAPI_PTR *PFN_vkCmdTraceRaysIndirectKHR)(VkCommandBuffer commandBuffer, const VkStridedBufferRegionKHR* pRaygenShaderBindingTable, const VkStridedBufferRegionKHR* pMissShaderBindingTable, const VkStridedBufferRegionKHR* pHitShaderBindingTable, const VkStridedBufferRegionKHR* pCallableShaderBindingTable, VkBuffer buffer, VkDeviceSize offset); +typedef VkResult (VKAPI_PTR *PFN_vkGetDeviceAccelerationStructureCompatibilityKHR)(VkDevice device, const VkAccelerationStructureVersionKHR* version); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateAccelerationStructureKHR( + VkDevice device, + const VkAccelerationStructureCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkAccelerationStructureKHR* pAccelerationStructure); + +VKAPI_ATTR void VKAPI_CALL vkGetAccelerationStructureMemoryRequirementsKHR( + VkDevice device, + const VkAccelerationStructureMemoryRequirementsInfoKHR* pInfo, + VkMemoryRequirements2* pMemoryRequirements); + +VKAPI_ATTR void VKAPI_CALL vkCmdBuildAccelerationStructureKHR( + VkCommandBuffer commandBuffer, + uint32_t infoCount, + const VkAccelerationStructureBuildGeometryInfoKHR* pInfos, + const VkAccelerationStructureBuildOffsetInfoKHR* const* ppOffsetInfos); + +VKAPI_ATTR void VKAPI_CALL vkCmdBuildAccelerationStructureIndirectKHR( + VkCommandBuffer commandBuffer, + const VkAccelerationStructureBuildGeometryInfoKHR* pInfo, + VkBuffer indirectBuffer, + VkDeviceSize indirectOffset, + uint32_t indirectStride); + +VKAPI_ATTR VkResult VKAPI_CALL vkBuildAccelerationStructureKHR( + VkDevice device, + uint32_t infoCount, + const VkAccelerationStructureBuildGeometryInfoKHR* pInfos, + const VkAccelerationStructureBuildOffsetInfoKHR* const* ppOffsetInfos); + +VKAPI_ATTR VkResult VKAPI_CALL vkCopyAccelerationStructureKHR( + VkDevice device, + const VkCopyAccelerationStructureInfoKHR* pInfo); + +VKAPI_ATTR VkResult VKAPI_CALL vkCopyAccelerationStructureToMemoryKHR( + VkDevice device, + const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo); + +VKAPI_ATTR VkResult VKAPI_CALL vkCopyMemoryToAccelerationStructureKHR( + VkDevice device, + const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo); + +VKAPI_ATTR VkResult VKAPI_CALL vkWriteAccelerationStructuresPropertiesKHR( + VkDevice device, + uint32_t accelerationStructureCount, + const VkAccelerationStructureKHR* pAccelerationStructures, + VkQueryType queryType, + size_t dataSize, + void* pData, + size_t stride); + +VKAPI_ATTR void VKAPI_CALL vkCmdCopyAccelerationStructureKHR( + VkCommandBuffer commandBuffer, + const VkCopyAccelerationStructureInfoKHR* pInfo); + +VKAPI_ATTR void VKAPI_CALL vkCmdCopyAccelerationStructureToMemoryKHR( + VkCommandBuffer commandBuffer, + const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo); + +VKAPI_ATTR void VKAPI_CALL vkCmdCopyMemoryToAccelerationStructureKHR( + VkCommandBuffer commandBuffer, + const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo); + +VKAPI_ATTR void VKAPI_CALL vkCmdTraceRaysKHR( + VkCommandBuffer commandBuffer, + const VkStridedBufferRegionKHR* pRaygenShaderBindingTable, + const VkStridedBufferRegionKHR* pMissShaderBindingTable, + const VkStridedBufferRegionKHR* pHitShaderBindingTable, + const VkStridedBufferRegionKHR* pCallableShaderBindingTable, + uint32_t width, + uint32_t height, + uint32_t depth); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateRayTracingPipelinesKHR( + VkDevice device, + VkPipelineCache pipelineCache, + uint32_t createInfoCount, + const VkRayTracingPipelineCreateInfoKHR* pCreateInfos, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipelines); + +VKAPI_ATTR VkDeviceAddress VKAPI_CALL vkGetAccelerationStructureDeviceAddressKHR( + VkDevice device, + const VkAccelerationStructureDeviceAddressInfoKHR* pInfo); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetRayTracingCaptureReplayShaderGroupHandlesKHR( + VkDevice device, + VkPipeline pipeline, + uint32_t firstGroup, + uint32_t groupCount, + size_t dataSize, + void* pData); + +VKAPI_ATTR void VKAPI_CALL vkCmdTraceRaysIndirectKHR( + VkCommandBuffer commandBuffer, + const VkStridedBufferRegionKHR* pRaygenShaderBindingTable, + const VkStridedBufferRegionKHR* pMissShaderBindingTable, + const VkStridedBufferRegionKHR* pHitShaderBindingTable, + const VkStridedBufferRegionKHR* pCallableShaderBindingTable, + VkBuffer buffer, + VkDeviceSize offset); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetDeviceAccelerationStructureCompatibilityKHR( + VkDevice device, + const VkAccelerationStructureVersionKHR* version); +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/third_party/vulkan/vulkan_core.h b/third_party/vulkan/vulkan_core.h index 9fefb4382..ae2140997 100644 --- a/third_party/vulkan/vulkan_core.h +++ b/third_party/vulkan/vulkan_core.h @@ -1,24 +1,10 @@ #ifndef VULKAN_CORE_H_ #define VULKAN_CORE_H_ 1 -#ifdef __cplusplus -extern "C" { -#endif - /* -** Copyright (c) 2015-2018 The Khronos Group Inc. +** Copyright (c) 2015-2020 The Khronos Group Inc. ** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. +** SPDX-License-Identifier: Apache-2.0 */ /* @@ -27,29 +13,15 @@ extern "C" { */ +#ifdef __cplusplus +extern "C" { +#endif + + + #define VK_VERSION_1_0 1 #include "vk_platform.h" -#define VK_MAKE_VERSION(major, minor, patch) \ - (((major) << 22) | ((minor) << 12) | (patch)) - -// DEPRECATED: This define has been removed. Specific version defines (e.g. VK_API_VERSION_1_0), or the VK_MAKE_VERSION macro, should be used instead. -//#define VK_API_VERSION VK_MAKE_VERSION(1, 0, 0) // Patch version should always be set to 0 - -// Vulkan 1.0 version number -#define VK_API_VERSION_1_0 VK_MAKE_VERSION(1, 0, 0)// Patch version should always be set to 0 - -#define VK_VERSION_MAJOR(version) ((uint32_t)(version) >> 22) -#define VK_VERSION_MINOR(version) (((uint32_t)(version) >> 12) & 0x3ff) -#define VK_VERSION_PATCH(version) ((uint32_t)(version) & 0xfff) -// Version of this file -#define VK_HEADER_VERSION 74 - - -#define VK_NULL_HANDLE 0 - - - #define VK_DEFINE_HANDLE(object) typedef struct object##_T* object; @@ -60,14 +32,35 @@ extern "C" { #define VK_DEFINE_NON_DISPATCHABLE_HANDLE(object) typedef uint64_t object; #endif #endif - +#define VK_MAKE_VERSION(major, minor, patch) \ + ((((uint32_t)(major)) << 22) | (((uint32_t)(minor)) << 12) | ((uint32_t)(patch))) + +// DEPRECATED: This define has been removed. Specific version defines (e.g. VK_API_VERSION_1_0), or the VK_MAKE_VERSION macro, should be used instead. +//#define VK_API_VERSION VK_MAKE_VERSION(1, 0, 0) // Patch version should always be set to 0 + +// Vulkan 1.0 version number +#define VK_API_VERSION_1_0 VK_MAKE_VERSION(1, 0, 0)// Patch version should always be set to 0 + +// Version of this file +#define VK_HEADER_VERSION 148 + +// Complete version of this file +#define VK_HEADER_VERSION_COMPLETE VK_MAKE_VERSION(1, 2, VK_HEADER_VERSION) + +#define VK_VERSION_MAJOR(version) ((uint32_t)(version) >> 22) +#define VK_VERSION_MINOR(version) (((uint32_t)(version) >> 12) & 0x3ff) +#define VK_VERSION_PATCH(version) ((uint32_t)(version) & 0xfff) + +#define VK_NULL_HANDLE 0 -typedef uint32_t VkFlags; typedef uint32_t VkBool32; +typedef uint64_t VkDeviceAddress; typedef uint64_t VkDeviceSize; +typedef uint32_t VkFlags; typedef uint32_t VkSampleMask; - +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkBuffer) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkImage) VK_DEFINE_HANDLE(VkInstance) VK_DEFINE_HANDLE(VkPhysicalDevice) VK_DEFINE_HANDLE(VkDevice) @@ -76,8 +69,6 @@ VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSemaphore) VK_DEFINE_HANDLE(VkCommandBuffer) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkFence) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDeviceMemory) -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkBuffer) -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkImage) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkEvent) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkQueryPool) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkBufferView) @@ -85,40 +76,30 @@ VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkImageView) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkShaderModule) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkPipelineCache) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkPipelineLayout) -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkRenderPass) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkPipeline) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkRenderPass) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDescriptorSetLayout) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSampler) -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDescriptorPool) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDescriptorSet) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDescriptorPool) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkFramebuffer) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkCommandPool) - -#define VK_LOD_CLAMP_NONE 1000.0f -#define VK_REMAINING_MIP_LEVELS (~0U) -#define VK_REMAINING_ARRAY_LAYERS (~0U) -#define VK_WHOLE_SIZE (~0ULL) #define VK_ATTACHMENT_UNUSED (~0U) -#define VK_TRUE 1 #define VK_FALSE 0 +#define VK_LOD_CLAMP_NONE 1000.0f #define VK_QUEUE_FAMILY_IGNORED (~0U) +#define VK_REMAINING_ARRAY_LAYERS (~0U) +#define VK_REMAINING_MIP_LEVELS (~0U) #define VK_SUBPASS_EXTERNAL (~0U) -#define VK_MAX_PHYSICAL_DEVICE_NAME_SIZE 256 -#define VK_UUID_SIZE 16 +#define VK_TRUE 1 +#define VK_WHOLE_SIZE (~0ULL) #define VK_MAX_MEMORY_TYPES 32 #define VK_MAX_MEMORY_HEAPS 16 +#define VK_MAX_PHYSICAL_DEVICE_NAME_SIZE 256 +#define VK_UUID_SIZE 16 #define VK_MAX_EXTENSION_NAME_SIZE 256 #define VK_MAX_DESCRIPTION_SIZE 256 - -typedef enum VkPipelineCacheHeaderVersion { - VK_PIPELINE_CACHE_HEADER_VERSION_ONE = 1, - VK_PIPELINE_CACHE_HEADER_VERSION_BEGIN_RANGE = VK_PIPELINE_CACHE_HEADER_VERSION_ONE, - VK_PIPELINE_CACHE_HEADER_VERSION_END_RANGE = VK_PIPELINE_CACHE_HEADER_VERSION_ONE, - VK_PIPELINE_CACHE_HEADER_VERSION_RANGE_SIZE = (VK_PIPELINE_CACHE_HEADER_VERSION_ONE - VK_PIPELINE_CACHE_HEADER_VERSION_ONE + 1), - VK_PIPELINE_CACHE_HEADER_VERSION_MAX_ENUM = 0x7FFFFFFF -} VkPipelineCacheHeaderVersion; - typedef enum VkResult { VK_SUCCESS = 0, VK_NOT_READY = 1, @@ -138,8 +119,11 @@ typedef enum VkResult { VK_ERROR_TOO_MANY_OBJECTS = -10, VK_ERROR_FORMAT_NOT_SUPPORTED = -11, VK_ERROR_FRAGMENTED_POOL = -12, + VK_ERROR_UNKNOWN = -13, VK_ERROR_OUT_OF_POOL_MEMORY = -1000069000, VK_ERROR_INVALID_EXTERNAL_HANDLE = -1000072003, + VK_ERROR_FRAGMENTATION = -1000161000, + VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS = -1000257000, VK_ERROR_SURFACE_LOST_KHR = -1000000000, VK_ERROR_NATIVE_WINDOW_IN_USE_KHR = -1000000001, VK_SUBOPTIMAL_KHR = 1000001003, @@ -147,13 +131,21 @@ typedef enum VkResult { VK_ERROR_INCOMPATIBLE_DISPLAY_KHR = -1000003001, VK_ERROR_VALIDATION_FAILED_EXT = -1000011001, VK_ERROR_INVALID_SHADER_NV = -1000012000, - VK_ERROR_FRAGMENTATION_EXT = -1000161000, + VK_ERROR_INCOMPATIBLE_VERSION_KHR = -1000150000, + VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT = -1000158000, VK_ERROR_NOT_PERMITTED_EXT = -1000174001, + VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT = -1000255000, + VK_THREAD_IDLE_KHR = 1000268000, + VK_THREAD_DONE_KHR = 1000268001, + VK_OPERATION_DEFERRED_KHR = 1000268002, + VK_OPERATION_NOT_DEFERRED_KHR = 1000268003, + VK_PIPELINE_COMPILE_REQUIRED_EXT = 1000297000, VK_ERROR_OUT_OF_POOL_MEMORY_KHR = VK_ERROR_OUT_OF_POOL_MEMORY, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR = VK_ERROR_INVALID_EXTERNAL_HANDLE, - VK_RESULT_BEGIN_RANGE = VK_ERROR_FRAGMENTED_POOL, - VK_RESULT_END_RANGE = VK_INCOMPLETE, - VK_RESULT_RANGE_SIZE = (VK_INCOMPLETE - VK_ERROR_FRAGMENTED_POOL + 1), + VK_ERROR_FRAGMENTATION_EXT = VK_ERROR_FRAGMENTATION, + VK_ERROR_INVALID_DEVICE_ADDRESS_EXT = VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS, + VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR = VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS, + VK_ERROR_PIPELINE_COMPILE_REQUIRED_EXT = VK_PIPELINE_COMPILE_REQUIRED_EXT, VK_RESULT_MAX_ENUM = 0x7FFFFFFF } VkResult; @@ -243,7 +235,7 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO = 1000053000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES = 1000053001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES = 1000053002, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES = 1000120000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES = 1000120000, VK_STRUCTURE_TYPE_PROTECTED_SUBMIT_INFO = 1000145000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES = 1000145001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES = 1000145002, @@ -271,7 +263,57 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_PROPERTIES = 1000076001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES = 1000168000, VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_SUPPORT = 1000168001, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES = 1000063000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES = 1000063000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES = 49, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES = 50, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES = 51, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES = 52, + VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO = 1000147000, + VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2 = 1000109000, + VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2 = 1000109001, + VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2 = 1000109002, + VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2 = 1000109003, + VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2 = 1000109004, + VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO = 1000109005, + VK_STRUCTURE_TYPE_SUBPASS_END_INFO = 1000109006, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES = 1000177000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES = 1000196000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES = 1000180000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES = 1000082000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES = 1000197000, + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO = 1000161000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES = 1000161001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES = 1000161002, + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO = 1000161003, + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_LAYOUT_SUPPORT = 1000161004, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES = 1000199000, + VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE = 1000199001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES = 1000221000, + VK_STRUCTURE_TYPE_IMAGE_STENCIL_USAGE_CREATE_INFO = 1000246000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES = 1000130000, + VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO = 1000130001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES = 1000211000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGELESS_FRAMEBUFFER_FEATURES = 1000108000, + VK_STRUCTURE_TYPE_FRAMEBUFFER_ATTACHMENTS_CREATE_INFO = 1000108001, + VK_STRUCTURE_TYPE_FRAMEBUFFER_ATTACHMENT_IMAGE_INFO = 1000108002, + VK_STRUCTURE_TYPE_RENDER_PASS_ATTACHMENT_BEGIN_INFO = 1000108003, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES = 1000253000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES = 1000175000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SEPARATE_DEPTH_STENCIL_LAYOUTS_FEATURES = 1000241000, + VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_STENCIL_LAYOUT = 1000241001, + VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_STENCIL_LAYOUT = 1000241002, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES = 1000261000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES = 1000207000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES = 1000207001, + VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO = 1000207002, + VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO = 1000207003, + VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO = 1000207004, + VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO = 1000207005, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES = 1000257000, + VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO = 1000244001, + VK_STRUCTURE_TYPE_BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO = 1000257002, + VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO = 1000257003, + VK_STRUCTURE_TYPE_DEVICE_MEMORY_OPAQUE_CAPTURE_ADDRESS_INFO = 1000257004, VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR = 1000001000, VK_STRUCTURE_TYPE_PRESENT_INFO_KHR = 1000001001, VK_STRUCTURE_TYPE_DEVICE_GROUP_PRESENT_CAPABILITIES_KHR = 1000060007, @@ -286,7 +328,6 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR = 1000004000, VK_STRUCTURE_TYPE_XCB_SURFACE_CREATE_INFO_KHR = 1000005000, VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR = 1000006000, - VK_STRUCTURE_TYPE_MIR_SURFACE_CREATE_INFO_KHR = 1000007000, VK_STRUCTURE_TYPE_ANDROID_SURFACE_CREATE_INFO_KHR = 1000008000, VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR = 1000009000, VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT = 1000011000, @@ -297,7 +338,14 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_IMAGE_CREATE_INFO_NV = 1000026000, VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_BUFFER_CREATE_INFO_NV = 1000026001, VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_MEMORY_ALLOCATE_INFO_NV = 1000026002, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT = 1000028000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT = 1000028001, + VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_STREAM_CREATE_INFO_EXT = 1000028002, + VK_STRUCTURE_TYPE_IMAGE_VIEW_HANDLE_INFO_NVX = 1000030000, + VK_STRUCTURE_TYPE_IMAGE_VIEW_ADDRESS_PROPERTIES_NVX = 1000030001, VK_STRUCTURE_TYPE_TEXTURE_LOD_GATHER_FORMAT_PROPERTIES_AMD = 1000041000, + VK_STRUCTURE_TYPE_STREAM_DESCRIPTOR_SURFACE_CREATE_INFO_GGP = 1000049000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CORNER_SAMPLED_IMAGE_FEATURES_NV = 1000050000, VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO_NV = 1000056000, VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_NV = 1000056001, VK_STRUCTURE_TYPE_IMPORT_MEMORY_WIN32_HANDLE_INFO_NV = 1000057000, @@ -305,6 +353,9 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_WIN32_KEYED_MUTEX_ACQUIRE_RELEASE_INFO_NV = 1000058000, VK_STRUCTURE_TYPE_VALIDATION_FLAGS_EXT = 1000061000, VK_STRUCTURE_TYPE_VI_SURFACE_CREATE_INFO_NN = 1000062000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXTURE_COMPRESSION_ASTC_HDR_FEATURES_EXT = 1000066000, + VK_STRUCTURE_TYPE_IMAGE_VIEW_ASTC_DECODE_MODE_EXT = 1000067000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ASTC_DECODE_FEATURES_EXT = 1000067001, VK_STRUCTURE_TYPE_IMPORT_MEMORY_WIN32_HANDLE_INFO_KHR = 1000073000, VK_STRUCTURE_TYPE_EXPORT_MEMORY_WIN32_HANDLE_INFO_KHR = 1000073001, VK_STRUCTURE_TYPE_MEMORY_WIN32_HANDLE_PROPERTIES_KHR = 1000073002, @@ -320,13 +371,10 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR = 1000079000, VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR = 1000079001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR = 1000080000, + VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_CONDITIONAL_RENDERING_INFO_EXT = 1000081000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT = 1000081001, + VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT = 1000081002, VK_STRUCTURE_TYPE_PRESENT_REGIONS_KHR = 1000084000, - VK_STRUCTURE_TYPE_OBJECT_TABLE_CREATE_INFO_NVX = 1000086000, - VK_STRUCTURE_TYPE_INDIRECT_COMMANDS_LAYOUT_CREATE_INFO_NVX = 1000086001, - VK_STRUCTURE_TYPE_CMD_PROCESS_COMMANDS_INFO_NVX = 1000086002, - VK_STRUCTURE_TYPE_CMD_RESERVE_SPACE_FOR_COMMANDS_INFO_NVX = 1000086003, - VK_STRUCTURE_TYPE_DEVICE_GENERATED_COMMANDS_LIMITS_NVX = 1000086004, - VK_STRUCTURE_TYPE_DEVICE_GENERATED_COMMANDS_FEATURES_NVX = 1000086005, VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_W_SCALING_STATE_CREATE_INFO_NV = 1000087000, VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES_2_EXT = 1000090000, VK_STRUCTURE_TYPE_DISPLAY_POWER_INFO_EXT = 1000091000, @@ -340,6 +388,8 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT = 1000099001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT = 1000101000, VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT = 1000101001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT = 1000102000, + VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT = 1000102001, VK_STRUCTURE_TYPE_HDR_METADATA_EXT = 1000105000, VK_STRUCTURE_TYPE_SHARED_PRESENT_SURFACE_CAPABILITIES_KHR = 1000111000, VK_STRUCTURE_TYPE_IMPORT_FENCE_WIN32_HANDLE_INFO_KHR = 1000114000, @@ -347,9 +397,21 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_FENCE_GET_WIN32_HANDLE_INFO_KHR = 1000114002, VK_STRUCTURE_TYPE_IMPORT_FENCE_FD_INFO_KHR = 1000115000, VK_STRUCTURE_TYPE_FENCE_GET_FD_INFO_KHR = 1000115001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR = 1000116000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_PROPERTIES_KHR = 1000116001, + VK_STRUCTURE_TYPE_QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR = 1000116002, + VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR = 1000116003, + VK_STRUCTURE_TYPE_ACQUIRE_PROFILING_LOCK_INFO_KHR = 1000116004, + VK_STRUCTURE_TYPE_PERFORMANCE_COUNTER_KHR = 1000116005, + VK_STRUCTURE_TYPE_PERFORMANCE_COUNTER_DESCRIPTION_KHR = 1000116006, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SURFACE_INFO_2_KHR = 1000119000, VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES_2_KHR = 1000119001, VK_STRUCTURE_TYPE_SURFACE_FORMAT_2_KHR = 1000119002, + VK_STRUCTURE_TYPE_DISPLAY_PROPERTIES_2_KHR = 1000121000, + VK_STRUCTURE_TYPE_DISPLAY_PLANE_PROPERTIES_2_KHR = 1000121001, + VK_STRUCTURE_TYPE_DISPLAY_MODE_PROPERTIES_2_KHR = 1000121002, + VK_STRUCTURE_TYPE_DISPLAY_PLANE_INFO_2_KHR = 1000121003, + VK_STRUCTURE_TYPE_DISPLAY_PLANE_CAPABILITIES_2_KHR = 1000121004, VK_STRUCTURE_TYPE_IOS_SURFACE_CREATE_INFO_MVK = 1000122000, VK_STRUCTURE_TYPE_MACOS_SURFACE_CREATE_INFO_MVK = 1000123000, VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT = 1000128000, @@ -363,33 +425,176 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID = 1000129003, VK_STRUCTURE_TYPE_MEMORY_GET_ANDROID_HARDWARE_BUFFER_INFO_ANDROID = 1000129004, VK_STRUCTURE_TYPE_EXTERNAL_FORMAT_ANDROID = 1000129005, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES_EXT = 1000130000, - VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT = 1000130001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT = 1000138000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT = 1000138001, + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK_EXT = 1000138002, + VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO_EXT = 1000138003, VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT = 1000143000, VK_STRUCTURE_TYPE_RENDER_PASS_SAMPLE_LOCATIONS_BEGIN_INFO_EXT = 1000143001, VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT = 1000143002, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT = 1000143003, VK_STRUCTURE_TYPE_MULTISAMPLE_PROPERTIES_EXT = 1000143004, - VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR = 1000147000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BLEND_OPERATION_ADVANCED_FEATURES_EXT = 1000148000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BLEND_OPERATION_ADVANCED_PROPERTIES_EXT = 1000148001, VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_ADVANCED_STATE_CREATE_INFO_EXT = 1000148002, VK_STRUCTURE_TYPE_PIPELINE_COVERAGE_TO_COLOR_STATE_CREATE_INFO_NV = 1000149000, + VK_STRUCTURE_TYPE_BIND_ACCELERATION_STRUCTURE_MEMORY_INFO_KHR = 1000165006, + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR = 1000165007, + VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR = 1000150000, + VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_GEOMETRY_TYPE_INFO_KHR = 1000150001, + VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR = 1000150002, + VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR = 1000150003, + VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR = 1000150004, + VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR = 1000150005, + VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR = 1000150006, + VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_KHR = 1000150008, + VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_VERSION_KHR = 1000150009, + VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR = 1000150010, + VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_TO_MEMORY_INFO_KHR = 1000150011, + VK_STRUCTURE_TYPE_COPY_MEMORY_TO_ACCELERATION_STRUCTURE_INFO_KHR = 1000150012, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_FEATURES_KHR = 1000150013, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PROPERTIES_KHR = 1000150014, + VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR = 1000150015, + VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR = 1000150016, + VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR = 1000150017, + VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_INTERFACE_CREATE_INFO_KHR = 1000150018, VK_STRUCTURE_TYPE_PIPELINE_COVERAGE_MODULATION_STATE_CREATE_INFO_NV = 1000152000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SM_BUILTINS_FEATURES_NV = 1000154000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SM_BUILTINS_PROPERTIES_NV = 1000154001, + VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT = 1000158000, + VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT = 1000158001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT = 1000158002, + VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT = 1000158003, + VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT = 1000158004, + VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT = 1000158005, VK_STRUCTURE_TYPE_VALIDATION_CACHE_CREATE_INFO_EXT = 1000160000, VK_STRUCTURE_TYPE_SHADER_MODULE_VALIDATION_CACHE_CREATE_INFO_EXT = 1000160001, - VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO_EXT = 1000161000, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT = 1000161001, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT = 1000161002, - VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO_EXT = 1000161003, - VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_LAYOUT_SUPPORT_EXT = 1000161004, + VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SHADING_RATE_IMAGE_STATE_CREATE_INFO_NV = 1000164000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADING_RATE_IMAGE_FEATURES_NV = 1000164001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADING_RATE_IMAGE_PROPERTIES_NV = 1000164002, + VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_COARSE_SAMPLE_ORDER_STATE_CREATE_INFO_NV = 1000164005, + VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_NV = 1000165000, + VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_NV = 1000165001, + VK_STRUCTURE_TYPE_GEOMETRY_NV = 1000165003, + VK_STRUCTURE_TYPE_GEOMETRY_TRIANGLES_NV = 1000165004, + VK_STRUCTURE_TYPE_GEOMETRY_AABB_NV = 1000165005, + VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV = 1000165008, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PROPERTIES_NV = 1000165009, + VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV = 1000165011, + VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_INFO_NV = 1000165012, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_REPRESENTATIVE_FRAGMENT_TEST_FEATURES_NV = 1000166000, + VK_STRUCTURE_TYPE_PIPELINE_REPRESENTATIVE_FRAGMENT_TEST_STATE_CREATE_INFO_NV = 1000166001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_VIEW_IMAGE_FORMAT_INFO_EXT = 1000170000, + VK_STRUCTURE_TYPE_FILTER_CUBIC_IMAGE_VIEW_IMAGE_FORMAT_PROPERTIES_EXT = 1000170001, VK_STRUCTURE_TYPE_DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT = 1000174000, VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT = 1000178000, VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT = 1000178001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT = 1000178002, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CLOCK_FEATURES_KHR = 1000181000, + VK_STRUCTURE_TYPE_PIPELINE_COMPILER_CONTROL_CREATE_INFO_AMD = 1000183000, + VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT = 1000184000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD = 1000185000, + VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD = 1000189000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT = 1000190000, VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT = 1000190001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT = 1000190002, + VK_STRUCTURE_TYPE_PRESENT_FRAME_TOKEN_GGP = 1000191000, + VK_STRUCTURE_TYPE_PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT = 1000192000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV = 1000201000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_NV = 1000202000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_PROPERTIES_NV = 1000202001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_BARYCENTRIC_FEATURES_NV = 1000203000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_IMAGE_FOOTPRINT_FEATURES_NV = 1000204000, + VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_EXCLUSIVE_SCISSOR_STATE_CREATE_INFO_NV = 1000205000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXCLUSIVE_SCISSOR_FEATURES_NV = 1000205002, + VK_STRUCTURE_TYPE_CHECKPOINT_DATA_NV = 1000206000, + VK_STRUCTURE_TYPE_QUEUE_FAMILY_CHECKPOINT_PROPERTIES_NV = 1000206001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_FUNCTIONS_2_FEATURES_INTEL = 1000209000, + VK_STRUCTURE_TYPE_QUERY_POOL_PERFORMANCE_QUERY_CREATE_INFO_INTEL = 1000210000, + VK_STRUCTURE_TYPE_INITIALIZE_PERFORMANCE_API_INFO_INTEL = 1000210001, + VK_STRUCTURE_TYPE_PERFORMANCE_MARKER_INFO_INTEL = 1000210002, + VK_STRUCTURE_TYPE_PERFORMANCE_STREAM_MARKER_INFO_INTEL = 1000210003, + VK_STRUCTURE_TYPE_PERFORMANCE_OVERRIDE_INFO_INTEL = 1000210004, + VK_STRUCTURE_TYPE_PERFORMANCE_CONFIGURATION_ACQUIRE_INFO_INTEL = 1000210005, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT = 1000212000, + VK_STRUCTURE_TYPE_DISPLAY_NATIVE_HDR_SURFACE_CAPABILITIES_AMD = 1000213000, + VK_STRUCTURE_TYPE_SWAPCHAIN_DISPLAY_NATIVE_HDR_CREATE_INFO_AMD = 1000213001, + VK_STRUCTURE_TYPE_IMAGEPIPE_SURFACE_CREATE_INFO_FUCHSIA = 1000214000, + VK_STRUCTURE_TYPE_METAL_SURFACE_CREATE_INFO_EXT = 1000217000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_FEATURES_EXT = 1000218000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_PROPERTIES_EXT = 1000218001, + VK_STRUCTURE_TYPE_RENDER_PASS_FRAGMENT_DENSITY_MAP_CREATE_INFO_EXT = 1000218002, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT = 1000225000, + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT = 1000225001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT = 1000225002, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_2_AMD = 1000227000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD = 1000229000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT = 1000237000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT = 1000238000, + VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT = 1000238001, + VK_STRUCTURE_TYPE_SURFACE_PROTECTED_CAPABILITIES_KHR = 1000239000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEDICATED_ALLOCATION_IMAGE_ALIASING_FEATURES_NV = 1000240000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT = 1000244000, + VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_CREATE_INFO_EXT = 1000244002, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TOOL_PROPERTIES_EXT = 1000245000, + VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT = 1000247000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_NV = 1000249000, + VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_NV = 1000249001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_PROPERTIES_NV = 1000249002, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COVERAGE_REDUCTION_MODE_FEATURES_NV = 1000250000, + VK_STRUCTURE_TYPE_PIPELINE_COVERAGE_REDUCTION_STATE_CREATE_INFO_NV = 1000250001, + VK_STRUCTURE_TYPE_FRAMEBUFFER_MIXED_SAMPLES_COMBINATION_NV = 1000250002, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT = 1000251000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT = 1000252000, + VK_STRUCTURE_TYPE_SURFACE_FULL_SCREEN_EXCLUSIVE_INFO_EXT = 1000255000, + VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES_FULL_SCREEN_EXCLUSIVE_EXT = 1000255002, + VK_STRUCTURE_TYPE_SURFACE_FULL_SCREEN_EXCLUSIVE_WIN32_INFO_EXT = 1000255001, + VK_STRUCTURE_TYPE_HEADLESS_SURFACE_CREATE_INFO_EXT = 1000256000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT = 1000259000, + VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT = 1000259001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT = 1000259002, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT = 1000260000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT = 1000265000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT = 1000267000, + VK_STRUCTURE_TYPE_DEFERRED_OPERATION_INFO_KHR = 1000268000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR = 1000269000, + VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR = 1000269001, + VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_PROPERTIES_KHR = 1000269002, + VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INFO_KHR = 1000269003, + VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_STATISTIC_KHR = 1000269004, + VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INTERNAL_REPRESENTATION_KHR = 1000269005, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT = 1000276000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_PROPERTIES_NV = 1000277000, + VK_STRUCTURE_TYPE_GRAPHICS_SHADER_GROUP_CREATE_INFO_NV = 1000277001, + VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_SHADER_GROUPS_CREATE_INFO_NV = 1000277002, + VK_STRUCTURE_TYPE_INDIRECT_COMMANDS_LAYOUT_TOKEN_NV = 1000277003, + VK_STRUCTURE_TYPE_INDIRECT_COMMANDS_LAYOUT_CREATE_INFO_NV = 1000277004, + VK_STRUCTURE_TYPE_GENERATED_COMMANDS_INFO_NV = 1000277005, + VK_STRUCTURE_TYPE_GENERATED_COMMANDS_MEMORY_REQUIREMENTS_INFO_NV = 1000277006, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_FEATURES_NV = 1000277007, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT = 1000281000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT = 1000281001, + VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_RENDER_PASS_TRANSFORM_INFO_QCOM = 1000282000, + VK_STRUCTURE_TYPE_RENDER_PASS_TRANSFORM_BEGIN_INFO_QCOM = 1000282001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT = 1000286000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT = 1000286001, + VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT = 1000287000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT = 1000287001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT = 1000287002, + VK_STRUCTURE_TYPE_PIPELINE_LIBRARY_CREATE_INFO_KHR = 1000290000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIVATE_DATA_FEATURES_EXT = 1000295000, + VK_STRUCTURE_TYPE_DEVICE_PRIVATE_DATA_CREATE_INFO_EXT = 1000295001, + VK_STRUCTURE_TYPE_PRIVATE_DATA_SLOT_CREATE_INFO_EXT = 1000295002, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES_EXT = 1000297000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DIAGNOSTICS_CONFIG_FEATURES_NV = 1000300000, + VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV = 1000300001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_2_FEATURES_EXT = 1000332000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_2_PROPERTIES_EXT = 1000332001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_ROBUSTNESS_FEATURES_EXT = 1000335000, + VK_STRUCTURE_TYPE_DIRECTFB_SURFACE_CREATE_INFO_EXT = 1000346000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES, + VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT, VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES, @@ -422,8 +627,22 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_SEMAPHORE_INFO_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_SEMAPHORE_INFO, VK_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_PROPERTIES_KHR = VK_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_PROPERTIES, VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES, VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO, + VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES2_EXT = VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES_2_EXT, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGELESS_FRAMEBUFFER_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGELESS_FRAMEBUFFER_FEATURES, + VK_STRUCTURE_TYPE_FRAMEBUFFER_ATTACHMENTS_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_FRAMEBUFFER_ATTACHMENTS_CREATE_INFO, + VK_STRUCTURE_TYPE_FRAMEBUFFER_ATTACHMENT_IMAGE_INFO_KHR = VK_STRUCTURE_TYPE_FRAMEBUFFER_ATTACHMENT_IMAGE_INFO, + VK_STRUCTURE_TYPE_RENDER_PASS_ATTACHMENT_BEGIN_INFO_KHR = VK_STRUCTURE_TYPE_RENDER_PASS_ATTACHMENT_BEGIN_INFO, + VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2_KHR = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2, + VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2, + VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2_KHR = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2, + VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2_KHR = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, + VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2, + VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO_KHR = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO, + VK_STRUCTURE_TYPE_SUBPASS_END_INFO_KHR = VK_STRUCTURE_TYPE_SUBPASS_END_INFO, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_FENCE_INFO_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_FENCE_INFO, VK_STRUCTURE_TYPE_EXTERNAL_FENCE_PROPERTIES_KHR = VK_STRUCTURE_TYPE_EXTERNAL_FENCE_PROPERTIES, VK_STRUCTURE_TYPE_EXPORT_FENCE_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_EXPORT_FENCE_CREATE_INFO, @@ -431,14 +650,18 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_RENDER_PASS_INPUT_ATTACHMENT_ASPECT_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_RENDER_PASS_INPUT_ATTACHMENT_ASPECT_CREATE_INFO, VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES_KHR, VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES_EXT = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES, + VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT = VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO, VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2_KHR = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2_KHR = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2, VK_STRUCTURE_TYPE_IMAGE_SPARSE_MEMORY_REQUIREMENTS_INFO_2_KHR = VK_STRUCTURE_TYPE_IMAGE_SPARSE_MEMORY_REQUIREMENTS_INFO_2, VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, VK_STRUCTURE_TYPE_SPARSE_IMAGE_MEMORY_REQUIREMENTS_2_KHR = VK_STRUCTURE_TYPE_SPARSE_IMAGE_MEMORY_REQUIREMENTS_2, + VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO, VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO, VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO_KHR = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO, VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO_KHR = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO, @@ -447,31 +670,148 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_IMAGE_FORMAT_PROPERTIES_KHR = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_IMAGE_FORMAT_PROPERTIES, VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO, VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO_KHR = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO, + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO_EXT = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES, + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO_EXT = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO, + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_LAYOUT_SUPPORT_EXT = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_LAYOUT_SUPPORT, + VK_STRUCTURE_TYPE_BIND_ACCELERATION_STRUCTURE_MEMORY_INFO_NV = VK_STRUCTURE_TYPE_BIND_ACCELERATION_STRUCTURE_MEMORY_INFO_KHR, + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_NV = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES, VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_SUPPORT_KHR = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_SUPPORT, - VK_STRUCTURE_TYPE_BEGIN_RANGE = VK_STRUCTURE_TYPE_APPLICATION_INFO, - VK_STRUCTURE_TYPE_END_RANGE = VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO, - VK_STRUCTURE_TYPE_RANGE_SIZE = (VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO - VK_STRUCTURE_TYPE_APPLICATION_INFO + 1), + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES, + VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES, + VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, + VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, + VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO_KHR = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, + VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO_KHR = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO, + VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO_INTEL = VK_STRUCTURE_TYPE_QUERY_POOL_PERFORMANCE_QUERY_CREATE_INFO_INTEL, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SEPARATE_DEPTH_STENCIL_LAYOUTS_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SEPARATE_DEPTH_STENCIL_LAYOUTS_FEATURES, + VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_STENCIL_LAYOUT_KHR = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_STENCIL_LAYOUT, + VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_STENCIL_LAYOUT_KHR = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_STENCIL_LAYOUT, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_ADDRESS_FEATURES_EXT = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT, + VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_EXT = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, + VK_STRUCTURE_TYPE_IMAGE_STENCIL_USAGE_CREATE_INFO_EXT = VK_STRUCTURE_TYPE_IMAGE_STENCIL_USAGE_CREATE_INFO, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES, + VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, + VK_STRUCTURE_TYPE_BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO, + VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO_KHR = VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO, + VK_STRUCTURE_TYPE_DEVICE_MEMORY_OPAQUE_CAPTURE_ADDRESS_INFO_KHR = VK_STRUCTURE_TYPE_DEVICE_MEMORY_OPAQUE_CAPTURE_ADDRESS_INFO, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES, VK_STRUCTURE_TYPE_MAX_ENUM = 0x7FFFFFFF } VkStructureType; +typedef enum VkImageLayout { + VK_IMAGE_LAYOUT_UNDEFINED = 0, + VK_IMAGE_LAYOUT_GENERAL = 1, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL = 2, + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL = 3, + VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL = 4, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL = 5, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL = 6, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL = 7, + VK_IMAGE_LAYOUT_PREINITIALIZED = 8, + VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL = 1000117000, + VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL = 1000117001, + VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL = 1000241000, + VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL = 1000241001, + VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL = 1000241002, + VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL = 1000241003, + VK_IMAGE_LAYOUT_PRESENT_SRC_KHR = 1000001002, + VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR = 1000111000, + VK_IMAGE_LAYOUT_SHADING_RATE_OPTIMAL_NV = 1000164003, + VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT = 1000218000, + VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL_KHR = VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL_KHR = VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL, + VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR = VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL_KHR = VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL, + VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR = VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL_KHR = VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL, + VK_IMAGE_LAYOUT_MAX_ENUM = 0x7FFFFFFF +} VkImageLayout; + +typedef enum VkObjectType { + VK_OBJECT_TYPE_UNKNOWN = 0, + VK_OBJECT_TYPE_INSTANCE = 1, + VK_OBJECT_TYPE_PHYSICAL_DEVICE = 2, + VK_OBJECT_TYPE_DEVICE = 3, + VK_OBJECT_TYPE_QUEUE = 4, + VK_OBJECT_TYPE_SEMAPHORE = 5, + VK_OBJECT_TYPE_COMMAND_BUFFER = 6, + VK_OBJECT_TYPE_FENCE = 7, + VK_OBJECT_TYPE_DEVICE_MEMORY = 8, + VK_OBJECT_TYPE_BUFFER = 9, + VK_OBJECT_TYPE_IMAGE = 10, + VK_OBJECT_TYPE_EVENT = 11, + VK_OBJECT_TYPE_QUERY_POOL = 12, + VK_OBJECT_TYPE_BUFFER_VIEW = 13, + VK_OBJECT_TYPE_IMAGE_VIEW = 14, + VK_OBJECT_TYPE_SHADER_MODULE = 15, + VK_OBJECT_TYPE_PIPELINE_CACHE = 16, + VK_OBJECT_TYPE_PIPELINE_LAYOUT = 17, + VK_OBJECT_TYPE_RENDER_PASS = 18, + VK_OBJECT_TYPE_PIPELINE = 19, + VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT = 20, + VK_OBJECT_TYPE_SAMPLER = 21, + VK_OBJECT_TYPE_DESCRIPTOR_POOL = 22, + VK_OBJECT_TYPE_DESCRIPTOR_SET = 23, + VK_OBJECT_TYPE_FRAMEBUFFER = 24, + VK_OBJECT_TYPE_COMMAND_POOL = 25, + VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION = 1000156000, + VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE = 1000085000, + VK_OBJECT_TYPE_SURFACE_KHR = 1000000000, + VK_OBJECT_TYPE_SWAPCHAIN_KHR = 1000001000, + VK_OBJECT_TYPE_DISPLAY_KHR = 1000002000, + VK_OBJECT_TYPE_DISPLAY_MODE_KHR = 1000002001, + VK_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT = 1000011000, + VK_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT = 1000128000, + VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR = 1000165000, + VK_OBJECT_TYPE_VALIDATION_CACHE_EXT = 1000160000, + VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL = 1000210000, + VK_OBJECT_TYPE_DEFERRED_OPERATION_KHR = 1000268000, + VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NV = 1000277000, + VK_OBJECT_TYPE_PRIVATE_DATA_SLOT_EXT = 1000295000, + VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_KHR = VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE, + VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION_KHR = VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION, + VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_NV = VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR, + VK_OBJECT_TYPE_MAX_ENUM = 0x7FFFFFFF +} VkObjectType; + +typedef enum VkVendorId { + VK_VENDOR_ID_VIV = 0x10001, + VK_VENDOR_ID_VSI = 0x10002, + VK_VENDOR_ID_KAZAN = 0x10003, + VK_VENDOR_ID_CODEPLAY = 0x10004, + VK_VENDOR_ID_MESA = 0x10005, + VK_VENDOR_ID_MAX_ENUM = 0x7FFFFFFF +} VkVendorId; + +typedef enum VkPipelineCacheHeaderVersion { + VK_PIPELINE_CACHE_HEADER_VERSION_ONE = 1, + VK_PIPELINE_CACHE_HEADER_VERSION_MAX_ENUM = 0x7FFFFFFF +} VkPipelineCacheHeaderVersion; + typedef enum VkSystemAllocationScope { VK_SYSTEM_ALLOCATION_SCOPE_COMMAND = 0, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT = 1, VK_SYSTEM_ALLOCATION_SCOPE_CACHE = 2, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE = 3, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE = 4, - VK_SYSTEM_ALLOCATION_SCOPE_BEGIN_RANGE = VK_SYSTEM_ALLOCATION_SCOPE_COMMAND, - VK_SYSTEM_ALLOCATION_SCOPE_END_RANGE = VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE, - VK_SYSTEM_ALLOCATION_SCOPE_RANGE_SIZE = (VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE - VK_SYSTEM_ALLOCATION_SCOPE_COMMAND + 1), VK_SYSTEM_ALLOCATION_SCOPE_MAX_ENUM = 0x7FFFFFFF } VkSystemAllocationScope; typedef enum VkInternalAllocationType { VK_INTERNAL_ALLOCATION_TYPE_EXECUTABLE = 0, - VK_INTERNAL_ALLOCATION_TYPE_BEGIN_RANGE = VK_INTERNAL_ALLOCATION_TYPE_EXECUTABLE, - VK_INTERNAL_ALLOCATION_TYPE_END_RANGE = VK_INTERNAL_ALLOCATION_TYPE_EXECUTABLE, - VK_INTERNAL_ALLOCATION_TYPE_RANGE_SIZE = (VK_INTERNAL_ALLOCATION_TYPE_EXECUTABLE - VK_INTERNAL_ALLOCATION_TYPE_EXECUTABLE + 1), VK_INTERNAL_ALLOCATION_TYPE_MAX_ENUM = 0x7FFFFFFF } VkInternalAllocationType; @@ -703,6 +1043,20 @@ typedef enum VkFormat { VK_FORMAT_PVRTC1_4BPP_SRGB_BLOCK_IMG = 1000054005, VK_FORMAT_PVRTC2_2BPP_SRGB_BLOCK_IMG = 1000054006, VK_FORMAT_PVRTC2_4BPP_SRGB_BLOCK_IMG = 1000054007, + VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK_EXT = 1000066000, + VK_FORMAT_ASTC_5x4_SFLOAT_BLOCK_EXT = 1000066001, + VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK_EXT = 1000066002, + VK_FORMAT_ASTC_6x5_SFLOAT_BLOCK_EXT = 1000066003, + VK_FORMAT_ASTC_6x6_SFLOAT_BLOCK_EXT = 1000066004, + VK_FORMAT_ASTC_8x5_SFLOAT_BLOCK_EXT = 1000066005, + VK_FORMAT_ASTC_8x6_SFLOAT_BLOCK_EXT = 1000066006, + VK_FORMAT_ASTC_8x8_SFLOAT_BLOCK_EXT = 1000066007, + VK_FORMAT_ASTC_10x5_SFLOAT_BLOCK_EXT = 1000066008, + VK_FORMAT_ASTC_10x6_SFLOAT_BLOCK_EXT = 1000066009, + VK_FORMAT_ASTC_10x8_SFLOAT_BLOCK_EXT = 1000066010, + VK_FORMAT_ASTC_10x10_SFLOAT_BLOCK_EXT = 1000066011, + VK_FORMAT_ASTC_12x10_SFLOAT_BLOCK_EXT = 1000066012, + VK_FORMAT_ASTC_12x12_SFLOAT_BLOCK_EXT = 1000066013, VK_FORMAT_G8B8G8R8_422_UNORM_KHR = VK_FORMAT_G8B8G8R8_422_UNORM, VK_FORMAT_B8G8R8G8_422_UNORM_KHR = VK_FORMAT_B8G8R8G8_422_UNORM, VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM_KHR = VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM, @@ -737,40 +1091,29 @@ typedef enum VkFormat { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM_KHR = VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, VK_FORMAT_G16_B16R16_2PLANE_422_UNORM_KHR = VK_FORMAT_G16_B16R16_2PLANE_422_UNORM, VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM_KHR = VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, - VK_FORMAT_BEGIN_RANGE = VK_FORMAT_UNDEFINED, - VK_FORMAT_END_RANGE = VK_FORMAT_ASTC_12x12_SRGB_BLOCK, - VK_FORMAT_RANGE_SIZE = (VK_FORMAT_ASTC_12x12_SRGB_BLOCK - VK_FORMAT_UNDEFINED + 1), VK_FORMAT_MAX_ENUM = 0x7FFFFFFF } VkFormat; +typedef enum VkImageTiling { + VK_IMAGE_TILING_OPTIMAL = 0, + VK_IMAGE_TILING_LINEAR = 1, + VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT = 1000158000, + VK_IMAGE_TILING_MAX_ENUM = 0x7FFFFFFF +} VkImageTiling; + typedef enum VkImageType { VK_IMAGE_TYPE_1D = 0, VK_IMAGE_TYPE_2D = 1, VK_IMAGE_TYPE_3D = 2, - VK_IMAGE_TYPE_BEGIN_RANGE = VK_IMAGE_TYPE_1D, - VK_IMAGE_TYPE_END_RANGE = VK_IMAGE_TYPE_3D, - VK_IMAGE_TYPE_RANGE_SIZE = (VK_IMAGE_TYPE_3D - VK_IMAGE_TYPE_1D + 1), VK_IMAGE_TYPE_MAX_ENUM = 0x7FFFFFFF } VkImageType; -typedef enum VkImageTiling { - VK_IMAGE_TILING_OPTIMAL = 0, - VK_IMAGE_TILING_LINEAR = 1, - VK_IMAGE_TILING_BEGIN_RANGE = VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_TILING_END_RANGE = VK_IMAGE_TILING_LINEAR, - VK_IMAGE_TILING_RANGE_SIZE = (VK_IMAGE_TILING_LINEAR - VK_IMAGE_TILING_OPTIMAL + 1), - VK_IMAGE_TILING_MAX_ENUM = 0x7FFFFFFF -} VkImageTiling; - typedef enum VkPhysicalDeviceType { VK_PHYSICAL_DEVICE_TYPE_OTHER = 0, VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU = 1, VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU = 2, VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU = 3, VK_PHYSICAL_DEVICE_TYPE_CPU = 4, - VK_PHYSICAL_DEVICE_TYPE_BEGIN_RANGE = VK_PHYSICAL_DEVICE_TYPE_OTHER, - VK_PHYSICAL_DEVICE_TYPE_END_RANGE = VK_PHYSICAL_DEVICE_TYPE_CPU, - VK_PHYSICAL_DEVICE_TYPE_RANGE_SIZE = (VK_PHYSICAL_DEVICE_TYPE_CPU - VK_PHYSICAL_DEVICE_TYPE_OTHER + 1), VK_PHYSICAL_DEVICE_TYPE_MAX_ENUM = 0x7FFFFFFF } VkPhysicalDeviceType; @@ -778,57 +1121,21 @@ typedef enum VkQueryType { VK_QUERY_TYPE_OCCLUSION = 0, VK_QUERY_TYPE_PIPELINE_STATISTICS = 1, VK_QUERY_TYPE_TIMESTAMP = 2, - VK_QUERY_TYPE_BEGIN_RANGE = VK_QUERY_TYPE_OCCLUSION, - VK_QUERY_TYPE_END_RANGE = VK_QUERY_TYPE_TIMESTAMP, - VK_QUERY_TYPE_RANGE_SIZE = (VK_QUERY_TYPE_TIMESTAMP - VK_QUERY_TYPE_OCCLUSION + 1), + VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT = 1000028004, + VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR = 1000116000, + VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR = 1000165000, + VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR = 1000150000, + VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL = 1000210000, + VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_NV = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, VK_QUERY_TYPE_MAX_ENUM = 0x7FFFFFFF } VkQueryType; typedef enum VkSharingMode { VK_SHARING_MODE_EXCLUSIVE = 0, VK_SHARING_MODE_CONCURRENT = 1, - VK_SHARING_MODE_BEGIN_RANGE = VK_SHARING_MODE_EXCLUSIVE, - VK_SHARING_MODE_END_RANGE = VK_SHARING_MODE_CONCURRENT, - VK_SHARING_MODE_RANGE_SIZE = (VK_SHARING_MODE_CONCURRENT - VK_SHARING_MODE_EXCLUSIVE + 1), VK_SHARING_MODE_MAX_ENUM = 0x7FFFFFFF } VkSharingMode; -typedef enum VkImageLayout { - VK_IMAGE_LAYOUT_UNDEFINED = 0, - VK_IMAGE_LAYOUT_GENERAL = 1, - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL = 2, - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL = 3, - VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL = 4, - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL = 5, - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL = 6, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL = 7, - VK_IMAGE_LAYOUT_PREINITIALIZED = 8, - VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL = 1000117000, - VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL = 1000117001, - VK_IMAGE_LAYOUT_PRESENT_SRC_KHR = 1000001002, - VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR = 1000111000, - VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL_KHR = VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL, - VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL_KHR = VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL, - VK_IMAGE_LAYOUT_BEGIN_RANGE = VK_IMAGE_LAYOUT_UNDEFINED, - VK_IMAGE_LAYOUT_END_RANGE = VK_IMAGE_LAYOUT_PREINITIALIZED, - VK_IMAGE_LAYOUT_RANGE_SIZE = (VK_IMAGE_LAYOUT_PREINITIALIZED - VK_IMAGE_LAYOUT_UNDEFINED + 1), - VK_IMAGE_LAYOUT_MAX_ENUM = 0x7FFFFFFF -} VkImageLayout; - -typedef enum VkImageViewType { - VK_IMAGE_VIEW_TYPE_1D = 0, - VK_IMAGE_VIEW_TYPE_2D = 1, - VK_IMAGE_VIEW_TYPE_3D = 2, - VK_IMAGE_VIEW_TYPE_CUBE = 3, - VK_IMAGE_VIEW_TYPE_1D_ARRAY = 4, - VK_IMAGE_VIEW_TYPE_2D_ARRAY = 5, - VK_IMAGE_VIEW_TYPE_CUBE_ARRAY = 6, - VK_IMAGE_VIEW_TYPE_BEGIN_RANGE = VK_IMAGE_VIEW_TYPE_1D, - VK_IMAGE_VIEW_TYPE_END_RANGE = VK_IMAGE_VIEW_TYPE_CUBE_ARRAY, - VK_IMAGE_VIEW_TYPE_RANGE_SIZE = (VK_IMAGE_VIEW_TYPE_CUBE_ARRAY - VK_IMAGE_VIEW_TYPE_1D + 1), - VK_IMAGE_VIEW_TYPE_MAX_ENUM = 0x7FFFFFFF -} VkImageViewType; - typedef enum VkComponentSwizzle { VK_COMPONENT_SWIZZLE_IDENTITY = 0, VK_COMPONENT_SWIZZLE_ZERO = 1, @@ -837,111 +1144,19 @@ typedef enum VkComponentSwizzle { VK_COMPONENT_SWIZZLE_G = 4, VK_COMPONENT_SWIZZLE_B = 5, VK_COMPONENT_SWIZZLE_A = 6, - VK_COMPONENT_SWIZZLE_BEGIN_RANGE = VK_COMPONENT_SWIZZLE_IDENTITY, - VK_COMPONENT_SWIZZLE_END_RANGE = VK_COMPONENT_SWIZZLE_A, - VK_COMPONENT_SWIZZLE_RANGE_SIZE = (VK_COMPONENT_SWIZZLE_A - VK_COMPONENT_SWIZZLE_IDENTITY + 1), VK_COMPONENT_SWIZZLE_MAX_ENUM = 0x7FFFFFFF } VkComponentSwizzle; -typedef enum VkVertexInputRate { - VK_VERTEX_INPUT_RATE_VERTEX = 0, - VK_VERTEX_INPUT_RATE_INSTANCE = 1, - VK_VERTEX_INPUT_RATE_BEGIN_RANGE = VK_VERTEX_INPUT_RATE_VERTEX, - VK_VERTEX_INPUT_RATE_END_RANGE = VK_VERTEX_INPUT_RATE_INSTANCE, - VK_VERTEX_INPUT_RATE_RANGE_SIZE = (VK_VERTEX_INPUT_RATE_INSTANCE - VK_VERTEX_INPUT_RATE_VERTEX + 1), - VK_VERTEX_INPUT_RATE_MAX_ENUM = 0x7FFFFFFF -} VkVertexInputRate; - -typedef enum VkPrimitiveTopology { - VK_PRIMITIVE_TOPOLOGY_POINT_LIST = 0, - VK_PRIMITIVE_TOPOLOGY_LINE_LIST = 1, - VK_PRIMITIVE_TOPOLOGY_LINE_STRIP = 2, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST = 3, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP = 4, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN = 5, - VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY = 6, - VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY = 7, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY = 8, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY = 9, - VK_PRIMITIVE_TOPOLOGY_PATCH_LIST = 10, - VK_PRIMITIVE_TOPOLOGY_BEGIN_RANGE = VK_PRIMITIVE_TOPOLOGY_POINT_LIST, - VK_PRIMITIVE_TOPOLOGY_END_RANGE = VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, - VK_PRIMITIVE_TOPOLOGY_RANGE_SIZE = (VK_PRIMITIVE_TOPOLOGY_PATCH_LIST - VK_PRIMITIVE_TOPOLOGY_POINT_LIST + 1), - VK_PRIMITIVE_TOPOLOGY_MAX_ENUM = 0x7FFFFFFF -} VkPrimitiveTopology; - -typedef enum VkPolygonMode { - VK_POLYGON_MODE_FILL = 0, - VK_POLYGON_MODE_LINE = 1, - VK_POLYGON_MODE_POINT = 2, - VK_POLYGON_MODE_FILL_RECTANGLE_NV = 1000153000, - VK_POLYGON_MODE_BEGIN_RANGE = VK_POLYGON_MODE_FILL, - VK_POLYGON_MODE_END_RANGE = VK_POLYGON_MODE_POINT, - VK_POLYGON_MODE_RANGE_SIZE = (VK_POLYGON_MODE_POINT - VK_POLYGON_MODE_FILL + 1), - VK_POLYGON_MODE_MAX_ENUM = 0x7FFFFFFF -} VkPolygonMode; - -typedef enum VkFrontFace { - VK_FRONT_FACE_COUNTER_CLOCKWISE = 0, - VK_FRONT_FACE_CLOCKWISE = 1, - VK_FRONT_FACE_BEGIN_RANGE = VK_FRONT_FACE_COUNTER_CLOCKWISE, - VK_FRONT_FACE_END_RANGE = VK_FRONT_FACE_CLOCKWISE, - VK_FRONT_FACE_RANGE_SIZE = (VK_FRONT_FACE_CLOCKWISE - VK_FRONT_FACE_COUNTER_CLOCKWISE + 1), - VK_FRONT_FACE_MAX_ENUM = 0x7FFFFFFF -} VkFrontFace; - -typedef enum VkCompareOp { - VK_COMPARE_OP_NEVER = 0, - VK_COMPARE_OP_LESS = 1, - VK_COMPARE_OP_EQUAL = 2, - VK_COMPARE_OP_LESS_OR_EQUAL = 3, - VK_COMPARE_OP_GREATER = 4, - VK_COMPARE_OP_NOT_EQUAL = 5, - VK_COMPARE_OP_GREATER_OR_EQUAL = 6, - VK_COMPARE_OP_ALWAYS = 7, - VK_COMPARE_OP_BEGIN_RANGE = VK_COMPARE_OP_NEVER, - VK_COMPARE_OP_END_RANGE = VK_COMPARE_OP_ALWAYS, - VK_COMPARE_OP_RANGE_SIZE = (VK_COMPARE_OP_ALWAYS - VK_COMPARE_OP_NEVER + 1), - VK_COMPARE_OP_MAX_ENUM = 0x7FFFFFFF -} VkCompareOp; - -typedef enum VkStencilOp { - VK_STENCIL_OP_KEEP = 0, - VK_STENCIL_OP_ZERO = 1, - VK_STENCIL_OP_REPLACE = 2, - VK_STENCIL_OP_INCREMENT_AND_CLAMP = 3, - VK_STENCIL_OP_DECREMENT_AND_CLAMP = 4, - VK_STENCIL_OP_INVERT = 5, - VK_STENCIL_OP_INCREMENT_AND_WRAP = 6, - VK_STENCIL_OP_DECREMENT_AND_WRAP = 7, - VK_STENCIL_OP_BEGIN_RANGE = VK_STENCIL_OP_KEEP, - VK_STENCIL_OP_END_RANGE = VK_STENCIL_OP_DECREMENT_AND_WRAP, - VK_STENCIL_OP_RANGE_SIZE = (VK_STENCIL_OP_DECREMENT_AND_WRAP - VK_STENCIL_OP_KEEP + 1), - VK_STENCIL_OP_MAX_ENUM = 0x7FFFFFFF -} VkStencilOp; - -typedef enum VkLogicOp { - VK_LOGIC_OP_CLEAR = 0, - VK_LOGIC_OP_AND = 1, - VK_LOGIC_OP_AND_REVERSE = 2, - VK_LOGIC_OP_COPY = 3, - VK_LOGIC_OP_AND_INVERTED = 4, - VK_LOGIC_OP_NO_OP = 5, - VK_LOGIC_OP_XOR = 6, - VK_LOGIC_OP_OR = 7, - VK_LOGIC_OP_NOR = 8, - VK_LOGIC_OP_EQUIVALENT = 9, - VK_LOGIC_OP_INVERT = 10, - VK_LOGIC_OP_OR_REVERSE = 11, - VK_LOGIC_OP_COPY_INVERTED = 12, - VK_LOGIC_OP_OR_INVERTED = 13, - VK_LOGIC_OP_NAND = 14, - VK_LOGIC_OP_SET = 15, - VK_LOGIC_OP_BEGIN_RANGE = VK_LOGIC_OP_CLEAR, - VK_LOGIC_OP_END_RANGE = VK_LOGIC_OP_SET, - VK_LOGIC_OP_RANGE_SIZE = (VK_LOGIC_OP_SET - VK_LOGIC_OP_CLEAR + 1), - VK_LOGIC_OP_MAX_ENUM = 0x7FFFFFFF -} VkLogicOp; +typedef enum VkImageViewType { + VK_IMAGE_VIEW_TYPE_1D = 0, + VK_IMAGE_VIEW_TYPE_2D = 1, + VK_IMAGE_VIEW_TYPE_3D = 2, + VK_IMAGE_VIEW_TYPE_CUBE = 3, + VK_IMAGE_VIEW_TYPE_1D_ARRAY = 4, + VK_IMAGE_VIEW_TYPE_2D_ARRAY = 5, + VK_IMAGE_VIEW_TYPE_CUBE_ARRAY = 6, + VK_IMAGE_VIEW_TYPE_MAX_ENUM = 0x7FFFFFFF +} VkImageViewType; typedef enum VkBlendFactor { VK_BLEND_FACTOR_ZERO = 0, @@ -963,9 +1178,6 @@ typedef enum VkBlendFactor { VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR = 16, VK_BLEND_FACTOR_SRC1_ALPHA = 17, VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA = 18, - VK_BLEND_FACTOR_BEGIN_RANGE = VK_BLEND_FACTOR_ZERO, - VK_BLEND_FACTOR_END_RANGE = VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA, - VK_BLEND_FACTOR_RANGE_SIZE = (VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA - VK_BLEND_FACTOR_ZERO + 1), VK_BLEND_FACTOR_MAX_ENUM = 0x7FFFFFFF } VkBlendFactor; @@ -1021,12 +1233,21 @@ typedef enum VkBlendOp { VK_BLEND_OP_RED_EXT = 1000148043, VK_BLEND_OP_GREEN_EXT = 1000148044, VK_BLEND_OP_BLUE_EXT = 1000148045, - VK_BLEND_OP_BEGIN_RANGE = VK_BLEND_OP_ADD, - VK_BLEND_OP_END_RANGE = VK_BLEND_OP_MAX, - VK_BLEND_OP_RANGE_SIZE = (VK_BLEND_OP_MAX - VK_BLEND_OP_ADD + 1), VK_BLEND_OP_MAX_ENUM = 0x7FFFFFFF } VkBlendOp; +typedef enum VkCompareOp { + VK_COMPARE_OP_NEVER = 0, + VK_COMPARE_OP_LESS = 1, + VK_COMPARE_OP_EQUAL = 2, + VK_COMPARE_OP_LESS_OR_EQUAL = 3, + VK_COMPARE_OP_GREATER = 4, + VK_COMPARE_OP_NOT_EQUAL = 5, + VK_COMPARE_OP_GREATER_OR_EQUAL = 6, + VK_COMPARE_OP_ALWAYS = 7, + VK_COMPARE_OP_MAX_ENUM = 0x7FFFFFFF +} VkCompareOp; + typedef enum VkDynamicState { VK_DYNAMIC_STATE_VIEWPORT = 0, VK_DYNAMIC_STATE_SCISSOR = 1, @@ -1040,42 +1261,91 @@ typedef enum VkDynamicState { VK_DYNAMIC_STATE_VIEWPORT_W_SCALING_NV = 1000087000, VK_DYNAMIC_STATE_DISCARD_RECTANGLE_EXT = 1000099000, VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT = 1000143000, - VK_DYNAMIC_STATE_BEGIN_RANGE = VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_END_RANGE = VK_DYNAMIC_STATE_STENCIL_REFERENCE, - VK_DYNAMIC_STATE_RANGE_SIZE = (VK_DYNAMIC_STATE_STENCIL_REFERENCE - VK_DYNAMIC_STATE_VIEWPORT + 1), + VK_DYNAMIC_STATE_VIEWPORT_SHADING_RATE_PALETTE_NV = 1000164004, + VK_DYNAMIC_STATE_VIEWPORT_COARSE_SAMPLE_ORDER_NV = 1000164006, + VK_DYNAMIC_STATE_EXCLUSIVE_SCISSOR_NV = 1000205001, + VK_DYNAMIC_STATE_LINE_STIPPLE_EXT = 1000259000, + VK_DYNAMIC_STATE_CULL_MODE_EXT = 1000267000, + VK_DYNAMIC_STATE_FRONT_FACE_EXT = 1000267001, + VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT = 1000267002, + VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT = 1000267003, + VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT = 1000267004, + VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT = 1000267005, + VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT = 1000267006, + VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT = 1000267007, + VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT = 1000267008, + VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT = 1000267009, + VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT = 1000267010, + VK_DYNAMIC_STATE_STENCIL_OP_EXT = 1000267011, VK_DYNAMIC_STATE_MAX_ENUM = 0x7FFFFFFF } VkDynamicState; -typedef enum VkFilter { - VK_FILTER_NEAREST = 0, - VK_FILTER_LINEAR = 1, - VK_FILTER_CUBIC_IMG = 1000015000, - VK_FILTER_BEGIN_RANGE = VK_FILTER_NEAREST, - VK_FILTER_END_RANGE = VK_FILTER_LINEAR, - VK_FILTER_RANGE_SIZE = (VK_FILTER_LINEAR - VK_FILTER_NEAREST + 1), - VK_FILTER_MAX_ENUM = 0x7FFFFFFF -} VkFilter; +typedef enum VkFrontFace { + VK_FRONT_FACE_COUNTER_CLOCKWISE = 0, + VK_FRONT_FACE_CLOCKWISE = 1, + VK_FRONT_FACE_MAX_ENUM = 0x7FFFFFFF +} VkFrontFace; -typedef enum VkSamplerMipmapMode { - VK_SAMPLER_MIPMAP_MODE_NEAREST = 0, - VK_SAMPLER_MIPMAP_MODE_LINEAR = 1, - VK_SAMPLER_MIPMAP_MODE_BEGIN_RANGE = VK_SAMPLER_MIPMAP_MODE_NEAREST, - VK_SAMPLER_MIPMAP_MODE_END_RANGE = VK_SAMPLER_MIPMAP_MODE_LINEAR, - VK_SAMPLER_MIPMAP_MODE_RANGE_SIZE = (VK_SAMPLER_MIPMAP_MODE_LINEAR - VK_SAMPLER_MIPMAP_MODE_NEAREST + 1), - VK_SAMPLER_MIPMAP_MODE_MAX_ENUM = 0x7FFFFFFF -} VkSamplerMipmapMode; +typedef enum VkVertexInputRate { + VK_VERTEX_INPUT_RATE_VERTEX = 0, + VK_VERTEX_INPUT_RATE_INSTANCE = 1, + VK_VERTEX_INPUT_RATE_MAX_ENUM = 0x7FFFFFFF +} VkVertexInputRate; -typedef enum VkSamplerAddressMode { - VK_SAMPLER_ADDRESS_MODE_REPEAT = 0, - VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT = 1, - VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE = 2, - VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER = 3, - VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE = 4, - VK_SAMPLER_ADDRESS_MODE_BEGIN_RANGE = VK_SAMPLER_ADDRESS_MODE_REPEAT, - VK_SAMPLER_ADDRESS_MODE_END_RANGE = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, - VK_SAMPLER_ADDRESS_MODE_RANGE_SIZE = (VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER - VK_SAMPLER_ADDRESS_MODE_REPEAT + 1), - VK_SAMPLER_ADDRESS_MODE_MAX_ENUM = 0x7FFFFFFF -} VkSamplerAddressMode; +typedef enum VkPrimitiveTopology { + VK_PRIMITIVE_TOPOLOGY_POINT_LIST = 0, + VK_PRIMITIVE_TOPOLOGY_LINE_LIST = 1, + VK_PRIMITIVE_TOPOLOGY_LINE_STRIP = 2, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST = 3, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP = 4, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN = 5, + VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY = 6, + VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY = 7, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY = 8, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY = 9, + VK_PRIMITIVE_TOPOLOGY_PATCH_LIST = 10, + VK_PRIMITIVE_TOPOLOGY_MAX_ENUM = 0x7FFFFFFF +} VkPrimitiveTopology; + +typedef enum VkPolygonMode { + VK_POLYGON_MODE_FILL = 0, + VK_POLYGON_MODE_LINE = 1, + VK_POLYGON_MODE_POINT = 2, + VK_POLYGON_MODE_FILL_RECTANGLE_NV = 1000153000, + VK_POLYGON_MODE_MAX_ENUM = 0x7FFFFFFF +} VkPolygonMode; + +typedef enum VkStencilOp { + VK_STENCIL_OP_KEEP = 0, + VK_STENCIL_OP_ZERO = 1, + VK_STENCIL_OP_REPLACE = 2, + VK_STENCIL_OP_INCREMENT_AND_CLAMP = 3, + VK_STENCIL_OP_DECREMENT_AND_CLAMP = 4, + VK_STENCIL_OP_INVERT = 5, + VK_STENCIL_OP_INCREMENT_AND_WRAP = 6, + VK_STENCIL_OP_DECREMENT_AND_WRAP = 7, + VK_STENCIL_OP_MAX_ENUM = 0x7FFFFFFF +} VkStencilOp; + +typedef enum VkLogicOp { + VK_LOGIC_OP_CLEAR = 0, + VK_LOGIC_OP_AND = 1, + VK_LOGIC_OP_AND_REVERSE = 2, + VK_LOGIC_OP_COPY = 3, + VK_LOGIC_OP_AND_INVERTED = 4, + VK_LOGIC_OP_NO_OP = 5, + VK_LOGIC_OP_XOR = 6, + VK_LOGIC_OP_OR = 7, + VK_LOGIC_OP_NOR = 8, + VK_LOGIC_OP_EQUIVALENT = 9, + VK_LOGIC_OP_INVERT = 10, + VK_LOGIC_OP_OR_REVERSE = 11, + VK_LOGIC_OP_COPY_INVERTED = 12, + VK_LOGIC_OP_OR_INVERTED = 13, + VK_LOGIC_OP_NAND = 14, + VK_LOGIC_OP_SET = 15, + VK_LOGIC_OP_MAX_ENUM = 0x7FFFFFFF +} VkLogicOp; typedef enum VkBorderColor { VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK = 0, @@ -1084,12 +1354,35 @@ typedef enum VkBorderColor { VK_BORDER_COLOR_INT_OPAQUE_BLACK = 3, VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE = 4, VK_BORDER_COLOR_INT_OPAQUE_WHITE = 5, - VK_BORDER_COLOR_BEGIN_RANGE = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, - VK_BORDER_COLOR_END_RANGE = VK_BORDER_COLOR_INT_OPAQUE_WHITE, - VK_BORDER_COLOR_RANGE_SIZE = (VK_BORDER_COLOR_INT_OPAQUE_WHITE - VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK + 1), + VK_BORDER_COLOR_FLOAT_CUSTOM_EXT = 1000287003, + VK_BORDER_COLOR_INT_CUSTOM_EXT = 1000287004, VK_BORDER_COLOR_MAX_ENUM = 0x7FFFFFFF } VkBorderColor; +typedef enum VkFilter { + VK_FILTER_NEAREST = 0, + VK_FILTER_LINEAR = 1, + VK_FILTER_CUBIC_IMG = 1000015000, + VK_FILTER_CUBIC_EXT = VK_FILTER_CUBIC_IMG, + VK_FILTER_MAX_ENUM = 0x7FFFFFFF +} VkFilter; + +typedef enum VkSamplerAddressMode { + VK_SAMPLER_ADDRESS_MODE_REPEAT = 0, + VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT = 1, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE = 2, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER = 3, + VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE = 4, + VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE_KHR = VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE, + VK_SAMPLER_ADDRESS_MODE_MAX_ENUM = 0x7FFFFFFF +} VkSamplerAddressMode; + +typedef enum VkSamplerMipmapMode { + VK_SAMPLER_MIPMAP_MODE_NEAREST = 0, + VK_SAMPLER_MIPMAP_MODE_LINEAR = 1, + VK_SAMPLER_MIPMAP_MODE_MAX_ENUM = 0x7FFFFFFF +} VkSamplerMipmapMode; + typedef enum VkDescriptorType { VK_DESCRIPTOR_TYPE_SAMPLER = 0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER = 1, @@ -1102,9 +1395,9 @@ typedef enum VkDescriptorType { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC = 8, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC = 9, VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT = 10, - VK_DESCRIPTOR_TYPE_BEGIN_RANGE = VK_DESCRIPTOR_TYPE_SAMPLER, - VK_DESCRIPTOR_TYPE_END_RANGE = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, - VK_DESCRIPTOR_TYPE_RANGE_SIZE = (VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT - VK_DESCRIPTOR_TYPE_SAMPLER + 1), + VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT = 1000138000, + VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR = 1000165000, + VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_NV = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, VK_DESCRIPTOR_TYPE_MAX_ENUM = 0x7FFFFFFF } VkDescriptorType; @@ -1112,104 +1405,98 @@ typedef enum VkAttachmentLoadOp { VK_ATTACHMENT_LOAD_OP_LOAD = 0, VK_ATTACHMENT_LOAD_OP_CLEAR = 1, VK_ATTACHMENT_LOAD_OP_DONT_CARE = 2, - VK_ATTACHMENT_LOAD_OP_BEGIN_RANGE = VK_ATTACHMENT_LOAD_OP_LOAD, - VK_ATTACHMENT_LOAD_OP_END_RANGE = VK_ATTACHMENT_LOAD_OP_DONT_CARE, - VK_ATTACHMENT_LOAD_OP_RANGE_SIZE = (VK_ATTACHMENT_LOAD_OP_DONT_CARE - VK_ATTACHMENT_LOAD_OP_LOAD + 1), VK_ATTACHMENT_LOAD_OP_MAX_ENUM = 0x7FFFFFFF } VkAttachmentLoadOp; typedef enum VkAttachmentStoreOp { VK_ATTACHMENT_STORE_OP_STORE = 0, VK_ATTACHMENT_STORE_OP_DONT_CARE = 1, - VK_ATTACHMENT_STORE_OP_BEGIN_RANGE = VK_ATTACHMENT_STORE_OP_STORE, - VK_ATTACHMENT_STORE_OP_END_RANGE = VK_ATTACHMENT_STORE_OP_DONT_CARE, - VK_ATTACHMENT_STORE_OP_RANGE_SIZE = (VK_ATTACHMENT_STORE_OP_DONT_CARE - VK_ATTACHMENT_STORE_OP_STORE + 1), + VK_ATTACHMENT_STORE_OP_NONE_QCOM = 1000301000, VK_ATTACHMENT_STORE_OP_MAX_ENUM = 0x7FFFFFFF } VkAttachmentStoreOp; typedef enum VkPipelineBindPoint { VK_PIPELINE_BIND_POINT_GRAPHICS = 0, VK_PIPELINE_BIND_POINT_COMPUTE = 1, - VK_PIPELINE_BIND_POINT_BEGIN_RANGE = VK_PIPELINE_BIND_POINT_GRAPHICS, - VK_PIPELINE_BIND_POINT_END_RANGE = VK_PIPELINE_BIND_POINT_COMPUTE, - VK_PIPELINE_BIND_POINT_RANGE_SIZE = (VK_PIPELINE_BIND_POINT_COMPUTE - VK_PIPELINE_BIND_POINT_GRAPHICS + 1), + VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR = 1000165000, + VK_PIPELINE_BIND_POINT_RAY_TRACING_NV = VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, VK_PIPELINE_BIND_POINT_MAX_ENUM = 0x7FFFFFFF } VkPipelineBindPoint; typedef enum VkCommandBufferLevel { VK_COMMAND_BUFFER_LEVEL_PRIMARY = 0, VK_COMMAND_BUFFER_LEVEL_SECONDARY = 1, - VK_COMMAND_BUFFER_LEVEL_BEGIN_RANGE = VK_COMMAND_BUFFER_LEVEL_PRIMARY, - VK_COMMAND_BUFFER_LEVEL_END_RANGE = VK_COMMAND_BUFFER_LEVEL_SECONDARY, - VK_COMMAND_BUFFER_LEVEL_RANGE_SIZE = (VK_COMMAND_BUFFER_LEVEL_SECONDARY - VK_COMMAND_BUFFER_LEVEL_PRIMARY + 1), VK_COMMAND_BUFFER_LEVEL_MAX_ENUM = 0x7FFFFFFF } VkCommandBufferLevel; typedef enum VkIndexType { VK_INDEX_TYPE_UINT16 = 0, VK_INDEX_TYPE_UINT32 = 1, - VK_INDEX_TYPE_BEGIN_RANGE = VK_INDEX_TYPE_UINT16, - VK_INDEX_TYPE_END_RANGE = VK_INDEX_TYPE_UINT32, - VK_INDEX_TYPE_RANGE_SIZE = (VK_INDEX_TYPE_UINT32 - VK_INDEX_TYPE_UINT16 + 1), + VK_INDEX_TYPE_NONE_KHR = 1000165000, + VK_INDEX_TYPE_UINT8_EXT = 1000265000, + VK_INDEX_TYPE_NONE_NV = VK_INDEX_TYPE_NONE_KHR, VK_INDEX_TYPE_MAX_ENUM = 0x7FFFFFFF } VkIndexType; typedef enum VkSubpassContents { VK_SUBPASS_CONTENTS_INLINE = 0, VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS = 1, - VK_SUBPASS_CONTENTS_BEGIN_RANGE = VK_SUBPASS_CONTENTS_INLINE, - VK_SUBPASS_CONTENTS_END_RANGE = VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS, - VK_SUBPASS_CONTENTS_RANGE_SIZE = (VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS - VK_SUBPASS_CONTENTS_INLINE + 1), VK_SUBPASS_CONTENTS_MAX_ENUM = 0x7FFFFFFF } VkSubpassContents; -typedef enum VkObjectType { - VK_OBJECT_TYPE_UNKNOWN = 0, - VK_OBJECT_TYPE_INSTANCE = 1, - VK_OBJECT_TYPE_PHYSICAL_DEVICE = 2, - VK_OBJECT_TYPE_DEVICE = 3, - VK_OBJECT_TYPE_QUEUE = 4, - VK_OBJECT_TYPE_SEMAPHORE = 5, - VK_OBJECT_TYPE_COMMAND_BUFFER = 6, - VK_OBJECT_TYPE_FENCE = 7, - VK_OBJECT_TYPE_DEVICE_MEMORY = 8, - VK_OBJECT_TYPE_BUFFER = 9, - VK_OBJECT_TYPE_IMAGE = 10, - VK_OBJECT_TYPE_EVENT = 11, - VK_OBJECT_TYPE_QUERY_POOL = 12, - VK_OBJECT_TYPE_BUFFER_VIEW = 13, - VK_OBJECT_TYPE_IMAGE_VIEW = 14, - VK_OBJECT_TYPE_SHADER_MODULE = 15, - VK_OBJECT_TYPE_PIPELINE_CACHE = 16, - VK_OBJECT_TYPE_PIPELINE_LAYOUT = 17, - VK_OBJECT_TYPE_RENDER_PASS = 18, - VK_OBJECT_TYPE_PIPELINE = 19, - VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT = 20, - VK_OBJECT_TYPE_SAMPLER = 21, - VK_OBJECT_TYPE_DESCRIPTOR_POOL = 22, - VK_OBJECT_TYPE_DESCRIPTOR_SET = 23, - VK_OBJECT_TYPE_FRAMEBUFFER = 24, - VK_OBJECT_TYPE_COMMAND_POOL = 25, - VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION = 1000156000, - VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE = 1000085000, - VK_OBJECT_TYPE_SURFACE_KHR = 1000000000, - VK_OBJECT_TYPE_SWAPCHAIN_KHR = 1000001000, - VK_OBJECT_TYPE_DISPLAY_KHR = 1000002000, - VK_OBJECT_TYPE_DISPLAY_MODE_KHR = 1000002001, - VK_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT = 1000011000, - VK_OBJECT_TYPE_OBJECT_TABLE_NVX = 1000086000, - VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NVX = 1000086001, - VK_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT = 1000128000, - VK_OBJECT_TYPE_VALIDATION_CACHE_EXT = 1000160000, - VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_KHR = VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE, - VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION_KHR = VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION, - VK_OBJECT_TYPE_BEGIN_RANGE = VK_OBJECT_TYPE_UNKNOWN, - VK_OBJECT_TYPE_END_RANGE = VK_OBJECT_TYPE_COMMAND_POOL, - VK_OBJECT_TYPE_RANGE_SIZE = (VK_OBJECT_TYPE_COMMAND_POOL - VK_OBJECT_TYPE_UNKNOWN + 1), - VK_OBJECT_TYPE_MAX_ENUM = 0x7FFFFFFF -} VkObjectType; +typedef enum VkAccessFlagBits { + VK_ACCESS_INDIRECT_COMMAND_READ_BIT = 0x00000001, + VK_ACCESS_INDEX_READ_BIT = 0x00000002, + VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT = 0x00000004, + VK_ACCESS_UNIFORM_READ_BIT = 0x00000008, + VK_ACCESS_INPUT_ATTACHMENT_READ_BIT = 0x00000010, + VK_ACCESS_SHADER_READ_BIT = 0x00000020, + VK_ACCESS_SHADER_WRITE_BIT = 0x00000040, + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT = 0x00000080, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT = 0x00000100, + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT = 0x00000200, + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT = 0x00000400, + VK_ACCESS_TRANSFER_READ_BIT = 0x00000800, + VK_ACCESS_TRANSFER_WRITE_BIT = 0x00001000, + VK_ACCESS_HOST_READ_BIT = 0x00002000, + VK_ACCESS_HOST_WRITE_BIT = 0x00004000, + VK_ACCESS_MEMORY_READ_BIT = 0x00008000, + VK_ACCESS_MEMORY_WRITE_BIT = 0x00010000, + VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT = 0x02000000, + VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT = 0x04000000, + VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT = 0x08000000, + VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT = 0x00100000, + VK_ACCESS_COLOR_ATTACHMENT_READ_NONCOHERENT_BIT_EXT = 0x00080000, + VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR = 0x00200000, + VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR = 0x00400000, + VK_ACCESS_SHADING_RATE_IMAGE_READ_BIT_NV = 0x00800000, + VK_ACCESS_FRAGMENT_DENSITY_MAP_READ_BIT_EXT = 0x01000000, + VK_ACCESS_COMMAND_PREPROCESS_READ_BIT_NV = 0x00020000, + VK_ACCESS_COMMAND_PREPROCESS_WRITE_BIT_NV = 0x00040000, + VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_NV = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR, + VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_NV = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR, + VK_ACCESS_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkAccessFlagBits; +typedef VkFlags VkAccessFlags; -typedef VkFlags VkInstanceCreateFlags; +typedef enum VkImageAspectFlagBits { + VK_IMAGE_ASPECT_COLOR_BIT = 0x00000001, + VK_IMAGE_ASPECT_DEPTH_BIT = 0x00000002, + VK_IMAGE_ASPECT_STENCIL_BIT = 0x00000004, + VK_IMAGE_ASPECT_METADATA_BIT = 0x00000008, + VK_IMAGE_ASPECT_PLANE_0_BIT = 0x00000010, + VK_IMAGE_ASPECT_PLANE_1_BIT = 0x00000020, + VK_IMAGE_ASPECT_PLANE_2_BIT = 0x00000040, + VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT = 0x00000080, + VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT = 0x00000100, + VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT = 0x00000200, + VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT = 0x00000400, + VK_IMAGE_ASPECT_PLANE_0_BIT_KHR = VK_IMAGE_ASPECT_PLANE_0_BIT, + VK_IMAGE_ASPECT_PLANE_1_BIT_KHR = VK_IMAGE_ASPECT_PLANE_1_BIT, + VK_IMAGE_ASPECT_PLANE_2_BIT_KHR = VK_IMAGE_ASPECT_PLANE_2_BIT, + VK_IMAGE_ASPECT_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkImageAspectFlagBits; +typedef VkFlags VkImageAspectFlags; typedef enum VkFormatFeatureFlagBits { VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT = 0x00000001, @@ -1234,10 +1521,13 @@ typedef enum VkFormatFeatureFlagBits { VK_FORMAT_FEATURE_SAMPLED_IMAGE_YCBCR_CONVERSION_CHROMA_RECONSTRUCTION_EXPLICIT_FORCEABLE_BIT = 0x00200000, VK_FORMAT_FEATURE_DISJOINT_BIT = 0x00400000, VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT = 0x00800000, + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT = 0x00010000, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_CUBIC_BIT_IMG = 0x00002000, - VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT_EXT = 0x00010000, + VK_FORMAT_FEATURE_ACCELERATION_STRUCTURE_VERTEX_BUFFER_BIT_KHR = 0x20000000, + VK_FORMAT_FEATURE_FRAGMENT_DENSITY_MAP_BIT_EXT = 0x01000000, VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT, VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR = VK_FORMAT_FEATURE_TRANSFER_DST_BIT, + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT_EXT = VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT, VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT_KHR = VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT, VK_FORMAT_FEATURE_SAMPLED_IMAGE_YCBCR_CONVERSION_LINEAR_FILTER_BIT_KHR = VK_FORMAT_FEATURE_SAMPLED_IMAGE_YCBCR_CONVERSION_LINEAR_FILTER_BIT, VK_FORMAT_FEATURE_SAMPLED_IMAGE_YCBCR_CONVERSION_SEPARATE_RECONSTRUCTION_FILTER_BIT_KHR = VK_FORMAT_FEATURE_SAMPLED_IMAGE_YCBCR_CONVERSION_SEPARATE_RECONSTRUCTION_FILTER_BIT, @@ -1245,23 +1535,11 @@ typedef enum VkFormatFeatureFlagBits { VK_FORMAT_FEATURE_SAMPLED_IMAGE_YCBCR_CONVERSION_CHROMA_RECONSTRUCTION_EXPLICIT_FORCEABLE_BIT_KHR = VK_FORMAT_FEATURE_SAMPLED_IMAGE_YCBCR_CONVERSION_CHROMA_RECONSTRUCTION_EXPLICIT_FORCEABLE_BIT, VK_FORMAT_FEATURE_DISJOINT_BIT_KHR = VK_FORMAT_FEATURE_DISJOINT_BIT, VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT_KHR = VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT, + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_CUBIC_BIT_EXT = VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_CUBIC_BIT_IMG, VK_FORMAT_FEATURE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VkFormatFeatureFlagBits; typedef VkFlags VkFormatFeatureFlags; -typedef enum VkImageUsageFlagBits { - VK_IMAGE_USAGE_TRANSFER_SRC_BIT = 0x00000001, - VK_IMAGE_USAGE_TRANSFER_DST_BIT = 0x00000002, - VK_IMAGE_USAGE_SAMPLED_BIT = 0x00000004, - VK_IMAGE_USAGE_STORAGE_BIT = 0x00000008, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT = 0x00000010, - VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT = 0x00000020, - VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT = 0x00000040, - VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT = 0x00000080, - VK_IMAGE_USAGE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF -} VkImageUsageFlagBits; -typedef VkFlags VkImageUsageFlags; - typedef enum VkImageCreateFlagBits { VK_IMAGE_CREATE_SPARSE_BINDING_BIT = 0x00000001, VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT = 0x00000002, @@ -1275,7 +1553,9 @@ typedef enum VkImageCreateFlagBits { VK_IMAGE_CREATE_EXTENDED_USAGE_BIT = 0x00000100, VK_IMAGE_CREATE_PROTECTED_BIT = 0x00000800, VK_IMAGE_CREATE_DISJOINT_BIT = 0x00000200, + VK_IMAGE_CREATE_CORNER_SAMPLED_BIT_NV = 0x00002000, VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT = 0x00001000, + VK_IMAGE_CREATE_SUBSAMPLED_BIT_EXT = 0x00004000, VK_IMAGE_CREATE_SPLIT_INSTANCE_BIND_REGIONS_BIT_KHR = VK_IMAGE_CREATE_SPLIT_INSTANCE_BIND_REGIONS_BIT, VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT_KHR = VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT, VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT_KHR = VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT, @@ -1298,6 +1578,43 @@ typedef enum VkSampleCountFlagBits { } VkSampleCountFlagBits; typedef VkFlags VkSampleCountFlags; +typedef enum VkImageUsageFlagBits { + VK_IMAGE_USAGE_TRANSFER_SRC_BIT = 0x00000001, + VK_IMAGE_USAGE_TRANSFER_DST_BIT = 0x00000002, + VK_IMAGE_USAGE_SAMPLED_BIT = 0x00000004, + VK_IMAGE_USAGE_STORAGE_BIT = 0x00000008, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT = 0x00000010, + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT = 0x00000020, + VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT = 0x00000040, + VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT = 0x00000080, + VK_IMAGE_USAGE_SHADING_RATE_IMAGE_BIT_NV = 0x00000100, + VK_IMAGE_USAGE_FRAGMENT_DENSITY_MAP_BIT_EXT = 0x00000200, + VK_IMAGE_USAGE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkImageUsageFlagBits; +typedef VkFlags VkImageUsageFlags; +typedef VkFlags VkInstanceCreateFlags; + +typedef enum VkMemoryHeapFlagBits { + VK_MEMORY_HEAP_DEVICE_LOCAL_BIT = 0x00000001, + VK_MEMORY_HEAP_MULTI_INSTANCE_BIT = 0x00000002, + VK_MEMORY_HEAP_MULTI_INSTANCE_BIT_KHR = VK_MEMORY_HEAP_MULTI_INSTANCE_BIT, + VK_MEMORY_HEAP_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkMemoryHeapFlagBits; +typedef VkFlags VkMemoryHeapFlags; + +typedef enum VkMemoryPropertyFlagBits { + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT = 0x00000001, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT = 0x00000002, + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT = 0x00000004, + VK_MEMORY_PROPERTY_HOST_CACHED_BIT = 0x00000008, + VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT = 0x00000010, + VK_MEMORY_PROPERTY_PROTECTED_BIT = 0x00000020, + VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD = 0x00000040, + VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD = 0x00000080, + VK_MEMORY_PROPERTY_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkMemoryPropertyFlagBits; +typedef VkFlags VkMemoryPropertyFlags; + typedef enum VkQueueFlagBits { VK_QUEUE_GRAPHICS_BIT = 0x00000001, VK_QUEUE_COMPUTE_BIT = 0x00000002, @@ -1307,25 +1624,6 @@ typedef enum VkQueueFlagBits { VK_QUEUE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VkQueueFlagBits; typedef VkFlags VkQueueFlags; - -typedef enum VkMemoryPropertyFlagBits { - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT = 0x00000001, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT = 0x00000002, - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT = 0x00000004, - VK_MEMORY_PROPERTY_HOST_CACHED_BIT = 0x00000008, - VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT = 0x00000010, - VK_MEMORY_PROPERTY_PROTECTED_BIT = 0x00000020, - VK_MEMORY_PROPERTY_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF -} VkMemoryPropertyFlagBits; -typedef VkFlags VkMemoryPropertyFlags; - -typedef enum VkMemoryHeapFlagBits { - VK_MEMORY_HEAP_DEVICE_LOCAL_BIT = 0x00000001, - VK_MEMORY_HEAP_MULTI_INSTANCE_BIT = 0x00000002, - VK_MEMORY_HEAP_MULTI_INSTANCE_BIT_KHR = VK_MEMORY_HEAP_MULTI_INSTANCE_BIT, - VK_MEMORY_HEAP_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF -} VkMemoryHeapFlagBits; -typedef VkFlags VkMemoryHeapFlags; typedef VkFlags VkDeviceCreateFlags; typedef enum VkDeviceQueueCreateFlagBits { @@ -1352,26 +1650,27 @@ typedef enum VkPipelineStageFlagBits { VK_PIPELINE_STAGE_HOST_BIT = 0x00004000, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT = 0x00008000, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT = 0x00010000, - VK_PIPELINE_STAGE_COMMAND_PROCESS_BIT_NVX = 0x00020000, + VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT = 0x01000000, + VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT = 0x00040000, + VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR = 0x00200000, + VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR = 0x02000000, + VK_PIPELINE_STAGE_SHADING_RATE_IMAGE_BIT_NV = 0x00400000, + VK_PIPELINE_STAGE_TASK_SHADER_BIT_NV = 0x00080000, + VK_PIPELINE_STAGE_MESH_SHADER_BIT_NV = 0x00100000, + VK_PIPELINE_STAGE_FRAGMENT_DENSITY_PROCESS_BIT_EXT = 0x00800000, + VK_PIPELINE_STAGE_COMMAND_PREPROCESS_BIT_NV = 0x00020000, + VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_NV = VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, + VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_NV = VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VkPipelineStageFlagBits; typedef VkFlags VkPipelineStageFlags; typedef VkFlags VkMemoryMapFlags; -typedef enum VkImageAspectFlagBits { - VK_IMAGE_ASPECT_COLOR_BIT = 0x00000001, - VK_IMAGE_ASPECT_DEPTH_BIT = 0x00000002, - VK_IMAGE_ASPECT_STENCIL_BIT = 0x00000004, - VK_IMAGE_ASPECT_METADATA_BIT = 0x00000008, - VK_IMAGE_ASPECT_PLANE_0_BIT = 0x00000010, - VK_IMAGE_ASPECT_PLANE_1_BIT = 0x00000020, - VK_IMAGE_ASPECT_PLANE_2_BIT = 0x00000040, - VK_IMAGE_ASPECT_PLANE_0_BIT_KHR = VK_IMAGE_ASPECT_PLANE_0_BIT, - VK_IMAGE_ASPECT_PLANE_1_BIT_KHR = VK_IMAGE_ASPECT_PLANE_1_BIT, - VK_IMAGE_ASPECT_PLANE_2_BIT_KHR = VK_IMAGE_ASPECT_PLANE_2_BIT, - VK_IMAGE_ASPECT_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF -} VkImageAspectFlagBits; -typedef VkFlags VkImageAspectFlags; +typedef enum VkSparseMemoryBindFlagBits { + VK_SPARSE_MEMORY_BIND_METADATA_BIT = 0x00000001, + VK_SPARSE_MEMORY_BIND_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkSparseMemoryBindFlagBits; +typedef VkFlags VkSparseMemoryBindFlags; typedef enum VkSparseImageFormatFlagBits { VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT = 0x00000001, @@ -1381,12 +1680,6 @@ typedef enum VkSparseImageFormatFlagBits { } VkSparseImageFormatFlagBits; typedef VkFlags VkSparseImageFormatFlags; -typedef enum VkSparseMemoryBindFlagBits { - VK_SPARSE_MEMORY_BIND_METADATA_BIT = 0x00000001, - VK_SPARSE_MEMORY_BIND_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF -} VkSparseMemoryBindFlagBits; -typedef VkFlags VkSparseMemoryBindFlags; - typedef enum VkFenceCreateFlagBits { VK_FENCE_CREATE_SIGNALED_BIT = 0x00000001, VK_FENCE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF @@ -1394,7 +1687,6 @@ typedef enum VkFenceCreateFlagBits { typedef VkFlags VkFenceCreateFlags; typedef VkFlags VkSemaphoreCreateFlags; typedef VkFlags VkEventCreateFlags; -typedef VkFlags VkQueryPoolCreateFlags; typedef enum VkQueryPipelineStatisticFlagBits { VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT = 0x00000001, @@ -1411,6 +1703,7 @@ typedef enum VkQueryPipelineStatisticFlagBits { VK_QUERY_PIPELINE_STATISTIC_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VkQueryPipelineStatisticFlagBits; typedef VkFlags VkQueryPipelineStatisticFlags; +typedef VkFlags VkQueryPoolCreateFlags; typedef enum VkQueryResultFlagBits { VK_QUERY_RESULT_64_BIT = 0x00000001, @@ -1426,6 +1719,9 @@ typedef enum VkBufferCreateFlagBits { VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT = 0x00000002, VK_BUFFER_CREATE_SPARSE_ALIASED_BIT = 0x00000004, VK_BUFFER_CREATE_PROTECTED_BIT = 0x00000008, + VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT = 0x00000010, + VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_EXT = VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT, + VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_KHR = VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT, VK_BUFFER_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VkBufferCreateFlagBits; typedef VkFlags VkBufferCreateFlags; @@ -1440,25 +1736,77 @@ typedef enum VkBufferUsageFlagBits { VK_BUFFER_USAGE_INDEX_BUFFER_BIT = 0x00000040, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT = 0x00000080, VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT = 0x00000100, + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT = 0x00020000, + VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT = 0x00000800, + VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT = 0x00001000, + VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT = 0x00000200, + VK_BUFFER_USAGE_RAY_TRACING_BIT_KHR = 0x00000400, + VK_BUFFER_USAGE_RAY_TRACING_BIT_NV = VK_BUFFER_USAGE_RAY_TRACING_BIT_KHR, + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT = VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR = VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, VK_BUFFER_USAGE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VkBufferUsageFlagBits; typedef VkFlags VkBufferUsageFlags; typedef VkFlags VkBufferViewCreateFlags; + +typedef enum VkImageViewCreateFlagBits { + VK_IMAGE_VIEW_CREATE_FRAGMENT_DENSITY_MAP_DYNAMIC_BIT_EXT = 0x00000001, + VK_IMAGE_VIEW_CREATE_FRAGMENT_DENSITY_MAP_DEFERRED_BIT_EXT = 0x00000002, + VK_IMAGE_VIEW_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkImageViewCreateFlagBits; typedef VkFlags VkImageViewCreateFlags; + +typedef enum VkShaderModuleCreateFlagBits { + VK_SHADER_MODULE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkShaderModuleCreateFlagBits; typedef VkFlags VkShaderModuleCreateFlags; + +typedef enum VkPipelineCacheCreateFlagBits { + VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT_EXT = 0x00000001, + VK_PIPELINE_CACHE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkPipelineCacheCreateFlagBits; typedef VkFlags VkPipelineCacheCreateFlags; +typedef enum VkColorComponentFlagBits { + VK_COLOR_COMPONENT_R_BIT = 0x00000001, + VK_COLOR_COMPONENT_G_BIT = 0x00000002, + VK_COLOR_COMPONENT_B_BIT = 0x00000004, + VK_COLOR_COMPONENT_A_BIT = 0x00000008, + VK_COLOR_COMPONENT_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkColorComponentFlagBits; +typedef VkFlags VkColorComponentFlags; + typedef enum VkPipelineCreateFlagBits { VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT = 0x00000001, VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT = 0x00000002, VK_PIPELINE_CREATE_DERIVATIVE_BIT = 0x00000004, VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT = 0x00000008, - VK_PIPELINE_CREATE_DISPATCH_BASE = 0x00000010, + VK_PIPELINE_CREATE_DISPATCH_BASE_BIT = 0x00000010, + VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_ANY_HIT_SHADERS_BIT_KHR = 0x00004000, + VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR = 0x00008000, + VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR = 0x00010000, + VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_INTERSECTION_SHADERS_BIT_KHR = 0x00020000, + VK_PIPELINE_CREATE_RAY_TRACING_SKIP_TRIANGLES_BIT_KHR = 0x00001000, + VK_PIPELINE_CREATE_RAY_TRACING_SKIP_AABBS_BIT_KHR = 0x00002000, + VK_PIPELINE_CREATE_DEFER_COMPILE_BIT_NV = 0x00000020, + VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR = 0x00000040, + VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR = 0x00000080, + VK_PIPELINE_CREATE_INDIRECT_BINDABLE_BIT_NV = 0x00040000, + VK_PIPELINE_CREATE_LIBRARY_BIT_KHR = 0x00000800, + VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT = 0x00000100, + VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT = 0x00000200, + VK_PIPELINE_CREATE_DISPATCH_BASE = VK_PIPELINE_CREATE_DISPATCH_BASE_BIT, VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT_KHR = VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT, VK_PIPELINE_CREATE_DISPATCH_BASE_KHR = VK_PIPELINE_CREATE_DISPATCH_BASE, VK_PIPELINE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VkPipelineCreateFlagBits; typedef VkFlags VkPipelineCreateFlags; + +typedef enum VkPipelineShaderStageCreateFlagBits { + VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT = 0x00000001, + VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT = 0x00000002, + VK_PIPELINE_SHADER_STAGE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkPipelineShaderStageCreateFlagBits; typedef VkFlags VkPipelineShaderStageCreateFlags; typedef enum VkShaderStageFlagBits { @@ -1470,13 +1818,22 @@ typedef enum VkShaderStageFlagBits { VK_SHADER_STAGE_COMPUTE_BIT = 0x00000020, VK_SHADER_STAGE_ALL_GRAPHICS = 0x0000001F, VK_SHADER_STAGE_ALL = 0x7FFFFFFF, + VK_SHADER_STAGE_RAYGEN_BIT_KHR = 0x00000100, + VK_SHADER_STAGE_ANY_HIT_BIT_KHR = 0x00000200, + VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR = 0x00000400, + VK_SHADER_STAGE_MISS_BIT_KHR = 0x00000800, + VK_SHADER_STAGE_INTERSECTION_BIT_KHR = 0x00001000, + VK_SHADER_STAGE_CALLABLE_BIT_KHR = 0x00002000, + VK_SHADER_STAGE_TASK_BIT_NV = 0x00000040, + VK_SHADER_STAGE_MESH_BIT_NV = 0x00000080, + VK_SHADER_STAGE_RAYGEN_BIT_NV = VK_SHADER_STAGE_RAYGEN_BIT_KHR, + VK_SHADER_STAGE_ANY_HIT_BIT_NV = VK_SHADER_STAGE_ANY_HIT_BIT_KHR, + VK_SHADER_STAGE_CLOSEST_HIT_BIT_NV = VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, + VK_SHADER_STAGE_MISS_BIT_NV = VK_SHADER_STAGE_MISS_BIT_KHR, + VK_SHADER_STAGE_INTERSECTION_BIT_NV = VK_SHADER_STAGE_INTERSECTION_BIT_KHR, + VK_SHADER_STAGE_CALLABLE_BIT_NV = VK_SHADER_STAGE_CALLABLE_BIT_KHR, VK_SHADER_STAGE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VkShaderStageFlagBits; -typedef VkFlags VkPipelineVertexInputStateCreateFlags; -typedef VkFlags VkPipelineInputAssemblyStateCreateFlags; -typedef VkFlags VkPipelineTessellationStateCreateFlags; -typedef VkFlags VkPipelineViewportStateCreateFlags; -typedef VkFlags VkPipelineRasterizationStateCreateFlags; typedef enum VkCullModeFlagBits { VK_CULL_MODE_NONE = 0, @@ -1486,39 +1843,41 @@ typedef enum VkCullModeFlagBits { VK_CULL_MODE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VkCullModeFlagBits; typedef VkFlags VkCullModeFlags; +typedef VkFlags VkPipelineVertexInputStateCreateFlags; +typedef VkFlags VkPipelineInputAssemblyStateCreateFlags; +typedef VkFlags VkPipelineTessellationStateCreateFlags; +typedef VkFlags VkPipelineViewportStateCreateFlags; +typedef VkFlags VkPipelineRasterizationStateCreateFlags; typedef VkFlags VkPipelineMultisampleStateCreateFlags; typedef VkFlags VkPipelineDepthStencilStateCreateFlags; typedef VkFlags VkPipelineColorBlendStateCreateFlags; - -typedef enum VkColorComponentFlagBits { - VK_COLOR_COMPONENT_R_BIT = 0x00000001, - VK_COLOR_COMPONENT_G_BIT = 0x00000002, - VK_COLOR_COMPONENT_B_BIT = 0x00000004, - VK_COLOR_COMPONENT_A_BIT = 0x00000008, - VK_COLOR_COMPONENT_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF -} VkColorComponentFlagBits; -typedef VkFlags VkColorComponentFlags; typedef VkFlags VkPipelineDynamicStateCreateFlags; typedef VkFlags VkPipelineLayoutCreateFlags; typedef VkFlags VkShaderStageFlags; -typedef VkFlags VkSamplerCreateFlags; -typedef enum VkDescriptorSetLayoutCreateFlagBits { - VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR = 0x00000001, - VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT_EXT = 0x00000002, - VK_DESCRIPTOR_SET_LAYOUT_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF -} VkDescriptorSetLayoutCreateFlagBits; -typedef VkFlags VkDescriptorSetLayoutCreateFlags; +typedef enum VkSamplerCreateFlagBits { + VK_SAMPLER_CREATE_SUBSAMPLED_BIT_EXT = 0x00000001, + VK_SAMPLER_CREATE_SUBSAMPLED_COARSE_RECONSTRUCTION_BIT_EXT = 0x00000002, + VK_SAMPLER_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkSamplerCreateFlagBits; +typedef VkFlags VkSamplerCreateFlags; typedef enum VkDescriptorPoolCreateFlagBits { VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT = 0x00000001, - VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT_EXT = 0x00000002, + VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT = 0x00000002, + VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT_EXT = VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT, VK_DESCRIPTOR_POOL_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VkDescriptorPoolCreateFlagBits; typedef VkFlags VkDescriptorPoolCreateFlags; typedef VkFlags VkDescriptorPoolResetFlags; -typedef VkFlags VkFramebufferCreateFlags; -typedef VkFlags VkRenderPassCreateFlags; + +typedef enum VkDescriptorSetLayoutCreateFlagBits { + VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT = 0x00000002, + VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR = 0x00000001, + VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT_EXT = VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT, + VK_DESCRIPTOR_SET_LAYOUT_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkDescriptorSetLayoutCreateFlagBits; +typedef VkFlags VkDescriptorSetLayoutCreateFlags; typedef enum VkAttachmentDescriptionFlagBits { VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT = 0x00000001, @@ -1526,38 +1885,6 @@ typedef enum VkAttachmentDescriptionFlagBits { } VkAttachmentDescriptionFlagBits; typedef VkFlags VkAttachmentDescriptionFlags; -typedef enum VkSubpassDescriptionFlagBits { - VK_SUBPASS_DESCRIPTION_PER_VIEW_ATTRIBUTES_BIT_NVX = 0x00000001, - VK_SUBPASS_DESCRIPTION_PER_VIEW_POSITION_X_ONLY_BIT_NVX = 0x00000002, - VK_SUBPASS_DESCRIPTION_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF -} VkSubpassDescriptionFlagBits; -typedef VkFlags VkSubpassDescriptionFlags; - -typedef enum VkAccessFlagBits { - VK_ACCESS_INDIRECT_COMMAND_READ_BIT = 0x00000001, - VK_ACCESS_INDEX_READ_BIT = 0x00000002, - VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT = 0x00000004, - VK_ACCESS_UNIFORM_READ_BIT = 0x00000008, - VK_ACCESS_INPUT_ATTACHMENT_READ_BIT = 0x00000010, - VK_ACCESS_SHADER_READ_BIT = 0x00000020, - VK_ACCESS_SHADER_WRITE_BIT = 0x00000040, - VK_ACCESS_COLOR_ATTACHMENT_READ_BIT = 0x00000080, - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT = 0x00000100, - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT = 0x00000200, - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT = 0x00000400, - VK_ACCESS_TRANSFER_READ_BIT = 0x00000800, - VK_ACCESS_TRANSFER_WRITE_BIT = 0x00001000, - VK_ACCESS_HOST_READ_BIT = 0x00002000, - VK_ACCESS_HOST_WRITE_BIT = 0x00004000, - VK_ACCESS_MEMORY_READ_BIT = 0x00008000, - VK_ACCESS_MEMORY_WRITE_BIT = 0x00010000, - VK_ACCESS_COMMAND_PROCESS_READ_BIT_NVX = 0x00020000, - VK_ACCESS_COMMAND_PROCESS_WRITE_BIT_NVX = 0x00040000, - VK_ACCESS_COLOR_ATTACHMENT_READ_NONCOHERENT_BIT_EXT = 0x00080000, - VK_ACCESS_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF -} VkAccessFlagBits; -typedef VkFlags VkAccessFlags; - typedef enum VkDependencyFlagBits { VK_DEPENDENCY_BY_REGION_BIT = 0x00000001, VK_DEPENDENCY_DEVICE_GROUP_BIT = 0x00000004, @@ -1568,6 +1895,28 @@ typedef enum VkDependencyFlagBits { } VkDependencyFlagBits; typedef VkFlags VkDependencyFlags; +typedef enum VkFramebufferCreateFlagBits { + VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT = 0x00000001, + VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT_KHR = VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT, + VK_FRAMEBUFFER_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkFramebufferCreateFlagBits; +typedef VkFlags VkFramebufferCreateFlags; + +typedef enum VkRenderPassCreateFlagBits { + VK_RENDER_PASS_CREATE_TRANSFORM_BIT_QCOM = 0x00000002, + VK_RENDER_PASS_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkRenderPassCreateFlagBits; +typedef VkFlags VkRenderPassCreateFlags; + +typedef enum VkSubpassDescriptionFlagBits { + VK_SUBPASS_DESCRIPTION_PER_VIEW_ATTRIBUTES_BIT_NVX = 0x00000001, + VK_SUBPASS_DESCRIPTION_PER_VIEW_POSITION_X_ONLY_BIT_NVX = 0x00000002, + VK_SUBPASS_DESCRIPTION_FRAGMENT_REGION_BIT_QCOM = 0x00000004, + VK_SUBPASS_DESCRIPTION_SHADER_RESOLVE_BIT_QCOM = 0x00000008, + VK_SUBPASS_DESCRIPTION_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkSubpassDescriptionFlagBits; +typedef VkFlags VkSubpassDescriptionFlags; + typedef enum VkCommandPoolCreateFlagBits { VK_COMMAND_POOL_CREATE_TRANSIENT_BIT = 0x00000001, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT = 0x00000002, @@ -1605,31 +1954,108 @@ typedef VkFlags VkCommandBufferResetFlags; typedef enum VkStencilFaceFlagBits { VK_STENCIL_FACE_FRONT_BIT = 0x00000001, VK_STENCIL_FACE_BACK_BIT = 0x00000002, - VK_STENCIL_FRONT_AND_BACK = 0x00000003, + VK_STENCIL_FACE_FRONT_AND_BACK = 0x00000003, + VK_STENCIL_FRONT_AND_BACK = VK_STENCIL_FACE_FRONT_AND_BACK, VK_STENCIL_FACE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VkStencilFaceFlagBits; typedef VkFlags VkStencilFaceFlags; +typedef struct VkExtent2D { + uint32_t width; + uint32_t height; +} VkExtent2D; -typedef struct VkApplicationInfo { +typedef struct VkExtent3D { + uint32_t width; + uint32_t height; + uint32_t depth; +} VkExtent3D; + +typedef struct VkOffset2D { + int32_t x; + int32_t y; +} VkOffset2D; + +typedef struct VkOffset3D { + int32_t x; + int32_t y; + int32_t z; +} VkOffset3D; + +typedef struct VkRect2D { + VkOffset2D offset; + VkExtent2D extent; +} VkRect2D; + +typedef struct VkBaseInStructure { + VkStructureType sType; + const struct VkBaseInStructure* pNext; +} VkBaseInStructure; + +typedef struct VkBaseOutStructure { + VkStructureType sType; + struct VkBaseOutStructure* pNext; +} VkBaseOutStructure; + +typedef struct VkBufferMemoryBarrier { VkStructureType sType; const void* pNext; - const char* pApplicationName; - uint32_t applicationVersion; - const char* pEngineName; - uint32_t engineVersion; - uint32_t apiVersion; -} VkApplicationInfo; + VkAccessFlags srcAccessMask; + VkAccessFlags dstAccessMask; + uint32_t srcQueueFamilyIndex; + uint32_t dstQueueFamilyIndex; + VkBuffer buffer; + VkDeviceSize offset; + VkDeviceSize size; +} VkBufferMemoryBarrier; -typedef struct VkInstanceCreateInfo { - VkStructureType sType; - const void* pNext; - VkInstanceCreateFlags flags; - const VkApplicationInfo* pApplicationInfo; - uint32_t enabledLayerCount; - const char* const* ppEnabledLayerNames; - uint32_t enabledExtensionCount; - const char* const* ppEnabledExtensionNames; -} VkInstanceCreateInfo; +typedef struct VkDispatchIndirectCommand { + uint32_t x; + uint32_t y; + uint32_t z; +} VkDispatchIndirectCommand; + +typedef struct VkDrawIndexedIndirectCommand { + uint32_t indexCount; + uint32_t instanceCount; + uint32_t firstIndex; + int32_t vertexOffset; + uint32_t firstInstance; +} VkDrawIndexedIndirectCommand; + +typedef struct VkDrawIndirectCommand { + uint32_t vertexCount; + uint32_t instanceCount; + uint32_t firstVertex; + uint32_t firstInstance; +} VkDrawIndirectCommand; + +typedef struct VkImageSubresourceRange { + VkImageAspectFlags aspectMask; + uint32_t baseMipLevel; + uint32_t levelCount; + uint32_t baseArrayLayer; + uint32_t layerCount; +} VkImageSubresourceRange; + +typedef struct VkImageMemoryBarrier { + VkStructureType sType; + const void* pNext; + VkAccessFlags srcAccessMask; + VkAccessFlags dstAccessMask; + VkImageLayout oldLayout; + VkImageLayout newLayout; + uint32_t srcQueueFamilyIndex; + uint32_t dstQueueFamilyIndex; + VkImage image; + VkImageSubresourceRange subresourceRange; +} VkImageMemoryBarrier; + +typedef struct VkMemoryBarrier { + VkStructureType sType; + const void* pNext; + VkAccessFlags srcAccessMask; + VkAccessFlags dstAccessMask; +} VkMemoryBarrier; typedef void* (VKAPI_PTR *PFN_vkAllocationFunction)( void* pUserData, @@ -1637,13 +2063,6 @@ typedef void* (VKAPI_PTR *PFN_vkAllocationFunction)( size_t alignment, VkSystemAllocationScope allocationScope); -typedef void* (VKAPI_PTR *PFN_vkReallocationFunction)( - void* pUserData, - void* pOriginal, - size_t size, - size_t alignment, - VkSystemAllocationScope allocationScope); - typedef void (VKAPI_PTR *PFN_vkFreeFunction)( void* pUserData, void* pMemory); @@ -1660,6 +2079,14 @@ typedef void (VKAPI_PTR *PFN_vkInternalFreeNotification)( VkInternalAllocationType allocationType, VkSystemAllocationScope allocationScope); +typedef void* (VKAPI_PTR *PFN_vkReallocationFunction)( + void* pUserData, + void* pOriginal, + size_t size, + size_t alignment, + VkSystemAllocationScope allocationScope); + +typedef void (VKAPI_PTR *PFN_vkVoidFunction)(void); typedef struct VkAllocationCallbacks { void* pUserData; PFN_vkAllocationFunction pfnAllocation; @@ -1669,6 +2096,51 @@ typedef struct VkAllocationCallbacks { PFN_vkInternalFreeNotification pfnInternalFree; } VkAllocationCallbacks; +typedef struct VkApplicationInfo { + VkStructureType sType; + const void* pNext; + const char* pApplicationName; + uint32_t applicationVersion; + const char* pEngineName; + uint32_t engineVersion; + uint32_t apiVersion; +} VkApplicationInfo; + +typedef struct VkFormatProperties { + VkFormatFeatureFlags linearTilingFeatures; + VkFormatFeatureFlags optimalTilingFeatures; + VkFormatFeatureFlags bufferFeatures; +} VkFormatProperties; + +typedef struct VkImageFormatProperties { + VkExtent3D maxExtent; + uint32_t maxMipLevels; + uint32_t maxArrayLayers; + VkSampleCountFlags sampleCounts; + VkDeviceSize maxResourceSize; +} VkImageFormatProperties; + +typedef struct VkInstanceCreateInfo { + VkStructureType sType; + const void* pNext; + VkInstanceCreateFlags flags; + const VkApplicationInfo* pApplicationInfo; + uint32_t enabledLayerCount; + const char* const* ppEnabledLayerNames; + uint32_t enabledExtensionCount; + const char* const* ppEnabledExtensionNames; +} VkInstanceCreateInfo; + +typedef struct VkMemoryHeap { + VkDeviceSize size; + VkMemoryHeapFlags flags; +} VkMemoryHeap; + +typedef struct VkMemoryType { + VkMemoryPropertyFlags propertyFlags; + uint32_t heapIndex; +} VkMemoryType; + typedef struct VkPhysicalDeviceFeatures { VkBool32 robustBufferAccess; VkBool32 fullDrawIndexUint32; @@ -1727,26 +2199,6 @@ typedef struct VkPhysicalDeviceFeatures { VkBool32 inheritedQueries; } VkPhysicalDeviceFeatures; -typedef struct VkFormatProperties { - VkFormatFeatureFlags linearTilingFeatures; - VkFormatFeatureFlags optimalTilingFeatures; - VkFormatFeatureFlags bufferFeatures; -} VkFormatProperties; - -typedef struct VkExtent3D { - uint32_t width; - uint32_t height; - uint32_t depth; -} VkExtent3D; - -typedef struct VkImageFormatProperties { - VkExtent3D maxExtent; - uint32_t maxMipLevels; - uint32_t maxArrayLayers; - VkSampleCountFlags sampleCounts; - VkDeviceSize maxResourceSize; -} VkImageFormatProperties; - typedef struct VkPhysicalDeviceLimits { uint32_t maxImageDimension1D; uint32_t maxImageDimension2D; @@ -1856,6 +2308,13 @@ typedef struct VkPhysicalDeviceLimits { VkDeviceSize nonCoherentAtomSize; } VkPhysicalDeviceLimits; +typedef struct VkPhysicalDeviceMemoryProperties { + uint32_t memoryTypeCount; + VkMemoryType memoryTypes[VK_MAX_MEMORY_TYPES]; + uint32_t memoryHeapCount; + VkMemoryHeap memoryHeaps[VK_MAX_MEMORY_HEAPS]; +} VkPhysicalDeviceMemoryProperties; + typedef struct VkPhysicalDeviceSparseProperties { VkBool32 residencyStandard2DBlockShape; VkBool32 residencyStandard2DMultisampleBlockShape; @@ -1883,24 +2342,6 @@ typedef struct VkQueueFamilyProperties { VkExtent3D minImageTransferGranularity; } VkQueueFamilyProperties; -typedef struct VkMemoryType { - VkMemoryPropertyFlags propertyFlags; - uint32_t heapIndex; -} VkMemoryType; - -typedef struct VkMemoryHeap { - VkDeviceSize size; - VkMemoryHeapFlags flags; -} VkMemoryHeap; - -typedef struct VkPhysicalDeviceMemoryProperties { - uint32_t memoryTypeCount; - VkMemoryType memoryTypes[VK_MAX_MEMORY_TYPES]; - uint32_t memoryHeapCount; - VkMemoryHeap memoryHeaps[VK_MAX_MEMORY_HEAPS]; -} VkPhysicalDeviceMemoryProperties; - -typedef void (VKAPI_PTR *PFN_vkVoidFunction)(void); typedef struct VkDeviceQueueCreateInfo { VkStructureType sType; const void* pNext; @@ -1947,13 +2388,6 @@ typedef struct VkSubmitInfo { const VkSemaphore* pSignalSemaphores; } VkSubmitInfo; -typedef struct VkMemoryAllocateInfo { - VkStructureType sType; - const void* pNext; - VkDeviceSize allocationSize; - uint32_t memoryTypeIndex; -} VkMemoryAllocateInfo; - typedef struct VkMappedMemoryRange { VkStructureType sType; const void* pNext; @@ -1962,26 +2396,19 @@ typedef struct VkMappedMemoryRange { VkDeviceSize size; } VkMappedMemoryRange; +typedef struct VkMemoryAllocateInfo { + VkStructureType sType; + const void* pNext; + VkDeviceSize allocationSize; + uint32_t memoryTypeIndex; +} VkMemoryAllocateInfo; + typedef struct VkMemoryRequirements { VkDeviceSize size; VkDeviceSize alignment; uint32_t memoryTypeBits; } VkMemoryRequirements; -typedef struct VkSparseImageFormatProperties { - VkImageAspectFlags aspectMask; - VkExtent3D imageGranularity; - VkSparseImageFormatFlags flags; -} VkSparseImageFormatProperties; - -typedef struct VkSparseImageMemoryRequirements { - VkSparseImageFormatProperties formatProperties; - uint32_t imageMipTailFirstLod; - VkDeviceSize imageMipTailSize; - VkDeviceSize imageMipTailOffset; - VkDeviceSize imageMipTailStride; -} VkSparseImageMemoryRequirements; - typedef struct VkSparseMemoryBind { VkDeviceSize resourceOffset; VkDeviceSize size; @@ -2008,12 +2435,6 @@ typedef struct VkImageSubresource { uint32_t arrayLayer; } VkImageSubresource; -typedef struct VkOffset3D { - int32_t x; - int32_t y; - int32_t z; -} VkOffset3D; - typedef struct VkSparseImageMemoryBind { VkImageSubresource subresource; VkOffset3D offset; @@ -2044,6 +2465,20 @@ typedef struct VkBindSparseInfo { const VkSemaphore* pSignalSemaphores; } VkBindSparseInfo; +typedef struct VkSparseImageFormatProperties { + VkImageAspectFlags aspectMask; + VkExtent3D imageGranularity; + VkSparseImageFormatFlags flags; +} VkSparseImageFormatProperties; + +typedef struct VkSparseImageMemoryRequirements { + VkSparseImageFormatProperties formatProperties; + uint32_t imageMipTailFirstLod; + VkDeviceSize imageMipTailSize; + VkDeviceSize imageMipTailOffset; + VkDeviceSize imageMipTailStride; +} VkSparseImageMemoryRequirements; + typedef struct VkFenceCreateInfo { VkStructureType sType; const void* pNext; @@ -2125,14 +2560,6 @@ typedef struct VkComponentMapping { VkComponentSwizzle a; } VkComponentMapping; -typedef struct VkImageSubresourceRange { - VkImageAspectFlags aspectMask; - uint32_t baseMipLevel; - uint32_t levelCount; - uint32_t baseArrayLayer; - uint32_t layerCount; -} VkImageSubresourceRange; - typedef struct VkImageViewCreateInfo { VkStructureType sType; const void* pNext; @@ -2183,6 +2610,16 @@ typedef struct VkPipelineShaderStageCreateInfo { const VkSpecializationInfo* pSpecializationInfo; } VkPipelineShaderStageCreateInfo; +typedef struct VkComputePipelineCreateInfo { + VkStructureType sType; + const void* pNext; + VkPipelineCreateFlags flags; + VkPipelineShaderStageCreateInfo stage; + VkPipelineLayout layout; + VkPipeline basePipelineHandle; + int32_t basePipelineIndex; +} VkComputePipelineCreateInfo; + typedef struct VkVertexInputBindingDescription { uint32_t binding; uint32_t stride; @@ -2230,21 +2667,6 @@ typedef struct VkViewport { float maxDepth; } VkViewport; -typedef struct VkOffset2D { - int32_t x; - int32_t y; -} VkOffset2D; - -typedef struct VkExtent2D { - uint32_t width; - uint32_t height; -} VkExtent2D; - -typedef struct VkRect2D { - VkOffset2D offset; - VkExtent2D extent; -} VkRect2D; - typedef struct VkPipelineViewportStateCreateInfo { VkStructureType sType; const void* pNext; @@ -2360,16 +2782,6 @@ typedef struct VkGraphicsPipelineCreateInfo { int32_t basePipelineIndex; } VkGraphicsPipelineCreateInfo; -typedef struct VkComputePipelineCreateInfo { - VkStructureType sType; - const void* pNext; - VkPipelineCreateFlags flags; - VkPipelineShaderStageCreateInfo stage; - VkPipelineLayout layout; - VkPipeline basePipelineHandle; - int32_t basePipelineIndex; -} VkComputePipelineCreateInfo; - typedef struct VkPushConstantRange { VkShaderStageFlags stageFlags; uint32_t offset; @@ -2407,21 +2819,29 @@ typedef struct VkSamplerCreateInfo { VkBool32 unnormalizedCoordinates; } VkSamplerCreateInfo; -typedef struct VkDescriptorSetLayoutBinding { - uint32_t binding; - VkDescriptorType descriptorType; - uint32_t descriptorCount; - VkShaderStageFlags stageFlags; - const VkSampler* pImmutableSamplers; -} VkDescriptorSetLayoutBinding; +typedef struct VkCopyDescriptorSet { + VkStructureType sType; + const void* pNext; + VkDescriptorSet srcSet; + uint32_t srcBinding; + uint32_t srcArrayElement; + VkDescriptorSet dstSet; + uint32_t dstBinding; + uint32_t dstArrayElement; + uint32_t descriptorCount; +} VkCopyDescriptorSet; -typedef struct VkDescriptorSetLayoutCreateInfo { - VkStructureType sType; - const void* pNext; - VkDescriptorSetLayoutCreateFlags flags; - uint32_t bindingCount; - const VkDescriptorSetLayoutBinding* pBindings; -} VkDescriptorSetLayoutCreateInfo; +typedef struct VkDescriptorBufferInfo { + VkBuffer buffer; + VkDeviceSize offset; + VkDeviceSize range; +} VkDescriptorBufferInfo; + +typedef struct VkDescriptorImageInfo { + VkSampler sampler; + VkImageView imageView; + VkImageLayout imageLayout; +} VkDescriptorImageInfo; typedef struct VkDescriptorPoolSize { VkDescriptorType type; @@ -2445,17 +2865,21 @@ typedef struct VkDescriptorSetAllocateInfo { const VkDescriptorSetLayout* pSetLayouts; } VkDescriptorSetAllocateInfo; -typedef struct VkDescriptorImageInfo { - VkSampler sampler; - VkImageView imageView; - VkImageLayout imageLayout; -} VkDescriptorImageInfo; +typedef struct VkDescriptorSetLayoutBinding { + uint32_t binding; + VkDescriptorType descriptorType; + uint32_t descriptorCount; + VkShaderStageFlags stageFlags; + const VkSampler* pImmutableSamplers; +} VkDescriptorSetLayoutBinding; -typedef struct VkDescriptorBufferInfo { - VkBuffer buffer; - VkDeviceSize offset; - VkDeviceSize range; -} VkDescriptorBufferInfo; +typedef struct VkDescriptorSetLayoutCreateInfo { + VkStructureType sType; + const void* pNext; + VkDescriptorSetLayoutCreateFlags flags; + uint32_t bindingCount; + const VkDescriptorSetLayoutBinding* pBindings; +} VkDescriptorSetLayoutCreateInfo; typedef struct VkWriteDescriptorSet { VkStructureType sType; @@ -2470,30 +2894,6 @@ typedef struct VkWriteDescriptorSet { const VkBufferView* pTexelBufferView; } VkWriteDescriptorSet; -typedef struct VkCopyDescriptorSet { - VkStructureType sType; - const void* pNext; - VkDescriptorSet srcSet; - uint32_t srcBinding; - uint32_t srcArrayElement; - VkDescriptorSet dstSet; - uint32_t dstBinding; - uint32_t dstArrayElement; - uint32_t descriptorCount; -} VkCopyDescriptorSet; - -typedef struct VkFramebufferCreateInfo { - VkStructureType sType; - const void* pNext; - VkFramebufferCreateFlags flags; - VkRenderPass renderPass; - uint32_t attachmentCount; - const VkImageView* pAttachments; - uint32_t width; - uint32_t height; - uint32_t layers; -} VkFramebufferCreateInfo; - typedef struct VkAttachmentDescription { VkAttachmentDescriptionFlags flags; VkFormat format; @@ -2511,6 +2911,18 @@ typedef struct VkAttachmentReference { VkImageLayout layout; } VkAttachmentReference; +typedef struct VkFramebufferCreateInfo { + VkStructureType sType; + const void* pNext; + VkFramebufferCreateFlags flags; + VkRenderPass renderPass; + uint32_t attachmentCount; + const VkImageView* pAttachments; + uint32_t width; + uint32_t height; + uint32_t layers; +} VkFramebufferCreateInfo; + typedef struct VkSubpassDescription { VkSubpassDescriptionFlags flags; VkPipelineBindPoint pipelineBindPoint; @@ -2592,21 +3004,6 @@ typedef struct VkImageSubresourceLayers { uint32_t layerCount; } VkImageSubresourceLayers; -typedef struct VkImageCopy { - VkImageSubresourceLayers srcSubresource; - VkOffset3D srcOffset; - VkImageSubresourceLayers dstSubresource; - VkOffset3D dstOffset; - VkExtent3D extent; -} VkImageCopy; - -typedef struct VkImageBlit { - VkImageSubresourceLayers srcSubresource; - VkOffset3D srcOffsets[2]; - VkImageSubresourceLayers dstSubresource; - VkOffset3D dstOffsets[2]; -} VkImageBlit; - typedef struct VkBufferImageCopy { VkDeviceSize bufferOffset; uint32_t bufferRowLength; @@ -2644,6 +3041,21 @@ typedef struct VkClearRect { uint32_t layerCount; } VkClearRect; +typedef struct VkImageBlit { + VkImageSubresourceLayers srcSubresource; + VkOffset3D srcOffsets[2]; + VkImageSubresourceLayers dstSubresource; + VkOffset3D dstOffsets[2]; +} VkImageBlit; + +typedef struct VkImageCopy { + VkImageSubresourceLayers srcSubresource; + VkOffset3D srcOffset; + VkImageSubresourceLayers dstSubresource; + VkOffset3D dstOffset; + VkExtent3D extent; +} VkImageCopy; + typedef struct VkImageResolve { VkImageSubresourceLayers srcSubresource; VkOffset3D srcOffset; @@ -2652,38 +3064,6 @@ typedef struct VkImageResolve { VkExtent3D extent; } VkImageResolve; -typedef struct VkMemoryBarrier { - VkStructureType sType; - const void* pNext; - VkAccessFlags srcAccessMask; - VkAccessFlags dstAccessMask; -} VkMemoryBarrier; - -typedef struct VkBufferMemoryBarrier { - VkStructureType sType; - const void* pNext; - VkAccessFlags srcAccessMask; - VkAccessFlags dstAccessMask; - uint32_t srcQueueFamilyIndex; - uint32_t dstQueueFamilyIndex; - VkBuffer buffer; - VkDeviceSize offset; - VkDeviceSize size; -} VkBufferMemoryBarrier; - -typedef struct VkImageMemoryBarrier { - VkStructureType sType; - const void* pNext; - VkAccessFlags srcAccessMask; - VkAccessFlags dstAccessMask; - VkImageLayout oldLayout; - VkImageLayout newLayout; - uint32_t srcQueueFamilyIndex; - uint32_t dstQueueFamilyIndex; - VkImage image; - VkImageSubresourceRange subresourceRange; -} VkImageMemoryBarrier; - typedef struct VkRenderPassBeginInfo { VkStructureType sType; const void* pNext; @@ -2694,28 +3074,6 @@ typedef struct VkRenderPassBeginInfo { const VkClearValue* pClearValues; } VkRenderPassBeginInfo; -typedef struct VkDispatchIndirectCommand { - uint32_t x; - uint32_t y; - uint32_t z; -} VkDispatchIndirectCommand; - -typedef struct VkDrawIndexedIndirectCommand { - uint32_t indexCount; - uint32_t instanceCount; - uint32_t firstIndex; - int32_t vertexOffset; - uint32_t firstInstance; -} VkDrawIndexedIndirectCommand; - -typedef struct VkDrawIndirectCommand { - uint32_t vertexCount; - uint32_t instanceCount; - uint32_t firstVertex; - uint32_t firstInstance; -} VkDrawIndirectCommand; - - typedef VkResult (VKAPI_PTR *PFN_vkCreateInstance)(const VkInstanceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkInstance* pInstance); typedef void (VKAPI_PTR *PFN_vkDestroyInstance)(VkInstance instance, const VkAllocationCallbacks* pAllocator); typedef VkResult (VKAPI_PTR *PFN_vkEnumeratePhysicalDevices)(VkInstance instance, uint32_t* pPhysicalDeviceCount, VkPhysicalDevice* pPhysicalDevices); @@ -3653,27 +4011,22 @@ VKAPI_ATTR void VKAPI_CALL vkCmdExecuteCommands( const VkCommandBuffer* pCommandBuffers); #endif + #define VK_VERSION_1_1 1 // Vulkan 1.1 version number #define VK_API_VERSION_1_1 VK_MAKE_VERSION(1, 1, 0)// Patch version should always be set to 0 - VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSamplerYcbcrConversion) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDescriptorUpdateTemplate) - #define VK_MAX_DEVICE_GROUP_SIZE 32 #define VK_LUID_SIZE 8 #define VK_QUEUE_FAMILY_EXTERNAL (~0U-1) - typedef enum VkPointClippingBehavior { VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES = 0, VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY = 1, VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES_KHR = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES, VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY_KHR = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY, - VK_POINT_CLIPPING_BEHAVIOR_BEGIN_RANGE = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES, - VK_POINT_CLIPPING_BEHAVIOR_END_RANGE = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY, - VK_POINT_CLIPPING_BEHAVIOR_RANGE_SIZE = (VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY - VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES + 1), VK_POINT_CLIPPING_BEHAVIOR_MAX_ENUM = 0x7FFFFFFF } VkPointClippingBehavior; @@ -3682,9 +4035,6 @@ typedef enum VkTessellationDomainOrigin { VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT = 1, VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT_KHR = VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT, VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT_KHR = VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT, - VK_TESSELLATION_DOMAIN_ORIGIN_BEGIN_RANGE = VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT, - VK_TESSELLATION_DOMAIN_ORIGIN_END_RANGE = VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT, - VK_TESSELLATION_DOMAIN_ORIGIN_RANGE_SIZE = (VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT - VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT + 1), VK_TESSELLATION_DOMAIN_ORIGIN_MAX_ENUM = 0x7FFFFFFF } VkTessellationDomainOrigin; @@ -3699,9 +4049,6 @@ typedef enum VkSamplerYcbcrModelConversion { VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709_KHR = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709, VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601_KHR = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601, VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020_KHR = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020, - VK_SAMPLER_YCBCR_MODEL_CONVERSION_BEGIN_RANGE = VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY, - VK_SAMPLER_YCBCR_MODEL_CONVERSION_END_RANGE = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020, - VK_SAMPLER_YCBCR_MODEL_CONVERSION_RANGE_SIZE = (VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020 - VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY + 1), VK_SAMPLER_YCBCR_MODEL_CONVERSION_MAX_ENUM = 0x7FFFFFFF } VkSamplerYcbcrModelConversion; @@ -3710,9 +4057,6 @@ typedef enum VkSamplerYcbcrRange { VK_SAMPLER_YCBCR_RANGE_ITU_NARROW = 1, VK_SAMPLER_YCBCR_RANGE_ITU_FULL_KHR = VK_SAMPLER_YCBCR_RANGE_ITU_FULL, VK_SAMPLER_YCBCR_RANGE_ITU_NARROW_KHR = VK_SAMPLER_YCBCR_RANGE_ITU_NARROW, - VK_SAMPLER_YCBCR_RANGE_BEGIN_RANGE = VK_SAMPLER_YCBCR_RANGE_ITU_FULL, - VK_SAMPLER_YCBCR_RANGE_END_RANGE = VK_SAMPLER_YCBCR_RANGE_ITU_NARROW, - VK_SAMPLER_YCBCR_RANGE_RANGE_SIZE = (VK_SAMPLER_YCBCR_RANGE_ITU_NARROW - VK_SAMPLER_YCBCR_RANGE_ITU_FULL + 1), VK_SAMPLER_YCBCR_RANGE_MAX_ENUM = 0x7FFFFFFF } VkSamplerYcbcrRange; @@ -3721,9 +4065,6 @@ typedef enum VkChromaLocation { VK_CHROMA_LOCATION_MIDPOINT = 1, VK_CHROMA_LOCATION_COSITED_EVEN_KHR = VK_CHROMA_LOCATION_COSITED_EVEN, VK_CHROMA_LOCATION_MIDPOINT_KHR = VK_CHROMA_LOCATION_MIDPOINT, - VK_CHROMA_LOCATION_BEGIN_RANGE = VK_CHROMA_LOCATION_COSITED_EVEN, - VK_CHROMA_LOCATION_END_RANGE = VK_CHROMA_LOCATION_MIDPOINT, - VK_CHROMA_LOCATION_RANGE_SIZE = (VK_CHROMA_LOCATION_MIDPOINT - VK_CHROMA_LOCATION_COSITED_EVEN + 1), VK_CHROMA_LOCATION_MAX_ENUM = 0x7FFFFFFF } VkChromaLocation; @@ -3731,13 +4072,9 @@ typedef enum VkDescriptorUpdateTemplateType { VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET = 0, VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR = 1, VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET, - VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_BEGIN_RANGE = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET, - VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_END_RANGE = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET, - VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_RANGE_SIZE = (VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET - VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET + 1), VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_MAX_ENUM = 0x7FFFFFFF } VkDescriptorUpdateTemplateType; - typedef enum VkSubgroupFeatureFlagBits { VK_SUBGROUP_FEATURE_BASIC_BIT = 0x00000001, VK_SUBGROUP_FEATURE_VOTE_BIT = 0x00000002, @@ -3767,7 +4104,11 @@ typedef VkFlags VkPeerMemoryFeatureFlags; typedef enum VkMemoryAllocateFlagBits { VK_MEMORY_ALLOCATE_DEVICE_MASK_BIT = 0x00000001, + VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT = 0x00000002, + VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT = 0x00000004, VK_MEMORY_ALLOCATE_DEVICE_MASK_BIT_KHR = VK_MEMORY_ALLOCATE_DEVICE_MASK_BIT, + VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT, + VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_KHR = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT, VK_MEMORY_ALLOCATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VkMemoryAllocateFlagBits; typedef VkFlags VkMemoryAllocateFlags; @@ -3850,6 +4191,7 @@ typedef enum VkExternalSemaphoreHandleTypeFlagBits { VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT = 0x00000004, VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE_BIT = 0x00000008, VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT = 0x00000010, + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE_BIT = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE_BIT, VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT, VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT, @@ -3867,7 +4209,6 @@ typedef enum VkExternalSemaphoreFeatureFlagBits { VK_EXTERNAL_SEMAPHORE_FEATURE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VkExternalSemaphoreFeatureFlagBits; typedef VkFlags VkExternalSemaphoreFeatureFlags; - typedef struct VkPhysicalDeviceSubgroupProperties { VkStructureType sType; void* pNext; @@ -4135,12 +4476,14 @@ typedef struct VkPhysicalDeviceMultiviewProperties { uint32_t maxMultiviewInstanceIndex; } VkPhysicalDeviceMultiviewProperties; -typedef struct VkPhysicalDeviceVariablePointerFeatures { +typedef struct VkPhysicalDeviceVariablePointersFeatures { VkStructureType sType; void* pNext; VkBool32 variablePointersStorageBuffer; VkBool32 variablePointers; -} VkPhysicalDeviceVariablePointerFeatures; +} VkPhysicalDeviceVariablePointersFeatures; + +typedef VkPhysicalDeviceVariablePointersFeatures VkPhysicalDeviceVariablePointerFeatures; typedef struct VkPhysicalDeviceProtectedMemoryFeatures { VkStructureType sType; @@ -4222,7 +4565,7 @@ typedef struct VkDescriptorUpdateTemplateEntry { typedef struct VkDescriptorUpdateTemplateCreateInfo { VkStructureType sType; - void* pNext; + const void* pNext; VkDescriptorUpdateTemplateCreateFlags flags; uint32_t descriptorUpdateEntryCount; const VkDescriptorUpdateTemplateEntry* pDescriptorUpdateEntries; @@ -4346,12 +4689,13 @@ typedef struct VkDescriptorSetLayoutSupport { VkBool32 supported; } VkDescriptorSetLayoutSupport; -typedef struct VkPhysicalDeviceShaderDrawParameterFeatures { +typedef struct VkPhysicalDeviceShaderDrawParametersFeatures { VkStructureType sType; void* pNext; VkBool32 shaderDrawParameters; -} VkPhysicalDeviceShaderDrawParameterFeatures; +} VkPhysicalDeviceShaderDrawParametersFeatures; +typedef VkPhysicalDeviceShaderDrawParametersFeatures VkPhysicalDeviceShaderDrawParameterFeatures; typedef VkResult (VKAPI_PTR *PFN_vkEnumerateInstanceVersion)(uint32_t* pApiVersion); typedef VkResult (VKAPI_PTR *PFN_vkBindBufferMemory2)(VkDevice device, uint32_t bindInfoCount, const VkBindBufferMemoryInfo* pBindInfos); @@ -4529,19 +4873,770 @@ VKAPI_ATTR void VKAPI_CALL vkGetDescriptorSetLayoutSupport( VkDescriptorSetLayoutSupport* pSupport); #endif + +#define VK_VERSION_1_2 1 +// Vulkan 1.2 version number +#define VK_API_VERSION_1_2 VK_MAKE_VERSION(1, 2, 0)// Patch version should always be set to 0 + +#define VK_MAX_DRIVER_NAME_SIZE 256 +#define VK_MAX_DRIVER_INFO_SIZE 256 + +typedef enum VkDriverId { + VK_DRIVER_ID_AMD_PROPRIETARY = 1, + VK_DRIVER_ID_AMD_OPEN_SOURCE = 2, + VK_DRIVER_ID_MESA_RADV = 3, + VK_DRIVER_ID_NVIDIA_PROPRIETARY = 4, + VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS = 5, + VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA = 6, + VK_DRIVER_ID_IMAGINATION_PROPRIETARY = 7, + VK_DRIVER_ID_QUALCOMM_PROPRIETARY = 8, + VK_DRIVER_ID_ARM_PROPRIETARY = 9, + VK_DRIVER_ID_GOOGLE_SWIFTSHADER = 10, + VK_DRIVER_ID_GGP_PROPRIETARY = 11, + VK_DRIVER_ID_BROADCOM_PROPRIETARY = 12, + VK_DRIVER_ID_MESA_LLVMPIPE = 13, + VK_DRIVER_ID_MOLTENVK = 14, + VK_DRIVER_ID_AMD_PROPRIETARY_KHR = VK_DRIVER_ID_AMD_PROPRIETARY, + VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR = VK_DRIVER_ID_AMD_OPEN_SOURCE, + VK_DRIVER_ID_MESA_RADV_KHR = VK_DRIVER_ID_MESA_RADV, + VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR = VK_DRIVER_ID_NVIDIA_PROPRIETARY, + VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR = VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS, + VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA_KHR = VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA, + VK_DRIVER_ID_IMAGINATION_PROPRIETARY_KHR = VK_DRIVER_ID_IMAGINATION_PROPRIETARY, + VK_DRIVER_ID_QUALCOMM_PROPRIETARY_KHR = VK_DRIVER_ID_QUALCOMM_PROPRIETARY, + VK_DRIVER_ID_ARM_PROPRIETARY_KHR = VK_DRIVER_ID_ARM_PROPRIETARY, + VK_DRIVER_ID_GOOGLE_SWIFTSHADER_KHR = VK_DRIVER_ID_GOOGLE_SWIFTSHADER, + VK_DRIVER_ID_GGP_PROPRIETARY_KHR = VK_DRIVER_ID_GGP_PROPRIETARY, + VK_DRIVER_ID_BROADCOM_PROPRIETARY_KHR = VK_DRIVER_ID_BROADCOM_PROPRIETARY, + VK_DRIVER_ID_MAX_ENUM = 0x7FFFFFFF +} VkDriverId; + +typedef enum VkShaderFloatControlsIndependence { + VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY = 0, + VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL = 1, + VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE = 2, + VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY, + VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL, + VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE_KHR = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE, + VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_MAX_ENUM = 0x7FFFFFFF +} VkShaderFloatControlsIndependence; + +typedef enum VkSamplerReductionMode { + VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE = 0, + VK_SAMPLER_REDUCTION_MODE_MIN = 1, + VK_SAMPLER_REDUCTION_MODE_MAX = 2, + VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT = VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE, + VK_SAMPLER_REDUCTION_MODE_MIN_EXT = VK_SAMPLER_REDUCTION_MODE_MIN, + VK_SAMPLER_REDUCTION_MODE_MAX_EXT = VK_SAMPLER_REDUCTION_MODE_MAX, + VK_SAMPLER_REDUCTION_MODE_MAX_ENUM = 0x7FFFFFFF +} VkSamplerReductionMode; + +typedef enum VkSemaphoreType { + VK_SEMAPHORE_TYPE_BINARY = 0, + VK_SEMAPHORE_TYPE_TIMELINE = 1, + VK_SEMAPHORE_TYPE_BINARY_KHR = VK_SEMAPHORE_TYPE_BINARY, + VK_SEMAPHORE_TYPE_TIMELINE_KHR = VK_SEMAPHORE_TYPE_TIMELINE, + VK_SEMAPHORE_TYPE_MAX_ENUM = 0x7FFFFFFF +} VkSemaphoreType; + +typedef enum VkResolveModeFlagBits { + VK_RESOLVE_MODE_NONE = 0, + VK_RESOLVE_MODE_SAMPLE_ZERO_BIT = 0x00000001, + VK_RESOLVE_MODE_AVERAGE_BIT = 0x00000002, + VK_RESOLVE_MODE_MIN_BIT = 0x00000004, + VK_RESOLVE_MODE_MAX_BIT = 0x00000008, + VK_RESOLVE_MODE_NONE_KHR = VK_RESOLVE_MODE_NONE, + VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT, + VK_RESOLVE_MODE_AVERAGE_BIT_KHR = VK_RESOLVE_MODE_AVERAGE_BIT, + VK_RESOLVE_MODE_MIN_BIT_KHR = VK_RESOLVE_MODE_MIN_BIT, + VK_RESOLVE_MODE_MAX_BIT_KHR = VK_RESOLVE_MODE_MAX_BIT, + VK_RESOLVE_MODE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkResolveModeFlagBits; +typedef VkFlags VkResolveModeFlags; + +typedef enum VkDescriptorBindingFlagBits { + VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT = 0x00000001, + VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT = 0x00000002, + VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT = 0x00000004, + VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT = 0x00000008, + VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT_EXT = VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT, + VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT_EXT = VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT, + VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT_EXT = VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT, + VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT = VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT, + VK_DESCRIPTOR_BINDING_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkDescriptorBindingFlagBits; +typedef VkFlags VkDescriptorBindingFlags; + +typedef enum VkSemaphoreWaitFlagBits { + VK_SEMAPHORE_WAIT_ANY_BIT = 0x00000001, + VK_SEMAPHORE_WAIT_ANY_BIT_KHR = VK_SEMAPHORE_WAIT_ANY_BIT, + VK_SEMAPHORE_WAIT_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkSemaphoreWaitFlagBits; +typedef VkFlags VkSemaphoreWaitFlags; +typedef struct VkPhysicalDeviceVulkan11Features { + VkStructureType sType; + void* pNext; + VkBool32 storageBuffer16BitAccess; + VkBool32 uniformAndStorageBuffer16BitAccess; + VkBool32 storagePushConstant16; + VkBool32 storageInputOutput16; + VkBool32 multiview; + VkBool32 multiviewGeometryShader; + VkBool32 multiviewTessellationShader; + VkBool32 variablePointersStorageBuffer; + VkBool32 variablePointers; + VkBool32 protectedMemory; + VkBool32 samplerYcbcrConversion; + VkBool32 shaderDrawParameters; +} VkPhysicalDeviceVulkan11Features; + +typedef struct VkPhysicalDeviceVulkan11Properties { + VkStructureType sType; + void* pNext; + uint8_t deviceUUID[VK_UUID_SIZE]; + uint8_t driverUUID[VK_UUID_SIZE]; + uint8_t deviceLUID[VK_LUID_SIZE]; + uint32_t deviceNodeMask; + VkBool32 deviceLUIDValid; + uint32_t subgroupSize; + VkShaderStageFlags subgroupSupportedStages; + VkSubgroupFeatureFlags subgroupSupportedOperations; + VkBool32 subgroupQuadOperationsInAllStages; + VkPointClippingBehavior pointClippingBehavior; + uint32_t maxMultiviewViewCount; + uint32_t maxMultiviewInstanceIndex; + VkBool32 protectedNoFault; + uint32_t maxPerSetDescriptors; + VkDeviceSize maxMemoryAllocationSize; +} VkPhysicalDeviceVulkan11Properties; + +typedef struct VkPhysicalDeviceVulkan12Features { + VkStructureType sType; + void* pNext; + VkBool32 samplerMirrorClampToEdge; + VkBool32 drawIndirectCount; + VkBool32 storageBuffer8BitAccess; + VkBool32 uniformAndStorageBuffer8BitAccess; + VkBool32 storagePushConstant8; + VkBool32 shaderBufferInt64Atomics; + VkBool32 shaderSharedInt64Atomics; + VkBool32 shaderFloat16; + VkBool32 shaderInt8; + VkBool32 descriptorIndexing; + VkBool32 shaderInputAttachmentArrayDynamicIndexing; + VkBool32 shaderUniformTexelBufferArrayDynamicIndexing; + VkBool32 shaderStorageTexelBufferArrayDynamicIndexing; + VkBool32 shaderUniformBufferArrayNonUniformIndexing; + VkBool32 shaderSampledImageArrayNonUniformIndexing; + VkBool32 shaderStorageBufferArrayNonUniformIndexing; + VkBool32 shaderStorageImageArrayNonUniformIndexing; + VkBool32 shaderInputAttachmentArrayNonUniformIndexing; + VkBool32 shaderUniformTexelBufferArrayNonUniformIndexing; + VkBool32 shaderStorageTexelBufferArrayNonUniformIndexing; + VkBool32 descriptorBindingUniformBufferUpdateAfterBind; + VkBool32 descriptorBindingSampledImageUpdateAfterBind; + VkBool32 descriptorBindingStorageImageUpdateAfterBind; + VkBool32 descriptorBindingStorageBufferUpdateAfterBind; + VkBool32 descriptorBindingUniformTexelBufferUpdateAfterBind; + VkBool32 descriptorBindingStorageTexelBufferUpdateAfterBind; + VkBool32 descriptorBindingUpdateUnusedWhilePending; + VkBool32 descriptorBindingPartiallyBound; + VkBool32 descriptorBindingVariableDescriptorCount; + VkBool32 runtimeDescriptorArray; + VkBool32 samplerFilterMinmax; + VkBool32 scalarBlockLayout; + VkBool32 imagelessFramebuffer; + VkBool32 uniformBufferStandardLayout; + VkBool32 shaderSubgroupExtendedTypes; + VkBool32 separateDepthStencilLayouts; + VkBool32 hostQueryReset; + VkBool32 timelineSemaphore; + VkBool32 bufferDeviceAddress; + VkBool32 bufferDeviceAddressCaptureReplay; + VkBool32 bufferDeviceAddressMultiDevice; + VkBool32 vulkanMemoryModel; + VkBool32 vulkanMemoryModelDeviceScope; + VkBool32 vulkanMemoryModelAvailabilityVisibilityChains; + VkBool32 shaderOutputViewportIndex; + VkBool32 shaderOutputLayer; + VkBool32 subgroupBroadcastDynamicId; +} VkPhysicalDeviceVulkan12Features; + +typedef struct VkConformanceVersion { + uint8_t major; + uint8_t minor; + uint8_t subminor; + uint8_t patch; +} VkConformanceVersion; + +typedef struct VkPhysicalDeviceVulkan12Properties { + VkStructureType sType; + void* pNext; + VkDriverId driverID; + char driverName[VK_MAX_DRIVER_NAME_SIZE]; + char driverInfo[VK_MAX_DRIVER_INFO_SIZE]; + VkConformanceVersion conformanceVersion; + VkShaderFloatControlsIndependence denormBehaviorIndependence; + VkShaderFloatControlsIndependence roundingModeIndependence; + VkBool32 shaderSignedZeroInfNanPreserveFloat16; + VkBool32 shaderSignedZeroInfNanPreserveFloat32; + VkBool32 shaderSignedZeroInfNanPreserveFloat64; + VkBool32 shaderDenormPreserveFloat16; + VkBool32 shaderDenormPreserveFloat32; + VkBool32 shaderDenormPreserveFloat64; + VkBool32 shaderDenormFlushToZeroFloat16; + VkBool32 shaderDenormFlushToZeroFloat32; + VkBool32 shaderDenormFlushToZeroFloat64; + VkBool32 shaderRoundingModeRTEFloat16; + VkBool32 shaderRoundingModeRTEFloat32; + VkBool32 shaderRoundingModeRTEFloat64; + VkBool32 shaderRoundingModeRTZFloat16; + VkBool32 shaderRoundingModeRTZFloat32; + VkBool32 shaderRoundingModeRTZFloat64; + uint32_t maxUpdateAfterBindDescriptorsInAllPools; + VkBool32 shaderUniformBufferArrayNonUniformIndexingNative; + VkBool32 shaderSampledImageArrayNonUniformIndexingNative; + VkBool32 shaderStorageBufferArrayNonUniformIndexingNative; + VkBool32 shaderStorageImageArrayNonUniformIndexingNative; + VkBool32 shaderInputAttachmentArrayNonUniformIndexingNative; + VkBool32 robustBufferAccessUpdateAfterBind; + VkBool32 quadDivergentImplicitLod; + uint32_t maxPerStageDescriptorUpdateAfterBindSamplers; + uint32_t maxPerStageDescriptorUpdateAfterBindUniformBuffers; + uint32_t maxPerStageDescriptorUpdateAfterBindStorageBuffers; + uint32_t maxPerStageDescriptorUpdateAfterBindSampledImages; + uint32_t maxPerStageDescriptorUpdateAfterBindStorageImages; + uint32_t maxPerStageDescriptorUpdateAfterBindInputAttachments; + uint32_t maxPerStageUpdateAfterBindResources; + uint32_t maxDescriptorSetUpdateAfterBindSamplers; + uint32_t maxDescriptorSetUpdateAfterBindUniformBuffers; + uint32_t maxDescriptorSetUpdateAfterBindUniformBuffersDynamic; + uint32_t maxDescriptorSetUpdateAfterBindStorageBuffers; + uint32_t maxDescriptorSetUpdateAfterBindStorageBuffersDynamic; + uint32_t maxDescriptorSetUpdateAfterBindSampledImages; + uint32_t maxDescriptorSetUpdateAfterBindStorageImages; + uint32_t maxDescriptorSetUpdateAfterBindInputAttachments; + VkResolveModeFlags supportedDepthResolveModes; + VkResolveModeFlags supportedStencilResolveModes; + VkBool32 independentResolveNone; + VkBool32 independentResolve; + VkBool32 filterMinmaxSingleComponentFormats; + VkBool32 filterMinmaxImageComponentMapping; + uint64_t maxTimelineSemaphoreValueDifference; + VkSampleCountFlags framebufferIntegerColorSampleCounts; +} VkPhysicalDeviceVulkan12Properties; + +typedef struct VkImageFormatListCreateInfo { + VkStructureType sType; + const void* pNext; + uint32_t viewFormatCount; + const VkFormat* pViewFormats; +} VkImageFormatListCreateInfo; + +typedef struct VkAttachmentDescription2 { + VkStructureType sType; + const void* pNext; + VkAttachmentDescriptionFlags flags; + VkFormat format; + VkSampleCountFlagBits samples; + VkAttachmentLoadOp loadOp; + VkAttachmentStoreOp storeOp; + VkAttachmentLoadOp stencilLoadOp; + VkAttachmentStoreOp stencilStoreOp; + VkImageLayout initialLayout; + VkImageLayout finalLayout; +} VkAttachmentDescription2; + +typedef struct VkAttachmentReference2 { + VkStructureType sType; + const void* pNext; + uint32_t attachment; + VkImageLayout layout; + VkImageAspectFlags aspectMask; +} VkAttachmentReference2; + +typedef struct VkSubpassDescription2 { + VkStructureType sType; + const void* pNext; + VkSubpassDescriptionFlags flags; + VkPipelineBindPoint pipelineBindPoint; + uint32_t viewMask; + uint32_t inputAttachmentCount; + const VkAttachmentReference2* pInputAttachments; + uint32_t colorAttachmentCount; + const VkAttachmentReference2* pColorAttachments; + const VkAttachmentReference2* pResolveAttachments; + const VkAttachmentReference2* pDepthStencilAttachment; + uint32_t preserveAttachmentCount; + const uint32_t* pPreserveAttachments; +} VkSubpassDescription2; + +typedef struct VkSubpassDependency2 { + VkStructureType sType; + const void* pNext; + uint32_t srcSubpass; + uint32_t dstSubpass; + VkPipelineStageFlags srcStageMask; + VkPipelineStageFlags dstStageMask; + VkAccessFlags srcAccessMask; + VkAccessFlags dstAccessMask; + VkDependencyFlags dependencyFlags; + int32_t viewOffset; +} VkSubpassDependency2; + +typedef struct VkRenderPassCreateInfo2 { + VkStructureType sType; + const void* pNext; + VkRenderPassCreateFlags flags; + uint32_t attachmentCount; + const VkAttachmentDescription2* pAttachments; + uint32_t subpassCount; + const VkSubpassDescription2* pSubpasses; + uint32_t dependencyCount; + const VkSubpassDependency2* pDependencies; + uint32_t correlatedViewMaskCount; + const uint32_t* pCorrelatedViewMasks; +} VkRenderPassCreateInfo2; + +typedef struct VkSubpassBeginInfo { + VkStructureType sType; + const void* pNext; + VkSubpassContents contents; +} VkSubpassBeginInfo; + +typedef struct VkSubpassEndInfo { + VkStructureType sType; + const void* pNext; +} VkSubpassEndInfo; + +typedef struct VkPhysicalDevice8BitStorageFeatures { + VkStructureType sType; + void* pNext; + VkBool32 storageBuffer8BitAccess; + VkBool32 uniformAndStorageBuffer8BitAccess; + VkBool32 storagePushConstant8; +} VkPhysicalDevice8BitStorageFeatures; + +typedef struct VkPhysicalDeviceDriverProperties { + VkStructureType sType; + void* pNext; + VkDriverId driverID; + char driverName[VK_MAX_DRIVER_NAME_SIZE]; + char driverInfo[VK_MAX_DRIVER_INFO_SIZE]; + VkConformanceVersion conformanceVersion; +} VkPhysicalDeviceDriverProperties; + +typedef struct VkPhysicalDeviceShaderAtomicInt64Features { + VkStructureType sType; + void* pNext; + VkBool32 shaderBufferInt64Atomics; + VkBool32 shaderSharedInt64Atomics; +} VkPhysicalDeviceShaderAtomicInt64Features; + +typedef struct VkPhysicalDeviceShaderFloat16Int8Features { + VkStructureType sType; + void* pNext; + VkBool32 shaderFloat16; + VkBool32 shaderInt8; +} VkPhysicalDeviceShaderFloat16Int8Features; + +typedef struct VkPhysicalDeviceFloatControlsProperties { + VkStructureType sType; + void* pNext; + VkShaderFloatControlsIndependence denormBehaviorIndependence; + VkShaderFloatControlsIndependence roundingModeIndependence; + VkBool32 shaderSignedZeroInfNanPreserveFloat16; + VkBool32 shaderSignedZeroInfNanPreserveFloat32; + VkBool32 shaderSignedZeroInfNanPreserveFloat64; + VkBool32 shaderDenormPreserveFloat16; + VkBool32 shaderDenormPreserveFloat32; + VkBool32 shaderDenormPreserveFloat64; + VkBool32 shaderDenormFlushToZeroFloat16; + VkBool32 shaderDenormFlushToZeroFloat32; + VkBool32 shaderDenormFlushToZeroFloat64; + VkBool32 shaderRoundingModeRTEFloat16; + VkBool32 shaderRoundingModeRTEFloat32; + VkBool32 shaderRoundingModeRTEFloat64; + VkBool32 shaderRoundingModeRTZFloat16; + VkBool32 shaderRoundingModeRTZFloat32; + VkBool32 shaderRoundingModeRTZFloat64; +} VkPhysicalDeviceFloatControlsProperties; + +typedef struct VkDescriptorSetLayoutBindingFlagsCreateInfo { + VkStructureType sType; + const void* pNext; + uint32_t bindingCount; + const VkDescriptorBindingFlags* pBindingFlags; +} VkDescriptorSetLayoutBindingFlagsCreateInfo; + +typedef struct VkPhysicalDeviceDescriptorIndexingFeatures { + VkStructureType sType; + void* pNext; + VkBool32 shaderInputAttachmentArrayDynamicIndexing; + VkBool32 shaderUniformTexelBufferArrayDynamicIndexing; + VkBool32 shaderStorageTexelBufferArrayDynamicIndexing; + VkBool32 shaderUniformBufferArrayNonUniformIndexing; + VkBool32 shaderSampledImageArrayNonUniformIndexing; + VkBool32 shaderStorageBufferArrayNonUniformIndexing; + VkBool32 shaderStorageImageArrayNonUniformIndexing; + VkBool32 shaderInputAttachmentArrayNonUniformIndexing; + VkBool32 shaderUniformTexelBufferArrayNonUniformIndexing; + VkBool32 shaderStorageTexelBufferArrayNonUniformIndexing; + VkBool32 descriptorBindingUniformBufferUpdateAfterBind; + VkBool32 descriptorBindingSampledImageUpdateAfterBind; + VkBool32 descriptorBindingStorageImageUpdateAfterBind; + VkBool32 descriptorBindingStorageBufferUpdateAfterBind; + VkBool32 descriptorBindingUniformTexelBufferUpdateAfterBind; + VkBool32 descriptorBindingStorageTexelBufferUpdateAfterBind; + VkBool32 descriptorBindingUpdateUnusedWhilePending; + VkBool32 descriptorBindingPartiallyBound; + VkBool32 descriptorBindingVariableDescriptorCount; + VkBool32 runtimeDescriptorArray; +} VkPhysicalDeviceDescriptorIndexingFeatures; + +typedef struct VkPhysicalDeviceDescriptorIndexingProperties { + VkStructureType sType; + void* pNext; + uint32_t maxUpdateAfterBindDescriptorsInAllPools; + VkBool32 shaderUniformBufferArrayNonUniformIndexingNative; + VkBool32 shaderSampledImageArrayNonUniformIndexingNative; + VkBool32 shaderStorageBufferArrayNonUniformIndexingNative; + VkBool32 shaderStorageImageArrayNonUniformIndexingNative; + VkBool32 shaderInputAttachmentArrayNonUniformIndexingNative; + VkBool32 robustBufferAccessUpdateAfterBind; + VkBool32 quadDivergentImplicitLod; + uint32_t maxPerStageDescriptorUpdateAfterBindSamplers; + uint32_t maxPerStageDescriptorUpdateAfterBindUniformBuffers; + uint32_t maxPerStageDescriptorUpdateAfterBindStorageBuffers; + uint32_t maxPerStageDescriptorUpdateAfterBindSampledImages; + uint32_t maxPerStageDescriptorUpdateAfterBindStorageImages; + uint32_t maxPerStageDescriptorUpdateAfterBindInputAttachments; + uint32_t maxPerStageUpdateAfterBindResources; + uint32_t maxDescriptorSetUpdateAfterBindSamplers; + uint32_t maxDescriptorSetUpdateAfterBindUniformBuffers; + uint32_t maxDescriptorSetUpdateAfterBindUniformBuffersDynamic; + uint32_t maxDescriptorSetUpdateAfterBindStorageBuffers; + uint32_t maxDescriptorSetUpdateAfterBindStorageBuffersDynamic; + uint32_t maxDescriptorSetUpdateAfterBindSampledImages; + uint32_t maxDescriptorSetUpdateAfterBindStorageImages; + uint32_t maxDescriptorSetUpdateAfterBindInputAttachments; +} VkPhysicalDeviceDescriptorIndexingProperties; + +typedef struct VkDescriptorSetVariableDescriptorCountAllocateInfo { + VkStructureType sType; + const void* pNext; + uint32_t descriptorSetCount; + const uint32_t* pDescriptorCounts; +} VkDescriptorSetVariableDescriptorCountAllocateInfo; + +typedef struct VkDescriptorSetVariableDescriptorCountLayoutSupport { + VkStructureType sType; + void* pNext; + uint32_t maxVariableDescriptorCount; +} VkDescriptorSetVariableDescriptorCountLayoutSupport; + +typedef struct VkSubpassDescriptionDepthStencilResolve { + VkStructureType sType; + const void* pNext; + VkResolveModeFlagBits depthResolveMode; + VkResolveModeFlagBits stencilResolveMode; + const VkAttachmentReference2* pDepthStencilResolveAttachment; +} VkSubpassDescriptionDepthStencilResolve; + +typedef struct VkPhysicalDeviceDepthStencilResolveProperties { + VkStructureType sType; + void* pNext; + VkResolveModeFlags supportedDepthResolveModes; + VkResolveModeFlags supportedStencilResolveModes; + VkBool32 independentResolveNone; + VkBool32 independentResolve; +} VkPhysicalDeviceDepthStencilResolveProperties; + +typedef struct VkPhysicalDeviceScalarBlockLayoutFeatures { + VkStructureType sType; + void* pNext; + VkBool32 scalarBlockLayout; +} VkPhysicalDeviceScalarBlockLayoutFeatures; + +typedef struct VkImageStencilUsageCreateInfo { + VkStructureType sType; + const void* pNext; + VkImageUsageFlags stencilUsage; +} VkImageStencilUsageCreateInfo; + +typedef struct VkSamplerReductionModeCreateInfo { + VkStructureType sType; + const void* pNext; + VkSamplerReductionMode reductionMode; +} VkSamplerReductionModeCreateInfo; + +typedef struct VkPhysicalDeviceSamplerFilterMinmaxProperties { + VkStructureType sType; + void* pNext; + VkBool32 filterMinmaxSingleComponentFormats; + VkBool32 filterMinmaxImageComponentMapping; +} VkPhysicalDeviceSamplerFilterMinmaxProperties; + +typedef struct VkPhysicalDeviceVulkanMemoryModelFeatures { + VkStructureType sType; + void* pNext; + VkBool32 vulkanMemoryModel; + VkBool32 vulkanMemoryModelDeviceScope; + VkBool32 vulkanMemoryModelAvailabilityVisibilityChains; +} VkPhysicalDeviceVulkanMemoryModelFeatures; + +typedef struct VkPhysicalDeviceImagelessFramebufferFeatures { + VkStructureType sType; + void* pNext; + VkBool32 imagelessFramebuffer; +} VkPhysicalDeviceImagelessFramebufferFeatures; + +typedef struct VkFramebufferAttachmentImageInfo { + VkStructureType sType; + const void* pNext; + VkImageCreateFlags flags; + VkImageUsageFlags usage; + uint32_t width; + uint32_t height; + uint32_t layerCount; + uint32_t viewFormatCount; + const VkFormat* pViewFormats; +} VkFramebufferAttachmentImageInfo; + +typedef struct VkFramebufferAttachmentsCreateInfo { + VkStructureType sType; + const void* pNext; + uint32_t attachmentImageInfoCount; + const VkFramebufferAttachmentImageInfo* pAttachmentImageInfos; +} VkFramebufferAttachmentsCreateInfo; + +typedef struct VkRenderPassAttachmentBeginInfo { + VkStructureType sType; + const void* pNext; + uint32_t attachmentCount; + const VkImageView* pAttachments; +} VkRenderPassAttachmentBeginInfo; + +typedef struct VkPhysicalDeviceUniformBufferStandardLayoutFeatures { + VkStructureType sType; + void* pNext; + VkBool32 uniformBufferStandardLayout; +} VkPhysicalDeviceUniformBufferStandardLayoutFeatures; + +typedef struct VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures { + VkStructureType sType; + void* pNext; + VkBool32 shaderSubgroupExtendedTypes; +} VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures; + +typedef struct VkPhysicalDeviceSeparateDepthStencilLayoutsFeatures { + VkStructureType sType; + void* pNext; + VkBool32 separateDepthStencilLayouts; +} VkPhysicalDeviceSeparateDepthStencilLayoutsFeatures; + +typedef struct VkAttachmentReferenceStencilLayout { + VkStructureType sType; + void* pNext; + VkImageLayout stencilLayout; +} VkAttachmentReferenceStencilLayout; + +typedef struct VkAttachmentDescriptionStencilLayout { + VkStructureType sType; + void* pNext; + VkImageLayout stencilInitialLayout; + VkImageLayout stencilFinalLayout; +} VkAttachmentDescriptionStencilLayout; + +typedef struct VkPhysicalDeviceHostQueryResetFeatures { + VkStructureType sType; + void* pNext; + VkBool32 hostQueryReset; +} VkPhysicalDeviceHostQueryResetFeatures; + +typedef struct VkPhysicalDeviceTimelineSemaphoreFeatures { + VkStructureType sType; + void* pNext; + VkBool32 timelineSemaphore; +} VkPhysicalDeviceTimelineSemaphoreFeatures; + +typedef struct VkPhysicalDeviceTimelineSemaphoreProperties { + VkStructureType sType; + void* pNext; + uint64_t maxTimelineSemaphoreValueDifference; +} VkPhysicalDeviceTimelineSemaphoreProperties; + +typedef struct VkSemaphoreTypeCreateInfo { + VkStructureType sType; + const void* pNext; + VkSemaphoreType semaphoreType; + uint64_t initialValue; +} VkSemaphoreTypeCreateInfo; + +typedef struct VkTimelineSemaphoreSubmitInfo { + VkStructureType sType; + const void* pNext; + uint32_t waitSemaphoreValueCount; + const uint64_t* pWaitSemaphoreValues; + uint32_t signalSemaphoreValueCount; + const uint64_t* pSignalSemaphoreValues; +} VkTimelineSemaphoreSubmitInfo; + +typedef struct VkSemaphoreWaitInfo { + VkStructureType sType; + const void* pNext; + VkSemaphoreWaitFlags flags; + uint32_t semaphoreCount; + const VkSemaphore* pSemaphores; + const uint64_t* pValues; +} VkSemaphoreWaitInfo; + +typedef struct VkSemaphoreSignalInfo { + VkStructureType sType; + const void* pNext; + VkSemaphore semaphore; + uint64_t value; +} VkSemaphoreSignalInfo; + +typedef struct VkPhysicalDeviceBufferDeviceAddressFeatures { + VkStructureType sType; + void* pNext; + VkBool32 bufferDeviceAddress; + VkBool32 bufferDeviceAddressCaptureReplay; + VkBool32 bufferDeviceAddressMultiDevice; +} VkPhysicalDeviceBufferDeviceAddressFeatures; + +typedef struct VkBufferDeviceAddressInfo { + VkStructureType sType; + const void* pNext; + VkBuffer buffer; +} VkBufferDeviceAddressInfo; + +typedef struct VkBufferOpaqueCaptureAddressCreateInfo { + VkStructureType sType; + const void* pNext; + uint64_t opaqueCaptureAddress; +} VkBufferOpaqueCaptureAddressCreateInfo; + +typedef struct VkMemoryOpaqueCaptureAddressAllocateInfo { + VkStructureType sType; + const void* pNext; + uint64_t opaqueCaptureAddress; +} VkMemoryOpaqueCaptureAddressAllocateInfo; + +typedef struct VkDeviceMemoryOpaqueCaptureAddressInfo { + VkStructureType sType; + const void* pNext; + VkDeviceMemory memory; +} VkDeviceMemoryOpaqueCaptureAddressInfo; + +typedef void (VKAPI_PTR *PFN_vkCmdDrawIndirectCount)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride); +typedef void (VKAPI_PTR *PFN_vkCmdDrawIndexedIndirectCount)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride); +typedef VkResult (VKAPI_PTR *PFN_vkCreateRenderPass2)(VkDevice device, const VkRenderPassCreateInfo2* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkRenderPass* pRenderPass); +typedef void (VKAPI_PTR *PFN_vkCmdBeginRenderPass2)(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBegin, const VkSubpassBeginInfo* pSubpassBeginInfo); +typedef void (VKAPI_PTR *PFN_vkCmdNextSubpass2)(VkCommandBuffer commandBuffer, const VkSubpassBeginInfo* pSubpassBeginInfo, const VkSubpassEndInfo* pSubpassEndInfo); +typedef void (VKAPI_PTR *PFN_vkCmdEndRenderPass2)(VkCommandBuffer commandBuffer, const VkSubpassEndInfo* pSubpassEndInfo); +typedef void (VKAPI_PTR *PFN_vkResetQueryPool)(VkDevice device, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount); +typedef VkResult (VKAPI_PTR *PFN_vkGetSemaphoreCounterValue)(VkDevice device, VkSemaphore semaphore, uint64_t* pValue); +typedef VkResult (VKAPI_PTR *PFN_vkWaitSemaphores)(VkDevice device, const VkSemaphoreWaitInfo* pWaitInfo, uint64_t timeout); +typedef VkResult (VKAPI_PTR *PFN_vkSignalSemaphore)(VkDevice device, const VkSemaphoreSignalInfo* pSignalInfo); +typedef VkDeviceAddress (VKAPI_PTR *PFN_vkGetBufferDeviceAddress)(VkDevice device, const VkBufferDeviceAddressInfo* pInfo); +typedef uint64_t (VKAPI_PTR *PFN_vkGetBufferOpaqueCaptureAddress)(VkDevice device, const VkBufferDeviceAddressInfo* pInfo); +typedef uint64_t (VKAPI_PTR *PFN_vkGetDeviceMemoryOpaqueCaptureAddress)(VkDevice device, const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndirectCount( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride); + +VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndexedIndirectCount( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateRenderPass2( + VkDevice device, + const VkRenderPassCreateInfo2* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass); + +VKAPI_ATTR void VKAPI_CALL vkCmdBeginRenderPass2( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + const VkSubpassBeginInfo* pSubpassBeginInfo); + +VKAPI_ATTR void VKAPI_CALL vkCmdNextSubpass2( + VkCommandBuffer commandBuffer, + const VkSubpassBeginInfo* pSubpassBeginInfo, + const VkSubpassEndInfo* pSubpassEndInfo); + +VKAPI_ATTR void VKAPI_CALL vkCmdEndRenderPass2( + VkCommandBuffer commandBuffer, + const VkSubpassEndInfo* pSubpassEndInfo); + +VKAPI_ATTR void VKAPI_CALL vkResetQueryPool( + VkDevice device, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetSemaphoreCounterValue( + VkDevice device, + VkSemaphore semaphore, + uint64_t* pValue); + +VKAPI_ATTR VkResult VKAPI_CALL vkWaitSemaphores( + VkDevice device, + const VkSemaphoreWaitInfo* pWaitInfo, + uint64_t timeout); + +VKAPI_ATTR VkResult VKAPI_CALL vkSignalSemaphore( + VkDevice device, + const VkSemaphoreSignalInfo* pSignalInfo); + +VKAPI_ATTR VkDeviceAddress VKAPI_CALL vkGetBufferDeviceAddress( + VkDevice device, + const VkBufferDeviceAddressInfo* pInfo); + +VKAPI_ATTR uint64_t VKAPI_CALL vkGetBufferOpaqueCaptureAddress( + VkDevice device, + const VkBufferDeviceAddressInfo* pInfo); + +VKAPI_ATTR uint64_t VKAPI_CALL vkGetDeviceMemoryOpaqueCaptureAddress( + VkDevice device, + const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo); +#endif + + #define VK_KHR_surface 1 VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSurfaceKHR) - #define VK_KHR_SURFACE_SPEC_VERSION 25 #define VK_KHR_SURFACE_EXTENSION_NAME "VK_KHR_surface" -#define VK_COLORSPACE_SRGB_NONLINEAR_KHR VK_COLOR_SPACE_SRGB_NONLINEAR_KHR +typedef enum VkPresentModeKHR { + VK_PRESENT_MODE_IMMEDIATE_KHR = 0, + VK_PRESENT_MODE_MAILBOX_KHR = 1, + VK_PRESENT_MODE_FIFO_KHR = 2, + VK_PRESENT_MODE_FIFO_RELAXED_KHR = 3, + VK_PRESENT_MODE_SHARED_DEMAND_REFRESH_KHR = 1000111000, + VK_PRESENT_MODE_SHARED_CONTINUOUS_REFRESH_KHR = 1000111001, + VK_PRESENT_MODE_MAX_ENUM_KHR = 0x7FFFFFFF +} VkPresentModeKHR; typedef enum VkColorSpaceKHR { VK_COLOR_SPACE_SRGB_NONLINEAR_KHR = 0, VK_COLOR_SPACE_DISPLAY_P3_NONLINEAR_EXT = 1000104001, VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT = 1000104002, - VK_COLOR_SPACE_DCI_P3_LINEAR_EXT = 1000104003, + VK_COLOR_SPACE_DISPLAY_P3_LINEAR_EXT = 1000104003, VK_COLOR_SPACE_DCI_P3_NONLINEAR_EXT = 1000104004, VK_COLOR_SPACE_BT709_LINEAR_EXT = 1000104005, VK_COLOR_SPACE_BT709_NONLINEAR_EXT = 1000104006, @@ -4553,26 +5648,12 @@ typedef enum VkColorSpaceKHR { VK_COLOR_SPACE_ADOBERGB_NONLINEAR_EXT = 1000104012, VK_COLOR_SPACE_PASS_THROUGH_EXT = 1000104013, VK_COLOR_SPACE_EXTENDED_SRGB_NONLINEAR_EXT = 1000104014, - VK_COLOR_SPACE_BEGIN_RANGE_KHR = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR, - VK_COLOR_SPACE_END_RANGE_KHR = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR, - VK_COLOR_SPACE_RANGE_SIZE_KHR = (VK_COLOR_SPACE_SRGB_NONLINEAR_KHR - VK_COLOR_SPACE_SRGB_NONLINEAR_KHR + 1), + VK_COLOR_SPACE_DISPLAY_NATIVE_AMD = 1000213000, + VK_COLORSPACE_SRGB_NONLINEAR_KHR = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR, + VK_COLOR_SPACE_DCI_P3_LINEAR_EXT = VK_COLOR_SPACE_DISPLAY_P3_LINEAR_EXT, VK_COLOR_SPACE_MAX_ENUM_KHR = 0x7FFFFFFF } VkColorSpaceKHR; -typedef enum VkPresentModeKHR { - VK_PRESENT_MODE_IMMEDIATE_KHR = 0, - VK_PRESENT_MODE_MAILBOX_KHR = 1, - VK_PRESENT_MODE_FIFO_KHR = 2, - VK_PRESENT_MODE_FIFO_RELAXED_KHR = 3, - VK_PRESENT_MODE_SHARED_DEMAND_REFRESH_KHR = 1000111000, - VK_PRESENT_MODE_SHARED_CONTINUOUS_REFRESH_KHR = 1000111001, - VK_PRESENT_MODE_BEGIN_RANGE_KHR = VK_PRESENT_MODE_IMMEDIATE_KHR, - VK_PRESENT_MODE_END_RANGE_KHR = VK_PRESENT_MODE_FIFO_RELAXED_KHR, - VK_PRESENT_MODE_RANGE_SIZE_KHR = (VK_PRESENT_MODE_FIFO_RELAXED_KHR - VK_PRESENT_MODE_IMMEDIATE_KHR + 1), - VK_PRESENT_MODE_MAX_ENUM_KHR = 0x7FFFFFFF -} VkPresentModeKHR; - - typedef enum VkSurfaceTransformFlagBitsKHR { VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR = 0x00000001, VK_SURFACE_TRANSFORM_ROTATE_90_BIT_KHR = 0x00000002, @@ -4585,7 +5666,6 @@ typedef enum VkSurfaceTransformFlagBitsKHR { VK_SURFACE_TRANSFORM_INHERIT_BIT_KHR = 0x00000100, VK_SURFACE_TRANSFORM_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF } VkSurfaceTransformFlagBitsKHR; -typedef VkFlags VkSurfaceTransformFlagsKHR; typedef enum VkCompositeAlphaFlagBitsKHR { VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR = 0x00000001, @@ -4595,7 +5675,7 @@ typedef enum VkCompositeAlphaFlagBitsKHR { VK_COMPOSITE_ALPHA_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF } VkCompositeAlphaFlagBitsKHR; typedef VkFlags VkCompositeAlphaFlagsKHR; - +typedef VkFlags VkSurfaceTransformFlagsKHR; typedef struct VkSurfaceCapabilitiesKHR { uint32_t minImageCount; uint32_t maxImageCount; @@ -4614,7 +5694,6 @@ typedef struct VkSurfaceFormatKHR { VkColorSpaceKHR colorSpace; } VkSurfaceFormatKHR; - typedef void (VKAPI_PTR *PFN_vkDestroySurfaceKHR)(VkInstance instance, VkSurfaceKHR surface, const VkAllocationCallbacks* pAllocator); typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfaceSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, VkSurfaceKHR surface, VkBool32* pSupported); typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR)(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface, VkSurfaceCapabilitiesKHR* pSurfaceCapabilities); @@ -4651,16 +5730,16 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfacePresentModesKHR( VkPresentModeKHR* pPresentModes); #endif + #define VK_KHR_swapchain 1 VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSwapchainKHR) - #define VK_KHR_SWAPCHAIN_SPEC_VERSION 70 #define VK_KHR_SWAPCHAIN_EXTENSION_NAME "VK_KHR_swapchain" - typedef enum VkSwapchainCreateFlagBitsKHR { VK_SWAPCHAIN_CREATE_SPLIT_INSTANCE_BIND_REGIONS_BIT_KHR = 0x00000001, VK_SWAPCHAIN_CREATE_PROTECTED_BIT_KHR = 0x00000002, + VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR = 0x00000004, VK_SWAPCHAIN_CREATE_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF } VkSwapchainCreateFlagBitsKHR; typedef VkFlags VkSwapchainCreateFlagsKHR; @@ -4673,7 +5752,6 @@ typedef enum VkDeviceGroupPresentModeFlagBitsKHR { VK_DEVICE_GROUP_PRESENT_MODE_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF } VkDeviceGroupPresentModeFlagBitsKHR; typedef VkFlags VkDeviceGroupPresentModeFlagsKHR; - typedef struct VkSwapchainCreateInfoKHR { VkStructureType sType; const void* pNext; @@ -4750,7 +5828,6 @@ typedef struct VkDeviceGroupSwapchainCreateInfoKHR { VkDeviceGroupPresentModeFlagsKHR modes; } VkDeviceGroupSwapchainCreateInfoKHR; - typedef VkResult (VKAPI_PTR *PFN_vkCreateSwapchainKHR)(VkDevice device, const VkSwapchainCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSwapchainKHR* pSwapchain); typedef void (VKAPI_PTR *PFN_vkDestroySwapchainKHR)(VkDevice device, VkSwapchainKHR swapchain, const VkAllocationCallbacks* pAllocator); typedef VkResult (VKAPI_PTR *PFN_vkGetSwapchainImagesKHR)(VkDevice device, VkSwapchainKHR swapchain, uint32_t* pSwapchainImageCount, VkImage* pSwapchainImages); @@ -4812,13 +5889,13 @@ VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImage2KHR( uint32_t* pImageIndex); #endif + #define VK_KHR_display 1 VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDisplayKHR) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDisplayModeKHR) - -#define VK_KHR_DISPLAY_SPEC_VERSION 21 +#define VK_KHR_DISPLAY_SPEC_VERSION 23 #define VK_KHR_DISPLAY_EXTENSION_NAME "VK_KHR_display" - +typedef VkFlags VkDisplayModeCreateFlagsKHR; typedef enum VkDisplayPlaneAlphaFlagBitsKHR { VK_DISPLAY_PLANE_ALPHA_OPAQUE_BIT_KHR = 0x00000001, @@ -4828,29 +5905,12 @@ typedef enum VkDisplayPlaneAlphaFlagBitsKHR { VK_DISPLAY_PLANE_ALPHA_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF } VkDisplayPlaneAlphaFlagBitsKHR; typedef VkFlags VkDisplayPlaneAlphaFlagsKHR; -typedef VkFlags VkDisplayModeCreateFlagsKHR; typedef VkFlags VkDisplaySurfaceCreateFlagsKHR; - -typedef struct VkDisplayPropertiesKHR { - VkDisplayKHR display; - const char* displayName; - VkExtent2D physicalDimensions; - VkExtent2D physicalResolution; - VkSurfaceTransformFlagsKHR supportedTransforms; - VkBool32 planeReorderPossible; - VkBool32 persistentContent; -} VkDisplayPropertiesKHR; - typedef struct VkDisplayModeParametersKHR { VkExtent2D visibleRegion; uint32_t refreshRate; } VkDisplayModeParametersKHR; -typedef struct VkDisplayModePropertiesKHR { - VkDisplayModeKHR displayMode; - VkDisplayModeParametersKHR parameters; -} VkDisplayModePropertiesKHR; - typedef struct VkDisplayModeCreateInfoKHR { VkStructureType sType; const void* pNext; @@ -4858,6 +5918,11 @@ typedef struct VkDisplayModeCreateInfoKHR { VkDisplayModeParametersKHR parameters; } VkDisplayModeCreateInfoKHR; +typedef struct VkDisplayModePropertiesKHR { + VkDisplayModeKHR displayMode; + VkDisplayModeParametersKHR parameters; +} VkDisplayModePropertiesKHR; + typedef struct VkDisplayPlaneCapabilitiesKHR { VkDisplayPlaneAlphaFlagsKHR supportedAlpha; VkOffset2D minSrcPosition; @@ -4875,6 +5940,16 @@ typedef struct VkDisplayPlanePropertiesKHR { uint32_t currentStackIndex; } VkDisplayPlanePropertiesKHR; +typedef struct VkDisplayPropertiesKHR { + VkDisplayKHR display; + const char* displayName; + VkExtent2D physicalDimensions; + VkExtent2D physicalResolution; + VkSurfaceTransformFlagsKHR supportedTransforms; + VkBool32 planeReorderPossible; + VkBool32 persistentContent; +} VkDisplayPropertiesKHR; + typedef struct VkDisplaySurfaceCreateInfoKHR { VkStructureType sType; const void* pNext; @@ -4888,7 +5963,6 @@ typedef struct VkDisplaySurfaceCreateInfoKHR { VkExtent2D imageExtent; } VkDisplaySurfaceCreateInfoKHR; - typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceDisplayPropertiesKHR)(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkDisplayPropertiesKHR* pProperties); typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceDisplayPlanePropertiesKHR)(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkDisplayPlanePropertiesKHR* pProperties); typedef VkResult (VKAPI_PTR *PFN_vkGetDisplayPlaneSupportedDisplaysKHR)(VkPhysicalDevice physicalDevice, uint32_t planeIndex, uint32_t* pDisplayCount, VkDisplayKHR* pDisplays); @@ -4940,10 +6014,10 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateDisplayPlaneSurfaceKHR( VkSurfaceKHR* pSurface); #endif -#define VK_KHR_display_swapchain 1 -#define VK_KHR_DISPLAY_SWAPCHAIN_SPEC_VERSION 9 -#define VK_KHR_DISPLAY_SWAPCHAIN_EXTENSION_NAME "VK_KHR_display_swapchain" +#define VK_KHR_display_swapchain 1 +#define VK_KHR_DISPLAY_SWAPCHAIN_SPEC_VERSION 10 +#define VK_KHR_DISPLAY_SWAPCHAIN_EXTENSION_NAME "VK_KHR_display_swapchain" typedef struct VkDisplayPresentInfoKHR { VkStructureType sType; const void* pNext; @@ -4952,7 +6026,6 @@ typedef struct VkDisplayPresentInfoKHR { VkBool32 persistent; } VkDisplayPresentInfoKHR; - typedef VkResult (VKAPI_PTR *PFN_vkCreateSharedSwapchainsKHR)(VkDevice device, uint32_t swapchainCount, const VkSwapchainCreateInfoKHR* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkSwapchainKHR* pSwapchains); #ifndef VK_NO_PROTOTYPES @@ -4964,15 +6037,15 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateSharedSwapchainsKHR( VkSwapchainKHR* pSwapchains); #endif + #define VK_KHR_sampler_mirror_clamp_to_edge 1 -#define VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_SPEC_VERSION 1 +#define VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_SPEC_VERSION 3 #define VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME "VK_KHR_sampler_mirror_clamp_to_edge" #define VK_KHR_multiview 1 #define VK_KHR_MULTIVIEW_SPEC_VERSION 1 #define VK_KHR_MULTIVIEW_EXTENSION_NAME "VK_KHR_multiview" - typedef VkRenderPassMultiviewCreateInfo VkRenderPassMultiviewCreateInfoKHR; typedef VkPhysicalDeviceMultiviewFeatures VkPhysicalDeviceMultiviewFeaturesKHR; @@ -4982,9 +6055,8 @@ typedef VkPhysicalDeviceMultiviewProperties VkPhysicalDeviceMultiviewPropertiesK #define VK_KHR_get_physical_device_properties2 1 -#define VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_SPEC_VERSION 1 +#define VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_SPEC_VERSION 2 #define VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME "VK_KHR_get_physical_device_properties2" - typedef VkPhysicalDeviceFeatures2 VkPhysicalDeviceFeatures2KHR; typedef VkPhysicalDeviceProperties2 VkPhysicalDeviceProperties2KHR; @@ -5003,7 +6075,6 @@ typedef VkSparseImageFormatProperties2 VkSparseImageFormatProperties2KHR; typedef VkPhysicalDeviceSparseImageFormatInfo2 VkPhysicalDeviceSparseImageFormatInfo2KHR; - typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceFeatures2KHR)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures2* pFeatures); typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceProperties2KHR)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties2* pProperties); typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceFormatProperties2KHR)(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties2* pFormatProperties); @@ -5047,10 +6118,10 @@ VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceSparseImageFormatProperties2KHR( VkSparseImageFormatProperties2* pProperties); #endif -#define VK_KHR_device_group 1 -#define VK_KHR_DEVICE_GROUP_SPEC_VERSION 3 -#define VK_KHR_DEVICE_GROUP_EXTENSION_NAME "VK_KHR_device_group" +#define VK_KHR_device_group 1 +#define VK_KHR_DEVICE_GROUP_SPEC_VERSION 4 +#define VK_KHR_DEVICE_GROUP_EXTENSION_NAME "VK_KHR_device_group" typedef VkPeerMemoryFeatureFlags VkPeerMemoryFeatureFlagsKHR; typedef VkPeerMemoryFeatureFlagBits VkPeerMemoryFeatureFlagBitsKHR; @@ -5059,7 +6130,6 @@ typedef VkMemoryAllocateFlags VkMemoryAllocateFlagsKHR; typedef VkMemoryAllocateFlagBits VkMemoryAllocateFlagBitsKHR; - typedef VkMemoryAllocateFlagsInfo VkMemoryAllocateFlagsInfoKHR; typedef VkDeviceGroupRenderPassBeginInfo VkDeviceGroupRenderPassBeginInfoKHR; @@ -5074,7 +6144,6 @@ typedef VkBindBufferMemoryDeviceGroupInfo VkBindBufferMemoryDeviceGroupInfoKHR; typedef VkBindImageMemoryDeviceGroupInfo VkBindImageMemoryDeviceGroupInfoKHR; - typedef void (VKAPI_PTR *PFN_vkGetDeviceGroupPeerMemoryFeaturesKHR)(VkDevice device, uint32_t heapIndex, uint32_t localDeviceIndex, uint32_t remoteDeviceIndex, VkPeerMemoryFeatureFlags* pPeerMemoryFeatures); typedef void (VKAPI_PTR *PFN_vkCmdSetDeviceMaskKHR)(VkCommandBuffer commandBuffer, uint32_t deviceMask); typedef void (VKAPI_PTR *PFN_vkCmdDispatchBaseKHR)(VkCommandBuffer commandBuffer, uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ, uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ); @@ -5101,6 +6170,7 @@ VKAPI_ATTR void VKAPI_CALL vkCmdDispatchBaseKHR( uint32_t groupCountZ); #endif + #define VK_KHR_shader_draw_parameters 1 #define VK_KHR_SHADER_DRAW_PARAMETERS_SPEC_VERSION 1 #define VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME "VK_KHR_shader_draw_parameters" @@ -5109,10 +6179,8 @@ VKAPI_ATTR void VKAPI_CALL vkCmdDispatchBaseKHR( #define VK_KHR_maintenance1 1 #define VK_KHR_MAINTENANCE1_SPEC_VERSION 2 #define VK_KHR_MAINTENANCE1_EXTENSION_NAME "VK_KHR_maintenance1" - typedef VkCommandPoolTrimFlags VkCommandPoolTrimFlagsKHR; - typedef void (VKAPI_PTR *PFN_vkTrimCommandPoolKHR)(VkDevice device, VkCommandPool commandPool, VkCommandPoolTrimFlags flags); #ifndef VK_NO_PROTOTYPES @@ -5122,16 +6190,15 @@ VKAPI_ATTR void VKAPI_CALL vkTrimCommandPoolKHR( VkCommandPoolTrimFlags flags); #endif + #define VK_KHR_device_group_creation 1 #define VK_KHR_DEVICE_GROUP_CREATION_SPEC_VERSION 1 #define VK_KHR_DEVICE_GROUP_CREATION_EXTENSION_NAME "VK_KHR_device_group_creation" #define VK_MAX_DEVICE_GROUP_SIZE_KHR VK_MAX_DEVICE_GROUP_SIZE - typedef VkPhysicalDeviceGroupProperties VkPhysicalDeviceGroupPropertiesKHR; typedef VkDeviceGroupDeviceCreateInfo VkDeviceGroupDeviceCreateInfoKHR; - typedef VkResult (VKAPI_PTR *PFN_vkEnumeratePhysicalDeviceGroupsKHR)(VkInstance instance, uint32_t* pPhysicalDeviceGroupCount, VkPhysicalDeviceGroupProperties* pPhysicalDeviceGroupProperties); #ifndef VK_NO_PROTOTYPES @@ -5141,11 +6208,11 @@ VKAPI_ATTR VkResult VKAPI_CALL vkEnumeratePhysicalDeviceGroupsKHR( VkPhysicalDeviceGroupProperties* pPhysicalDeviceGroupProperties); #endif + #define VK_KHR_external_memory_capabilities 1 #define VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_SPEC_VERSION 1 #define VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME "VK_KHR_external_memory_capabilities" #define VK_LUID_SIZE_KHR VK_LUID_SIZE - typedef VkExternalMemoryHandleTypeFlags VkExternalMemoryHandleTypeFlagsKHR; typedef VkExternalMemoryHandleTypeFlagBits VkExternalMemoryHandleTypeFlagBitsKHR; @@ -5154,7 +6221,6 @@ typedef VkExternalMemoryFeatureFlags VkExternalMemoryFeatureFlagsKHR; typedef VkExternalMemoryFeatureFlagBits VkExternalMemoryFeatureFlagBitsKHR; - typedef VkExternalMemoryProperties VkExternalMemoryPropertiesKHR; typedef VkPhysicalDeviceExternalImageFormatInfo VkPhysicalDeviceExternalImageFormatInfoKHR; @@ -5167,7 +6233,6 @@ typedef VkExternalBufferProperties VkExternalBufferPropertiesKHR; typedef VkPhysicalDeviceIDProperties VkPhysicalDeviceIDPropertiesKHR; - typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceExternalBufferPropertiesKHR)(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceExternalBufferInfo* pExternalBufferInfo, VkExternalBufferProperties* pExternalBufferProperties); #ifndef VK_NO_PROTOTYPES @@ -5177,11 +6242,11 @@ VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceExternalBufferPropertiesKHR( VkExternalBufferProperties* pExternalBufferProperties); #endif + #define VK_KHR_external_memory 1 #define VK_KHR_EXTERNAL_MEMORY_SPEC_VERSION 1 #define VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME "VK_KHR_external_memory" #define VK_QUEUE_FAMILY_EXTERNAL_KHR VK_QUEUE_FAMILY_EXTERNAL - typedef VkExternalMemoryImageCreateInfo VkExternalMemoryImageCreateInfoKHR; typedef VkExternalMemoryBufferCreateInfo VkExternalMemoryBufferCreateInfoKHR; @@ -5193,7 +6258,6 @@ typedef VkExportMemoryAllocateInfo VkExportMemoryAllocateInfoKHR; #define VK_KHR_external_memory_fd 1 #define VK_KHR_EXTERNAL_MEMORY_FD_SPEC_VERSION 1 #define VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME "VK_KHR_external_memory_fd" - typedef struct VkImportMemoryFdInfoKHR { VkStructureType sType; const void* pNext; @@ -5214,7 +6278,6 @@ typedef struct VkMemoryGetFdInfoKHR { VkExternalMemoryHandleTypeFlagBits handleType; } VkMemoryGetFdInfoKHR; - typedef VkResult (VKAPI_PTR *PFN_vkGetMemoryFdKHR)(VkDevice device, const VkMemoryGetFdInfoKHR* pGetFdInfo, int* pFd); typedef VkResult (VKAPI_PTR *PFN_vkGetMemoryFdPropertiesKHR)(VkDevice device, VkExternalMemoryHandleTypeFlagBits handleType, int fd, VkMemoryFdPropertiesKHR* pMemoryFdProperties); @@ -5231,10 +6294,10 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetMemoryFdPropertiesKHR( VkMemoryFdPropertiesKHR* pMemoryFdProperties); #endif + #define VK_KHR_external_semaphore_capabilities 1 #define VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_SPEC_VERSION 1 #define VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME "VK_KHR_external_semaphore_capabilities" - typedef VkExternalSemaphoreHandleTypeFlags VkExternalSemaphoreHandleTypeFlagsKHR; typedef VkExternalSemaphoreHandleTypeFlagBits VkExternalSemaphoreHandleTypeFlagBitsKHR; @@ -5243,12 +6306,10 @@ typedef VkExternalSemaphoreFeatureFlags VkExternalSemaphoreFeatureFlagsKHR; typedef VkExternalSemaphoreFeatureFlagBits VkExternalSemaphoreFeatureFlagBitsKHR; - typedef VkPhysicalDeviceExternalSemaphoreInfo VkPhysicalDeviceExternalSemaphoreInfoKHR; typedef VkExternalSemaphoreProperties VkExternalSemaphorePropertiesKHR; - typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceExternalSemaphorePropertiesKHR)(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceExternalSemaphoreInfo* pExternalSemaphoreInfo, VkExternalSemaphoreProperties* pExternalSemaphoreProperties); #ifndef VK_NO_PROTOTYPES @@ -5258,15 +6319,14 @@ VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceExternalSemaphorePropertiesKHR( VkExternalSemaphoreProperties* pExternalSemaphoreProperties); #endif + #define VK_KHR_external_semaphore 1 #define VK_KHR_EXTERNAL_SEMAPHORE_SPEC_VERSION 1 #define VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME "VK_KHR_external_semaphore" - typedef VkSemaphoreImportFlags VkSemaphoreImportFlagsKHR; typedef VkSemaphoreImportFlagBits VkSemaphoreImportFlagBitsKHR; - typedef VkExportSemaphoreCreateInfo VkExportSemaphoreCreateInfoKHR; @@ -5274,7 +6334,6 @@ typedef VkExportSemaphoreCreateInfo VkExportSemaphoreCreateInfoKHR; #define VK_KHR_external_semaphore_fd 1 #define VK_KHR_EXTERNAL_SEMAPHORE_FD_SPEC_VERSION 1 #define VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME "VK_KHR_external_semaphore_fd" - typedef struct VkImportSemaphoreFdInfoKHR { VkStructureType sType; const void* pNext; @@ -5291,7 +6350,6 @@ typedef struct VkSemaphoreGetFdInfoKHR { VkExternalSemaphoreHandleTypeFlagBits handleType; } VkSemaphoreGetFdInfoKHR; - typedef VkResult (VKAPI_PTR *PFN_vkImportSemaphoreFdKHR)(VkDevice device, const VkImportSemaphoreFdInfoKHR* pImportSemaphoreFdInfo); typedef VkResult (VKAPI_PTR *PFN_vkGetSemaphoreFdKHR)(VkDevice device, const VkSemaphoreGetFdInfoKHR* pGetFdInfo, int* pFd); @@ -5306,17 +6364,16 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetSemaphoreFdKHR( int* pFd); #endif + #define VK_KHR_push_descriptor 1 #define VK_KHR_PUSH_DESCRIPTOR_SPEC_VERSION 2 #define VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME "VK_KHR_push_descriptor" - typedef struct VkPhysicalDevicePushDescriptorPropertiesKHR { VkStructureType sType; void* pNext; uint32_t maxPushDescriptors; } VkPhysicalDevicePushDescriptorPropertiesKHR; - typedef void (VKAPI_PTR *PFN_vkCmdPushDescriptorSetKHR)(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, uint32_t set, uint32_t descriptorWriteCount, const VkWriteDescriptorSet* pDescriptorWrites); typedef void (VKAPI_PTR *PFN_vkCmdPushDescriptorSetWithTemplateKHR)(VkCommandBuffer commandBuffer, VkDescriptorUpdateTemplate descriptorUpdateTemplate, VkPipelineLayout layout, uint32_t set, const void* pData); @@ -5337,10 +6394,19 @@ VKAPI_ATTR void VKAPI_CALL vkCmdPushDescriptorSetWithTemplateKHR( const void* pData); #endif + +#define VK_KHR_shader_float16_int8 1 +#define VK_KHR_SHADER_FLOAT16_INT8_SPEC_VERSION 1 +#define VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME "VK_KHR_shader_float16_int8" +typedef VkPhysicalDeviceShaderFloat16Int8Features VkPhysicalDeviceShaderFloat16Int8FeaturesKHR; + +typedef VkPhysicalDeviceShaderFloat16Int8Features VkPhysicalDeviceFloat16Int8FeaturesKHR; + + + #define VK_KHR_16bit_storage 1 #define VK_KHR_16BIT_STORAGE_SPEC_VERSION 1 #define VK_KHR_16BIT_STORAGE_EXTENSION_NAME "VK_KHR_16bit_storage" - typedef VkPhysicalDevice16BitStorageFeatures VkPhysicalDevice16BitStorageFeaturesKHR; @@ -5348,7 +6414,6 @@ typedef VkPhysicalDevice16BitStorageFeatures VkPhysicalDevice16BitStorageFeature #define VK_KHR_incremental_present 1 #define VK_KHR_INCREMENTAL_PRESENT_SPEC_VERSION 1 #define VK_KHR_INCREMENTAL_PRESENT_EXTENSION_NAME "VK_KHR_incremental_present" - typedef struct VkRectLayerKHR { VkOffset2D offset; VkExtent2D extent; @@ -5372,21 +6437,16 @@ typedef struct VkPresentRegionsKHR { #define VK_KHR_descriptor_update_template 1 typedef VkDescriptorUpdateTemplate VkDescriptorUpdateTemplateKHR; - #define VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_SPEC_VERSION 1 #define VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME "VK_KHR_descriptor_update_template" - typedef VkDescriptorUpdateTemplateType VkDescriptorUpdateTemplateTypeKHR; - typedef VkDescriptorUpdateTemplateCreateFlags VkDescriptorUpdateTemplateCreateFlagsKHR; - typedef VkDescriptorUpdateTemplateEntry VkDescriptorUpdateTemplateEntryKHR; typedef VkDescriptorUpdateTemplateCreateInfo VkDescriptorUpdateTemplateCreateInfoKHR; - typedef VkResult (VKAPI_PTR *PFN_vkCreateDescriptorUpdateTemplateKHR)(VkDevice device, const VkDescriptorUpdateTemplateCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDescriptorUpdateTemplate* pDescriptorUpdateTemplate); typedef void (VKAPI_PTR *PFN_vkDestroyDescriptorUpdateTemplateKHR)(VkDevice device, VkDescriptorUpdateTemplate descriptorUpdateTemplate, const VkAllocationCallbacks* pAllocator); typedef void (VKAPI_PTR *PFN_vkUpdateDescriptorSetWithTemplateKHR)(VkDevice device, VkDescriptorSet descriptorSet, VkDescriptorUpdateTemplate descriptorUpdateTemplate, const void* pData); @@ -5410,17 +6470,74 @@ VKAPI_ATTR void VKAPI_CALL vkUpdateDescriptorSetWithTemplateKHR( const void* pData); #endif + +#define VK_KHR_imageless_framebuffer 1 +#define VK_KHR_IMAGELESS_FRAMEBUFFER_SPEC_VERSION 1 +#define VK_KHR_IMAGELESS_FRAMEBUFFER_EXTENSION_NAME "VK_KHR_imageless_framebuffer" +typedef VkPhysicalDeviceImagelessFramebufferFeatures VkPhysicalDeviceImagelessFramebufferFeaturesKHR; + +typedef VkFramebufferAttachmentsCreateInfo VkFramebufferAttachmentsCreateInfoKHR; + +typedef VkFramebufferAttachmentImageInfo VkFramebufferAttachmentImageInfoKHR; + +typedef VkRenderPassAttachmentBeginInfo VkRenderPassAttachmentBeginInfoKHR; + + + +#define VK_KHR_create_renderpass2 1 +#define VK_KHR_CREATE_RENDERPASS_2_SPEC_VERSION 1 +#define VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME "VK_KHR_create_renderpass2" +typedef VkRenderPassCreateInfo2 VkRenderPassCreateInfo2KHR; + +typedef VkAttachmentDescription2 VkAttachmentDescription2KHR; + +typedef VkAttachmentReference2 VkAttachmentReference2KHR; + +typedef VkSubpassDescription2 VkSubpassDescription2KHR; + +typedef VkSubpassDependency2 VkSubpassDependency2KHR; + +typedef VkSubpassBeginInfo VkSubpassBeginInfoKHR; + +typedef VkSubpassEndInfo VkSubpassEndInfoKHR; + +typedef VkResult (VKAPI_PTR *PFN_vkCreateRenderPass2KHR)(VkDevice device, const VkRenderPassCreateInfo2* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkRenderPass* pRenderPass); +typedef void (VKAPI_PTR *PFN_vkCmdBeginRenderPass2KHR)(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBegin, const VkSubpassBeginInfo* pSubpassBeginInfo); +typedef void (VKAPI_PTR *PFN_vkCmdNextSubpass2KHR)(VkCommandBuffer commandBuffer, const VkSubpassBeginInfo* pSubpassBeginInfo, const VkSubpassEndInfo* pSubpassEndInfo); +typedef void (VKAPI_PTR *PFN_vkCmdEndRenderPass2KHR)(VkCommandBuffer commandBuffer, const VkSubpassEndInfo* pSubpassEndInfo); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateRenderPass2KHR( + VkDevice device, + const VkRenderPassCreateInfo2* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass); + +VKAPI_ATTR void VKAPI_CALL vkCmdBeginRenderPass2KHR( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + const VkSubpassBeginInfo* pSubpassBeginInfo); + +VKAPI_ATTR void VKAPI_CALL vkCmdNextSubpass2KHR( + VkCommandBuffer commandBuffer, + const VkSubpassBeginInfo* pSubpassBeginInfo, + const VkSubpassEndInfo* pSubpassEndInfo); + +VKAPI_ATTR void VKAPI_CALL vkCmdEndRenderPass2KHR( + VkCommandBuffer commandBuffer, + const VkSubpassEndInfo* pSubpassEndInfo); +#endif + + #define VK_KHR_shared_presentable_image 1 #define VK_KHR_SHARED_PRESENTABLE_IMAGE_SPEC_VERSION 1 #define VK_KHR_SHARED_PRESENTABLE_IMAGE_EXTENSION_NAME "VK_KHR_shared_presentable_image" - typedef struct VkSharedPresentSurfaceCapabilitiesKHR { VkStructureType sType; void* pNext; VkImageUsageFlags sharedPresentSupportedUsageFlags; } VkSharedPresentSurfaceCapabilitiesKHR; - typedef VkResult (VKAPI_PTR *PFN_vkGetSwapchainStatusKHR)(VkDevice device, VkSwapchainKHR swapchain); #ifndef VK_NO_PROTOTYPES @@ -5429,10 +6546,10 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetSwapchainStatusKHR( VkSwapchainKHR swapchain); #endif + #define VK_KHR_external_fence_capabilities 1 #define VK_KHR_EXTERNAL_FENCE_CAPABILITIES_SPEC_VERSION 1 #define VK_KHR_EXTERNAL_FENCE_CAPABILITIES_EXTENSION_NAME "VK_KHR_external_fence_capabilities" - typedef VkExternalFenceHandleTypeFlags VkExternalFenceHandleTypeFlagsKHR; typedef VkExternalFenceHandleTypeFlagBits VkExternalFenceHandleTypeFlagBitsKHR; @@ -5441,12 +6558,10 @@ typedef VkExternalFenceFeatureFlags VkExternalFenceFeatureFlagsKHR; typedef VkExternalFenceFeatureFlagBits VkExternalFenceFeatureFlagBitsKHR; - typedef VkPhysicalDeviceExternalFenceInfo VkPhysicalDeviceExternalFenceInfoKHR; typedef VkExternalFenceProperties VkExternalFencePropertiesKHR; - typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceExternalFencePropertiesKHR)(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceExternalFenceInfo* pExternalFenceInfo, VkExternalFenceProperties* pExternalFenceProperties); #ifndef VK_NO_PROTOTYPES @@ -5456,15 +6571,14 @@ VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceExternalFencePropertiesKHR( VkExternalFenceProperties* pExternalFenceProperties); #endif + #define VK_KHR_external_fence 1 #define VK_KHR_EXTERNAL_FENCE_SPEC_VERSION 1 #define VK_KHR_EXTERNAL_FENCE_EXTENSION_NAME "VK_KHR_external_fence" - typedef VkFenceImportFlags VkFenceImportFlagsKHR; typedef VkFenceImportFlagBits VkFenceImportFlagBitsKHR; - typedef VkExportFenceCreateInfo VkExportFenceCreateInfoKHR; @@ -5472,7 +6586,6 @@ typedef VkExportFenceCreateInfo VkExportFenceCreateInfoKHR; #define VK_KHR_external_fence_fd 1 #define VK_KHR_EXTERNAL_FENCE_FD_SPEC_VERSION 1 #define VK_KHR_EXTERNAL_FENCE_FD_EXTENSION_NAME "VK_KHR_external_fence_fd" - typedef struct VkImportFenceFdInfoKHR { VkStructureType sType; const void* pNext; @@ -5489,7 +6602,6 @@ typedef struct VkFenceGetFdInfoKHR { VkExternalFenceHandleTypeFlagBits handleType; } VkFenceGetFdInfoKHR; - typedef VkResult (VKAPI_PTR *PFN_vkImportFenceFdKHR)(VkDevice device, const VkImportFenceFdInfoKHR* pImportFenceFdInfo); typedef VkResult (VKAPI_PTR *PFN_vkGetFenceFdKHR)(VkDevice device, const VkFenceGetFdInfoKHR* pGetFdInfo, int* pFd); @@ -5504,15 +6616,152 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetFenceFdKHR( int* pFd); #endif + +#define VK_KHR_performance_query 1 +#define VK_KHR_PERFORMANCE_QUERY_SPEC_VERSION 1 +#define VK_KHR_PERFORMANCE_QUERY_EXTENSION_NAME "VK_KHR_performance_query" + +typedef enum VkPerformanceCounterUnitKHR { + VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR = 0, + VK_PERFORMANCE_COUNTER_UNIT_PERCENTAGE_KHR = 1, + VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR = 2, + VK_PERFORMANCE_COUNTER_UNIT_BYTES_KHR = 3, + VK_PERFORMANCE_COUNTER_UNIT_BYTES_PER_SECOND_KHR = 4, + VK_PERFORMANCE_COUNTER_UNIT_KELVIN_KHR = 5, + VK_PERFORMANCE_COUNTER_UNIT_WATTS_KHR = 6, + VK_PERFORMANCE_COUNTER_UNIT_VOLTS_KHR = 7, + VK_PERFORMANCE_COUNTER_UNIT_AMPS_KHR = 8, + VK_PERFORMANCE_COUNTER_UNIT_HERTZ_KHR = 9, + VK_PERFORMANCE_COUNTER_UNIT_CYCLES_KHR = 10, + VK_PERFORMANCE_COUNTER_UNIT_MAX_ENUM_KHR = 0x7FFFFFFF +} VkPerformanceCounterUnitKHR; + +typedef enum VkPerformanceCounterScopeKHR { + VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_BUFFER_KHR = 0, + VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR = 1, + VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_KHR = 2, + VK_QUERY_SCOPE_COMMAND_BUFFER_KHR = VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_BUFFER_KHR, + VK_QUERY_SCOPE_RENDER_PASS_KHR = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + VK_QUERY_SCOPE_COMMAND_KHR = VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_KHR, + VK_PERFORMANCE_COUNTER_SCOPE_MAX_ENUM_KHR = 0x7FFFFFFF +} VkPerformanceCounterScopeKHR; + +typedef enum VkPerformanceCounterStorageKHR { + VK_PERFORMANCE_COUNTER_STORAGE_INT32_KHR = 0, + VK_PERFORMANCE_COUNTER_STORAGE_INT64_KHR = 1, + VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR = 2, + VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR = 3, + VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR = 4, + VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR = 5, + VK_PERFORMANCE_COUNTER_STORAGE_MAX_ENUM_KHR = 0x7FFFFFFF +} VkPerformanceCounterStorageKHR; + +typedef enum VkPerformanceCounterDescriptionFlagBitsKHR { + VK_PERFORMANCE_COUNTER_DESCRIPTION_PERFORMANCE_IMPACTING_KHR = 0x00000001, + VK_PERFORMANCE_COUNTER_DESCRIPTION_CONCURRENTLY_IMPACTED_KHR = 0x00000002, + VK_PERFORMANCE_COUNTER_DESCRIPTION_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF +} VkPerformanceCounterDescriptionFlagBitsKHR; +typedef VkFlags VkPerformanceCounterDescriptionFlagsKHR; + +typedef enum VkAcquireProfilingLockFlagBitsKHR { + VK_ACQUIRE_PROFILING_LOCK_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF +} VkAcquireProfilingLockFlagBitsKHR; +typedef VkFlags VkAcquireProfilingLockFlagsKHR; +typedef struct VkPhysicalDevicePerformanceQueryFeaturesKHR { + VkStructureType sType; + void* pNext; + VkBool32 performanceCounterQueryPools; + VkBool32 performanceCounterMultipleQueryPools; +} VkPhysicalDevicePerformanceQueryFeaturesKHR; + +typedef struct VkPhysicalDevicePerformanceQueryPropertiesKHR { + VkStructureType sType; + void* pNext; + VkBool32 allowCommandBufferQueryCopies; +} VkPhysicalDevicePerformanceQueryPropertiesKHR; + +typedef struct VkPerformanceCounterKHR { + VkStructureType sType; + const void* pNext; + VkPerformanceCounterUnitKHR unit; + VkPerformanceCounterScopeKHR scope; + VkPerformanceCounterStorageKHR storage; + uint8_t uuid[VK_UUID_SIZE]; +} VkPerformanceCounterKHR; + +typedef struct VkPerformanceCounterDescriptionKHR { + VkStructureType sType; + const void* pNext; + VkPerformanceCounterDescriptionFlagsKHR flags; + char name[VK_MAX_DESCRIPTION_SIZE]; + char category[VK_MAX_DESCRIPTION_SIZE]; + char description[VK_MAX_DESCRIPTION_SIZE]; +} VkPerformanceCounterDescriptionKHR; + +typedef struct VkQueryPoolPerformanceCreateInfoKHR { + VkStructureType sType; + const void* pNext; + uint32_t queueFamilyIndex; + uint32_t counterIndexCount; + const uint32_t* pCounterIndices; +} VkQueryPoolPerformanceCreateInfoKHR; + +typedef union VkPerformanceCounterResultKHR { + int32_t int32; + int64_t int64; + uint32_t uint32; + uint64_t uint64; + float float32; + double float64; +} VkPerformanceCounterResultKHR; + +typedef struct VkAcquireProfilingLockInfoKHR { + VkStructureType sType; + const void* pNext; + VkAcquireProfilingLockFlagsKHR flags; + uint64_t timeout; +} VkAcquireProfilingLockInfoKHR; + +typedef struct VkPerformanceQuerySubmitInfoKHR { + VkStructureType sType; + const void* pNext; + uint32_t counterPassIndex; +} VkPerformanceQuerySubmitInfoKHR; + +typedef VkResult (VKAPI_PTR *PFN_vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, uint32_t* pCounterCount, VkPerformanceCounterKHR* pCounters, VkPerformanceCounterDescriptionKHR* pCounterDescriptions); +typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR)(VkPhysicalDevice physicalDevice, const VkQueryPoolPerformanceCreateInfoKHR* pPerformanceQueryCreateInfo, uint32_t* pNumPasses); +typedef VkResult (VKAPI_PTR *PFN_vkAcquireProfilingLockKHR)(VkDevice device, const VkAcquireProfilingLockInfoKHR* pInfo); +typedef void (VKAPI_PTR *PFN_vkReleaseProfilingLockKHR)(VkDevice device); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + uint32_t* pCounterCount, + VkPerformanceCounterKHR* pCounters, + VkPerformanceCounterDescriptionKHR* pCounterDescriptions); + +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR( + VkPhysicalDevice physicalDevice, + const VkQueryPoolPerformanceCreateInfoKHR* pPerformanceQueryCreateInfo, + uint32_t* pNumPasses); + +VKAPI_ATTR VkResult VKAPI_CALL vkAcquireProfilingLockKHR( + VkDevice device, + const VkAcquireProfilingLockInfoKHR* pInfo); + +VKAPI_ATTR void VKAPI_CALL vkReleaseProfilingLockKHR( + VkDevice device); +#endif + + #define VK_KHR_maintenance2 1 #define VK_KHR_MAINTENANCE2_SPEC_VERSION 1 #define VK_KHR_MAINTENANCE2_EXTENSION_NAME "VK_KHR_maintenance2" - typedef VkPointClippingBehavior VkPointClippingBehaviorKHR; typedef VkTessellationDomainOrigin VkTessellationDomainOriginKHR; - typedef VkPhysicalDevicePointClippingProperties VkPhysicalDevicePointClippingPropertiesKHR; typedef VkRenderPassInputAttachmentAspectCreateInfo VkRenderPassInputAttachmentAspectCreateInfoKHR; @@ -5528,7 +6777,6 @@ typedef VkPipelineTessellationDomainOriginStateCreateInfo VkPipelineTessellation #define VK_KHR_get_surface_capabilities2 1 #define VK_KHR_GET_SURFACE_CAPABILITIES_2_SPEC_VERSION 1 #define VK_KHR_GET_SURFACE_CAPABILITIES_2_EXTENSION_NAME "VK_KHR_get_surface_capabilities2" - typedef struct VkPhysicalDeviceSurfaceInfo2KHR { VkStructureType sType; const void* pNext; @@ -5547,7 +6795,6 @@ typedef struct VkSurfaceFormat2KHR { VkSurfaceFormatKHR surfaceFormat; } VkSurfaceFormat2KHR; - typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfaceCapabilities2KHR)(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceSurfaceInfo2KHR* pSurfaceInfo, VkSurfaceCapabilities2KHR* pSurfaceCapabilities); typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfaceFormats2KHR)(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceSurfaceInfo2KHR* pSurfaceInfo, uint32_t* pSurfaceFormatCount, VkSurfaceFormat2KHR* pSurfaceFormats); @@ -5564,18 +6811,82 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceFormats2KHR( VkSurfaceFormat2KHR* pSurfaceFormats); #endif + #define VK_KHR_variable_pointers 1 #define VK_KHR_VARIABLE_POINTERS_SPEC_VERSION 1 #define VK_KHR_VARIABLE_POINTERS_EXTENSION_NAME "VK_KHR_variable_pointers" +typedef VkPhysicalDeviceVariablePointersFeatures VkPhysicalDeviceVariablePointerFeaturesKHR; -typedef VkPhysicalDeviceVariablePointerFeatures VkPhysicalDeviceVariablePointerFeaturesKHR; +typedef VkPhysicalDeviceVariablePointersFeatures VkPhysicalDeviceVariablePointersFeaturesKHR; +#define VK_KHR_get_display_properties2 1 +#define VK_KHR_GET_DISPLAY_PROPERTIES_2_SPEC_VERSION 1 +#define VK_KHR_GET_DISPLAY_PROPERTIES_2_EXTENSION_NAME "VK_KHR_get_display_properties2" +typedef struct VkDisplayProperties2KHR { + VkStructureType sType; + void* pNext; + VkDisplayPropertiesKHR displayProperties; +} VkDisplayProperties2KHR; + +typedef struct VkDisplayPlaneProperties2KHR { + VkStructureType sType; + void* pNext; + VkDisplayPlanePropertiesKHR displayPlaneProperties; +} VkDisplayPlaneProperties2KHR; + +typedef struct VkDisplayModeProperties2KHR { + VkStructureType sType; + void* pNext; + VkDisplayModePropertiesKHR displayModeProperties; +} VkDisplayModeProperties2KHR; + +typedef struct VkDisplayPlaneInfo2KHR { + VkStructureType sType; + const void* pNext; + VkDisplayModeKHR mode; + uint32_t planeIndex; +} VkDisplayPlaneInfo2KHR; + +typedef struct VkDisplayPlaneCapabilities2KHR { + VkStructureType sType; + void* pNext; + VkDisplayPlaneCapabilitiesKHR capabilities; +} VkDisplayPlaneCapabilities2KHR; + +typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceDisplayProperties2KHR)(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkDisplayProperties2KHR* pProperties); +typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceDisplayPlaneProperties2KHR)(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkDisplayPlaneProperties2KHR* pProperties); +typedef VkResult (VKAPI_PTR *PFN_vkGetDisplayModeProperties2KHR)(VkPhysicalDevice physicalDevice, VkDisplayKHR display, uint32_t* pPropertyCount, VkDisplayModeProperties2KHR* pProperties); +typedef VkResult (VKAPI_PTR *PFN_vkGetDisplayPlaneCapabilities2KHR)(VkPhysicalDevice physicalDevice, const VkDisplayPlaneInfo2KHR* pDisplayPlaneInfo, VkDisplayPlaneCapabilities2KHR* pCapabilities); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceDisplayProperties2KHR( + VkPhysicalDevice physicalDevice, + uint32_t* pPropertyCount, + VkDisplayProperties2KHR* pProperties); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceDisplayPlaneProperties2KHR( + VkPhysicalDevice physicalDevice, + uint32_t* pPropertyCount, + VkDisplayPlaneProperties2KHR* pProperties); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetDisplayModeProperties2KHR( + VkPhysicalDevice physicalDevice, + VkDisplayKHR display, + uint32_t* pPropertyCount, + VkDisplayModeProperties2KHR* pProperties); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetDisplayPlaneCapabilities2KHR( + VkPhysicalDevice physicalDevice, + const VkDisplayPlaneInfo2KHR* pDisplayPlaneInfo, + VkDisplayPlaneCapabilities2KHR* pCapabilities); +#endif + + #define VK_KHR_dedicated_allocation 1 #define VK_KHR_DEDICATED_ALLOCATION_SPEC_VERSION 3 #define VK_KHR_DEDICATED_ALLOCATION_EXTENSION_NAME "VK_KHR_dedicated_allocation" - typedef VkMemoryDedicatedRequirements VkMemoryDedicatedRequirementsKHR; typedef VkMemoryDedicatedAllocateInfo VkMemoryDedicatedAllocateInfoKHR; @@ -5595,7 +6906,6 @@ typedef VkMemoryDedicatedAllocateInfo VkMemoryDedicatedAllocateInfoKHR; #define VK_KHR_get_memory_requirements2 1 #define VK_KHR_GET_MEMORY_REQUIREMENTS_2_SPEC_VERSION 1 #define VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME "VK_KHR_get_memory_requirements2" - typedef VkBufferMemoryRequirementsInfo2 VkBufferMemoryRequirementsInfo2KHR; typedef VkImageMemoryRequirementsInfo2 VkImageMemoryRequirementsInfo2KHR; @@ -5606,7 +6916,6 @@ typedef VkMemoryRequirements2 VkMemoryRequirements2KHR; typedef VkSparseImageMemoryRequirements2 VkSparseImageMemoryRequirements2KHR; - typedef void (VKAPI_PTR *PFN_vkGetImageMemoryRequirements2KHR)(VkDevice device, const VkImageMemoryRequirementsInfo2* pInfo, VkMemoryRequirements2* pMemoryRequirements); typedef void (VKAPI_PTR *PFN_vkGetBufferMemoryRequirements2KHR)(VkDevice device, const VkBufferMemoryRequirementsInfo2* pInfo, VkMemoryRequirements2* pMemoryRequirements); typedef void (VKAPI_PTR *PFN_vkGetImageSparseMemoryRequirements2KHR)(VkDevice device, const VkImageSparseMemoryRequirementsInfo2* pInfo, uint32_t* pSparseMemoryRequirementCount, VkSparseImageMemoryRequirements2* pSparseMemoryRequirements); @@ -5629,33 +6938,25 @@ VKAPI_ATTR void VKAPI_CALL vkGetImageSparseMemoryRequirements2KHR( VkSparseImageMemoryRequirements2* pSparseMemoryRequirements); #endif + #define VK_KHR_image_format_list 1 #define VK_KHR_IMAGE_FORMAT_LIST_SPEC_VERSION 1 #define VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME "VK_KHR_image_format_list" - -typedef struct VkImageFormatListCreateInfoKHR { - VkStructureType sType; - const void* pNext; - uint32_t viewFormatCount; - const VkFormat* pViewFormats; -} VkImageFormatListCreateInfoKHR; +typedef VkImageFormatListCreateInfo VkImageFormatListCreateInfoKHR; #define VK_KHR_sampler_ycbcr_conversion 1 typedef VkSamplerYcbcrConversion VkSamplerYcbcrConversionKHR; - -#define VK_KHR_SAMPLER_YCBCR_CONVERSION_SPEC_VERSION 1 +#define VK_KHR_SAMPLER_YCBCR_CONVERSION_SPEC_VERSION 14 #define VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME "VK_KHR_sampler_ycbcr_conversion" - typedef VkSamplerYcbcrModelConversion VkSamplerYcbcrModelConversionKHR; typedef VkSamplerYcbcrRange VkSamplerYcbcrRangeKHR; typedef VkChromaLocation VkChromaLocationKHR; - typedef VkSamplerYcbcrConversionCreateInfo VkSamplerYcbcrConversionCreateInfoKHR; typedef VkSamplerYcbcrConversionInfo VkSamplerYcbcrConversionInfoKHR; @@ -5668,7 +6969,6 @@ typedef VkPhysicalDeviceSamplerYcbcrConversionFeatures VkPhysicalDeviceSamplerYc typedef VkSamplerYcbcrConversionImageFormatProperties VkSamplerYcbcrConversionImageFormatPropertiesKHR; - typedef VkResult (VKAPI_PTR *PFN_vkCreateSamplerYcbcrConversionKHR)(VkDevice device, const VkSamplerYcbcrConversionCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSamplerYcbcrConversion* pYcbcrConversion); typedef void (VKAPI_PTR *PFN_vkDestroySamplerYcbcrConversionKHR)(VkDevice device, VkSamplerYcbcrConversion ycbcrConversion, const VkAllocationCallbacks* pAllocator); @@ -5685,15 +6985,14 @@ VKAPI_ATTR void VKAPI_CALL vkDestroySamplerYcbcrConversionKHR( const VkAllocationCallbacks* pAllocator); #endif + #define VK_KHR_bind_memory2 1 #define VK_KHR_BIND_MEMORY_2_SPEC_VERSION 1 #define VK_KHR_BIND_MEMORY_2_EXTENSION_NAME "VK_KHR_bind_memory2" - typedef VkBindBufferMemoryInfo VkBindBufferMemoryInfoKHR; typedef VkBindImageMemoryInfo VkBindImageMemoryInfoKHR; - typedef VkResult (VKAPI_PTR *PFN_vkBindBufferMemory2KHR)(VkDevice device, uint32_t bindInfoCount, const VkBindBufferMemoryInfo* pBindInfos); typedef VkResult (VKAPI_PTR *PFN_vkBindImageMemory2KHR)(VkDevice device, uint32_t bindInfoCount, const VkBindImageMemoryInfo* pBindInfos); @@ -5709,15 +7008,14 @@ VKAPI_ATTR VkResult VKAPI_CALL vkBindImageMemory2KHR( const VkBindImageMemoryInfo* pBindInfos); #endif + #define VK_KHR_maintenance3 1 #define VK_KHR_MAINTENANCE3_SPEC_VERSION 1 #define VK_KHR_MAINTENANCE3_EXTENSION_NAME "VK_KHR_maintenance3" - typedef VkPhysicalDeviceMaintenance3Properties VkPhysicalDeviceMaintenance3PropertiesKHR; typedef VkDescriptorSetLayoutSupport VkDescriptorSetLayoutSupportKHR; - typedef void (VKAPI_PTR *PFN_vkGetDescriptorSetLayoutSupportKHR)(VkDevice device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayoutSupport* pSupport); #ifndef VK_NO_PROTOTYPES @@ -5727,14 +7025,321 @@ VKAPI_ATTR void VKAPI_CALL vkGetDescriptorSetLayoutSupportKHR( VkDescriptorSetLayoutSupport* pSupport); #endif + +#define VK_KHR_draw_indirect_count 1 +#define VK_KHR_DRAW_INDIRECT_COUNT_SPEC_VERSION 1 +#define VK_KHR_DRAW_INDIRECT_COUNT_EXTENSION_NAME "VK_KHR_draw_indirect_count" +typedef void (VKAPI_PTR *PFN_vkCmdDrawIndirectCountKHR)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride); +typedef void (VKAPI_PTR *PFN_vkCmdDrawIndexedIndirectCountKHR)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndirectCountKHR( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride); + +VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndexedIndirectCountKHR( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride); +#endif + + +#define VK_KHR_shader_subgroup_extended_types 1 +#define VK_KHR_SHADER_SUBGROUP_EXTENDED_TYPES_SPEC_VERSION 1 +#define VK_KHR_SHADER_SUBGROUP_EXTENDED_TYPES_EXTENSION_NAME "VK_KHR_shader_subgroup_extended_types" +typedef VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures VkPhysicalDeviceShaderSubgroupExtendedTypesFeaturesKHR; + + + +#define VK_KHR_8bit_storage 1 +#define VK_KHR_8BIT_STORAGE_SPEC_VERSION 1 +#define VK_KHR_8BIT_STORAGE_EXTENSION_NAME "VK_KHR_8bit_storage" +typedef VkPhysicalDevice8BitStorageFeatures VkPhysicalDevice8BitStorageFeaturesKHR; + + + +#define VK_KHR_shader_atomic_int64 1 +#define VK_KHR_SHADER_ATOMIC_INT64_SPEC_VERSION 1 +#define VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME "VK_KHR_shader_atomic_int64" +typedef VkPhysicalDeviceShaderAtomicInt64Features VkPhysicalDeviceShaderAtomicInt64FeaturesKHR; + + + +#define VK_KHR_shader_clock 1 +#define VK_KHR_SHADER_CLOCK_SPEC_VERSION 1 +#define VK_KHR_SHADER_CLOCK_EXTENSION_NAME "VK_KHR_shader_clock" +typedef struct VkPhysicalDeviceShaderClockFeaturesKHR { + VkStructureType sType; + void* pNext; + VkBool32 shaderSubgroupClock; + VkBool32 shaderDeviceClock; +} VkPhysicalDeviceShaderClockFeaturesKHR; + + + +#define VK_KHR_driver_properties 1 +#define VK_KHR_DRIVER_PROPERTIES_SPEC_VERSION 1 +#define VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME "VK_KHR_driver_properties" +#define VK_MAX_DRIVER_NAME_SIZE_KHR VK_MAX_DRIVER_NAME_SIZE +#define VK_MAX_DRIVER_INFO_SIZE_KHR VK_MAX_DRIVER_INFO_SIZE +typedef VkDriverId VkDriverIdKHR; + +typedef VkConformanceVersion VkConformanceVersionKHR; + +typedef VkPhysicalDeviceDriverProperties VkPhysicalDeviceDriverPropertiesKHR; + + + +#define VK_KHR_shader_float_controls 1 +#define VK_KHR_SHADER_FLOAT_CONTROLS_SPEC_VERSION 4 +#define VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME "VK_KHR_shader_float_controls" +typedef VkShaderFloatControlsIndependence VkShaderFloatControlsIndependenceKHR; + +typedef VkPhysicalDeviceFloatControlsProperties VkPhysicalDeviceFloatControlsPropertiesKHR; + + + +#define VK_KHR_depth_stencil_resolve 1 +#define VK_KHR_DEPTH_STENCIL_RESOLVE_SPEC_VERSION 1 +#define VK_KHR_DEPTH_STENCIL_RESOLVE_EXTENSION_NAME "VK_KHR_depth_stencil_resolve" +typedef VkResolveModeFlagBits VkResolveModeFlagBitsKHR; + +typedef VkResolveModeFlags VkResolveModeFlagsKHR; + +typedef VkSubpassDescriptionDepthStencilResolve VkSubpassDescriptionDepthStencilResolveKHR; + +typedef VkPhysicalDeviceDepthStencilResolveProperties VkPhysicalDeviceDepthStencilResolvePropertiesKHR; + + + +#define VK_KHR_swapchain_mutable_format 1 +#define VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_SPEC_VERSION 1 +#define VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME "VK_KHR_swapchain_mutable_format" + + +#define VK_KHR_timeline_semaphore 1 +#define VK_KHR_TIMELINE_SEMAPHORE_SPEC_VERSION 2 +#define VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME "VK_KHR_timeline_semaphore" +typedef VkSemaphoreType VkSemaphoreTypeKHR; + +typedef VkSemaphoreWaitFlagBits VkSemaphoreWaitFlagBitsKHR; + +typedef VkSemaphoreWaitFlags VkSemaphoreWaitFlagsKHR; + +typedef VkPhysicalDeviceTimelineSemaphoreFeatures VkPhysicalDeviceTimelineSemaphoreFeaturesKHR; + +typedef VkPhysicalDeviceTimelineSemaphoreProperties VkPhysicalDeviceTimelineSemaphorePropertiesKHR; + +typedef VkSemaphoreTypeCreateInfo VkSemaphoreTypeCreateInfoKHR; + +typedef VkTimelineSemaphoreSubmitInfo VkTimelineSemaphoreSubmitInfoKHR; + +typedef VkSemaphoreWaitInfo VkSemaphoreWaitInfoKHR; + +typedef VkSemaphoreSignalInfo VkSemaphoreSignalInfoKHR; + +typedef VkResult (VKAPI_PTR *PFN_vkGetSemaphoreCounterValueKHR)(VkDevice device, VkSemaphore semaphore, uint64_t* pValue); +typedef VkResult (VKAPI_PTR *PFN_vkWaitSemaphoresKHR)(VkDevice device, const VkSemaphoreWaitInfo* pWaitInfo, uint64_t timeout); +typedef VkResult (VKAPI_PTR *PFN_vkSignalSemaphoreKHR)(VkDevice device, const VkSemaphoreSignalInfo* pSignalInfo); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkGetSemaphoreCounterValueKHR( + VkDevice device, + VkSemaphore semaphore, + uint64_t* pValue); + +VKAPI_ATTR VkResult VKAPI_CALL vkWaitSemaphoresKHR( + VkDevice device, + const VkSemaphoreWaitInfo* pWaitInfo, + uint64_t timeout); + +VKAPI_ATTR VkResult VKAPI_CALL vkSignalSemaphoreKHR( + VkDevice device, + const VkSemaphoreSignalInfo* pSignalInfo); +#endif + + +#define VK_KHR_vulkan_memory_model 1 +#define VK_KHR_VULKAN_MEMORY_MODEL_SPEC_VERSION 3 +#define VK_KHR_VULKAN_MEMORY_MODEL_EXTENSION_NAME "VK_KHR_vulkan_memory_model" +typedef VkPhysicalDeviceVulkanMemoryModelFeatures VkPhysicalDeviceVulkanMemoryModelFeaturesKHR; + + + +#define VK_KHR_spirv_1_4 1 +#define VK_KHR_SPIRV_1_4_SPEC_VERSION 1 +#define VK_KHR_SPIRV_1_4_EXTENSION_NAME "VK_KHR_spirv_1_4" + + +#define VK_KHR_surface_protected_capabilities 1 +#define VK_KHR_SURFACE_PROTECTED_CAPABILITIES_SPEC_VERSION 1 +#define VK_KHR_SURFACE_PROTECTED_CAPABILITIES_EXTENSION_NAME "VK_KHR_surface_protected_capabilities" +typedef struct VkSurfaceProtectedCapabilitiesKHR { + VkStructureType sType; + const void* pNext; + VkBool32 supportsProtected; +} VkSurfaceProtectedCapabilitiesKHR; + + + +#define VK_KHR_separate_depth_stencil_layouts 1 +#define VK_KHR_SEPARATE_DEPTH_STENCIL_LAYOUTS_SPEC_VERSION 1 +#define VK_KHR_SEPARATE_DEPTH_STENCIL_LAYOUTS_EXTENSION_NAME "VK_KHR_separate_depth_stencil_layouts" +typedef VkPhysicalDeviceSeparateDepthStencilLayoutsFeatures VkPhysicalDeviceSeparateDepthStencilLayoutsFeaturesKHR; + +typedef VkAttachmentReferenceStencilLayout VkAttachmentReferenceStencilLayoutKHR; + +typedef VkAttachmentDescriptionStencilLayout VkAttachmentDescriptionStencilLayoutKHR; + + + +#define VK_KHR_uniform_buffer_standard_layout 1 +#define VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_SPEC_VERSION 1 +#define VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME "VK_KHR_uniform_buffer_standard_layout" +typedef VkPhysicalDeviceUniformBufferStandardLayoutFeatures VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR; + + + +#define VK_KHR_buffer_device_address 1 +#define VK_KHR_BUFFER_DEVICE_ADDRESS_SPEC_VERSION 1 +#define VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME "VK_KHR_buffer_device_address" +typedef VkPhysicalDeviceBufferDeviceAddressFeatures VkPhysicalDeviceBufferDeviceAddressFeaturesKHR; + +typedef VkBufferDeviceAddressInfo VkBufferDeviceAddressInfoKHR; + +typedef VkBufferOpaqueCaptureAddressCreateInfo VkBufferOpaqueCaptureAddressCreateInfoKHR; + +typedef VkMemoryOpaqueCaptureAddressAllocateInfo VkMemoryOpaqueCaptureAddressAllocateInfoKHR; + +typedef VkDeviceMemoryOpaqueCaptureAddressInfo VkDeviceMemoryOpaqueCaptureAddressInfoKHR; + +typedef VkDeviceAddress (VKAPI_PTR *PFN_vkGetBufferDeviceAddressKHR)(VkDevice device, const VkBufferDeviceAddressInfo* pInfo); +typedef uint64_t (VKAPI_PTR *PFN_vkGetBufferOpaqueCaptureAddressKHR)(VkDevice device, const VkBufferDeviceAddressInfo* pInfo); +typedef uint64_t (VKAPI_PTR *PFN_vkGetDeviceMemoryOpaqueCaptureAddressKHR)(VkDevice device, const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkDeviceAddress VKAPI_CALL vkGetBufferDeviceAddressKHR( + VkDevice device, + const VkBufferDeviceAddressInfo* pInfo); + +VKAPI_ATTR uint64_t VKAPI_CALL vkGetBufferOpaqueCaptureAddressKHR( + VkDevice device, + const VkBufferDeviceAddressInfo* pInfo); + +VKAPI_ATTR uint64_t VKAPI_CALL vkGetDeviceMemoryOpaqueCaptureAddressKHR( + VkDevice device, + const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo); +#endif + + +#define VK_KHR_pipeline_executable_properties 1 +#define VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_SPEC_VERSION 1 +#define VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME "VK_KHR_pipeline_executable_properties" + +typedef enum VkPipelineExecutableStatisticFormatKHR { + VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_BOOL32_KHR = 0, + VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_INT64_KHR = 1, + VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR = 2, + VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_FLOAT64_KHR = 3, + VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_MAX_ENUM_KHR = 0x7FFFFFFF +} VkPipelineExecutableStatisticFormatKHR; +typedef struct VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR { + VkStructureType sType; + void* pNext; + VkBool32 pipelineExecutableInfo; +} VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR; + +typedef struct VkPipelineInfoKHR { + VkStructureType sType; + const void* pNext; + VkPipeline pipeline; +} VkPipelineInfoKHR; + +typedef struct VkPipelineExecutablePropertiesKHR { + VkStructureType sType; + void* pNext; + VkShaderStageFlags stages; + char name[VK_MAX_DESCRIPTION_SIZE]; + char description[VK_MAX_DESCRIPTION_SIZE]; + uint32_t subgroupSize; +} VkPipelineExecutablePropertiesKHR; + +typedef struct VkPipelineExecutableInfoKHR { + VkStructureType sType; + const void* pNext; + VkPipeline pipeline; + uint32_t executableIndex; +} VkPipelineExecutableInfoKHR; + +typedef union VkPipelineExecutableStatisticValueKHR { + VkBool32 b32; + int64_t i64; + uint64_t u64; + double f64; +} VkPipelineExecutableStatisticValueKHR; + +typedef struct VkPipelineExecutableStatisticKHR { + VkStructureType sType; + void* pNext; + char name[VK_MAX_DESCRIPTION_SIZE]; + char description[VK_MAX_DESCRIPTION_SIZE]; + VkPipelineExecutableStatisticFormatKHR format; + VkPipelineExecutableStatisticValueKHR value; +} VkPipelineExecutableStatisticKHR; + +typedef struct VkPipelineExecutableInternalRepresentationKHR { + VkStructureType sType; + void* pNext; + char name[VK_MAX_DESCRIPTION_SIZE]; + char description[VK_MAX_DESCRIPTION_SIZE]; + VkBool32 isText; + size_t dataSize; + void* pData; +} VkPipelineExecutableInternalRepresentationKHR; + +typedef VkResult (VKAPI_PTR *PFN_vkGetPipelineExecutablePropertiesKHR)(VkDevice device, const VkPipelineInfoKHR* pPipelineInfo, uint32_t* pExecutableCount, VkPipelineExecutablePropertiesKHR* pProperties); +typedef VkResult (VKAPI_PTR *PFN_vkGetPipelineExecutableStatisticsKHR)(VkDevice device, const VkPipelineExecutableInfoKHR* pExecutableInfo, uint32_t* pStatisticCount, VkPipelineExecutableStatisticKHR* pStatistics); +typedef VkResult (VKAPI_PTR *PFN_vkGetPipelineExecutableInternalRepresentationsKHR)(VkDevice device, const VkPipelineExecutableInfoKHR* pExecutableInfo, uint32_t* pInternalRepresentationCount, VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkGetPipelineExecutablePropertiesKHR( + VkDevice device, + const VkPipelineInfoKHR* pPipelineInfo, + uint32_t* pExecutableCount, + VkPipelineExecutablePropertiesKHR* pProperties); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPipelineExecutableStatisticsKHR( + VkDevice device, + const VkPipelineExecutableInfoKHR* pExecutableInfo, + uint32_t* pStatisticCount, + VkPipelineExecutableStatisticKHR* pStatistics); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPipelineExecutableInternalRepresentationsKHR( + VkDevice device, + const VkPipelineExecutableInfoKHR* pExecutableInfo, + uint32_t* pInternalRepresentationCount, + VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations); +#endif + + +#define VK_KHR_shader_non_semantic_info 1 +#define VK_KHR_SHADER_NON_SEMANTIC_INFO_SPEC_VERSION 1 +#define VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME "VK_KHR_shader_non_semantic_info" + + #define VK_EXT_debug_report 1 VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDebugReportCallbackEXT) - #define VK_EXT_DEBUG_REPORT_SPEC_VERSION 9 #define VK_EXT_DEBUG_REPORT_EXTENSION_NAME "VK_EXT_debug_report" -#define VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT -#define VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_EXT VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT_EXT - typedef enum VkDebugReportObjectTypeEXT { VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT = 0, @@ -5768,20 +7373,18 @@ typedef enum VkDebugReportObjectTypeEXT { VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT_EXT = 28, VK_DEBUG_REPORT_OBJECT_TYPE_DISPLAY_KHR_EXT = 29, VK_DEBUG_REPORT_OBJECT_TYPE_DISPLAY_MODE_KHR_EXT = 30, - VK_DEBUG_REPORT_OBJECT_TYPE_OBJECT_TABLE_NVX_EXT = 31, - VK_DEBUG_REPORT_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NVX_EXT = 32, VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT_EXT = 33, VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION_EXT = 1000156000, VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_EXT = 1000085000, + VK_DEBUG_REPORT_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR_EXT = 1000165000, + VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT_EXT, + VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT_EXT, VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_KHR_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_EXT, VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION_KHR_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION_EXT, - VK_DEBUG_REPORT_OBJECT_TYPE_BEGIN_RANGE_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT, - VK_DEBUG_REPORT_OBJECT_TYPE_END_RANGE_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT_EXT, - VK_DEBUG_REPORT_OBJECT_TYPE_RANGE_SIZE_EXT = (VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT_EXT - VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT + 1), + VK_DEBUG_REPORT_OBJECT_TYPE_ACCELERATION_STRUCTURE_NV_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR_EXT, VK_DEBUG_REPORT_OBJECT_TYPE_MAX_ENUM_EXT = 0x7FFFFFFF } VkDebugReportObjectTypeEXT; - typedef enum VkDebugReportFlagBitsEXT { VK_DEBUG_REPORT_INFORMATION_BIT_EXT = 0x00000001, VK_DEBUG_REPORT_WARNING_BIT_EXT = 0x00000002, @@ -5791,7 +7394,6 @@ typedef enum VkDebugReportFlagBitsEXT { VK_DEBUG_REPORT_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF } VkDebugReportFlagBitsEXT; typedef VkFlags VkDebugReportFlagsEXT; - typedef VkBool32 (VKAPI_PTR *PFN_vkDebugReportCallbackEXT)( VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT objectType, @@ -5810,7 +7412,6 @@ typedef struct VkDebugReportCallbackCreateInfoEXT { void* pUserData; } VkDebugReportCallbackCreateInfoEXT; - typedef VkResult (VKAPI_PTR *PFN_vkCreateDebugReportCallbackEXT)(VkInstance instance, const VkDebugReportCallbackCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDebugReportCallbackEXT* pCallback); typedef void (VKAPI_PTR *PFN_vkDestroyDebugReportCallbackEXT)(VkInstance instance, VkDebugReportCallbackEXT callback, const VkAllocationCallbacks* pAllocator); typedef void (VKAPI_PTR *PFN_vkDebugReportMessageEXT)(VkInstance instance, VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT objectType, uint64_t object, size_t location, int32_t messageCode, const char* pLayerPrefix, const char* pMessage); @@ -5838,6 +7439,7 @@ VKAPI_ATTR void VKAPI_CALL vkDebugReportMessageEXT( const char* pMessage); #endif + #define VK_NV_glsl_shader 1 #define VK_NV_GLSL_SHADER_SPEC_VERSION 1 #define VK_NV_GLSL_SHADER_EXTENSION_NAME "VK_NV_glsl_shader" @@ -5857,16 +7459,11 @@ VKAPI_ATTR void VKAPI_CALL vkDebugReportMessageEXT( #define VK_AMD_RASTERIZATION_ORDER_SPEC_VERSION 1 #define VK_AMD_RASTERIZATION_ORDER_EXTENSION_NAME "VK_AMD_rasterization_order" - typedef enum VkRasterizationOrderAMD { VK_RASTERIZATION_ORDER_STRICT_AMD = 0, VK_RASTERIZATION_ORDER_RELAXED_AMD = 1, - VK_RASTERIZATION_ORDER_BEGIN_RANGE_AMD = VK_RASTERIZATION_ORDER_STRICT_AMD, - VK_RASTERIZATION_ORDER_END_RANGE_AMD = VK_RASTERIZATION_ORDER_RELAXED_AMD, - VK_RASTERIZATION_ORDER_RANGE_SIZE_AMD = (VK_RASTERIZATION_ORDER_RELAXED_AMD - VK_RASTERIZATION_ORDER_STRICT_AMD + 1), VK_RASTERIZATION_ORDER_MAX_ENUM_AMD = 0x7FFFFFFF } VkRasterizationOrderAMD; - typedef struct VkPipelineRasterizationStateRasterizationOrderAMD { VkStructureType sType; const void* pNext; @@ -5888,7 +7485,6 @@ typedef struct VkPipelineRasterizationStateRasterizationOrderAMD { #define VK_EXT_debug_marker 1 #define VK_EXT_DEBUG_MARKER_SPEC_VERSION 4 #define VK_EXT_DEBUG_MARKER_EXTENSION_NAME "VK_EXT_debug_marker" - typedef struct VkDebugMarkerObjectNameInfoEXT { VkStructureType sType; const void* pNext; @@ -5914,7 +7510,6 @@ typedef struct VkDebugMarkerMarkerInfoEXT { float color[4]; } VkDebugMarkerMarkerInfoEXT; - typedef VkResult (VKAPI_PTR *PFN_vkDebugMarkerSetObjectTagEXT)(VkDevice device, const VkDebugMarkerObjectTagInfoEXT* pTagInfo); typedef VkResult (VKAPI_PTR *PFN_vkDebugMarkerSetObjectNameEXT)(VkDevice device, const VkDebugMarkerObjectNameInfoEXT* pNameInfo); typedef void (VKAPI_PTR *PFN_vkCmdDebugMarkerBeginEXT)(VkCommandBuffer commandBuffer, const VkDebugMarkerMarkerInfoEXT* pMarkerInfo); @@ -5942,6 +7537,7 @@ VKAPI_ATTR void VKAPI_CALL vkCmdDebugMarkerInsertEXT( const VkDebugMarkerMarkerInfoEXT* pMarkerInfo); #endif + #define VK_AMD_gcn_shader 1 #define VK_AMD_GCN_SHADER_SPEC_VERSION 1 #define VK_AMD_GCN_SHADER_EXTENSION_NAME "VK_AMD_gcn_shader" @@ -5950,7 +7546,6 @@ VKAPI_ATTR void VKAPI_CALL vkCmdDebugMarkerInsertEXT( #define VK_NV_dedicated_allocation 1 #define VK_NV_DEDICATED_ALLOCATION_SPEC_VERSION 1 #define VK_NV_DEDICATED_ALLOCATION_EXTENSION_NAME "VK_NV_dedicated_allocation" - typedef struct VkDedicatedAllocationImageCreateInfoNV { VkStructureType sType; const void* pNext; @@ -5972,10 +7567,129 @@ typedef struct VkDedicatedAllocationMemoryAllocateInfoNV { -#define VK_AMD_draw_indirect_count 1 -#define VK_AMD_DRAW_INDIRECT_COUNT_SPEC_VERSION 1 -#define VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME "VK_AMD_draw_indirect_count" +#define VK_EXT_transform_feedback 1 +#define VK_EXT_TRANSFORM_FEEDBACK_SPEC_VERSION 1 +#define VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME "VK_EXT_transform_feedback" +typedef VkFlags VkPipelineRasterizationStateStreamCreateFlagsEXT; +typedef struct VkPhysicalDeviceTransformFeedbackFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 transformFeedback; + VkBool32 geometryStreams; +} VkPhysicalDeviceTransformFeedbackFeaturesEXT; +typedef struct VkPhysicalDeviceTransformFeedbackPropertiesEXT { + VkStructureType sType; + void* pNext; + uint32_t maxTransformFeedbackStreams; + uint32_t maxTransformFeedbackBuffers; + VkDeviceSize maxTransformFeedbackBufferSize; + uint32_t maxTransformFeedbackStreamDataSize; + uint32_t maxTransformFeedbackBufferDataSize; + uint32_t maxTransformFeedbackBufferDataStride; + VkBool32 transformFeedbackQueries; + VkBool32 transformFeedbackStreamsLinesTriangles; + VkBool32 transformFeedbackRasterizationStreamSelect; + VkBool32 transformFeedbackDraw; +} VkPhysicalDeviceTransformFeedbackPropertiesEXT; + +typedef struct VkPipelineRasterizationStateStreamCreateInfoEXT { + VkStructureType sType; + const void* pNext; + VkPipelineRasterizationStateStreamCreateFlagsEXT flags; + uint32_t rasterizationStream; +} VkPipelineRasterizationStateStreamCreateInfoEXT; + +typedef void (VKAPI_PTR *PFN_vkCmdBindTransformFeedbackBuffersEXT)(VkCommandBuffer commandBuffer, uint32_t firstBinding, uint32_t bindingCount, const VkBuffer* pBuffers, const VkDeviceSize* pOffsets, const VkDeviceSize* pSizes); +typedef void (VKAPI_PTR *PFN_vkCmdBeginTransformFeedbackEXT)(VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer, uint32_t counterBufferCount, const VkBuffer* pCounterBuffers, const VkDeviceSize* pCounterBufferOffsets); +typedef void (VKAPI_PTR *PFN_vkCmdEndTransformFeedbackEXT)(VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer, uint32_t counterBufferCount, const VkBuffer* pCounterBuffers, const VkDeviceSize* pCounterBufferOffsets); +typedef void (VKAPI_PTR *PFN_vkCmdBeginQueryIndexedEXT)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query, VkQueryControlFlags flags, uint32_t index); +typedef void (VKAPI_PTR *PFN_vkCmdEndQueryIndexedEXT)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query, uint32_t index); +typedef void (VKAPI_PTR *PFN_vkCmdDrawIndirectByteCountEXT)(VkCommandBuffer commandBuffer, uint32_t instanceCount, uint32_t firstInstance, VkBuffer counterBuffer, VkDeviceSize counterBufferOffset, uint32_t counterOffset, uint32_t vertexStride); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkCmdBindTransformFeedbackBuffersEXT( + VkCommandBuffer commandBuffer, + uint32_t firstBinding, + uint32_t bindingCount, + const VkBuffer* pBuffers, + const VkDeviceSize* pOffsets, + const VkDeviceSize* pSizes); + +VKAPI_ATTR void VKAPI_CALL vkCmdBeginTransformFeedbackEXT( + VkCommandBuffer commandBuffer, + uint32_t firstCounterBuffer, + uint32_t counterBufferCount, + const VkBuffer* pCounterBuffers, + const VkDeviceSize* pCounterBufferOffsets); + +VKAPI_ATTR void VKAPI_CALL vkCmdEndTransformFeedbackEXT( + VkCommandBuffer commandBuffer, + uint32_t firstCounterBuffer, + uint32_t counterBufferCount, + const VkBuffer* pCounterBuffers, + const VkDeviceSize* pCounterBufferOffsets); + +VKAPI_ATTR void VKAPI_CALL vkCmdBeginQueryIndexedEXT( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query, + VkQueryControlFlags flags, + uint32_t index); + +VKAPI_ATTR void VKAPI_CALL vkCmdEndQueryIndexedEXT( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query, + uint32_t index); + +VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndirectByteCountEXT( + VkCommandBuffer commandBuffer, + uint32_t instanceCount, + uint32_t firstInstance, + VkBuffer counterBuffer, + VkDeviceSize counterBufferOffset, + uint32_t counterOffset, + uint32_t vertexStride); +#endif + + +#define VK_NVX_image_view_handle 1 +#define VK_NVX_IMAGE_VIEW_HANDLE_SPEC_VERSION 2 +#define VK_NVX_IMAGE_VIEW_HANDLE_EXTENSION_NAME "VK_NVX_image_view_handle" +typedef struct VkImageViewHandleInfoNVX { + VkStructureType sType; + const void* pNext; + VkImageView imageView; + VkDescriptorType descriptorType; + VkSampler sampler; +} VkImageViewHandleInfoNVX; + +typedef struct VkImageViewAddressPropertiesNVX { + VkStructureType sType; + void* pNext; + VkDeviceAddress deviceAddress; + VkDeviceSize size; +} VkImageViewAddressPropertiesNVX; + +typedef uint32_t (VKAPI_PTR *PFN_vkGetImageViewHandleNVX)(VkDevice device, const VkImageViewHandleInfoNVX* pInfo); +typedef VkResult (VKAPI_PTR *PFN_vkGetImageViewAddressNVX)(VkDevice device, VkImageView imageView, VkImageViewAddressPropertiesNVX* pProperties); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR uint32_t VKAPI_CALL vkGetImageViewHandleNVX( + VkDevice device, + const VkImageViewHandleInfoNVX* pInfo); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetImageViewAddressNVX( + VkDevice device, + VkImageView imageView, + VkImageViewAddressPropertiesNVX* pProperties); +#endif + + +#define VK_AMD_draw_indirect_count 1 +#define VK_AMD_DRAW_INDIRECT_COUNT_SPEC_VERSION 2 +#define VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME "VK_AMD_draw_indirect_count" typedef void (VKAPI_PTR *PFN_vkCmdDrawIndirectCountAMD)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride); typedef void (VKAPI_PTR *PFN_vkCmdDrawIndexedIndirectCountAMD)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride); @@ -5999,13 +7713,14 @@ VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndexedIndirectCountAMD( uint32_t stride); #endif + #define VK_AMD_negative_viewport_height 1 #define VK_AMD_NEGATIVE_VIEWPORT_HEIGHT_SPEC_VERSION 1 #define VK_AMD_NEGATIVE_VIEWPORT_HEIGHT_EXTENSION_NAME "VK_AMD_negative_viewport_height" #define VK_AMD_gpu_shader_half_float 1 -#define VK_AMD_GPU_SHADER_HALF_FLOAT_SPEC_VERSION 1 +#define VK_AMD_GPU_SHADER_HALF_FLOAT_SPEC_VERSION 2 #define VK_AMD_GPU_SHADER_HALF_FLOAT_EXTENSION_NAME "VK_AMD_gpu_shader_half_float" @@ -6017,7 +7732,6 @@ VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndexedIndirectCountAMD( #define VK_AMD_texture_gather_bias_lod 1 #define VK_AMD_TEXTURE_GATHER_BIAS_LOD_SPEC_VERSION 1 #define VK_AMD_TEXTURE_GATHER_BIAS_LOD_EXTENSION_NAME "VK_AMD_texture_gather_bias_lod" - typedef struct VkTextureLODGatherFormatPropertiesAMD { VkStructureType sType; void* pNext; @@ -6030,17 +7744,12 @@ typedef struct VkTextureLODGatherFormatPropertiesAMD { #define VK_AMD_SHADER_INFO_SPEC_VERSION 1 #define VK_AMD_SHADER_INFO_EXTENSION_NAME "VK_AMD_shader_info" - typedef enum VkShaderInfoTypeAMD { VK_SHADER_INFO_TYPE_STATISTICS_AMD = 0, VK_SHADER_INFO_TYPE_BINARY_AMD = 1, VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD = 2, - VK_SHADER_INFO_TYPE_BEGIN_RANGE_AMD = VK_SHADER_INFO_TYPE_STATISTICS_AMD, - VK_SHADER_INFO_TYPE_END_RANGE_AMD = VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD, - VK_SHADER_INFO_TYPE_RANGE_SIZE_AMD = (VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD - VK_SHADER_INFO_TYPE_STATISTICS_AMD + 1), VK_SHADER_INFO_TYPE_MAX_ENUM_AMD = 0x7FFFFFFF } VkShaderInfoTypeAMD; - typedef struct VkShaderResourceUsageAMD { uint32_t numUsedVgprs; uint32_t numUsedSgprs; @@ -6059,7 +7768,6 @@ typedef struct VkShaderStatisticsInfoAMD { uint32_t computeWorkGroupSize[3]; } VkShaderStatisticsInfoAMD; - typedef VkResult (VKAPI_PTR *PFN_vkGetShaderInfoAMD)(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBits shaderStage, VkShaderInfoTypeAMD infoType, size_t* pInfoSize, void* pInfo); #ifndef VK_NO_PROTOTYPES @@ -6072,11 +7780,23 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetShaderInfoAMD( void* pInfo); #endif + #define VK_AMD_shader_image_load_store_lod 1 #define VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_SPEC_VERSION 1 #define VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME "VK_AMD_shader_image_load_store_lod" +#define VK_NV_corner_sampled_image 1 +#define VK_NV_CORNER_SAMPLED_IMAGE_SPEC_VERSION 2 +#define VK_NV_CORNER_SAMPLED_IMAGE_EXTENSION_NAME "VK_NV_corner_sampled_image" +typedef struct VkPhysicalDeviceCornerSampledImageFeaturesNV { + VkStructureType sType; + void* pNext; + VkBool32 cornerSampledImage; +} VkPhysicalDeviceCornerSampledImageFeaturesNV; + + + #define VK_IMG_format_pvrtc 1 #define VK_IMG_FORMAT_PVRTC_SPEC_VERSION 1 #define VK_IMG_FORMAT_PVRTC_EXTENSION_NAME "VK_IMG_format_pvrtc" @@ -6086,7 +7806,6 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetShaderInfoAMD( #define VK_NV_EXTERNAL_MEMORY_CAPABILITIES_SPEC_VERSION 1 #define VK_NV_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME "VK_NV_external_memory_capabilities" - typedef enum VkExternalMemoryHandleTypeFlagBitsNV { VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_NV = 0x00000001, VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_NV = 0x00000002, @@ -6103,7 +7822,6 @@ typedef enum VkExternalMemoryFeatureFlagBitsNV { VK_EXTERNAL_MEMORY_FEATURE_FLAG_BITS_MAX_ENUM_NV = 0x7FFFFFFF } VkExternalMemoryFeatureFlagBitsNV; typedef VkFlags VkExternalMemoryFeatureFlagsNV; - typedef struct VkExternalImageFormatPropertiesNV { VkImageFormatProperties imageFormatProperties; VkExternalMemoryFeatureFlagsNV externalMemoryFeatures; @@ -6111,7 +7829,6 @@ typedef struct VkExternalImageFormatPropertiesNV { VkExternalMemoryHandleTypeFlagsNV compatibleHandleTypes; } VkExternalImageFormatPropertiesNV; - typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceExternalImageFormatPropertiesNV)(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkImageTiling tiling, VkImageUsageFlags usage, VkImageCreateFlags flags, VkExternalMemoryHandleTypeFlagsNV externalHandleType, VkExternalImageFormatPropertiesNV* pExternalImageFormatProperties); #ifndef VK_NO_PROTOTYPES @@ -6126,10 +7843,10 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceExternalImageFormatPropertiesN VkExternalImageFormatPropertiesNV* pExternalImageFormatProperties); #endif + #define VK_NV_external_memory 1 #define VK_NV_EXTERNAL_MEMORY_SPEC_VERSION 1 #define VK_NV_EXTERNAL_MEMORY_EXTENSION_NAME "VK_NV_external_memory" - typedef struct VkExternalMemoryImageCreateInfoNV { VkStructureType sType; const void* pNext; @@ -6145,24 +7862,19 @@ typedef struct VkExportMemoryAllocateInfoNV { #define VK_EXT_validation_flags 1 -#define VK_EXT_VALIDATION_FLAGS_SPEC_VERSION 1 +#define VK_EXT_VALIDATION_FLAGS_SPEC_VERSION 2 #define VK_EXT_VALIDATION_FLAGS_EXTENSION_NAME "VK_EXT_validation_flags" - typedef enum VkValidationCheckEXT { VK_VALIDATION_CHECK_ALL_EXT = 0, VK_VALIDATION_CHECK_SHADERS_EXT = 1, - VK_VALIDATION_CHECK_BEGIN_RANGE_EXT = VK_VALIDATION_CHECK_ALL_EXT, - VK_VALIDATION_CHECK_END_RANGE_EXT = VK_VALIDATION_CHECK_SHADERS_EXT, - VK_VALIDATION_CHECK_RANGE_SIZE_EXT = (VK_VALIDATION_CHECK_SHADERS_EXT - VK_VALIDATION_CHECK_ALL_EXT + 1), VK_VALIDATION_CHECK_MAX_ENUM_EXT = 0x7FFFFFFF } VkValidationCheckEXT; - typedef struct VkValidationFlagsEXT { - VkStructureType sType; - const void* pNext; - uint32_t disabledValidationCheckCount; - VkValidationCheckEXT* pDisabledValidationChecks; + VkStructureType sType; + const void* pNext; + uint32_t disabledValidationCheckCount; + const VkValidationCheckEXT* pDisabledValidationChecks; } VkValidationFlagsEXT; @@ -6177,237 +7889,80 @@ typedef struct VkValidationFlagsEXT { #define VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME "VK_EXT_shader_subgroup_vote" -#define VK_NVX_device_generated_commands 1 -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkObjectTableNVX) -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkIndirectCommandsLayoutNVX) - -#define VK_NVX_DEVICE_GENERATED_COMMANDS_SPEC_VERSION 3 -#define VK_NVX_DEVICE_GENERATED_COMMANDS_EXTENSION_NAME "VK_NVX_device_generated_commands" +#define VK_EXT_texture_compression_astc_hdr 1 +#define VK_EXT_TEXTURE_COMPRESSION_ASTC_HDR_SPEC_VERSION 1 +#define VK_EXT_TEXTURE_COMPRESSION_ASTC_HDR_EXTENSION_NAME "VK_EXT_texture_compression_astc_hdr" +typedef struct VkPhysicalDeviceTextureCompressionASTCHDRFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 textureCompressionASTC_HDR; +} VkPhysicalDeviceTextureCompressionASTCHDRFeaturesEXT; -typedef enum VkIndirectCommandsTokenTypeNVX { - VK_INDIRECT_COMMANDS_TOKEN_TYPE_PIPELINE_NVX = 0, - VK_INDIRECT_COMMANDS_TOKEN_TYPE_DESCRIPTOR_SET_NVX = 1, - VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NVX = 2, - VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NVX = 3, - VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NVX = 4, - VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NVX = 5, - VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NVX = 6, - VK_INDIRECT_COMMANDS_TOKEN_TYPE_DISPATCH_NVX = 7, - VK_INDIRECT_COMMANDS_TOKEN_TYPE_BEGIN_RANGE_NVX = VK_INDIRECT_COMMANDS_TOKEN_TYPE_PIPELINE_NVX, - VK_INDIRECT_COMMANDS_TOKEN_TYPE_END_RANGE_NVX = VK_INDIRECT_COMMANDS_TOKEN_TYPE_DISPATCH_NVX, - VK_INDIRECT_COMMANDS_TOKEN_TYPE_RANGE_SIZE_NVX = (VK_INDIRECT_COMMANDS_TOKEN_TYPE_DISPATCH_NVX - VK_INDIRECT_COMMANDS_TOKEN_TYPE_PIPELINE_NVX + 1), - VK_INDIRECT_COMMANDS_TOKEN_TYPE_MAX_ENUM_NVX = 0x7FFFFFFF -} VkIndirectCommandsTokenTypeNVX; -typedef enum VkObjectEntryTypeNVX { - VK_OBJECT_ENTRY_TYPE_DESCRIPTOR_SET_NVX = 0, - VK_OBJECT_ENTRY_TYPE_PIPELINE_NVX = 1, - VK_OBJECT_ENTRY_TYPE_INDEX_BUFFER_NVX = 2, - VK_OBJECT_ENTRY_TYPE_VERTEX_BUFFER_NVX = 3, - VK_OBJECT_ENTRY_TYPE_PUSH_CONSTANT_NVX = 4, - VK_OBJECT_ENTRY_TYPE_BEGIN_RANGE_NVX = VK_OBJECT_ENTRY_TYPE_DESCRIPTOR_SET_NVX, - VK_OBJECT_ENTRY_TYPE_END_RANGE_NVX = VK_OBJECT_ENTRY_TYPE_PUSH_CONSTANT_NVX, - VK_OBJECT_ENTRY_TYPE_RANGE_SIZE_NVX = (VK_OBJECT_ENTRY_TYPE_PUSH_CONSTANT_NVX - VK_OBJECT_ENTRY_TYPE_DESCRIPTOR_SET_NVX + 1), - VK_OBJECT_ENTRY_TYPE_MAX_ENUM_NVX = 0x7FFFFFFF -} VkObjectEntryTypeNVX; - - -typedef enum VkIndirectCommandsLayoutUsageFlagBitsNVX { - VK_INDIRECT_COMMANDS_LAYOUT_USAGE_UNORDERED_SEQUENCES_BIT_NVX = 0x00000001, - VK_INDIRECT_COMMANDS_LAYOUT_USAGE_SPARSE_SEQUENCES_BIT_NVX = 0x00000002, - VK_INDIRECT_COMMANDS_LAYOUT_USAGE_EMPTY_EXECUTIONS_BIT_NVX = 0x00000004, - VK_INDIRECT_COMMANDS_LAYOUT_USAGE_INDEXED_SEQUENCES_BIT_NVX = 0x00000008, - VK_INDIRECT_COMMANDS_LAYOUT_USAGE_FLAG_BITS_MAX_ENUM_NVX = 0x7FFFFFFF -} VkIndirectCommandsLayoutUsageFlagBitsNVX; -typedef VkFlags VkIndirectCommandsLayoutUsageFlagsNVX; - -typedef enum VkObjectEntryUsageFlagBitsNVX { - VK_OBJECT_ENTRY_USAGE_GRAPHICS_BIT_NVX = 0x00000001, - VK_OBJECT_ENTRY_USAGE_COMPUTE_BIT_NVX = 0x00000002, - VK_OBJECT_ENTRY_USAGE_FLAG_BITS_MAX_ENUM_NVX = 0x7FFFFFFF -} VkObjectEntryUsageFlagBitsNVX; -typedef VkFlags VkObjectEntryUsageFlagsNVX; - -typedef struct VkDeviceGeneratedCommandsFeaturesNVX { +#define VK_EXT_astc_decode_mode 1 +#define VK_EXT_ASTC_DECODE_MODE_SPEC_VERSION 1 +#define VK_EXT_ASTC_DECODE_MODE_EXTENSION_NAME "VK_EXT_astc_decode_mode" +typedef struct VkImageViewASTCDecodeModeEXT { VkStructureType sType; const void* pNext; - VkBool32 computeBindingPointSupport; -} VkDeviceGeneratedCommandsFeaturesNVX; + VkFormat decodeMode; +} VkImageViewASTCDecodeModeEXT; -typedef struct VkDeviceGeneratedCommandsLimitsNVX { +typedef struct VkPhysicalDeviceASTCDecodeFeaturesEXT { VkStructureType sType; - const void* pNext; - uint32_t maxIndirectCommandsLayoutTokenCount; - uint32_t maxObjectEntryCounts; - uint32_t minSequenceCountBufferOffsetAlignment; - uint32_t minSequenceIndexBufferOffsetAlignment; - uint32_t minCommandsTokenBufferOffsetAlignment; -} VkDeviceGeneratedCommandsLimitsNVX; + void* pNext; + VkBool32 decodeModeSharedExponent; +} VkPhysicalDeviceASTCDecodeFeaturesEXT; -typedef struct VkIndirectCommandsTokenNVX { - VkIndirectCommandsTokenTypeNVX tokenType; + + +#define VK_EXT_conditional_rendering 1 +#define VK_EXT_CONDITIONAL_RENDERING_SPEC_VERSION 2 +#define VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME "VK_EXT_conditional_rendering" + +typedef enum VkConditionalRenderingFlagBitsEXT { + VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT = 0x00000001, + VK_CONDITIONAL_RENDERING_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF +} VkConditionalRenderingFlagBitsEXT; +typedef VkFlags VkConditionalRenderingFlagsEXT; +typedef struct VkConditionalRenderingBeginInfoEXT { + VkStructureType sType; + const void* pNext; VkBuffer buffer; VkDeviceSize offset; -} VkIndirectCommandsTokenNVX; + VkConditionalRenderingFlagsEXT flags; +} VkConditionalRenderingBeginInfoEXT; -typedef struct VkIndirectCommandsLayoutTokenNVX { - VkIndirectCommandsTokenTypeNVX tokenType; - uint32_t bindingUnit; - uint32_t dynamicCount; - uint32_t divisor; -} VkIndirectCommandsLayoutTokenNVX; +typedef struct VkPhysicalDeviceConditionalRenderingFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 conditionalRendering; + VkBool32 inheritedConditionalRendering; +} VkPhysicalDeviceConditionalRenderingFeaturesEXT; -typedef struct VkIndirectCommandsLayoutCreateInfoNVX { - VkStructureType sType; - const void* pNext; - VkPipelineBindPoint pipelineBindPoint; - VkIndirectCommandsLayoutUsageFlagsNVX flags; - uint32_t tokenCount; - const VkIndirectCommandsLayoutTokenNVX* pTokens; -} VkIndirectCommandsLayoutCreateInfoNVX; +typedef struct VkCommandBufferInheritanceConditionalRenderingInfoEXT { + VkStructureType sType; + const void* pNext; + VkBool32 conditionalRenderingEnable; +} VkCommandBufferInheritanceConditionalRenderingInfoEXT; -typedef struct VkCmdProcessCommandsInfoNVX { - VkStructureType sType; - const void* pNext; - VkObjectTableNVX objectTable; - VkIndirectCommandsLayoutNVX indirectCommandsLayout; - uint32_t indirectCommandsTokenCount; - const VkIndirectCommandsTokenNVX* pIndirectCommandsTokens; - uint32_t maxSequencesCount; - VkCommandBuffer targetCommandBuffer; - VkBuffer sequencesCountBuffer; - VkDeviceSize sequencesCountOffset; - VkBuffer sequencesIndexBuffer; - VkDeviceSize sequencesIndexOffset; -} VkCmdProcessCommandsInfoNVX; - -typedef struct VkCmdReserveSpaceForCommandsInfoNVX { - VkStructureType sType; - const void* pNext; - VkObjectTableNVX objectTable; - VkIndirectCommandsLayoutNVX indirectCommandsLayout; - uint32_t maxSequencesCount; -} VkCmdReserveSpaceForCommandsInfoNVX; - -typedef struct VkObjectTableCreateInfoNVX { - VkStructureType sType; - const void* pNext; - uint32_t objectCount; - const VkObjectEntryTypeNVX* pObjectEntryTypes; - const uint32_t* pObjectEntryCounts; - const VkObjectEntryUsageFlagsNVX* pObjectEntryUsageFlags; - uint32_t maxUniformBuffersPerDescriptor; - uint32_t maxStorageBuffersPerDescriptor; - uint32_t maxStorageImagesPerDescriptor; - uint32_t maxSampledImagesPerDescriptor; - uint32_t maxPipelineLayouts; -} VkObjectTableCreateInfoNVX; - -typedef struct VkObjectTableEntryNVX { - VkObjectEntryTypeNVX type; - VkObjectEntryUsageFlagsNVX flags; -} VkObjectTableEntryNVX; - -typedef struct VkObjectTablePipelineEntryNVX { - VkObjectEntryTypeNVX type; - VkObjectEntryUsageFlagsNVX flags; - VkPipeline pipeline; -} VkObjectTablePipelineEntryNVX; - -typedef struct VkObjectTableDescriptorSetEntryNVX { - VkObjectEntryTypeNVX type; - VkObjectEntryUsageFlagsNVX flags; - VkPipelineLayout pipelineLayout; - VkDescriptorSet descriptorSet; -} VkObjectTableDescriptorSetEntryNVX; - -typedef struct VkObjectTableVertexBufferEntryNVX { - VkObjectEntryTypeNVX type; - VkObjectEntryUsageFlagsNVX flags; - VkBuffer buffer; -} VkObjectTableVertexBufferEntryNVX; - -typedef struct VkObjectTableIndexBufferEntryNVX { - VkObjectEntryTypeNVX type; - VkObjectEntryUsageFlagsNVX flags; - VkBuffer buffer; - VkIndexType indexType; -} VkObjectTableIndexBufferEntryNVX; - -typedef struct VkObjectTablePushConstantEntryNVX { - VkObjectEntryTypeNVX type; - VkObjectEntryUsageFlagsNVX flags; - VkPipelineLayout pipelineLayout; - VkShaderStageFlags stageFlags; -} VkObjectTablePushConstantEntryNVX; - - -typedef void (VKAPI_PTR *PFN_vkCmdProcessCommandsNVX)(VkCommandBuffer commandBuffer, const VkCmdProcessCommandsInfoNVX* pProcessCommandsInfo); -typedef void (VKAPI_PTR *PFN_vkCmdReserveSpaceForCommandsNVX)(VkCommandBuffer commandBuffer, const VkCmdReserveSpaceForCommandsInfoNVX* pReserveSpaceInfo); -typedef VkResult (VKAPI_PTR *PFN_vkCreateIndirectCommandsLayoutNVX)(VkDevice device, const VkIndirectCommandsLayoutCreateInfoNVX* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkIndirectCommandsLayoutNVX* pIndirectCommandsLayout); -typedef void (VKAPI_PTR *PFN_vkDestroyIndirectCommandsLayoutNVX)(VkDevice device, VkIndirectCommandsLayoutNVX indirectCommandsLayout, const VkAllocationCallbacks* pAllocator); -typedef VkResult (VKAPI_PTR *PFN_vkCreateObjectTableNVX)(VkDevice device, const VkObjectTableCreateInfoNVX* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkObjectTableNVX* pObjectTable); -typedef void (VKAPI_PTR *PFN_vkDestroyObjectTableNVX)(VkDevice device, VkObjectTableNVX objectTable, const VkAllocationCallbacks* pAllocator); -typedef VkResult (VKAPI_PTR *PFN_vkRegisterObjectsNVX)(VkDevice device, VkObjectTableNVX objectTable, uint32_t objectCount, const VkObjectTableEntryNVX* const* ppObjectTableEntries, const uint32_t* pObjectIndices); -typedef VkResult (VKAPI_PTR *PFN_vkUnregisterObjectsNVX)(VkDevice device, VkObjectTableNVX objectTable, uint32_t objectCount, const VkObjectEntryTypeNVX* pObjectEntryTypes, const uint32_t* pObjectIndices); -typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX)(VkPhysicalDevice physicalDevice, VkDeviceGeneratedCommandsFeaturesNVX* pFeatures, VkDeviceGeneratedCommandsLimitsNVX* pLimits); +typedef void (VKAPI_PTR *PFN_vkCmdBeginConditionalRenderingEXT)(VkCommandBuffer commandBuffer, const VkConditionalRenderingBeginInfoEXT* pConditionalRenderingBegin); +typedef void (VKAPI_PTR *PFN_vkCmdEndConditionalRenderingEXT)(VkCommandBuffer commandBuffer); #ifndef VK_NO_PROTOTYPES -VKAPI_ATTR void VKAPI_CALL vkCmdProcessCommandsNVX( +VKAPI_ATTR void VKAPI_CALL vkCmdBeginConditionalRenderingEXT( VkCommandBuffer commandBuffer, - const VkCmdProcessCommandsInfoNVX* pProcessCommandsInfo); + const VkConditionalRenderingBeginInfoEXT* pConditionalRenderingBegin); -VKAPI_ATTR void VKAPI_CALL vkCmdReserveSpaceForCommandsNVX( - VkCommandBuffer commandBuffer, - const VkCmdReserveSpaceForCommandsInfoNVX* pReserveSpaceInfo); - -VKAPI_ATTR VkResult VKAPI_CALL vkCreateIndirectCommandsLayoutNVX( - VkDevice device, - const VkIndirectCommandsLayoutCreateInfoNVX* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkIndirectCommandsLayoutNVX* pIndirectCommandsLayout); - -VKAPI_ATTR void VKAPI_CALL vkDestroyIndirectCommandsLayoutNVX( - VkDevice device, - VkIndirectCommandsLayoutNVX indirectCommandsLayout, - const VkAllocationCallbacks* pAllocator); - -VKAPI_ATTR VkResult VKAPI_CALL vkCreateObjectTableNVX( - VkDevice device, - const VkObjectTableCreateInfoNVX* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkObjectTableNVX* pObjectTable); - -VKAPI_ATTR void VKAPI_CALL vkDestroyObjectTableNVX( - VkDevice device, - VkObjectTableNVX objectTable, - const VkAllocationCallbacks* pAllocator); - -VKAPI_ATTR VkResult VKAPI_CALL vkRegisterObjectsNVX( - VkDevice device, - VkObjectTableNVX objectTable, - uint32_t objectCount, - const VkObjectTableEntryNVX* const* ppObjectTableEntries, - const uint32_t* pObjectIndices); - -VKAPI_ATTR VkResult VKAPI_CALL vkUnregisterObjectsNVX( - VkDevice device, - VkObjectTableNVX objectTable, - uint32_t objectCount, - const VkObjectEntryTypeNVX* pObjectEntryTypes, - const uint32_t* pObjectIndices); - -VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX( - VkPhysicalDevice physicalDevice, - VkDeviceGeneratedCommandsFeaturesNVX* pFeatures, - VkDeviceGeneratedCommandsLimitsNVX* pLimits); +VKAPI_ATTR void VKAPI_CALL vkCmdEndConditionalRenderingEXT( + VkCommandBuffer commandBuffer); #endif + #define VK_NV_clip_space_w_scaling 1 #define VK_NV_CLIP_SPACE_W_SCALING_SPEC_VERSION 1 #define VK_NV_CLIP_SPACE_W_SCALING_EXTENSION_NAME "VK_NV_clip_space_w_scaling" - typedef struct VkViewportWScalingNV { float xcoeff; float ycoeff; @@ -6421,7 +7976,6 @@ typedef struct VkPipelineViewportWScalingStateCreateInfoNV { const VkViewportWScalingNV* pViewportWScalings; } VkPipelineViewportWScalingStateCreateInfoNV; - typedef void (VKAPI_PTR *PFN_vkCmdSetViewportWScalingNV)(VkCommandBuffer commandBuffer, uint32_t firstViewport, uint32_t viewportCount, const VkViewportWScalingNV* pViewportWScalings); #ifndef VK_NO_PROTOTYPES @@ -6432,10 +7986,10 @@ VKAPI_ATTR void VKAPI_CALL vkCmdSetViewportWScalingNV( const VkViewportWScalingNV* pViewportWScalings); #endif + #define VK_EXT_direct_mode_display 1 #define VK_EXT_DIRECT_MODE_DISPLAY_SPEC_VERSION 1 #define VK_EXT_DIRECT_MODE_DISPLAY_EXTENSION_NAME "VK_EXT_direct_mode_display" - typedef VkResult (VKAPI_PTR *PFN_vkReleaseDisplayEXT)(VkPhysicalDevice physicalDevice, VkDisplayKHR display); #ifndef VK_NO_PROTOTYPES @@ -6444,18 +7998,16 @@ VKAPI_ATTR VkResult VKAPI_CALL vkReleaseDisplayEXT( VkDisplayKHR display); #endif + #define VK_EXT_display_surface_counter 1 #define VK_EXT_DISPLAY_SURFACE_COUNTER_SPEC_VERSION 1 #define VK_EXT_DISPLAY_SURFACE_COUNTER_EXTENSION_NAME "VK_EXT_display_surface_counter" -#define VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES2_EXT VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES_2_EXT - typedef enum VkSurfaceCounterFlagBitsEXT { VK_SURFACE_COUNTER_VBLANK_EXT = 0x00000001, VK_SURFACE_COUNTER_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF } VkSurfaceCounterFlagBitsEXT; typedef VkFlags VkSurfaceCounterFlagsEXT; - typedef struct VkSurfaceCapabilities2EXT { VkStructureType sType; void* pNext; @@ -6472,7 +8024,6 @@ typedef struct VkSurfaceCapabilities2EXT { VkSurfaceCounterFlagsEXT supportedSurfaceCounters; } VkSurfaceCapabilities2EXT; - typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfaceCapabilities2EXT)(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface, VkSurfaceCapabilities2EXT* pSurfaceCapabilities); #ifndef VK_NO_PROTOTYPES @@ -6482,37 +8033,27 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceCapabilities2EXT( VkSurfaceCapabilities2EXT* pSurfaceCapabilities); #endif + #define VK_EXT_display_control 1 #define VK_EXT_DISPLAY_CONTROL_SPEC_VERSION 1 #define VK_EXT_DISPLAY_CONTROL_EXTENSION_NAME "VK_EXT_display_control" - typedef enum VkDisplayPowerStateEXT { VK_DISPLAY_POWER_STATE_OFF_EXT = 0, VK_DISPLAY_POWER_STATE_SUSPEND_EXT = 1, VK_DISPLAY_POWER_STATE_ON_EXT = 2, - VK_DISPLAY_POWER_STATE_BEGIN_RANGE_EXT = VK_DISPLAY_POWER_STATE_OFF_EXT, - VK_DISPLAY_POWER_STATE_END_RANGE_EXT = VK_DISPLAY_POWER_STATE_ON_EXT, - VK_DISPLAY_POWER_STATE_RANGE_SIZE_EXT = (VK_DISPLAY_POWER_STATE_ON_EXT - VK_DISPLAY_POWER_STATE_OFF_EXT + 1), VK_DISPLAY_POWER_STATE_MAX_ENUM_EXT = 0x7FFFFFFF } VkDisplayPowerStateEXT; typedef enum VkDeviceEventTypeEXT { VK_DEVICE_EVENT_TYPE_DISPLAY_HOTPLUG_EXT = 0, - VK_DEVICE_EVENT_TYPE_BEGIN_RANGE_EXT = VK_DEVICE_EVENT_TYPE_DISPLAY_HOTPLUG_EXT, - VK_DEVICE_EVENT_TYPE_END_RANGE_EXT = VK_DEVICE_EVENT_TYPE_DISPLAY_HOTPLUG_EXT, - VK_DEVICE_EVENT_TYPE_RANGE_SIZE_EXT = (VK_DEVICE_EVENT_TYPE_DISPLAY_HOTPLUG_EXT - VK_DEVICE_EVENT_TYPE_DISPLAY_HOTPLUG_EXT + 1), VK_DEVICE_EVENT_TYPE_MAX_ENUM_EXT = 0x7FFFFFFF } VkDeviceEventTypeEXT; typedef enum VkDisplayEventTypeEXT { VK_DISPLAY_EVENT_TYPE_FIRST_PIXEL_OUT_EXT = 0, - VK_DISPLAY_EVENT_TYPE_BEGIN_RANGE_EXT = VK_DISPLAY_EVENT_TYPE_FIRST_PIXEL_OUT_EXT, - VK_DISPLAY_EVENT_TYPE_END_RANGE_EXT = VK_DISPLAY_EVENT_TYPE_FIRST_PIXEL_OUT_EXT, - VK_DISPLAY_EVENT_TYPE_RANGE_SIZE_EXT = (VK_DISPLAY_EVENT_TYPE_FIRST_PIXEL_OUT_EXT - VK_DISPLAY_EVENT_TYPE_FIRST_PIXEL_OUT_EXT + 1), VK_DISPLAY_EVENT_TYPE_MAX_ENUM_EXT = 0x7FFFFFFF } VkDisplayEventTypeEXT; - typedef struct VkDisplayPowerInfoEXT { VkStructureType sType; const void* pNext; @@ -6537,7 +8078,6 @@ typedef struct VkSwapchainCounterCreateInfoEXT { VkSurfaceCounterFlagsEXT surfaceCounters; } VkSwapchainCounterCreateInfoEXT; - typedef VkResult (VKAPI_PTR *PFN_vkDisplayPowerControlEXT)(VkDevice device, VkDisplayKHR display, const VkDisplayPowerInfoEXT* pDisplayPowerInfo); typedef VkResult (VKAPI_PTR *PFN_vkRegisterDeviceEventEXT)(VkDevice device, const VkDeviceEventInfoEXT* pDeviceEventInfo, const VkAllocationCallbacks* pAllocator, VkFence* pFence); typedef VkResult (VKAPI_PTR *PFN_vkRegisterDisplayEventEXT)(VkDevice device, VkDisplayKHR display, const VkDisplayEventInfoEXT* pDisplayEventInfo, const VkAllocationCallbacks* pAllocator, VkFence* pFence); @@ -6569,10 +8109,10 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetSwapchainCounterEXT( uint64_t* pCounterValue); #endif + #define VK_GOOGLE_display_timing 1 #define VK_GOOGLE_DISPLAY_TIMING_SPEC_VERSION 1 #define VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME "VK_GOOGLE_display_timing" - typedef struct VkRefreshCycleDurationGOOGLE { uint64_t refreshDuration; } VkRefreshCycleDurationGOOGLE; @@ -6597,7 +8137,6 @@ typedef struct VkPresentTimesInfoGOOGLE { const VkPresentTimeGOOGLE* pTimes; } VkPresentTimesInfoGOOGLE; - typedef VkResult (VKAPI_PTR *PFN_vkGetRefreshCycleDurationGOOGLE)(VkDevice device, VkSwapchainKHR swapchain, VkRefreshCycleDurationGOOGLE* pDisplayTimingProperties); typedef VkResult (VKAPI_PTR *PFN_vkGetPastPresentationTimingGOOGLE)(VkDevice device, VkSwapchainKHR swapchain, uint32_t* pPresentationTimingCount, VkPastPresentationTimingGOOGLE* pPresentationTimings); @@ -6614,6 +8153,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetPastPresentationTimingGOOGLE( VkPastPresentationTimingGOOGLE* pPresentationTimings); #endif + #define VK_NV_sample_mask_override_coverage 1 #define VK_NV_SAMPLE_MASK_OVERRIDE_COVERAGE_SPEC_VERSION 1 #define VK_NV_SAMPLE_MASK_OVERRIDE_COVERAGE_EXTENSION_NAME "VK_NV_sample_mask_override_coverage" @@ -6632,7 +8172,6 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetPastPresentationTimingGOOGLE( #define VK_NVX_multiview_per_view_attributes 1 #define VK_NVX_MULTIVIEW_PER_VIEW_ATTRIBUTES_SPEC_VERSION 1 #define VK_NVX_MULTIVIEW_PER_VIEW_ATTRIBUTES_EXTENSION_NAME "VK_NVX_multiview_per_view_attributes" - typedef struct VkPhysicalDeviceMultiviewPerViewAttributesPropertiesNVX { VkStructureType sType; void* pNext; @@ -6645,7 +8184,6 @@ typedef struct VkPhysicalDeviceMultiviewPerViewAttributesPropertiesNVX { #define VK_NV_VIEWPORT_SWIZZLE_SPEC_VERSION 1 #define VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME "VK_NV_viewport_swizzle" - typedef enum VkViewportCoordinateSwizzleNV { VK_VIEWPORT_COORDINATE_SWIZZLE_POSITIVE_X_NV = 0, VK_VIEWPORT_COORDINATE_SWIZZLE_NEGATIVE_X_NV = 1, @@ -6655,14 +8193,9 @@ typedef enum VkViewportCoordinateSwizzleNV { VK_VIEWPORT_COORDINATE_SWIZZLE_NEGATIVE_Z_NV = 5, VK_VIEWPORT_COORDINATE_SWIZZLE_POSITIVE_W_NV = 6, VK_VIEWPORT_COORDINATE_SWIZZLE_NEGATIVE_W_NV = 7, - VK_VIEWPORT_COORDINATE_SWIZZLE_BEGIN_RANGE_NV = VK_VIEWPORT_COORDINATE_SWIZZLE_POSITIVE_X_NV, - VK_VIEWPORT_COORDINATE_SWIZZLE_END_RANGE_NV = VK_VIEWPORT_COORDINATE_SWIZZLE_NEGATIVE_W_NV, - VK_VIEWPORT_COORDINATE_SWIZZLE_RANGE_SIZE_NV = (VK_VIEWPORT_COORDINATE_SWIZZLE_NEGATIVE_W_NV - VK_VIEWPORT_COORDINATE_SWIZZLE_POSITIVE_X_NV + 1), VK_VIEWPORT_COORDINATE_SWIZZLE_MAX_ENUM_NV = 0x7FFFFFFF } VkViewportCoordinateSwizzleNV; - typedef VkFlags VkPipelineViewportSwizzleStateCreateFlagsNV; - typedef struct VkViewportSwizzleNV { VkViewportCoordinateSwizzleNV x; VkViewportCoordinateSwizzleNV y; @@ -6684,18 +8217,12 @@ typedef struct VkPipelineViewportSwizzleStateCreateInfoNV { #define VK_EXT_DISCARD_RECTANGLES_SPEC_VERSION 1 #define VK_EXT_DISCARD_RECTANGLES_EXTENSION_NAME "VK_EXT_discard_rectangles" - typedef enum VkDiscardRectangleModeEXT { VK_DISCARD_RECTANGLE_MODE_INCLUSIVE_EXT = 0, VK_DISCARD_RECTANGLE_MODE_EXCLUSIVE_EXT = 1, - VK_DISCARD_RECTANGLE_MODE_BEGIN_RANGE_EXT = VK_DISCARD_RECTANGLE_MODE_INCLUSIVE_EXT, - VK_DISCARD_RECTANGLE_MODE_END_RANGE_EXT = VK_DISCARD_RECTANGLE_MODE_EXCLUSIVE_EXT, - VK_DISCARD_RECTANGLE_MODE_RANGE_SIZE_EXT = (VK_DISCARD_RECTANGLE_MODE_EXCLUSIVE_EXT - VK_DISCARD_RECTANGLE_MODE_INCLUSIVE_EXT + 1), VK_DISCARD_RECTANGLE_MODE_MAX_ENUM_EXT = 0x7FFFFFFF } VkDiscardRectangleModeEXT; - typedef VkFlags VkPipelineDiscardRectangleStateCreateFlagsEXT; - typedef struct VkPhysicalDeviceDiscardRectanglePropertiesEXT { VkStructureType sType; void* pNext; @@ -6711,7 +8238,6 @@ typedef struct VkPipelineDiscardRectangleStateCreateInfoEXT { const VkRect2D* pDiscardRectangles; } VkPipelineDiscardRectangleStateCreateInfoEXT; - typedef void (VKAPI_PTR *PFN_vkCmdSetDiscardRectangleEXT)(VkCommandBuffer commandBuffer, uint32_t firstDiscardRectangle, uint32_t discardRectangleCount, const VkRect2D* pDiscardRectangles); #ifndef VK_NO_PROTOTYPES @@ -6722,23 +8248,18 @@ VKAPI_ATTR void VKAPI_CALL vkCmdSetDiscardRectangleEXT( const VkRect2D* pDiscardRectangles); #endif + #define VK_EXT_conservative_rasterization 1 #define VK_EXT_CONSERVATIVE_RASTERIZATION_SPEC_VERSION 1 #define VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME "VK_EXT_conservative_rasterization" - typedef enum VkConservativeRasterizationModeEXT { VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT = 0, VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT = 1, VK_CONSERVATIVE_RASTERIZATION_MODE_UNDERESTIMATE_EXT = 2, - VK_CONSERVATIVE_RASTERIZATION_MODE_BEGIN_RANGE_EXT = VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT, - VK_CONSERVATIVE_RASTERIZATION_MODE_END_RANGE_EXT = VK_CONSERVATIVE_RASTERIZATION_MODE_UNDERESTIMATE_EXT, - VK_CONSERVATIVE_RASTERIZATION_MODE_RANGE_SIZE_EXT = (VK_CONSERVATIVE_RASTERIZATION_MODE_UNDERESTIMATE_EXT - VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT + 1), VK_CONSERVATIVE_RASTERIZATION_MODE_MAX_ENUM_EXT = 0x7FFFFFFF } VkConservativeRasterizationModeEXT; - typedef VkFlags VkPipelineRasterizationConservativeStateCreateFlagsEXT; - typedef struct VkPhysicalDeviceConservativeRasterizationPropertiesEXT { VkStructureType sType; void* pNext; @@ -6763,15 +8284,33 @@ typedef struct VkPipelineRasterizationConservativeStateCreateInfoEXT { +#define VK_EXT_depth_clip_enable 1 +#define VK_EXT_DEPTH_CLIP_ENABLE_SPEC_VERSION 1 +#define VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME "VK_EXT_depth_clip_enable" +typedef VkFlags VkPipelineRasterizationDepthClipStateCreateFlagsEXT; +typedef struct VkPhysicalDeviceDepthClipEnableFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 depthClipEnable; +} VkPhysicalDeviceDepthClipEnableFeaturesEXT; + +typedef struct VkPipelineRasterizationDepthClipStateCreateInfoEXT { + VkStructureType sType; + const void* pNext; + VkPipelineRasterizationDepthClipStateCreateFlagsEXT flags; + VkBool32 depthClipEnable; +} VkPipelineRasterizationDepthClipStateCreateInfoEXT; + + + #define VK_EXT_swapchain_colorspace 1 -#define VK_EXT_SWAPCHAIN_COLOR_SPACE_SPEC_VERSION 3 +#define VK_EXT_SWAPCHAIN_COLOR_SPACE_SPEC_VERSION 4 #define VK_EXT_SWAPCHAIN_COLOR_SPACE_EXTENSION_NAME "VK_EXT_swapchain_colorspace" #define VK_EXT_hdr_metadata 1 -#define VK_EXT_HDR_METADATA_SPEC_VERSION 1 +#define VK_EXT_HDR_METADATA_SPEC_VERSION 2 #define VK_EXT_HDR_METADATA_EXTENSION_NAME "VK_EXT_hdr_metadata" - typedef struct VkXYColorEXT { float x; float y; @@ -6790,7 +8329,6 @@ typedef struct VkHdrMetadataEXT { float maxFrameAverageLightLevel; } VkHdrMetadataEXT; - typedef void (VKAPI_PTR *PFN_vkSetHdrMetadataEXT)(VkDevice device, uint32_t swapchainCount, const VkSwapchainKHR* pSwapchains, const VkHdrMetadataEXT* pMetadata); #ifndef VK_NO_PROTOTYPES @@ -6801,6 +8339,7 @@ VKAPI_ATTR void VKAPI_CALL vkSetHdrMetadataEXT( const VkHdrMetadataEXT* pMetadata); #endif + #define VK_EXT_external_memory_dma_buf 1 #define VK_EXT_EXTERNAL_MEMORY_DMA_BUF_SPEC_VERSION 1 #define VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME "VK_EXT_external_memory_dma_buf" @@ -6814,12 +8353,9 @@ VKAPI_ATTR void VKAPI_CALL vkSetHdrMetadataEXT( #define VK_EXT_debug_utils 1 VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDebugUtilsMessengerEXT) - -#define VK_EXT_DEBUG_UTILS_SPEC_VERSION 1 +#define VK_EXT_DEBUG_UTILS_SPEC_VERSION 2 #define VK_EXT_DEBUG_UTILS_EXTENSION_NAME "VK_EXT_debug_utils" - typedef VkFlags VkDebugUtilsMessengerCallbackDataFlagsEXT; -typedef VkFlags VkDebugUtilsMessengerCreateFlagsEXT; typedef enum VkDebugUtilsMessageSeverityFlagBitsEXT { VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT = 0x00000001, @@ -6828,7 +8364,6 @@ typedef enum VkDebugUtilsMessageSeverityFlagBitsEXT { VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT = 0x00001000, VK_DEBUG_UTILS_MESSAGE_SEVERITY_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF } VkDebugUtilsMessageSeverityFlagBitsEXT; -typedef VkFlags VkDebugUtilsMessageSeverityFlagsEXT; typedef enum VkDebugUtilsMessageTypeFlagBitsEXT { VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT = 0x00000001, @@ -6837,6 +8372,14 @@ typedef enum VkDebugUtilsMessageTypeFlagBitsEXT { VK_DEBUG_UTILS_MESSAGE_TYPE_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF } VkDebugUtilsMessageTypeFlagBitsEXT; typedef VkFlags VkDebugUtilsMessageTypeFlagsEXT; +typedef VkFlags VkDebugUtilsMessageSeverityFlagsEXT; +typedef VkFlags VkDebugUtilsMessengerCreateFlagsEXT; +typedef struct VkDebugUtilsLabelEXT { + VkStructureType sType; + const void* pNext; + const char* pLabelName; + float color[4]; +} VkDebugUtilsLabelEXT; typedef struct VkDebugUtilsObjectNameInfoEXT { VkStructureType sType; @@ -6846,23 +8389,6 @@ typedef struct VkDebugUtilsObjectNameInfoEXT { const char* pObjectName; } VkDebugUtilsObjectNameInfoEXT; -typedef struct VkDebugUtilsObjectTagInfoEXT { - VkStructureType sType; - const void* pNext; - VkObjectType objectType; - uint64_t objectHandle; - uint64_t tagName; - size_t tagSize; - const void* pTag; -} VkDebugUtilsObjectTagInfoEXT; - -typedef struct VkDebugUtilsLabelEXT { - VkStructureType sType; - const void* pNext; - const char* pLabelName; - float color[4]; -} VkDebugUtilsLabelEXT; - typedef struct VkDebugUtilsMessengerCallbackDataEXT { VkStructureType sType; const void* pNext; @@ -6871,16 +8397,16 @@ typedef struct VkDebugUtilsMessengerCallbackDataEXT { int32_t messageIdNumber; const char* pMessage; uint32_t queueLabelCount; - VkDebugUtilsLabelEXT* pQueueLabels; + const VkDebugUtilsLabelEXT* pQueueLabels; uint32_t cmdBufLabelCount; - VkDebugUtilsLabelEXT* pCmdBufLabels; + const VkDebugUtilsLabelEXT* pCmdBufLabels; uint32_t objectCount; - VkDebugUtilsObjectNameInfoEXT* pObjects; + const VkDebugUtilsObjectNameInfoEXT* pObjects; } VkDebugUtilsMessengerCallbackDataEXT; typedef VkBool32 (VKAPI_PTR *PFN_vkDebugUtilsMessengerCallbackEXT)( VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, - VkDebugUtilsMessageTypeFlagsEXT messageType, + VkDebugUtilsMessageTypeFlagsEXT messageTypes, const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData, void* pUserData); @@ -6894,6 +8420,15 @@ typedef struct VkDebugUtilsMessengerCreateInfoEXT { void* pUserData; } VkDebugUtilsMessengerCreateInfoEXT; +typedef struct VkDebugUtilsObjectTagInfoEXT { + VkStructureType sType; + const void* pNext; + VkObjectType objectType; + uint64_t objectHandle; + uint64_t tagName; + size_t tagSize; + const void* pTag; +} VkDebugUtilsObjectTagInfoEXT; typedef VkResult (VKAPI_PTR *PFN_vkSetDebugUtilsObjectNameEXT)(VkDevice device, const VkDebugUtilsObjectNameInfoEXT* pNameInfo); typedef VkResult (VKAPI_PTR *PFN_vkSetDebugUtilsObjectTagEXT)(VkDevice device, const VkDebugUtilsObjectTagInfoEXT* pTagInfo); @@ -6956,38 +8491,20 @@ VKAPI_ATTR void VKAPI_CALL vkSubmitDebugUtilsMessageEXT( const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData); #endif + #define VK_EXT_sampler_filter_minmax 1 -#define VK_EXT_SAMPLER_FILTER_MINMAX_SPEC_VERSION 1 +#define VK_EXT_SAMPLER_FILTER_MINMAX_SPEC_VERSION 2 #define VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME "VK_EXT_sampler_filter_minmax" +typedef VkSamplerReductionMode VkSamplerReductionModeEXT; +typedef VkSamplerReductionModeCreateInfo VkSamplerReductionModeCreateInfoEXT; -typedef enum VkSamplerReductionModeEXT { - VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT = 0, - VK_SAMPLER_REDUCTION_MODE_MIN_EXT = 1, - VK_SAMPLER_REDUCTION_MODE_MAX_EXT = 2, - VK_SAMPLER_REDUCTION_MODE_BEGIN_RANGE_EXT = VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT, - VK_SAMPLER_REDUCTION_MODE_END_RANGE_EXT = VK_SAMPLER_REDUCTION_MODE_MAX_EXT, - VK_SAMPLER_REDUCTION_MODE_RANGE_SIZE_EXT = (VK_SAMPLER_REDUCTION_MODE_MAX_EXT - VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT + 1), - VK_SAMPLER_REDUCTION_MODE_MAX_ENUM_EXT = 0x7FFFFFFF -} VkSamplerReductionModeEXT; - -typedef struct VkSamplerReductionModeCreateInfoEXT { - VkStructureType sType; - const void* pNext; - VkSamplerReductionModeEXT reductionMode; -} VkSamplerReductionModeCreateInfoEXT; - -typedef struct VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT { - VkStructureType sType; - void* pNext; - VkBool32 filterMinmaxSingleComponentFormats; - VkBool32 filterMinmaxImageComponentMapping; -} VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT; +typedef VkPhysicalDeviceSamplerFilterMinmaxProperties VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT; #define VK_AMD_gpu_shader_int16 1 -#define VK_AMD_GPU_SHADER_INT16_SPEC_VERSION 1 +#define VK_AMD_GPU_SHADER_INT16_SPEC_VERSION 2 #define VK_AMD_GPU_SHADER_INT16_EXTENSION_NAME "VK_AMD_gpu_shader_int16" @@ -7001,6 +8518,41 @@ typedef struct VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT { #define VK_AMD_SHADER_FRAGMENT_MASK_EXTENSION_NAME "VK_AMD_shader_fragment_mask" +#define VK_EXT_inline_uniform_block 1 +#define VK_EXT_INLINE_UNIFORM_BLOCK_SPEC_VERSION 1 +#define VK_EXT_INLINE_UNIFORM_BLOCK_EXTENSION_NAME "VK_EXT_inline_uniform_block" +typedef struct VkPhysicalDeviceInlineUniformBlockFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 inlineUniformBlock; + VkBool32 descriptorBindingInlineUniformBlockUpdateAfterBind; +} VkPhysicalDeviceInlineUniformBlockFeaturesEXT; + +typedef struct VkPhysicalDeviceInlineUniformBlockPropertiesEXT { + VkStructureType sType; + void* pNext; + uint32_t maxInlineUniformBlockSize; + uint32_t maxPerStageDescriptorInlineUniformBlocks; + uint32_t maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks; + uint32_t maxDescriptorSetInlineUniformBlocks; + uint32_t maxDescriptorSetUpdateAfterBindInlineUniformBlocks; +} VkPhysicalDeviceInlineUniformBlockPropertiesEXT; + +typedef struct VkWriteDescriptorSetInlineUniformBlockEXT { + VkStructureType sType; + const void* pNext; + uint32_t dataSize; + const void* pData; +} VkWriteDescriptorSetInlineUniformBlockEXT; + +typedef struct VkDescriptorPoolInlineUniformBlockCreateInfoEXT { + VkStructureType sType; + const void* pNext; + uint32_t maxInlineUniformBlockBindings; +} VkDescriptorPoolInlineUniformBlockCreateInfoEXT; + + + #define VK_EXT_shader_stencil_export 1 #define VK_EXT_SHADER_STENCIL_EXPORT_SPEC_VERSION 1 #define VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME "VK_EXT_shader_stencil_export" @@ -7009,7 +8561,6 @@ typedef struct VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT { #define VK_EXT_sample_locations 1 #define VK_EXT_SAMPLE_LOCATIONS_SPEC_VERSION 1 #define VK_EXT_SAMPLE_LOCATIONS_EXTENSION_NAME "VK_EXT_sample_locations" - typedef struct VkSampleLocationEXT { float x; float y; @@ -7066,7 +8617,6 @@ typedef struct VkMultisamplePropertiesEXT { VkExtent2D maxSampleLocationGridSize; } VkMultisamplePropertiesEXT; - typedef void (VKAPI_PTR *PFN_vkCmdSetSampleLocationsEXT)(VkCommandBuffer commandBuffer, const VkSampleLocationsInfoEXT* pSampleLocationsInfo); typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceMultisamplePropertiesEXT)(VkPhysicalDevice physicalDevice, VkSampleCountFlagBits samples, VkMultisamplePropertiesEXT* pMultisampleProperties); @@ -7081,21 +8631,17 @@ VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceMultisamplePropertiesEXT( VkMultisamplePropertiesEXT* pMultisampleProperties); #endif + #define VK_EXT_blend_operation_advanced 1 #define VK_EXT_BLEND_OPERATION_ADVANCED_SPEC_VERSION 2 #define VK_EXT_BLEND_OPERATION_ADVANCED_EXTENSION_NAME "VK_EXT_blend_operation_advanced" - typedef enum VkBlendOverlapEXT { VK_BLEND_OVERLAP_UNCORRELATED_EXT = 0, VK_BLEND_OVERLAP_DISJOINT_EXT = 1, VK_BLEND_OVERLAP_CONJOINT_EXT = 2, - VK_BLEND_OVERLAP_BEGIN_RANGE_EXT = VK_BLEND_OVERLAP_UNCORRELATED_EXT, - VK_BLEND_OVERLAP_END_RANGE_EXT = VK_BLEND_OVERLAP_CONJOINT_EXT, - VK_BLEND_OVERLAP_RANGE_SIZE_EXT = (VK_BLEND_OVERLAP_CONJOINT_EXT - VK_BLEND_OVERLAP_UNCORRELATED_EXT + 1), VK_BLEND_OVERLAP_MAX_ENUM_EXT = 0x7FFFFFFF } VkBlendOverlapEXT; - typedef struct VkPhysicalDeviceBlendOperationAdvancedFeaturesEXT { VkStructureType sType; void* pNext; @@ -7126,9 +8672,7 @@ typedef struct VkPipelineColorBlendAdvancedStateCreateInfoEXT { #define VK_NV_fragment_coverage_to_color 1 #define VK_NV_FRAGMENT_COVERAGE_TO_COLOR_SPEC_VERSION 1 #define VK_NV_FRAGMENT_COVERAGE_TO_COLOR_EXTENSION_NAME "VK_NV_fragment_coverage_to_color" - typedef VkFlags VkPipelineCoverageToColorStateCreateFlagsNV; - typedef struct VkPipelineCoverageToColorStateCreateInfoNV { VkStructureType sType; const void* pNext; @@ -7143,20 +8687,14 @@ typedef struct VkPipelineCoverageToColorStateCreateInfoNV { #define VK_NV_FRAMEBUFFER_MIXED_SAMPLES_SPEC_VERSION 1 #define VK_NV_FRAMEBUFFER_MIXED_SAMPLES_EXTENSION_NAME "VK_NV_framebuffer_mixed_samples" - typedef enum VkCoverageModulationModeNV { VK_COVERAGE_MODULATION_MODE_NONE_NV = 0, VK_COVERAGE_MODULATION_MODE_RGB_NV = 1, VK_COVERAGE_MODULATION_MODE_ALPHA_NV = 2, VK_COVERAGE_MODULATION_MODE_RGBA_NV = 3, - VK_COVERAGE_MODULATION_MODE_BEGIN_RANGE_NV = VK_COVERAGE_MODULATION_MODE_NONE_NV, - VK_COVERAGE_MODULATION_MODE_END_RANGE_NV = VK_COVERAGE_MODULATION_MODE_RGBA_NV, - VK_COVERAGE_MODULATION_MODE_RANGE_SIZE_NV = (VK_COVERAGE_MODULATION_MODE_RGBA_NV - VK_COVERAGE_MODULATION_MODE_NONE_NV + 1), VK_COVERAGE_MODULATION_MODE_MAX_ENUM_NV = 0x7FFFFFFF } VkCoverageModulationModeNV; - typedef VkFlags VkPipelineCoverageModulationStateCreateFlagsNV; - typedef struct VkPipelineCoverageModulationStateCreateInfoNV { VkStructureType sType; const void* pNext; @@ -7174,29 +8712,95 @@ typedef struct VkPipelineCoverageModulationStateCreateInfoNV { #define VK_NV_FILL_RECTANGLE_EXTENSION_NAME "VK_NV_fill_rectangle" +#define VK_NV_shader_sm_builtins 1 +#define VK_NV_SHADER_SM_BUILTINS_SPEC_VERSION 1 +#define VK_NV_SHADER_SM_BUILTINS_EXTENSION_NAME "VK_NV_shader_sm_builtins" +typedef struct VkPhysicalDeviceShaderSMBuiltinsPropertiesNV { + VkStructureType sType; + void* pNext; + uint32_t shaderSMCount; + uint32_t shaderWarpsPerSM; +} VkPhysicalDeviceShaderSMBuiltinsPropertiesNV; + +typedef struct VkPhysicalDeviceShaderSMBuiltinsFeaturesNV { + VkStructureType sType; + void* pNext; + VkBool32 shaderSMBuiltins; +} VkPhysicalDeviceShaderSMBuiltinsFeaturesNV; + + + #define VK_EXT_post_depth_coverage 1 #define VK_EXT_POST_DEPTH_COVERAGE_SPEC_VERSION 1 #define VK_EXT_POST_DEPTH_COVERAGE_EXTENSION_NAME "VK_EXT_post_depth_coverage" +#define VK_EXT_image_drm_format_modifier 1 +#define VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_SPEC_VERSION 1 +#define VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME "VK_EXT_image_drm_format_modifier" +typedef struct VkDrmFormatModifierPropertiesEXT { + uint64_t drmFormatModifier; + uint32_t drmFormatModifierPlaneCount; + VkFormatFeatureFlags drmFormatModifierTilingFeatures; +} VkDrmFormatModifierPropertiesEXT; + +typedef struct VkDrmFormatModifierPropertiesListEXT { + VkStructureType sType; + void* pNext; + uint32_t drmFormatModifierCount; + VkDrmFormatModifierPropertiesEXT* pDrmFormatModifierProperties; +} VkDrmFormatModifierPropertiesListEXT; + +typedef struct VkPhysicalDeviceImageDrmFormatModifierInfoEXT { + VkStructureType sType; + const void* pNext; + uint64_t drmFormatModifier; + VkSharingMode sharingMode; + uint32_t queueFamilyIndexCount; + const uint32_t* pQueueFamilyIndices; +} VkPhysicalDeviceImageDrmFormatModifierInfoEXT; + +typedef struct VkImageDrmFormatModifierListCreateInfoEXT { + VkStructureType sType; + const void* pNext; + uint32_t drmFormatModifierCount; + const uint64_t* pDrmFormatModifiers; +} VkImageDrmFormatModifierListCreateInfoEXT; + +typedef struct VkImageDrmFormatModifierExplicitCreateInfoEXT { + VkStructureType sType; + const void* pNext; + uint64_t drmFormatModifier; + uint32_t drmFormatModifierPlaneCount; + const VkSubresourceLayout* pPlaneLayouts; +} VkImageDrmFormatModifierExplicitCreateInfoEXT; + +typedef struct VkImageDrmFormatModifierPropertiesEXT { + VkStructureType sType; + void* pNext; + uint64_t drmFormatModifier; +} VkImageDrmFormatModifierPropertiesEXT; + +typedef VkResult (VKAPI_PTR *PFN_vkGetImageDrmFormatModifierPropertiesEXT)(VkDevice device, VkImage image, VkImageDrmFormatModifierPropertiesEXT* pProperties); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkGetImageDrmFormatModifierPropertiesEXT( + VkDevice device, + VkImage image, + VkImageDrmFormatModifierPropertiesEXT* pProperties); +#endif + + #define VK_EXT_validation_cache 1 VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkValidationCacheEXT) - #define VK_EXT_VALIDATION_CACHE_SPEC_VERSION 1 #define VK_EXT_VALIDATION_CACHE_EXTENSION_NAME "VK_EXT_validation_cache" -#define VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT_EXT - typedef enum VkValidationCacheHeaderVersionEXT { VK_VALIDATION_CACHE_HEADER_VERSION_ONE_EXT = 1, - VK_VALIDATION_CACHE_HEADER_VERSION_BEGIN_RANGE_EXT = VK_VALIDATION_CACHE_HEADER_VERSION_ONE_EXT, - VK_VALIDATION_CACHE_HEADER_VERSION_END_RANGE_EXT = VK_VALIDATION_CACHE_HEADER_VERSION_ONE_EXT, - VK_VALIDATION_CACHE_HEADER_VERSION_RANGE_SIZE_EXT = (VK_VALIDATION_CACHE_HEADER_VERSION_ONE_EXT - VK_VALIDATION_CACHE_HEADER_VERSION_ONE_EXT + 1), VK_VALIDATION_CACHE_HEADER_VERSION_MAX_ENUM_EXT = 0x7FFFFFFF } VkValidationCacheHeaderVersionEXT; - typedef VkFlags VkValidationCacheCreateFlagsEXT; - typedef struct VkValidationCacheCreateInfoEXT { VkStructureType sType; const void* pNext; @@ -7211,7 +8815,6 @@ typedef struct VkShaderModuleValidationCacheCreateInfoEXT { VkValidationCacheEXT validationCache; } VkShaderModuleValidationCacheCreateInfoEXT; - typedef VkResult (VKAPI_PTR *PFN_vkCreateValidationCacheEXT)(VkDevice device, const VkValidationCacheCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkValidationCacheEXT* pValidationCache); typedef void (VKAPI_PTR *PFN_vkDestroyValidationCacheEXT)(VkDevice device, VkValidationCacheEXT validationCache, const VkAllocationCallbacks* pAllocator); typedef VkResult (VKAPI_PTR *PFN_vkMergeValidationCachesEXT)(VkDevice device, VkValidationCacheEXT dstCache, uint32_t srcCacheCount, const VkValidationCacheEXT* pSrcCaches); @@ -7242,92 +8845,23 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetValidationCacheDataEXT( void* pData); #endif + #define VK_EXT_descriptor_indexing 1 #define VK_EXT_DESCRIPTOR_INDEXING_SPEC_VERSION 2 #define VK_EXT_DESCRIPTOR_INDEXING_EXTENSION_NAME "VK_EXT_descriptor_indexing" +typedef VkDescriptorBindingFlagBits VkDescriptorBindingFlagBitsEXT; +typedef VkDescriptorBindingFlags VkDescriptorBindingFlagsEXT; -typedef enum VkDescriptorBindingFlagBitsEXT { - VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT_EXT = 0x00000001, - VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT_EXT = 0x00000002, - VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT_EXT = 0x00000004, - VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT = 0x00000008, - VK_DESCRIPTOR_BINDING_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF -} VkDescriptorBindingFlagBitsEXT; -typedef VkFlags VkDescriptorBindingFlagsEXT; +typedef VkDescriptorSetLayoutBindingFlagsCreateInfo VkDescriptorSetLayoutBindingFlagsCreateInfoEXT; -typedef struct VkDescriptorSetLayoutBindingFlagsCreateInfoEXT { - VkStructureType sType; - const void* pNext; - uint32_t bindingCount; - const VkDescriptorBindingFlagsEXT* pBindingFlags; -} VkDescriptorSetLayoutBindingFlagsCreateInfoEXT; +typedef VkPhysicalDeviceDescriptorIndexingFeatures VkPhysicalDeviceDescriptorIndexingFeaturesEXT; -typedef struct VkPhysicalDeviceDescriptorIndexingFeaturesEXT { - VkStructureType sType; - void* pNext; - VkBool32 shaderInputAttachmentArrayDynamicIndexing; - VkBool32 shaderUniformTexelBufferArrayDynamicIndexing; - VkBool32 shaderStorageTexelBufferArrayDynamicIndexing; - VkBool32 shaderUniformBufferArrayNonUniformIndexing; - VkBool32 shaderSampledImageArrayNonUniformIndexing; - VkBool32 shaderStorageBufferArrayNonUniformIndexing; - VkBool32 shaderStorageImageArrayNonUniformIndexing; - VkBool32 shaderInputAttachmentArrayNonUniformIndexing; - VkBool32 shaderUniformTexelBufferArrayNonUniformIndexing; - VkBool32 shaderStorageTexelBufferArrayNonUniformIndexing; - VkBool32 descriptorBindingUniformBufferUpdateAfterBind; - VkBool32 descriptorBindingSampledImageUpdateAfterBind; - VkBool32 descriptorBindingStorageImageUpdateAfterBind; - VkBool32 descriptorBindingStorageBufferUpdateAfterBind; - VkBool32 descriptorBindingUniformTexelBufferUpdateAfterBind; - VkBool32 descriptorBindingStorageTexelBufferUpdateAfterBind; - VkBool32 descriptorBindingUpdateUnusedWhilePending; - VkBool32 descriptorBindingPartiallyBound; - VkBool32 descriptorBindingVariableDescriptorCount; - VkBool32 runtimeDescriptorArray; -} VkPhysicalDeviceDescriptorIndexingFeaturesEXT; +typedef VkPhysicalDeviceDescriptorIndexingProperties VkPhysicalDeviceDescriptorIndexingPropertiesEXT; -typedef struct VkPhysicalDeviceDescriptorIndexingPropertiesEXT { - VkStructureType sType; - void* pNext; - uint32_t maxUpdateAfterBindDescriptorsInAllPools; - VkBool32 shaderUniformBufferArrayNonUniformIndexingNative; - VkBool32 shaderSampledImageArrayNonUniformIndexingNative; - VkBool32 shaderStorageBufferArrayNonUniformIndexingNative; - VkBool32 shaderStorageImageArrayNonUniformIndexingNative; - VkBool32 shaderInputAttachmentArrayNonUniformIndexingNative; - VkBool32 robustBufferAccessUpdateAfterBind; - VkBool32 quadDivergentImplicitLod; - uint32_t maxPerStageDescriptorUpdateAfterBindSamplers; - uint32_t maxPerStageDescriptorUpdateAfterBindUniformBuffers; - uint32_t maxPerStageDescriptorUpdateAfterBindStorageBuffers; - uint32_t maxPerStageDescriptorUpdateAfterBindSampledImages; - uint32_t maxPerStageDescriptorUpdateAfterBindStorageImages; - uint32_t maxPerStageDescriptorUpdateAfterBindInputAttachments; - uint32_t maxPerStageUpdateAfterBindResources; - uint32_t maxDescriptorSetUpdateAfterBindSamplers; - uint32_t maxDescriptorSetUpdateAfterBindUniformBuffers; - uint32_t maxDescriptorSetUpdateAfterBindUniformBuffersDynamic; - uint32_t maxDescriptorSetUpdateAfterBindStorageBuffers; - uint32_t maxDescriptorSetUpdateAfterBindStorageBuffersDynamic; - uint32_t maxDescriptorSetUpdateAfterBindSampledImages; - uint32_t maxDescriptorSetUpdateAfterBindStorageImages; - uint32_t maxDescriptorSetUpdateAfterBindInputAttachments; -} VkPhysicalDeviceDescriptorIndexingPropertiesEXT; +typedef VkDescriptorSetVariableDescriptorCountAllocateInfo VkDescriptorSetVariableDescriptorCountAllocateInfoEXT; -typedef struct VkDescriptorSetVariableDescriptorCountAllocateInfoEXT { - VkStructureType sType; - const void* pNext; - uint32_t descriptorSetCount; - const uint32_t* pDescriptorCounts; -} VkDescriptorSetVariableDescriptorCountAllocateInfoEXT; - -typedef struct VkDescriptorSetVariableDescriptorCountLayoutSupportEXT { - VkStructureType sType; - void* pNext; - uint32_t maxVariableDescriptorCount; -} VkDescriptorSetVariableDescriptorCountLayoutSupportEXT; +typedef VkDescriptorSetVariableDescriptorCountLayoutSupport VkDescriptorSetVariableDescriptorCountLayoutSupportEXT; @@ -7336,22 +8870,555 @@ typedef struct VkDescriptorSetVariableDescriptorCountLayoutSupportEXT { #define VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME "VK_EXT_shader_viewport_index_layer" +#define VK_NV_shading_rate_image 1 +#define VK_NV_SHADING_RATE_IMAGE_SPEC_VERSION 3 +#define VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME "VK_NV_shading_rate_image" + +typedef enum VkShadingRatePaletteEntryNV { + VK_SHADING_RATE_PALETTE_ENTRY_NO_INVOCATIONS_NV = 0, + VK_SHADING_RATE_PALETTE_ENTRY_16_INVOCATIONS_PER_PIXEL_NV = 1, + VK_SHADING_RATE_PALETTE_ENTRY_8_INVOCATIONS_PER_PIXEL_NV = 2, + VK_SHADING_RATE_PALETTE_ENTRY_4_INVOCATIONS_PER_PIXEL_NV = 3, + VK_SHADING_RATE_PALETTE_ENTRY_2_INVOCATIONS_PER_PIXEL_NV = 4, + VK_SHADING_RATE_PALETTE_ENTRY_1_INVOCATION_PER_PIXEL_NV = 5, + VK_SHADING_RATE_PALETTE_ENTRY_1_INVOCATION_PER_2X1_PIXELS_NV = 6, + VK_SHADING_RATE_PALETTE_ENTRY_1_INVOCATION_PER_1X2_PIXELS_NV = 7, + VK_SHADING_RATE_PALETTE_ENTRY_1_INVOCATION_PER_2X2_PIXELS_NV = 8, + VK_SHADING_RATE_PALETTE_ENTRY_1_INVOCATION_PER_4X2_PIXELS_NV = 9, + VK_SHADING_RATE_PALETTE_ENTRY_1_INVOCATION_PER_2X4_PIXELS_NV = 10, + VK_SHADING_RATE_PALETTE_ENTRY_1_INVOCATION_PER_4X4_PIXELS_NV = 11, + VK_SHADING_RATE_PALETTE_ENTRY_MAX_ENUM_NV = 0x7FFFFFFF +} VkShadingRatePaletteEntryNV; + +typedef enum VkCoarseSampleOrderTypeNV { + VK_COARSE_SAMPLE_ORDER_TYPE_DEFAULT_NV = 0, + VK_COARSE_SAMPLE_ORDER_TYPE_CUSTOM_NV = 1, + VK_COARSE_SAMPLE_ORDER_TYPE_PIXEL_MAJOR_NV = 2, + VK_COARSE_SAMPLE_ORDER_TYPE_SAMPLE_MAJOR_NV = 3, + VK_COARSE_SAMPLE_ORDER_TYPE_MAX_ENUM_NV = 0x7FFFFFFF +} VkCoarseSampleOrderTypeNV; +typedef struct VkShadingRatePaletteNV { + uint32_t shadingRatePaletteEntryCount; + const VkShadingRatePaletteEntryNV* pShadingRatePaletteEntries; +} VkShadingRatePaletteNV; + +typedef struct VkPipelineViewportShadingRateImageStateCreateInfoNV { + VkStructureType sType; + const void* pNext; + VkBool32 shadingRateImageEnable; + uint32_t viewportCount; + const VkShadingRatePaletteNV* pShadingRatePalettes; +} VkPipelineViewportShadingRateImageStateCreateInfoNV; + +typedef struct VkPhysicalDeviceShadingRateImageFeaturesNV { + VkStructureType sType; + void* pNext; + VkBool32 shadingRateImage; + VkBool32 shadingRateCoarseSampleOrder; +} VkPhysicalDeviceShadingRateImageFeaturesNV; + +typedef struct VkPhysicalDeviceShadingRateImagePropertiesNV { + VkStructureType sType; + void* pNext; + VkExtent2D shadingRateTexelSize; + uint32_t shadingRatePaletteSize; + uint32_t shadingRateMaxCoarseSamples; +} VkPhysicalDeviceShadingRateImagePropertiesNV; + +typedef struct VkCoarseSampleLocationNV { + uint32_t pixelX; + uint32_t pixelY; + uint32_t sample; +} VkCoarseSampleLocationNV; + +typedef struct VkCoarseSampleOrderCustomNV { + VkShadingRatePaletteEntryNV shadingRate; + uint32_t sampleCount; + uint32_t sampleLocationCount; + const VkCoarseSampleLocationNV* pSampleLocations; +} VkCoarseSampleOrderCustomNV; + +typedef struct VkPipelineViewportCoarseSampleOrderStateCreateInfoNV { + VkStructureType sType; + const void* pNext; + VkCoarseSampleOrderTypeNV sampleOrderType; + uint32_t customSampleOrderCount; + const VkCoarseSampleOrderCustomNV* pCustomSampleOrders; +} VkPipelineViewportCoarseSampleOrderStateCreateInfoNV; + +typedef void (VKAPI_PTR *PFN_vkCmdBindShadingRateImageNV)(VkCommandBuffer commandBuffer, VkImageView imageView, VkImageLayout imageLayout); +typedef void (VKAPI_PTR *PFN_vkCmdSetViewportShadingRatePaletteNV)(VkCommandBuffer commandBuffer, uint32_t firstViewport, uint32_t viewportCount, const VkShadingRatePaletteNV* pShadingRatePalettes); +typedef void (VKAPI_PTR *PFN_vkCmdSetCoarseSampleOrderNV)(VkCommandBuffer commandBuffer, VkCoarseSampleOrderTypeNV sampleOrderType, uint32_t customSampleOrderCount, const VkCoarseSampleOrderCustomNV* pCustomSampleOrders); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkCmdBindShadingRateImageNV( + VkCommandBuffer commandBuffer, + VkImageView imageView, + VkImageLayout imageLayout); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetViewportShadingRatePaletteNV( + VkCommandBuffer commandBuffer, + uint32_t firstViewport, + uint32_t viewportCount, + const VkShadingRatePaletteNV* pShadingRatePalettes); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetCoarseSampleOrderNV( + VkCommandBuffer commandBuffer, + VkCoarseSampleOrderTypeNV sampleOrderType, + uint32_t customSampleOrderCount, + const VkCoarseSampleOrderCustomNV* pCustomSampleOrders); +#endif + + +#define VK_NV_ray_tracing 1 +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkAccelerationStructureKHR) +typedef VkAccelerationStructureKHR VkAccelerationStructureNV; + +#define VK_NV_RAY_TRACING_SPEC_VERSION 3 +#define VK_NV_RAY_TRACING_EXTENSION_NAME "VK_NV_ray_tracing" +#define VK_SHADER_UNUSED_KHR (~0U) +#define VK_SHADER_UNUSED_NV VK_SHADER_UNUSED_KHR + +typedef enum VkRayTracingShaderGroupTypeKHR { + VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR = 0, + VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR = 1, + VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR = 2, + VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, + VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR, + VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_NV = VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR, + VK_RAY_TRACING_SHADER_GROUP_TYPE_MAX_ENUM_KHR = 0x7FFFFFFF +} VkRayTracingShaderGroupTypeKHR; +typedef VkRayTracingShaderGroupTypeKHR VkRayTracingShaderGroupTypeNV; + + +typedef enum VkGeometryTypeKHR { + VK_GEOMETRY_TYPE_TRIANGLES_KHR = 0, + VK_GEOMETRY_TYPE_AABBS_KHR = 1, + VK_GEOMETRY_TYPE_INSTANCES_KHR = 1000150000, + VK_GEOMETRY_TYPE_TRIANGLES_NV = VK_GEOMETRY_TYPE_TRIANGLES_KHR, + VK_GEOMETRY_TYPE_AABBS_NV = VK_GEOMETRY_TYPE_AABBS_KHR, + VK_GEOMETRY_TYPE_MAX_ENUM_KHR = 0x7FFFFFFF +} VkGeometryTypeKHR; +typedef VkGeometryTypeKHR VkGeometryTypeNV; + + +typedef enum VkAccelerationStructureTypeKHR { + VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR = 0, + VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR = 1, + VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NV = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR, + VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, + VK_ACCELERATION_STRUCTURE_TYPE_MAX_ENUM_KHR = 0x7FFFFFFF +} VkAccelerationStructureTypeKHR; +typedef VkAccelerationStructureTypeKHR VkAccelerationStructureTypeNV; + + +typedef enum VkCopyAccelerationStructureModeKHR { + VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR = 0, + VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR = 1, + VK_COPY_ACCELERATION_STRUCTURE_MODE_SERIALIZE_KHR = 2, + VK_COPY_ACCELERATION_STRUCTURE_MODE_DESERIALIZE_KHR = 3, + VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_NV = VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR, + VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_NV = VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR, + VK_COPY_ACCELERATION_STRUCTURE_MODE_MAX_ENUM_KHR = 0x7FFFFFFF +} VkCopyAccelerationStructureModeKHR; +typedef VkCopyAccelerationStructureModeKHR VkCopyAccelerationStructureModeNV; + + +typedef enum VkAccelerationStructureMemoryRequirementsTypeKHR { + VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_OBJECT_KHR = 0, + VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_BUILD_SCRATCH_KHR = 1, + VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_UPDATE_SCRATCH_KHR = 2, + VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_OBJECT_NV = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_OBJECT_KHR, + VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_BUILD_SCRATCH_NV = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_BUILD_SCRATCH_KHR, + VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_UPDATE_SCRATCH_NV = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_UPDATE_SCRATCH_KHR, + VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_MAX_ENUM_KHR = 0x7FFFFFFF +} VkAccelerationStructureMemoryRequirementsTypeKHR; +typedef VkAccelerationStructureMemoryRequirementsTypeKHR VkAccelerationStructureMemoryRequirementsTypeNV; + + +typedef enum VkGeometryFlagBitsKHR { + VK_GEOMETRY_OPAQUE_BIT_KHR = 0x00000001, + VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR = 0x00000002, + VK_GEOMETRY_OPAQUE_BIT_NV = VK_GEOMETRY_OPAQUE_BIT_KHR, + VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_NV = VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR, + VK_GEOMETRY_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF +} VkGeometryFlagBitsKHR; +typedef VkFlags VkGeometryFlagsKHR; +typedef VkGeometryFlagsKHR VkGeometryFlagsNV; + +typedef VkGeometryFlagBitsKHR VkGeometryFlagBitsNV; + + +typedef enum VkGeometryInstanceFlagBitsKHR { + VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR = 0x00000001, + VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT_KHR = 0x00000002, + VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR = 0x00000004, + VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR = 0x00000008, + VK_GEOMETRY_INSTANCE_TRIANGLE_CULL_DISABLE_BIT_NV = VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR, + VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT_NV = VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT_KHR, + VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_NV = VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR, + VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_NV = VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR, + VK_GEOMETRY_INSTANCE_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF +} VkGeometryInstanceFlagBitsKHR; +typedef VkFlags VkGeometryInstanceFlagsKHR; +typedef VkGeometryInstanceFlagsKHR VkGeometryInstanceFlagsNV; + +typedef VkGeometryInstanceFlagBitsKHR VkGeometryInstanceFlagBitsNV; + + +typedef enum VkBuildAccelerationStructureFlagBitsKHR { + VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR = 0x00000001, + VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR = 0x00000002, + VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR = 0x00000004, + VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_KHR = 0x00000008, + VK_BUILD_ACCELERATION_STRUCTURE_LOW_MEMORY_BIT_KHR = 0x00000010, + VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_NV = VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR, + VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_NV = VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR, + VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_NV = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR, + VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_NV = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_KHR, + VK_BUILD_ACCELERATION_STRUCTURE_LOW_MEMORY_BIT_NV = VK_BUILD_ACCELERATION_STRUCTURE_LOW_MEMORY_BIT_KHR, + VK_BUILD_ACCELERATION_STRUCTURE_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF +} VkBuildAccelerationStructureFlagBitsKHR; +typedef VkFlags VkBuildAccelerationStructureFlagsKHR; +typedef VkBuildAccelerationStructureFlagsKHR VkBuildAccelerationStructureFlagsNV; + +typedef VkBuildAccelerationStructureFlagBitsKHR VkBuildAccelerationStructureFlagBitsNV; + +typedef struct VkRayTracingShaderGroupCreateInfoNV { + VkStructureType sType; + const void* pNext; + VkRayTracingShaderGroupTypeKHR type; + uint32_t generalShader; + uint32_t closestHitShader; + uint32_t anyHitShader; + uint32_t intersectionShader; +} VkRayTracingShaderGroupCreateInfoNV; + +typedef struct VkRayTracingPipelineCreateInfoNV { + VkStructureType sType; + const void* pNext; + VkPipelineCreateFlags flags; + uint32_t stageCount; + const VkPipelineShaderStageCreateInfo* pStages; + uint32_t groupCount; + const VkRayTracingShaderGroupCreateInfoNV* pGroups; + uint32_t maxRecursionDepth; + VkPipelineLayout layout; + VkPipeline basePipelineHandle; + int32_t basePipelineIndex; +} VkRayTracingPipelineCreateInfoNV; + +typedef struct VkGeometryTrianglesNV { + VkStructureType sType; + const void* pNext; + VkBuffer vertexData; + VkDeviceSize vertexOffset; + uint32_t vertexCount; + VkDeviceSize vertexStride; + VkFormat vertexFormat; + VkBuffer indexData; + VkDeviceSize indexOffset; + uint32_t indexCount; + VkIndexType indexType; + VkBuffer transformData; + VkDeviceSize transformOffset; +} VkGeometryTrianglesNV; + +typedef struct VkGeometryAABBNV { + VkStructureType sType; + const void* pNext; + VkBuffer aabbData; + uint32_t numAABBs; + uint32_t stride; + VkDeviceSize offset; +} VkGeometryAABBNV; + +typedef struct VkGeometryDataNV { + VkGeometryTrianglesNV triangles; + VkGeometryAABBNV aabbs; +} VkGeometryDataNV; + +typedef struct VkGeometryNV { + VkStructureType sType; + const void* pNext; + VkGeometryTypeKHR geometryType; + VkGeometryDataNV geometry; + VkGeometryFlagsKHR flags; +} VkGeometryNV; + +typedef struct VkAccelerationStructureInfoNV { + VkStructureType sType; + const void* pNext; + VkAccelerationStructureTypeNV type; + VkBuildAccelerationStructureFlagsNV flags; + uint32_t instanceCount; + uint32_t geometryCount; + const VkGeometryNV* pGeometries; +} VkAccelerationStructureInfoNV; + +typedef struct VkAccelerationStructureCreateInfoNV { + VkStructureType sType; + const void* pNext; + VkDeviceSize compactedSize; + VkAccelerationStructureInfoNV info; +} VkAccelerationStructureCreateInfoNV; + +typedef struct VkBindAccelerationStructureMemoryInfoKHR { + VkStructureType sType; + const void* pNext; + VkAccelerationStructureKHR accelerationStructure; + VkDeviceMemory memory; + VkDeviceSize memoryOffset; + uint32_t deviceIndexCount; + const uint32_t* pDeviceIndices; +} VkBindAccelerationStructureMemoryInfoKHR; + +typedef VkBindAccelerationStructureMemoryInfoKHR VkBindAccelerationStructureMemoryInfoNV; + +typedef struct VkWriteDescriptorSetAccelerationStructureKHR { + VkStructureType sType; + const void* pNext; + uint32_t accelerationStructureCount; + const VkAccelerationStructureKHR* pAccelerationStructures; +} VkWriteDescriptorSetAccelerationStructureKHR; + +typedef VkWriteDescriptorSetAccelerationStructureKHR VkWriteDescriptorSetAccelerationStructureNV; + +typedef struct VkAccelerationStructureMemoryRequirementsInfoNV { + VkStructureType sType; + const void* pNext; + VkAccelerationStructureMemoryRequirementsTypeNV type; + VkAccelerationStructureNV accelerationStructure; +} VkAccelerationStructureMemoryRequirementsInfoNV; + +typedef struct VkPhysicalDeviceRayTracingPropertiesNV { + VkStructureType sType; + void* pNext; + uint32_t shaderGroupHandleSize; + uint32_t maxRecursionDepth; + uint32_t maxShaderGroupStride; + uint32_t shaderGroupBaseAlignment; + uint64_t maxGeometryCount; + uint64_t maxInstanceCount; + uint64_t maxTriangleCount; + uint32_t maxDescriptorSetAccelerationStructures; +} VkPhysicalDeviceRayTracingPropertiesNV; + +typedef struct VkTransformMatrixKHR { + float matrix[3][4]; +} VkTransformMatrixKHR; + +typedef VkTransformMatrixKHR VkTransformMatrixNV; + +typedef struct VkAabbPositionsKHR { + float minX; + float minY; + float minZ; + float maxX; + float maxY; + float maxZ; +} VkAabbPositionsKHR; + +typedef VkAabbPositionsKHR VkAabbPositionsNV; + +typedef struct VkAccelerationStructureInstanceKHR { + VkTransformMatrixKHR transform; + uint32_t instanceCustomIndex:24; + uint32_t mask:8; + uint32_t instanceShaderBindingTableRecordOffset:24; + VkGeometryInstanceFlagsKHR flags:8; + uint64_t accelerationStructureReference; +} VkAccelerationStructureInstanceKHR; + +typedef VkAccelerationStructureInstanceKHR VkAccelerationStructureInstanceNV; + +typedef VkResult (VKAPI_PTR *PFN_vkCreateAccelerationStructureNV)(VkDevice device, const VkAccelerationStructureCreateInfoNV* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkAccelerationStructureNV* pAccelerationStructure); +typedef void (VKAPI_PTR *PFN_vkDestroyAccelerationStructureKHR)(VkDevice device, VkAccelerationStructureKHR accelerationStructure, const VkAllocationCallbacks* pAllocator); +typedef void (VKAPI_PTR *PFN_vkDestroyAccelerationStructureNV)(VkDevice device, VkAccelerationStructureKHR accelerationStructure, const VkAllocationCallbacks* pAllocator); +typedef void (VKAPI_PTR *PFN_vkGetAccelerationStructureMemoryRequirementsNV)(VkDevice device, const VkAccelerationStructureMemoryRequirementsInfoNV* pInfo, VkMemoryRequirements2KHR* pMemoryRequirements); +typedef VkResult (VKAPI_PTR *PFN_vkBindAccelerationStructureMemoryKHR)(VkDevice device, uint32_t bindInfoCount, const VkBindAccelerationStructureMemoryInfoKHR* pBindInfos); +typedef VkResult (VKAPI_PTR *PFN_vkBindAccelerationStructureMemoryNV)(VkDevice device, uint32_t bindInfoCount, const VkBindAccelerationStructureMemoryInfoKHR* pBindInfos); +typedef void (VKAPI_PTR *PFN_vkCmdBuildAccelerationStructureNV)(VkCommandBuffer commandBuffer, const VkAccelerationStructureInfoNV* pInfo, VkBuffer instanceData, VkDeviceSize instanceOffset, VkBool32 update, VkAccelerationStructureKHR dst, VkAccelerationStructureKHR src, VkBuffer scratch, VkDeviceSize scratchOffset); +typedef void (VKAPI_PTR *PFN_vkCmdCopyAccelerationStructureNV)(VkCommandBuffer commandBuffer, VkAccelerationStructureKHR dst, VkAccelerationStructureKHR src, VkCopyAccelerationStructureModeKHR mode); +typedef void (VKAPI_PTR *PFN_vkCmdTraceRaysNV)(VkCommandBuffer commandBuffer, VkBuffer raygenShaderBindingTableBuffer, VkDeviceSize raygenShaderBindingOffset, VkBuffer missShaderBindingTableBuffer, VkDeviceSize missShaderBindingOffset, VkDeviceSize missShaderBindingStride, VkBuffer hitShaderBindingTableBuffer, VkDeviceSize hitShaderBindingOffset, VkDeviceSize hitShaderBindingStride, VkBuffer callableShaderBindingTableBuffer, VkDeviceSize callableShaderBindingOffset, VkDeviceSize callableShaderBindingStride, uint32_t width, uint32_t height, uint32_t depth); +typedef VkResult (VKAPI_PTR *PFN_vkCreateRayTracingPipelinesNV)(VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount, const VkRayTracingPipelineCreateInfoNV* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines); +typedef VkResult (VKAPI_PTR *PFN_vkGetRayTracingShaderGroupHandlesKHR)(VkDevice device, VkPipeline pipeline, uint32_t firstGroup, uint32_t groupCount, size_t dataSize, void* pData); +typedef VkResult (VKAPI_PTR *PFN_vkGetRayTracingShaderGroupHandlesNV)(VkDevice device, VkPipeline pipeline, uint32_t firstGroup, uint32_t groupCount, size_t dataSize, void* pData); +typedef VkResult (VKAPI_PTR *PFN_vkGetAccelerationStructureHandleNV)(VkDevice device, VkAccelerationStructureKHR accelerationStructure, size_t dataSize, void* pData); +typedef void (VKAPI_PTR *PFN_vkCmdWriteAccelerationStructuresPropertiesKHR)(VkCommandBuffer commandBuffer, uint32_t accelerationStructureCount, const VkAccelerationStructureKHR* pAccelerationStructures, VkQueryType queryType, VkQueryPool queryPool, uint32_t firstQuery); +typedef void (VKAPI_PTR *PFN_vkCmdWriteAccelerationStructuresPropertiesNV)(VkCommandBuffer commandBuffer, uint32_t accelerationStructureCount, const VkAccelerationStructureKHR* pAccelerationStructures, VkQueryType queryType, VkQueryPool queryPool, uint32_t firstQuery); +typedef VkResult (VKAPI_PTR *PFN_vkCompileDeferredNV)(VkDevice device, VkPipeline pipeline, uint32_t shader); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateAccelerationStructureNV( + VkDevice device, + const VkAccelerationStructureCreateInfoNV* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkAccelerationStructureNV* pAccelerationStructure); + +VKAPI_ATTR void VKAPI_CALL vkDestroyAccelerationStructureKHR( + VkDevice device, + VkAccelerationStructureKHR accelerationStructure, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR void VKAPI_CALL vkDestroyAccelerationStructureNV( + VkDevice device, + VkAccelerationStructureKHR accelerationStructure, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR void VKAPI_CALL vkGetAccelerationStructureMemoryRequirementsNV( + VkDevice device, + const VkAccelerationStructureMemoryRequirementsInfoNV* pInfo, + VkMemoryRequirements2KHR* pMemoryRequirements); + +VKAPI_ATTR VkResult VKAPI_CALL vkBindAccelerationStructureMemoryKHR( + VkDevice device, + uint32_t bindInfoCount, + const VkBindAccelerationStructureMemoryInfoKHR* pBindInfos); + +VKAPI_ATTR VkResult VKAPI_CALL vkBindAccelerationStructureMemoryNV( + VkDevice device, + uint32_t bindInfoCount, + const VkBindAccelerationStructureMemoryInfoKHR* pBindInfos); + +VKAPI_ATTR void VKAPI_CALL vkCmdBuildAccelerationStructureNV( + VkCommandBuffer commandBuffer, + const VkAccelerationStructureInfoNV* pInfo, + VkBuffer instanceData, + VkDeviceSize instanceOffset, + VkBool32 update, + VkAccelerationStructureKHR dst, + VkAccelerationStructureKHR src, + VkBuffer scratch, + VkDeviceSize scratchOffset); + +VKAPI_ATTR void VKAPI_CALL vkCmdCopyAccelerationStructureNV( + VkCommandBuffer commandBuffer, + VkAccelerationStructureKHR dst, + VkAccelerationStructureKHR src, + VkCopyAccelerationStructureModeKHR mode); + +VKAPI_ATTR void VKAPI_CALL vkCmdTraceRaysNV( + VkCommandBuffer commandBuffer, + VkBuffer raygenShaderBindingTableBuffer, + VkDeviceSize raygenShaderBindingOffset, + VkBuffer missShaderBindingTableBuffer, + VkDeviceSize missShaderBindingOffset, + VkDeviceSize missShaderBindingStride, + VkBuffer hitShaderBindingTableBuffer, + VkDeviceSize hitShaderBindingOffset, + VkDeviceSize hitShaderBindingStride, + VkBuffer callableShaderBindingTableBuffer, + VkDeviceSize callableShaderBindingOffset, + VkDeviceSize callableShaderBindingStride, + uint32_t width, + uint32_t height, + uint32_t depth); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateRayTracingPipelinesNV( + VkDevice device, + VkPipelineCache pipelineCache, + uint32_t createInfoCount, + const VkRayTracingPipelineCreateInfoNV* pCreateInfos, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipelines); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetRayTracingShaderGroupHandlesKHR( + VkDevice device, + VkPipeline pipeline, + uint32_t firstGroup, + uint32_t groupCount, + size_t dataSize, + void* pData); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetRayTracingShaderGroupHandlesNV( + VkDevice device, + VkPipeline pipeline, + uint32_t firstGroup, + uint32_t groupCount, + size_t dataSize, + void* pData); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetAccelerationStructureHandleNV( + VkDevice device, + VkAccelerationStructureKHR accelerationStructure, + size_t dataSize, + void* pData); + +VKAPI_ATTR void VKAPI_CALL vkCmdWriteAccelerationStructuresPropertiesKHR( + VkCommandBuffer commandBuffer, + uint32_t accelerationStructureCount, + const VkAccelerationStructureKHR* pAccelerationStructures, + VkQueryType queryType, + VkQueryPool queryPool, + uint32_t firstQuery); + +VKAPI_ATTR void VKAPI_CALL vkCmdWriteAccelerationStructuresPropertiesNV( + VkCommandBuffer commandBuffer, + uint32_t accelerationStructureCount, + const VkAccelerationStructureKHR* pAccelerationStructures, + VkQueryType queryType, + VkQueryPool queryPool, + uint32_t firstQuery); + +VKAPI_ATTR VkResult VKAPI_CALL vkCompileDeferredNV( + VkDevice device, + VkPipeline pipeline, + uint32_t shader); +#endif + + +#define VK_NV_representative_fragment_test 1 +#define VK_NV_REPRESENTATIVE_FRAGMENT_TEST_SPEC_VERSION 2 +#define VK_NV_REPRESENTATIVE_FRAGMENT_TEST_EXTENSION_NAME "VK_NV_representative_fragment_test" +typedef struct VkPhysicalDeviceRepresentativeFragmentTestFeaturesNV { + VkStructureType sType; + void* pNext; + VkBool32 representativeFragmentTest; +} VkPhysicalDeviceRepresentativeFragmentTestFeaturesNV; + +typedef struct VkPipelineRepresentativeFragmentTestStateCreateInfoNV { + VkStructureType sType; + const void* pNext; + VkBool32 representativeFragmentTestEnable; +} VkPipelineRepresentativeFragmentTestStateCreateInfoNV; + + + +#define VK_EXT_filter_cubic 1 +#define VK_EXT_FILTER_CUBIC_SPEC_VERSION 3 +#define VK_EXT_FILTER_CUBIC_EXTENSION_NAME "VK_EXT_filter_cubic" +typedef struct VkPhysicalDeviceImageViewImageFormatInfoEXT { + VkStructureType sType; + void* pNext; + VkImageViewType imageViewType; +} VkPhysicalDeviceImageViewImageFormatInfoEXT; + +typedef struct VkFilterCubicImageViewImageFormatPropertiesEXT { + VkStructureType sType; + void* pNext; + VkBool32 filterCubic; + VkBool32 filterCubicMinmax; +} VkFilterCubicImageViewImageFormatPropertiesEXT; + + + +#define VK_QCOM_render_pass_shader_resolve 1 +#define VK_QCOM_RENDER_PASS_SHADER_RESOLVE_SPEC_VERSION 4 +#define VK_QCOM_RENDER_PASS_SHADER_RESOLVE_EXTENSION_NAME "VK_QCOM_render_pass_shader_resolve" + + #define VK_EXT_global_priority 1 #define VK_EXT_GLOBAL_PRIORITY_SPEC_VERSION 2 #define VK_EXT_GLOBAL_PRIORITY_EXTENSION_NAME "VK_EXT_global_priority" - typedef enum VkQueueGlobalPriorityEXT { VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT = 128, VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT = 256, VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT = 512, VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT = 1024, - VK_QUEUE_GLOBAL_PRIORITY_BEGIN_RANGE_EXT = VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT, - VK_QUEUE_GLOBAL_PRIORITY_END_RANGE_EXT = VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT, - VK_QUEUE_GLOBAL_PRIORITY_RANGE_SIZE_EXT = (VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT - VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT + 1), VK_QUEUE_GLOBAL_PRIORITY_MAX_ENUM_EXT = 0x7FFFFFFF } VkQueueGlobalPriorityEXT; - typedef struct VkDeviceQueueGlobalPriorityCreateInfoEXT { VkStructureType sType; const void* pNext; @@ -7363,7 +9430,6 @@ typedef struct VkDeviceQueueGlobalPriorityCreateInfoEXT { #define VK_EXT_external_memory_host 1 #define VK_EXT_EXTERNAL_MEMORY_HOST_SPEC_VERSION 1 #define VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME "VK_EXT_external_memory_host" - typedef struct VkImportMemoryHostPointerInfoEXT { VkStructureType sType; const void* pNext; @@ -7383,7 +9449,6 @@ typedef struct VkPhysicalDeviceExternalMemoryHostPropertiesEXT { VkDeviceSize minImportedHostPointerAlignment; } VkPhysicalDeviceExternalMemoryHostPropertiesEXT; - typedef VkResult (VKAPI_PTR *PFN_vkGetMemoryHostPointerPropertiesEXT)(VkDevice device, VkExternalMemoryHandleTypeFlagBits handleType, const void* pHostPointer, VkMemoryHostPointerPropertiesEXT* pMemoryHostPointerProperties); #ifndef VK_NO_PROTOTYPES @@ -7394,10 +9459,10 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetMemoryHostPointerPropertiesEXT( VkMemoryHostPointerPropertiesEXT* pMemoryHostPointerProperties); #endif + #define VK_AMD_buffer_marker 1 #define VK_AMD_BUFFER_MARKER_SPEC_VERSION 1 #define VK_AMD_BUFFER_MARKER_EXTENSION_NAME "VK_AMD_buffer_marker" - typedef void (VKAPI_PTR *PFN_vkCmdWriteBufferMarkerAMD)(VkCommandBuffer commandBuffer, VkPipelineStageFlagBits pipelineStage, VkBuffer dstBuffer, VkDeviceSize dstOffset, uint32_t marker); #ifndef VK_NO_PROTOTYPES @@ -7409,10 +9474,61 @@ VKAPI_ATTR void VKAPI_CALL vkCmdWriteBufferMarkerAMD( uint32_t marker); #endif -#define VK_AMD_shader_core_properties 1 -#define VK_AMD_SHADER_CORE_PROPERTIES_SPEC_VERSION 1 -#define VK_AMD_SHADER_CORE_PROPERTIES_EXTENSION_NAME "VK_AMD_shader_core_properties" +#define VK_AMD_pipeline_compiler_control 1 +#define VK_AMD_PIPELINE_COMPILER_CONTROL_SPEC_VERSION 1 +#define VK_AMD_PIPELINE_COMPILER_CONTROL_EXTENSION_NAME "VK_AMD_pipeline_compiler_control" + +typedef enum VkPipelineCompilerControlFlagBitsAMD { + VK_PIPELINE_COMPILER_CONTROL_FLAG_BITS_MAX_ENUM_AMD = 0x7FFFFFFF +} VkPipelineCompilerControlFlagBitsAMD; +typedef VkFlags VkPipelineCompilerControlFlagsAMD; +typedef struct VkPipelineCompilerControlCreateInfoAMD { + VkStructureType sType; + const void* pNext; + VkPipelineCompilerControlFlagsAMD compilerControlFlags; +} VkPipelineCompilerControlCreateInfoAMD; + + + +#define VK_EXT_calibrated_timestamps 1 +#define VK_EXT_CALIBRATED_TIMESTAMPS_SPEC_VERSION 1 +#define VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME "VK_EXT_calibrated_timestamps" + +typedef enum VkTimeDomainEXT { + VK_TIME_DOMAIN_DEVICE_EXT = 0, + VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT = 1, + VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT = 2, + VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT = 3, + VK_TIME_DOMAIN_MAX_ENUM_EXT = 0x7FFFFFFF +} VkTimeDomainEXT; +typedef struct VkCalibratedTimestampInfoEXT { + VkStructureType sType; + const void* pNext; + VkTimeDomainEXT timeDomain; +} VkCalibratedTimestampInfoEXT; + +typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT)(VkPhysicalDevice physicalDevice, uint32_t* pTimeDomainCount, VkTimeDomainEXT* pTimeDomains); +typedef VkResult (VKAPI_PTR *PFN_vkGetCalibratedTimestampsEXT)(VkDevice device, uint32_t timestampCount, const VkCalibratedTimestampInfoEXT* pTimestampInfos, uint64_t* pTimestamps, uint64_t* pMaxDeviation); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( + VkPhysicalDevice physicalDevice, + uint32_t* pTimeDomainCount, + VkTimeDomainEXT* pTimeDomains); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetCalibratedTimestampsEXT( + VkDevice device, + uint32_t timestampCount, + const VkCalibratedTimestampInfoEXT* pTimestampInfos, + uint64_t* pTimestamps, + uint64_t* pMaxDeviation); +#endif + + +#define VK_AMD_shader_core_properties 1 +#define VK_AMD_SHADER_CORE_PROPERTIES_SPEC_VERSION 2 +#define VK_AMD_SHADER_CORE_PROPERTIES_EXTENSION_NAME "VK_AMD_shader_core_properties" typedef struct VkPhysicalDeviceShaderCorePropertiesAMD { VkStructureType sType; void* pNext; @@ -7434,10 +9550,27 @@ typedef struct VkPhysicalDeviceShaderCorePropertiesAMD { -#define VK_EXT_vertex_attribute_divisor 1 -#define VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_SPEC_VERSION 1 -#define VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME "VK_EXT_vertex_attribute_divisor" +#define VK_AMD_memory_overallocation_behavior 1 +#define VK_AMD_MEMORY_OVERALLOCATION_BEHAVIOR_SPEC_VERSION 1 +#define VK_AMD_MEMORY_OVERALLOCATION_BEHAVIOR_EXTENSION_NAME "VK_AMD_memory_overallocation_behavior" +typedef enum VkMemoryOverallocationBehaviorAMD { + VK_MEMORY_OVERALLOCATION_BEHAVIOR_DEFAULT_AMD = 0, + VK_MEMORY_OVERALLOCATION_BEHAVIOR_ALLOWED_AMD = 1, + VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD = 2, + VK_MEMORY_OVERALLOCATION_BEHAVIOR_MAX_ENUM_AMD = 0x7FFFFFFF +} VkMemoryOverallocationBehaviorAMD; +typedef struct VkDeviceMemoryOverallocationCreateInfoAMD { + VkStructureType sType; + const void* pNext; + VkMemoryOverallocationBehaviorAMD overallocationBehavior; +} VkDeviceMemoryOverallocationCreateInfoAMD; + + + +#define VK_EXT_vertex_attribute_divisor 1 +#define VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_SPEC_VERSION 3 +#define VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME "VK_EXT_vertex_attribute_divisor" typedef struct VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT { VkStructureType sType; void* pNext; @@ -7456,6 +9589,39 @@ typedef struct VkPipelineVertexInputDivisorStateCreateInfoEXT { const VkVertexInputBindingDivisorDescriptionEXT* pVertexBindingDivisors; } VkPipelineVertexInputDivisorStateCreateInfoEXT; +typedef struct VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 vertexAttributeInstanceRateDivisor; + VkBool32 vertexAttributeInstanceRateZeroDivisor; +} VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT; + + + +#define VK_EXT_pipeline_creation_feedback 1 +#define VK_EXT_PIPELINE_CREATION_FEEDBACK_SPEC_VERSION 1 +#define VK_EXT_PIPELINE_CREATION_FEEDBACK_EXTENSION_NAME "VK_EXT_pipeline_creation_feedback" + +typedef enum VkPipelineCreationFeedbackFlagBitsEXT { + VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT = 0x00000001, + VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT = 0x00000002, + VK_PIPELINE_CREATION_FEEDBACK_BASE_PIPELINE_ACCELERATION_BIT_EXT = 0x00000004, + VK_PIPELINE_CREATION_FEEDBACK_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF +} VkPipelineCreationFeedbackFlagBitsEXT; +typedef VkFlags VkPipelineCreationFeedbackFlagsEXT; +typedef struct VkPipelineCreationFeedbackEXT { + VkPipelineCreationFeedbackFlagsEXT flags; + uint64_t duration; +} VkPipelineCreationFeedbackEXT; + +typedef struct VkPipelineCreationFeedbackCreateInfoEXT { + VkStructureType sType; + const void* pNext; + VkPipelineCreationFeedbackEXT* pPipelineCreationFeedback; + uint32_t pipelineStageCreationFeedbackCount; + VkPipelineCreationFeedbackEXT* pPipelineStageCreationFeedbacks; +} VkPipelineCreationFeedbackCreateInfoEXT; + #define VK_NV_shader_subgroup_partitioned 1 @@ -7463,6 +9629,1340 @@ typedef struct VkPipelineVertexInputDivisorStateCreateInfoEXT { #define VK_NV_SHADER_SUBGROUP_PARTITIONED_EXTENSION_NAME "VK_NV_shader_subgroup_partitioned" +#define VK_NV_compute_shader_derivatives 1 +#define VK_NV_COMPUTE_SHADER_DERIVATIVES_SPEC_VERSION 1 +#define VK_NV_COMPUTE_SHADER_DERIVATIVES_EXTENSION_NAME "VK_NV_compute_shader_derivatives" +typedef struct VkPhysicalDeviceComputeShaderDerivativesFeaturesNV { + VkStructureType sType; + void* pNext; + VkBool32 computeDerivativeGroupQuads; + VkBool32 computeDerivativeGroupLinear; +} VkPhysicalDeviceComputeShaderDerivativesFeaturesNV; + + + +#define VK_NV_mesh_shader 1 +#define VK_NV_MESH_SHADER_SPEC_VERSION 1 +#define VK_NV_MESH_SHADER_EXTENSION_NAME "VK_NV_mesh_shader" +typedef struct VkPhysicalDeviceMeshShaderFeaturesNV { + VkStructureType sType; + void* pNext; + VkBool32 taskShader; + VkBool32 meshShader; +} VkPhysicalDeviceMeshShaderFeaturesNV; + +typedef struct VkPhysicalDeviceMeshShaderPropertiesNV { + VkStructureType sType; + void* pNext; + uint32_t maxDrawMeshTasksCount; + uint32_t maxTaskWorkGroupInvocations; + uint32_t maxTaskWorkGroupSize[3]; + uint32_t maxTaskTotalMemorySize; + uint32_t maxTaskOutputCount; + uint32_t maxMeshWorkGroupInvocations; + uint32_t maxMeshWorkGroupSize[3]; + uint32_t maxMeshTotalMemorySize; + uint32_t maxMeshOutputVertices; + uint32_t maxMeshOutputPrimitives; + uint32_t maxMeshMultiviewViewCount; + uint32_t meshOutputPerVertexGranularity; + uint32_t meshOutputPerPrimitiveGranularity; +} VkPhysicalDeviceMeshShaderPropertiesNV; + +typedef struct VkDrawMeshTasksIndirectCommandNV { + uint32_t taskCount; + uint32_t firstTask; +} VkDrawMeshTasksIndirectCommandNV; + +typedef void (VKAPI_PTR *PFN_vkCmdDrawMeshTasksNV)(VkCommandBuffer commandBuffer, uint32_t taskCount, uint32_t firstTask); +typedef void (VKAPI_PTR *PFN_vkCmdDrawMeshTasksIndirectNV)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount, uint32_t stride); +typedef void (VKAPI_PTR *PFN_vkCmdDrawMeshTasksIndirectCountNV)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkCmdDrawMeshTasksNV( + VkCommandBuffer commandBuffer, + uint32_t taskCount, + uint32_t firstTask); + +VKAPI_ATTR void VKAPI_CALL vkCmdDrawMeshTasksIndirectNV( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride); + +VKAPI_ATTR void VKAPI_CALL vkCmdDrawMeshTasksIndirectCountNV( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride); +#endif + + +#define VK_NV_fragment_shader_barycentric 1 +#define VK_NV_FRAGMENT_SHADER_BARYCENTRIC_SPEC_VERSION 1 +#define VK_NV_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME "VK_NV_fragment_shader_barycentric" +typedef struct VkPhysicalDeviceFragmentShaderBarycentricFeaturesNV { + VkStructureType sType; + void* pNext; + VkBool32 fragmentShaderBarycentric; +} VkPhysicalDeviceFragmentShaderBarycentricFeaturesNV; + + + +#define VK_NV_shader_image_footprint 1 +#define VK_NV_SHADER_IMAGE_FOOTPRINT_SPEC_VERSION 2 +#define VK_NV_SHADER_IMAGE_FOOTPRINT_EXTENSION_NAME "VK_NV_shader_image_footprint" +typedef struct VkPhysicalDeviceShaderImageFootprintFeaturesNV { + VkStructureType sType; + void* pNext; + VkBool32 imageFootprint; +} VkPhysicalDeviceShaderImageFootprintFeaturesNV; + + + +#define VK_NV_scissor_exclusive 1 +#define VK_NV_SCISSOR_EXCLUSIVE_SPEC_VERSION 1 +#define VK_NV_SCISSOR_EXCLUSIVE_EXTENSION_NAME "VK_NV_scissor_exclusive" +typedef struct VkPipelineViewportExclusiveScissorStateCreateInfoNV { + VkStructureType sType; + const void* pNext; + uint32_t exclusiveScissorCount; + const VkRect2D* pExclusiveScissors; +} VkPipelineViewportExclusiveScissorStateCreateInfoNV; + +typedef struct VkPhysicalDeviceExclusiveScissorFeaturesNV { + VkStructureType sType; + void* pNext; + VkBool32 exclusiveScissor; +} VkPhysicalDeviceExclusiveScissorFeaturesNV; + +typedef void (VKAPI_PTR *PFN_vkCmdSetExclusiveScissorNV)(VkCommandBuffer commandBuffer, uint32_t firstExclusiveScissor, uint32_t exclusiveScissorCount, const VkRect2D* pExclusiveScissors); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkCmdSetExclusiveScissorNV( + VkCommandBuffer commandBuffer, + uint32_t firstExclusiveScissor, + uint32_t exclusiveScissorCount, + const VkRect2D* pExclusiveScissors); +#endif + + +#define VK_NV_device_diagnostic_checkpoints 1 +#define VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_SPEC_VERSION 2 +#define VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME "VK_NV_device_diagnostic_checkpoints" +typedef struct VkQueueFamilyCheckpointPropertiesNV { + VkStructureType sType; + void* pNext; + VkPipelineStageFlags checkpointExecutionStageMask; +} VkQueueFamilyCheckpointPropertiesNV; + +typedef struct VkCheckpointDataNV { + VkStructureType sType; + void* pNext; + VkPipelineStageFlagBits stage; + void* pCheckpointMarker; +} VkCheckpointDataNV; + +typedef void (VKAPI_PTR *PFN_vkCmdSetCheckpointNV)(VkCommandBuffer commandBuffer, const void* pCheckpointMarker); +typedef void (VKAPI_PTR *PFN_vkGetQueueCheckpointDataNV)(VkQueue queue, uint32_t* pCheckpointDataCount, VkCheckpointDataNV* pCheckpointData); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkCmdSetCheckpointNV( + VkCommandBuffer commandBuffer, + const void* pCheckpointMarker); + +VKAPI_ATTR void VKAPI_CALL vkGetQueueCheckpointDataNV( + VkQueue queue, + uint32_t* pCheckpointDataCount, + VkCheckpointDataNV* pCheckpointData); +#endif + + +#define VK_INTEL_shader_integer_functions2 1 +#define VK_INTEL_SHADER_INTEGER_FUNCTIONS_2_SPEC_VERSION 1 +#define VK_INTEL_SHADER_INTEGER_FUNCTIONS_2_EXTENSION_NAME "VK_INTEL_shader_integer_functions2" +typedef struct VkPhysicalDeviceShaderIntegerFunctions2FeaturesINTEL { + VkStructureType sType; + void* pNext; + VkBool32 shaderIntegerFunctions2; +} VkPhysicalDeviceShaderIntegerFunctions2FeaturesINTEL; + + + +#define VK_INTEL_performance_query 1 +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkPerformanceConfigurationINTEL) +#define VK_INTEL_PERFORMANCE_QUERY_SPEC_VERSION 2 +#define VK_INTEL_PERFORMANCE_QUERY_EXTENSION_NAME "VK_INTEL_performance_query" + +typedef enum VkPerformanceConfigurationTypeINTEL { + VK_PERFORMANCE_CONFIGURATION_TYPE_COMMAND_QUEUE_METRICS_DISCOVERY_ACTIVATED_INTEL = 0, + VK_PERFORMANCE_CONFIGURATION_TYPE_MAX_ENUM_INTEL = 0x7FFFFFFF +} VkPerformanceConfigurationTypeINTEL; + +typedef enum VkQueryPoolSamplingModeINTEL { + VK_QUERY_POOL_SAMPLING_MODE_MANUAL_INTEL = 0, + VK_QUERY_POOL_SAMPLING_MODE_MAX_ENUM_INTEL = 0x7FFFFFFF +} VkQueryPoolSamplingModeINTEL; + +typedef enum VkPerformanceOverrideTypeINTEL { + VK_PERFORMANCE_OVERRIDE_TYPE_NULL_HARDWARE_INTEL = 0, + VK_PERFORMANCE_OVERRIDE_TYPE_FLUSH_GPU_CACHES_INTEL = 1, + VK_PERFORMANCE_OVERRIDE_TYPE_MAX_ENUM_INTEL = 0x7FFFFFFF +} VkPerformanceOverrideTypeINTEL; + +typedef enum VkPerformanceParameterTypeINTEL { + VK_PERFORMANCE_PARAMETER_TYPE_HW_COUNTERS_SUPPORTED_INTEL = 0, + VK_PERFORMANCE_PARAMETER_TYPE_STREAM_MARKER_VALID_BITS_INTEL = 1, + VK_PERFORMANCE_PARAMETER_TYPE_MAX_ENUM_INTEL = 0x7FFFFFFF +} VkPerformanceParameterTypeINTEL; + +typedef enum VkPerformanceValueTypeINTEL { + VK_PERFORMANCE_VALUE_TYPE_UINT32_INTEL = 0, + VK_PERFORMANCE_VALUE_TYPE_UINT64_INTEL = 1, + VK_PERFORMANCE_VALUE_TYPE_FLOAT_INTEL = 2, + VK_PERFORMANCE_VALUE_TYPE_BOOL_INTEL = 3, + VK_PERFORMANCE_VALUE_TYPE_STRING_INTEL = 4, + VK_PERFORMANCE_VALUE_TYPE_MAX_ENUM_INTEL = 0x7FFFFFFF +} VkPerformanceValueTypeINTEL; +typedef union VkPerformanceValueDataINTEL { + uint32_t value32; + uint64_t value64; + float valueFloat; + VkBool32 valueBool; + const char* valueString; +} VkPerformanceValueDataINTEL; + +typedef struct VkPerformanceValueINTEL { + VkPerformanceValueTypeINTEL type; + VkPerformanceValueDataINTEL data; +} VkPerformanceValueINTEL; + +typedef struct VkInitializePerformanceApiInfoINTEL { + VkStructureType sType; + const void* pNext; + void* pUserData; +} VkInitializePerformanceApiInfoINTEL; + +typedef struct VkQueryPoolPerformanceQueryCreateInfoINTEL { + VkStructureType sType; + const void* pNext; + VkQueryPoolSamplingModeINTEL performanceCountersSampling; +} VkQueryPoolPerformanceQueryCreateInfoINTEL; + +typedef VkQueryPoolPerformanceQueryCreateInfoINTEL VkQueryPoolCreateInfoINTEL; + +typedef struct VkPerformanceMarkerInfoINTEL { + VkStructureType sType; + const void* pNext; + uint64_t marker; +} VkPerformanceMarkerInfoINTEL; + +typedef struct VkPerformanceStreamMarkerInfoINTEL { + VkStructureType sType; + const void* pNext; + uint32_t marker; +} VkPerformanceStreamMarkerInfoINTEL; + +typedef struct VkPerformanceOverrideInfoINTEL { + VkStructureType sType; + const void* pNext; + VkPerformanceOverrideTypeINTEL type; + VkBool32 enable; + uint64_t parameter; +} VkPerformanceOverrideInfoINTEL; + +typedef struct VkPerformanceConfigurationAcquireInfoINTEL { + VkStructureType sType; + const void* pNext; + VkPerformanceConfigurationTypeINTEL type; +} VkPerformanceConfigurationAcquireInfoINTEL; + +typedef VkResult (VKAPI_PTR *PFN_vkInitializePerformanceApiINTEL)(VkDevice device, const VkInitializePerformanceApiInfoINTEL* pInitializeInfo); +typedef void (VKAPI_PTR *PFN_vkUninitializePerformanceApiINTEL)(VkDevice device); +typedef VkResult (VKAPI_PTR *PFN_vkCmdSetPerformanceMarkerINTEL)(VkCommandBuffer commandBuffer, const VkPerformanceMarkerInfoINTEL* pMarkerInfo); +typedef VkResult (VKAPI_PTR *PFN_vkCmdSetPerformanceStreamMarkerINTEL)(VkCommandBuffer commandBuffer, const VkPerformanceStreamMarkerInfoINTEL* pMarkerInfo); +typedef VkResult (VKAPI_PTR *PFN_vkCmdSetPerformanceOverrideINTEL)(VkCommandBuffer commandBuffer, const VkPerformanceOverrideInfoINTEL* pOverrideInfo); +typedef VkResult (VKAPI_PTR *PFN_vkAcquirePerformanceConfigurationINTEL)(VkDevice device, const VkPerformanceConfigurationAcquireInfoINTEL* pAcquireInfo, VkPerformanceConfigurationINTEL* pConfiguration); +typedef VkResult (VKAPI_PTR *PFN_vkReleasePerformanceConfigurationINTEL)(VkDevice device, VkPerformanceConfigurationINTEL configuration); +typedef VkResult (VKAPI_PTR *PFN_vkQueueSetPerformanceConfigurationINTEL)(VkQueue queue, VkPerformanceConfigurationINTEL configuration); +typedef VkResult (VKAPI_PTR *PFN_vkGetPerformanceParameterINTEL)(VkDevice device, VkPerformanceParameterTypeINTEL parameter, VkPerformanceValueINTEL* pValue); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkInitializePerformanceApiINTEL( + VkDevice device, + const VkInitializePerformanceApiInfoINTEL* pInitializeInfo); + +VKAPI_ATTR void VKAPI_CALL vkUninitializePerformanceApiINTEL( + VkDevice device); + +VKAPI_ATTR VkResult VKAPI_CALL vkCmdSetPerformanceMarkerINTEL( + VkCommandBuffer commandBuffer, + const VkPerformanceMarkerInfoINTEL* pMarkerInfo); + +VKAPI_ATTR VkResult VKAPI_CALL vkCmdSetPerformanceStreamMarkerINTEL( + VkCommandBuffer commandBuffer, + const VkPerformanceStreamMarkerInfoINTEL* pMarkerInfo); + +VKAPI_ATTR VkResult VKAPI_CALL vkCmdSetPerformanceOverrideINTEL( + VkCommandBuffer commandBuffer, + const VkPerformanceOverrideInfoINTEL* pOverrideInfo); + +VKAPI_ATTR VkResult VKAPI_CALL vkAcquirePerformanceConfigurationINTEL( + VkDevice device, + const VkPerformanceConfigurationAcquireInfoINTEL* pAcquireInfo, + VkPerformanceConfigurationINTEL* pConfiguration); + +VKAPI_ATTR VkResult VKAPI_CALL vkReleasePerformanceConfigurationINTEL( + VkDevice device, + VkPerformanceConfigurationINTEL configuration); + +VKAPI_ATTR VkResult VKAPI_CALL vkQueueSetPerformanceConfigurationINTEL( + VkQueue queue, + VkPerformanceConfigurationINTEL configuration); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPerformanceParameterINTEL( + VkDevice device, + VkPerformanceParameterTypeINTEL parameter, + VkPerformanceValueINTEL* pValue); +#endif + + +#define VK_EXT_pci_bus_info 1 +#define VK_EXT_PCI_BUS_INFO_SPEC_VERSION 2 +#define VK_EXT_PCI_BUS_INFO_EXTENSION_NAME "VK_EXT_pci_bus_info" +typedef struct VkPhysicalDevicePCIBusInfoPropertiesEXT { + VkStructureType sType; + void* pNext; + uint32_t pciDomain; + uint32_t pciBus; + uint32_t pciDevice; + uint32_t pciFunction; +} VkPhysicalDevicePCIBusInfoPropertiesEXT; + + + +#define VK_AMD_display_native_hdr 1 +#define VK_AMD_DISPLAY_NATIVE_HDR_SPEC_VERSION 1 +#define VK_AMD_DISPLAY_NATIVE_HDR_EXTENSION_NAME "VK_AMD_display_native_hdr" +typedef struct VkDisplayNativeHdrSurfaceCapabilitiesAMD { + VkStructureType sType; + void* pNext; + VkBool32 localDimmingSupport; +} VkDisplayNativeHdrSurfaceCapabilitiesAMD; + +typedef struct VkSwapchainDisplayNativeHdrCreateInfoAMD { + VkStructureType sType; + const void* pNext; + VkBool32 localDimmingEnable; +} VkSwapchainDisplayNativeHdrCreateInfoAMD; + +typedef void (VKAPI_PTR *PFN_vkSetLocalDimmingAMD)(VkDevice device, VkSwapchainKHR swapChain, VkBool32 localDimmingEnable); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkSetLocalDimmingAMD( + VkDevice device, + VkSwapchainKHR swapChain, + VkBool32 localDimmingEnable); +#endif + + +#define VK_EXT_fragment_density_map 1 +#define VK_EXT_FRAGMENT_DENSITY_MAP_SPEC_VERSION 1 +#define VK_EXT_FRAGMENT_DENSITY_MAP_EXTENSION_NAME "VK_EXT_fragment_density_map" +typedef struct VkPhysicalDeviceFragmentDensityMapFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 fragmentDensityMap; + VkBool32 fragmentDensityMapDynamic; + VkBool32 fragmentDensityMapNonSubsampledImages; +} VkPhysicalDeviceFragmentDensityMapFeaturesEXT; + +typedef struct VkPhysicalDeviceFragmentDensityMapPropertiesEXT { + VkStructureType sType; + void* pNext; + VkExtent2D minFragmentDensityTexelSize; + VkExtent2D maxFragmentDensityTexelSize; + VkBool32 fragmentDensityInvocations; +} VkPhysicalDeviceFragmentDensityMapPropertiesEXT; + +typedef struct VkRenderPassFragmentDensityMapCreateInfoEXT { + VkStructureType sType; + const void* pNext; + VkAttachmentReference fragmentDensityMapAttachment; +} VkRenderPassFragmentDensityMapCreateInfoEXT; + + + +#define VK_EXT_scalar_block_layout 1 +#define VK_EXT_SCALAR_BLOCK_LAYOUT_SPEC_VERSION 1 +#define VK_EXT_SCALAR_BLOCK_LAYOUT_EXTENSION_NAME "VK_EXT_scalar_block_layout" +typedef VkPhysicalDeviceScalarBlockLayoutFeatures VkPhysicalDeviceScalarBlockLayoutFeaturesEXT; + + + +#define VK_GOOGLE_hlsl_functionality1 1 +#define VK_GOOGLE_HLSL_FUNCTIONALITY1_SPEC_VERSION 1 +#define VK_GOOGLE_HLSL_FUNCTIONALITY1_EXTENSION_NAME "VK_GOOGLE_hlsl_functionality1" + + +#define VK_GOOGLE_decorate_string 1 +#define VK_GOOGLE_DECORATE_STRING_SPEC_VERSION 1 +#define VK_GOOGLE_DECORATE_STRING_EXTENSION_NAME "VK_GOOGLE_decorate_string" + + +#define VK_EXT_subgroup_size_control 1 +#define VK_EXT_SUBGROUP_SIZE_CONTROL_SPEC_VERSION 2 +#define VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME "VK_EXT_subgroup_size_control" +typedef struct VkPhysicalDeviceSubgroupSizeControlFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 subgroupSizeControl; + VkBool32 computeFullSubgroups; +} VkPhysicalDeviceSubgroupSizeControlFeaturesEXT; + +typedef struct VkPhysicalDeviceSubgroupSizeControlPropertiesEXT { + VkStructureType sType; + void* pNext; + uint32_t minSubgroupSize; + uint32_t maxSubgroupSize; + uint32_t maxComputeWorkgroupSubgroups; + VkShaderStageFlags requiredSubgroupSizeStages; +} VkPhysicalDeviceSubgroupSizeControlPropertiesEXT; + +typedef struct VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT { + VkStructureType sType; + void* pNext; + uint32_t requiredSubgroupSize; +} VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT; + + + +#define VK_AMD_shader_core_properties2 1 +#define VK_AMD_SHADER_CORE_PROPERTIES_2_SPEC_VERSION 1 +#define VK_AMD_SHADER_CORE_PROPERTIES_2_EXTENSION_NAME "VK_AMD_shader_core_properties2" + +typedef enum VkShaderCorePropertiesFlagBitsAMD { + VK_SHADER_CORE_PROPERTIES_FLAG_BITS_MAX_ENUM_AMD = 0x7FFFFFFF +} VkShaderCorePropertiesFlagBitsAMD; +typedef VkFlags VkShaderCorePropertiesFlagsAMD; +typedef struct VkPhysicalDeviceShaderCoreProperties2AMD { + VkStructureType sType; + void* pNext; + VkShaderCorePropertiesFlagsAMD shaderCoreFeatures; + uint32_t activeComputeUnitCount; +} VkPhysicalDeviceShaderCoreProperties2AMD; + + + +#define VK_AMD_device_coherent_memory 1 +#define VK_AMD_DEVICE_COHERENT_MEMORY_SPEC_VERSION 1 +#define VK_AMD_DEVICE_COHERENT_MEMORY_EXTENSION_NAME "VK_AMD_device_coherent_memory" +typedef struct VkPhysicalDeviceCoherentMemoryFeaturesAMD { + VkStructureType sType; + void* pNext; + VkBool32 deviceCoherentMemory; +} VkPhysicalDeviceCoherentMemoryFeaturesAMD; + + + +#define VK_EXT_memory_budget 1 +#define VK_EXT_MEMORY_BUDGET_SPEC_VERSION 1 +#define VK_EXT_MEMORY_BUDGET_EXTENSION_NAME "VK_EXT_memory_budget" +typedef struct VkPhysicalDeviceMemoryBudgetPropertiesEXT { + VkStructureType sType; + void* pNext; + VkDeviceSize heapBudget[VK_MAX_MEMORY_HEAPS]; + VkDeviceSize heapUsage[VK_MAX_MEMORY_HEAPS]; +} VkPhysicalDeviceMemoryBudgetPropertiesEXT; + + + +#define VK_EXT_memory_priority 1 +#define VK_EXT_MEMORY_PRIORITY_SPEC_VERSION 1 +#define VK_EXT_MEMORY_PRIORITY_EXTENSION_NAME "VK_EXT_memory_priority" +typedef struct VkPhysicalDeviceMemoryPriorityFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 memoryPriority; +} VkPhysicalDeviceMemoryPriorityFeaturesEXT; + +typedef struct VkMemoryPriorityAllocateInfoEXT { + VkStructureType sType; + const void* pNext; + float priority; +} VkMemoryPriorityAllocateInfoEXT; + + + +#define VK_NV_dedicated_allocation_image_aliasing 1 +#define VK_NV_DEDICATED_ALLOCATION_IMAGE_ALIASING_SPEC_VERSION 1 +#define VK_NV_DEDICATED_ALLOCATION_IMAGE_ALIASING_EXTENSION_NAME "VK_NV_dedicated_allocation_image_aliasing" +typedef struct VkPhysicalDeviceDedicatedAllocationImageAliasingFeaturesNV { + VkStructureType sType; + void* pNext; + VkBool32 dedicatedAllocationImageAliasing; +} VkPhysicalDeviceDedicatedAllocationImageAliasingFeaturesNV; + + + +#define VK_EXT_buffer_device_address 1 +#define VK_EXT_BUFFER_DEVICE_ADDRESS_SPEC_VERSION 2 +#define VK_EXT_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME "VK_EXT_buffer_device_address" +typedef struct VkPhysicalDeviceBufferDeviceAddressFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 bufferDeviceAddress; + VkBool32 bufferDeviceAddressCaptureReplay; + VkBool32 bufferDeviceAddressMultiDevice; +} VkPhysicalDeviceBufferDeviceAddressFeaturesEXT; + +typedef VkPhysicalDeviceBufferDeviceAddressFeaturesEXT VkPhysicalDeviceBufferAddressFeaturesEXT; + +typedef VkBufferDeviceAddressInfo VkBufferDeviceAddressInfoEXT; + +typedef struct VkBufferDeviceAddressCreateInfoEXT { + VkStructureType sType; + const void* pNext; + VkDeviceAddress deviceAddress; +} VkBufferDeviceAddressCreateInfoEXT; + +typedef VkDeviceAddress (VKAPI_PTR *PFN_vkGetBufferDeviceAddressEXT)(VkDevice device, const VkBufferDeviceAddressInfo* pInfo); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkDeviceAddress VKAPI_CALL vkGetBufferDeviceAddressEXT( + VkDevice device, + const VkBufferDeviceAddressInfo* pInfo); +#endif + + +#define VK_EXT_tooling_info 1 +#define VK_EXT_TOOLING_INFO_SPEC_VERSION 1 +#define VK_EXT_TOOLING_INFO_EXTENSION_NAME "VK_EXT_tooling_info" + +typedef enum VkToolPurposeFlagBitsEXT { + VK_TOOL_PURPOSE_VALIDATION_BIT_EXT = 0x00000001, + VK_TOOL_PURPOSE_PROFILING_BIT_EXT = 0x00000002, + VK_TOOL_PURPOSE_TRACING_BIT_EXT = 0x00000004, + VK_TOOL_PURPOSE_ADDITIONAL_FEATURES_BIT_EXT = 0x00000008, + VK_TOOL_PURPOSE_MODIFYING_FEATURES_BIT_EXT = 0x00000010, + VK_TOOL_PURPOSE_DEBUG_REPORTING_BIT_EXT = 0x00000020, + VK_TOOL_PURPOSE_DEBUG_MARKERS_BIT_EXT = 0x00000040, + VK_TOOL_PURPOSE_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF +} VkToolPurposeFlagBitsEXT; +typedef VkFlags VkToolPurposeFlagsEXT; +typedef struct VkPhysicalDeviceToolPropertiesEXT { + VkStructureType sType; + void* pNext; + char name[VK_MAX_EXTENSION_NAME_SIZE]; + char version[VK_MAX_EXTENSION_NAME_SIZE]; + VkToolPurposeFlagsEXT purposes; + char description[VK_MAX_DESCRIPTION_SIZE]; + char layer[VK_MAX_EXTENSION_NAME_SIZE]; +} VkPhysicalDeviceToolPropertiesEXT; + +typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceToolPropertiesEXT)(VkPhysicalDevice physicalDevice, uint32_t* pToolCount, VkPhysicalDeviceToolPropertiesEXT* pToolProperties); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceToolPropertiesEXT( + VkPhysicalDevice physicalDevice, + uint32_t* pToolCount, + VkPhysicalDeviceToolPropertiesEXT* pToolProperties); +#endif + + +#define VK_EXT_separate_stencil_usage 1 +#define VK_EXT_SEPARATE_STENCIL_USAGE_SPEC_VERSION 1 +#define VK_EXT_SEPARATE_STENCIL_USAGE_EXTENSION_NAME "VK_EXT_separate_stencil_usage" +typedef VkImageStencilUsageCreateInfo VkImageStencilUsageCreateInfoEXT; + + + +#define VK_EXT_validation_features 1 +#define VK_EXT_VALIDATION_FEATURES_SPEC_VERSION 3 +#define VK_EXT_VALIDATION_FEATURES_EXTENSION_NAME "VK_EXT_validation_features" + +typedef enum VkValidationFeatureEnableEXT { + VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT = 0, + VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT = 1, + VK_VALIDATION_FEATURE_ENABLE_BEST_PRACTICES_EXT = 2, + VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT = 3, + VK_VALIDATION_FEATURE_ENABLE_MAX_ENUM_EXT = 0x7FFFFFFF +} VkValidationFeatureEnableEXT; + +typedef enum VkValidationFeatureDisableEXT { + VK_VALIDATION_FEATURE_DISABLE_ALL_EXT = 0, + VK_VALIDATION_FEATURE_DISABLE_SHADERS_EXT = 1, + VK_VALIDATION_FEATURE_DISABLE_THREAD_SAFETY_EXT = 2, + VK_VALIDATION_FEATURE_DISABLE_API_PARAMETERS_EXT = 3, + VK_VALIDATION_FEATURE_DISABLE_OBJECT_LIFETIMES_EXT = 4, + VK_VALIDATION_FEATURE_DISABLE_CORE_CHECKS_EXT = 5, + VK_VALIDATION_FEATURE_DISABLE_UNIQUE_HANDLES_EXT = 6, + VK_VALIDATION_FEATURE_DISABLE_MAX_ENUM_EXT = 0x7FFFFFFF +} VkValidationFeatureDisableEXT; +typedef struct VkValidationFeaturesEXT { + VkStructureType sType; + const void* pNext; + uint32_t enabledValidationFeatureCount; + const VkValidationFeatureEnableEXT* pEnabledValidationFeatures; + uint32_t disabledValidationFeatureCount; + const VkValidationFeatureDisableEXT* pDisabledValidationFeatures; +} VkValidationFeaturesEXT; + + + +#define VK_NV_cooperative_matrix 1 +#define VK_NV_COOPERATIVE_MATRIX_SPEC_VERSION 1 +#define VK_NV_COOPERATIVE_MATRIX_EXTENSION_NAME "VK_NV_cooperative_matrix" + +typedef enum VkComponentTypeNV { + VK_COMPONENT_TYPE_FLOAT16_NV = 0, + VK_COMPONENT_TYPE_FLOAT32_NV = 1, + VK_COMPONENT_TYPE_FLOAT64_NV = 2, + VK_COMPONENT_TYPE_SINT8_NV = 3, + VK_COMPONENT_TYPE_SINT16_NV = 4, + VK_COMPONENT_TYPE_SINT32_NV = 5, + VK_COMPONENT_TYPE_SINT64_NV = 6, + VK_COMPONENT_TYPE_UINT8_NV = 7, + VK_COMPONENT_TYPE_UINT16_NV = 8, + VK_COMPONENT_TYPE_UINT32_NV = 9, + VK_COMPONENT_TYPE_UINT64_NV = 10, + VK_COMPONENT_TYPE_MAX_ENUM_NV = 0x7FFFFFFF +} VkComponentTypeNV; + +typedef enum VkScopeNV { + VK_SCOPE_DEVICE_NV = 1, + VK_SCOPE_WORKGROUP_NV = 2, + VK_SCOPE_SUBGROUP_NV = 3, + VK_SCOPE_QUEUE_FAMILY_NV = 5, + VK_SCOPE_MAX_ENUM_NV = 0x7FFFFFFF +} VkScopeNV; +typedef struct VkCooperativeMatrixPropertiesNV { + VkStructureType sType; + void* pNext; + uint32_t MSize; + uint32_t NSize; + uint32_t KSize; + VkComponentTypeNV AType; + VkComponentTypeNV BType; + VkComponentTypeNV CType; + VkComponentTypeNV DType; + VkScopeNV scope; +} VkCooperativeMatrixPropertiesNV; + +typedef struct VkPhysicalDeviceCooperativeMatrixFeaturesNV { + VkStructureType sType; + void* pNext; + VkBool32 cooperativeMatrix; + VkBool32 cooperativeMatrixRobustBufferAccess; +} VkPhysicalDeviceCooperativeMatrixFeaturesNV; + +typedef struct VkPhysicalDeviceCooperativeMatrixPropertiesNV { + VkStructureType sType; + void* pNext; + VkShaderStageFlags cooperativeMatrixSupportedStages; +} VkPhysicalDeviceCooperativeMatrixPropertiesNV; + +typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesNV)(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkCooperativeMatrixPropertiesNV* pProperties); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceCooperativeMatrixPropertiesNV( + VkPhysicalDevice physicalDevice, + uint32_t* pPropertyCount, + VkCooperativeMatrixPropertiesNV* pProperties); +#endif + + +#define VK_NV_coverage_reduction_mode 1 +#define VK_NV_COVERAGE_REDUCTION_MODE_SPEC_VERSION 1 +#define VK_NV_COVERAGE_REDUCTION_MODE_EXTENSION_NAME "VK_NV_coverage_reduction_mode" + +typedef enum VkCoverageReductionModeNV { + VK_COVERAGE_REDUCTION_MODE_MERGE_NV = 0, + VK_COVERAGE_REDUCTION_MODE_TRUNCATE_NV = 1, + VK_COVERAGE_REDUCTION_MODE_MAX_ENUM_NV = 0x7FFFFFFF +} VkCoverageReductionModeNV; +typedef VkFlags VkPipelineCoverageReductionStateCreateFlagsNV; +typedef struct VkPhysicalDeviceCoverageReductionModeFeaturesNV { + VkStructureType sType; + void* pNext; + VkBool32 coverageReductionMode; +} VkPhysicalDeviceCoverageReductionModeFeaturesNV; + +typedef struct VkPipelineCoverageReductionStateCreateInfoNV { + VkStructureType sType; + const void* pNext; + VkPipelineCoverageReductionStateCreateFlagsNV flags; + VkCoverageReductionModeNV coverageReductionMode; +} VkPipelineCoverageReductionStateCreateInfoNV; + +typedef struct VkFramebufferMixedSamplesCombinationNV { + VkStructureType sType; + void* pNext; + VkCoverageReductionModeNV coverageReductionMode; + VkSampleCountFlagBits rasterizationSamples; + VkSampleCountFlags depthStencilSamples; + VkSampleCountFlags colorSamples; +} VkFramebufferMixedSamplesCombinationNV; + +typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceSupportedFramebufferMixedSamplesCombinationsNV)(VkPhysicalDevice physicalDevice, uint32_t* pCombinationCount, VkFramebufferMixedSamplesCombinationNV* pCombinations); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSupportedFramebufferMixedSamplesCombinationsNV( + VkPhysicalDevice physicalDevice, + uint32_t* pCombinationCount, + VkFramebufferMixedSamplesCombinationNV* pCombinations); +#endif + + +#define VK_EXT_fragment_shader_interlock 1 +#define VK_EXT_FRAGMENT_SHADER_INTERLOCK_SPEC_VERSION 1 +#define VK_EXT_FRAGMENT_SHADER_INTERLOCK_EXTENSION_NAME "VK_EXT_fragment_shader_interlock" +typedef struct VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 fragmentShaderSampleInterlock; + VkBool32 fragmentShaderPixelInterlock; + VkBool32 fragmentShaderShadingRateInterlock; +} VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT; + + + +#define VK_EXT_ycbcr_image_arrays 1 +#define VK_EXT_YCBCR_IMAGE_ARRAYS_SPEC_VERSION 1 +#define VK_EXT_YCBCR_IMAGE_ARRAYS_EXTENSION_NAME "VK_EXT_ycbcr_image_arrays" +typedef struct VkPhysicalDeviceYcbcrImageArraysFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 ycbcrImageArrays; +} VkPhysicalDeviceYcbcrImageArraysFeaturesEXT; + + + +#define VK_EXT_headless_surface 1 +#define VK_EXT_HEADLESS_SURFACE_SPEC_VERSION 1 +#define VK_EXT_HEADLESS_SURFACE_EXTENSION_NAME "VK_EXT_headless_surface" +typedef VkFlags VkHeadlessSurfaceCreateFlagsEXT; +typedef struct VkHeadlessSurfaceCreateInfoEXT { + VkStructureType sType; + const void* pNext; + VkHeadlessSurfaceCreateFlagsEXT flags; +} VkHeadlessSurfaceCreateInfoEXT; + +typedef VkResult (VKAPI_PTR *PFN_vkCreateHeadlessSurfaceEXT)(VkInstance instance, const VkHeadlessSurfaceCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateHeadlessSurfaceEXT( + VkInstance instance, + const VkHeadlessSurfaceCreateInfoEXT* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface); +#endif + + +#define VK_EXT_line_rasterization 1 +#define VK_EXT_LINE_RASTERIZATION_SPEC_VERSION 1 +#define VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME "VK_EXT_line_rasterization" + +typedef enum VkLineRasterizationModeEXT { + VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT = 0, + VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT = 1, + VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT = 2, + VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT = 3, + VK_LINE_RASTERIZATION_MODE_MAX_ENUM_EXT = 0x7FFFFFFF +} VkLineRasterizationModeEXT; +typedef struct VkPhysicalDeviceLineRasterizationFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 rectangularLines; + VkBool32 bresenhamLines; + VkBool32 smoothLines; + VkBool32 stippledRectangularLines; + VkBool32 stippledBresenhamLines; + VkBool32 stippledSmoothLines; +} VkPhysicalDeviceLineRasterizationFeaturesEXT; + +typedef struct VkPhysicalDeviceLineRasterizationPropertiesEXT { + VkStructureType sType; + void* pNext; + uint32_t lineSubPixelPrecisionBits; +} VkPhysicalDeviceLineRasterizationPropertiesEXT; + +typedef struct VkPipelineRasterizationLineStateCreateInfoEXT { + VkStructureType sType; + const void* pNext; + VkLineRasterizationModeEXT lineRasterizationMode; + VkBool32 stippledLineEnable; + uint32_t lineStippleFactor; + uint16_t lineStipplePattern; +} VkPipelineRasterizationLineStateCreateInfoEXT; + +typedef void (VKAPI_PTR *PFN_vkCmdSetLineStippleEXT)(VkCommandBuffer commandBuffer, uint32_t lineStippleFactor, uint16_t lineStipplePattern); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkCmdSetLineStippleEXT( + VkCommandBuffer commandBuffer, + uint32_t lineStippleFactor, + uint16_t lineStipplePattern); +#endif + + +#define VK_EXT_shader_atomic_float 1 +#define VK_EXT_SHADER_ATOMIC_FLOAT_SPEC_VERSION 1 +#define VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME "VK_EXT_shader_atomic_float" +typedef struct VkPhysicalDeviceShaderAtomicFloatFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 shaderBufferFloat32Atomics; + VkBool32 shaderBufferFloat32AtomicAdd; + VkBool32 shaderBufferFloat64Atomics; + VkBool32 shaderBufferFloat64AtomicAdd; + VkBool32 shaderSharedFloat32Atomics; + VkBool32 shaderSharedFloat32AtomicAdd; + VkBool32 shaderSharedFloat64Atomics; + VkBool32 shaderSharedFloat64AtomicAdd; + VkBool32 shaderImageFloat32Atomics; + VkBool32 shaderImageFloat32AtomicAdd; + VkBool32 sparseImageFloat32Atomics; + VkBool32 sparseImageFloat32AtomicAdd; +} VkPhysicalDeviceShaderAtomicFloatFeaturesEXT; + + + +#define VK_EXT_host_query_reset 1 +#define VK_EXT_HOST_QUERY_RESET_SPEC_VERSION 1 +#define VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME "VK_EXT_host_query_reset" +typedef VkPhysicalDeviceHostQueryResetFeatures VkPhysicalDeviceHostQueryResetFeaturesEXT; + +typedef void (VKAPI_PTR *PFN_vkResetQueryPoolEXT)(VkDevice device, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkResetQueryPoolEXT( + VkDevice device, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount); +#endif + + +#define VK_EXT_index_type_uint8 1 +#define VK_EXT_INDEX_TYPE_UINT8_SPEC_VERSION 1 +#define VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME "VK_EXT_index_type_uint8" +typedef struct VkPhysicalDeviceIndexTypeUint8FeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 indexTypeUint8; +} VkPhysicalDeviceIndexTypeUint8FeaturesEXT; + + + +#define VK_EXT_extended_dynamic_state 1 +#define VK_EXT_EXTENDED_DYNAMIC_STATE_SPEC_VERSION 1 +#define VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME "VK_EXT_extended_dynamic_state" +typedef struct VkPhysicalDeviceExtendedDynamicStateFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 extendedDynamicState; +} VkPhysicalDeviceExtendedDynamicStateFeaturesEXT; + +typedef void (VKAPI_PTR *PFN_vkCmdSetCullModeEXT)(VkCommandBuffer commandBuffer, VkCullModeFlags cullMode); +typedef void (VKAPI_PTR *PFN_vkCmdSetFrontFaceEXT)(VkCommandBuffer commandBuffer, VkFrontFace frontFace); +typedef void (VKAPI_PTR *PFN_vkCmdSetPrimitiveTopologyEXT)(VkCommandBuffer commandBuffer, VkPrimitiveTopology primitiveTopology); +typedef void (VKAPI_PTR *PFN_vkCmdSetViewportWithCountEXT)(VkCommandBuffer commandBuffer, uint32_t viewportCount, const VkViewport* pViewports); +typedef void (VKAPI_PTR *PFN_vkCmdSetScissorWithCountEXT)(VkCommandBuffer commandBuffer, uint32_t scissorCount, const VkRect2D* pScissors); +typedef void (VKAPI_PTR *PFN_vkCmdBindVertexBuffers2EXT)(VkCommandBuffer commandBuffer, uint32_t firstBinding, uint32_t bindingCount, const VkBuffer* pBuffers, const VkDeviceSize* pOffsets, const VkDeviceSize* pSizes, const VkDeviceSize* pStrides); +typedef void (VKAPI_PTR *PFN_vkCmdSetDepthTestEnableEXT)(VkCommandBuffer commandBuffer, VkBool32 depthTestEnable); +typedef void (VKAPI_PTR *PFN_vkCmdSetDepthWriteEnableEXT)(VkCommandBuffer commandBuffer, VkBool32 depthWriteEnable); +typedef void (VKAPI_PTR *PFN_vkCmdSetDepthCompareOpEXT)(VkCommandBuffer commandBuffer, VkCompareOp depthCompareOp); +typedef void (VKAPI_PTR *PFN_vkCmdSetDepthBoundsTestEnableEXT)(VkCommandBuffer commandBuffer, VkBool32 depthBoundsTestEnable); +typedef void (VKAPI_PTR *PFN_vkCmdSetStencilTestEnableEXT)(VkCommandBuffer commandBuffer, VkBool32 stencilTestEnable); +typedef void (VKAPI_PTR *PFN_vkCmdSetStencilOpEXT)(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp, VkCompareOp compareOp); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkCmdSetCullModeEXT( + VkCommandBuffer commandBuffer, + VkCullModeFlags cullMode); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetFrontFaceEXT( + VkCommandBuffer commandBuffer, + VkFrontFace frontFace); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetPrimitiveTopologyEXT( + VkCommandBuffer commandBuffer, + VkPrimitiveTopology primitiveTopology); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetViewportWithCountEXT( + VkCommandBuffer commandBuffer, + uint32_t viewportCount, + const VkViewport* pViewports); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetScissorWithCountEXT( + VkCommandBuffer commandBuffer, + uint32_t scissorCount, + const VkRect2D* pScissors); + +VKAPI_ATTR void VKAPI_CALL vkCmdBindVertexBuffers2EXT( + VkCommandBuffer commandBuffer, + uint32_t firstBinding, + uint32_t bindingCount, + const VkBuffer* pBuffers, + const VkDeviceSize* pOffsets, + const VkDeviceSize* pSizes, + const VkDeviceSize* pStrides); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetDepthTestEnableEXT( + VkCommandBuffer commandBuffer, + VkBool32 depthTestEnable); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetDepthWriteEnableEXT( + VkCommandBuffer commandBuffer, + VkBool32 depthWriteEnable); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetDepthCompareOpEXT( + VkCommandBuffer commandBuffer, + VkCompareOp depthCompareOp); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetDepthBoundsTestEnableEXT( + VkCommandBuffer commandBuffer, + VkBool32 depthBoundsTestEnable); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetStencilTestEnableEXT( + VkCommandBuffer commandBuffer, + VkBool32 stencilTestEnable); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetStencilOpEXT( + VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + VkStencilOp failOp, + VkStencilOp passOp, + VkStencilOp depthFailOp, + VkCompareOp compareOp); +#endif + + +#define VK_EXT_shader_demote_to_helper_invocation 1 +#define VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_SPEC_VERSION 1 +#define VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME "VK_EXT_shader_demote_to_helper_invocation" +typedef struct VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 shaderDemoteToHelperInvocation; +} VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT; + + + +#define VK_NV_device_generated_commands 1 +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkIndirectCommandsLayoutNV) +#define VK_NV_DEVICE_GENERATED_COMMANDS_SPEC_VERSION 3 +#define VK_NV_DEVICE_GENERATED_COMMANDS_EXTENSION_NAME "VK_NV_device_generated_commands" + +typedef enum VkIndirectCommandsTokenTypeNV { + VK_INDIRECT_COMMANDS_TOKEN_TYPE_SHADER_GROUP_NV = 0, + VK_INDIRECT_COMMANDS_TOKEN_TYPE_STATE_FLAGS_NV = 1, + VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV = 2, + VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV = 3, + VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV = 4, + VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV = 5, + VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV = 6, + VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_TASKS_NV = 7, + VK_INDIRECT_COMMANDS_TOKEN_TYPE_MAX_ENUM_NV = 0x7FFFFFFF +} VkIndirectCommandsTokenTypeNV; + +typedef enum VkIndirectStateFlagBitsNV { + VK_INDIRECT_STATE_FLAG_FRONTFACE_BIT_NV = 0x00000001, + VK_INDIRECT_STATE_FLAG_BITS_MAX_ENUM_NV = 0x7FFFFFFF +} VkIndirectStateFlagBitsNV; +typedef VkFlags VkIndirectStateFlagsNV; + +typedef enum VkIndirectCommandsLayoutUsageFlagBitsNV { + VK_INDIRECT_COMMANDS_LAYOUT_USAGE_EXPLICIT_PREPROCESS_BIT_NV = 0x00000001, + VK_INDIRECT_COMMANDS_LAYOUT_USAGE_INDEXED_SEQUENCES_BIT_NV = 0x00000002, + VK_INDIRECT_COMMANDS_LAYOUT_USAGE_UNORDERED_SEQUENCES_BIT_NV = 0x00000004, + VK_INDIRECT_COMMANDS_LAYOUT_USAGE_FLAG_BITS_MAX_ENUM_NV = 0x7FFFFFFF +} VkIndirectCommandsLayoutUsageFlagBitsNV; +typedef VkFlags VkIndirectCommandsLayoutUsageFlagsNV; +typedef struct VkPhysicalDeviceDeviceGeneratedCommandsPropertiesNV { + VkStructureType sType; + void* pNext; + uint32_t maxGraphicsShaderGroupCount; + uint32_t maxIndirectSequenceCount; + uint32_t maxIndirectCommandsTokenCount; + uint32_t maxIndirectCommandsStreamCount; + uint32_t maxIndirectCommandsTokenOffset; + uint32_t maxIndirectCommandsStreamStride; + uint32_t minSequencesCountBufferOffsetAlignment; + uint32_t minSequencesIndexBufferOffsetAlignment; + uint32_t minIndirectCommandsBufferOffsetAlignment; +} VkPhysicalDeviceDeviceGeneratedCommandsPropertiesNV; + +typedef struct VkPhysicalDeviceDeviceGeneratedCommandsFeaturesNV { + VkStructureType sType; + void* pNext; + VkBool32 deviceGeneratedCommands; +} VkPhysicalDeviceDeviceGeneratedCommandsFeaturesNV; + +typedef struct VkGraphicsShaderGroupCreateInfoNV { + VkStructureType sType; + const void* pNext; + uint32_t stageCount; + const VkPipelineShaderStageCreateInfo* pStages; + const VkPipelineVertexInputStateCreateInfo* pVertexInputState; + const VkPipelineTessellationStateCreateInfo* pTessellationState; +} VkGraphicsShaderGroupCreateInfoNV; + +typedef struct VkGraphicsPipelineShaderGroupsCreateInfoNV { + VkStructureType sType; + const void* pNext; + uint32_t groupCount; + const VkGraphicsShaderGroupCreateInfoNV* pGroups; + uint32_t pipelineCount; + const VkPipeline* pPipelines; +} VkGraphicsPipelineShaderGroupsCreateInfoNV; + +typedef struct VkBindShaderGroupIndirectCommandNV { + uint32_t groupIndex; +} VkBindShaderGroupIndirectCommandNV; + +typedef struct VkBindIndexBufferIndirectCommandNV { + VkDeviceAddress bufferAddress; + uint32_t size; + VkIndexType indexType; +} VkBindIndexBufferIndirectCommandNV; + +typedef struct VkBindVertexBufferIndirectCommandNV { + VkDeviceAddress bufferAddress; + uint32_t size; + uint32_t stride; +} VkBindVertexBufferIndirectCommandNV; + +typedef struct VkSetStateFlagsIndirectCommandNV { + uint32_t data; +} VkSetStateFlagsIndirectCommandNV; + +typedef struct VkIndirectCommandsStreamNV { + VkBuffer buffer; + VkDeviceSize offset; +} VkIndirectCommandsStreamNV; + +typedef struct VkIndirectCommandsLayoutTokenNV { + VkStructureType sType; + const void* pNext; + VkIndirectCommandsTokenTypeNV tokenType; + uint32_t stream; + uint32_t offset; + uint32_t vertexBindingUnit; + VkBool32 vertexDynamicStride; + VkPipelineLayout pushconstantPipelineLayout; + VkShaderStageFlags pushconstantShaderStageFlags; + uint32_t pushconstantOffset; + uint32_t pushconstantSize; + VkIndirectStateFlagsNV indirectStateFlags; + uint32_t indexTypeCount; + const VkIndexType* pIndexTypes; + const uint32_t* pIndexTypeValues; +} VkIndirectCommandsLayoutTokenNV; + +typedef struct VkIndirectCommandsLayoutCreateInfoNV { + VkStructureType sType; + const void* pNext; + VkIndirectCommandsLayoutUsageFlagsNV flags; + VkPipelineBindPoint pipelineBindPoint; + uint32_t tokenCount; + const VkIndirectCommandsLayoutTokenNV* pTokens; + uint32_t streamCount; + const uint32_t* pStreamStrides; +} VkIndirectCommandsLayoutCreateInfoNV; + +typedef struct VkGeneratedCommandsInfoNV { + VkStructureType sType; + const void* pNext; + VkPipelineBindPoint pipelineBindPoint; + VkPipeline pipeline; + VkIndirectCommandsLayoutNV indirectCommandsLayout; + uint32_t streamCount; + const VkIndirectCommandsStreamNV* pStreams; + uint32_t sequencesCount; + VkBuffer preprocessBuffer; + VkDeviceSize preprocessOffset; + VkDeviceSize preprocessSize; + VkBuffer sequencesCountBuffer; + VkDeviceSize sequencesCountOffset; + VkBuffer sequencesIndexBuffer; + VkDeviceSize sequencesIndexOffset; +} VkGeneratedCommandsInfoNV; + +typedef struct VkGeneratedCommandsMemoryRequirementsInfoNV { + VkStructureType sType; + const void* pNext; + VkPipelineBindPoint pipelineBindPoint; + VkPipeline pipeline; + VkIndirectCommandsLayoutNV indirectCommandsLayout; + uint32_t maxSequencesCount; +} VkGeneratedCommandsMemoryRequirementsInfoNV; + +typedef void (VKAPI_PTR *PFN_vkGetGeneratedCommandsMemoryRequirementsNV)(VkDevice device, const VkGeneratedCommandsMemoryRequirementsInfoNV* pInfo, VkMemoryRequirements2* pMemoryRequirements); +typedef void (VKAPI_PTR *PFN_vkCmdPreprocessGeneratedCommandsNV)(VkCommandBuffer commandBuffer, const VkGeneratedCommandsInfoNV* pGeneratedCommandsInfo); +typedef void (VKAPI_PTR *PFN_vkCmdExecuteGeneratedCommandsNV)(VkCommandBuffer commandBuffer, VkBool32 isPreprocessed, const VkGeneratedCommandsInfoNV* pGeneratedCommandsInfo); +typedef void (VKAPI_PTR *PFN_vkCmdBindPipelineShaderGroupNV)(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline, uint32_t groupIndex); +typedef VkResult (VKAPI_PTR *PFN_vkCreateIndirectCommandsLayoutNV)(VkDevice device, const VkIndirectCommandsLayoutCreateInfoNV* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkIndirectCommandsLayoutNV* pIndirectCommandsLayout); +typedef void (VKAPI_PTR *PFN_vkDestroyIndirectCommandsLayoutNV)(VkDevice device, VkIndirectCommandsLayoutNV indirectCommandsLayout, const VkAllocationCallbacks* pAllocator); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkGetGeneratedCommandsMemoryRequirementsNV( + VkDevice device, + const VkGeneratedCommandsMemoryRequirementsInfoNV* pInfo, + VkMemoryRequirements2* pMemoryRequirements); + +VKAPI_ATTR void VKAPI_CALL vkCmdPreprocessGeneratedCommandsNV( + VkCommandBuffer commandBuffer, + const VkGeneratedCommandsInfoNV* pGeneratedCommandsInfo); + +VKAPI_ATTR void VKAPI_CALL vkCmdExecuteGeneratedCommandsNV( + VkCommandBuffer commandBuffer, + VkBool32 isPreprocessed, + const VkGeneratedCommandsInfoNV* pGeneratedCommandsInfo); + +VKAPI_ATTR void VKAPI_CALL vkCmdBindPipelineShaderGroupNV( + VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipeline pipeline, + uint32_t groupIndex); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateIndirectCommandsLayoutNV( + VkDevice device, + const VkIndirectCommandsLayoutCreateInfoNV* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkIndirectCommandsLayoutNV* pIndirectCommandsLayout); + +VKAPI_ATTR void VKAPI_CALL vkDestroyIndirectCommandsLayoutNV( + VkDevice device, + VkIndirectCommandsLayoutNV indirectCommandsLayout, + const VkAllocationCallbacks* pAllocator); +#endif + + +#define VK_EXT_texel_buffer_alignment 1 +#define VK_EXT_TEXEL_BUFFER_ALIGNMENT_SPEC_VERSION 1 +#define VK_EXT_TEXEL_BUFFER_ALIGNMENT_EXTENSION_NAME "VK_EXT_texel_buffer_alignment" +typedef struct VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 texelBufferAlignment; +} VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT; + +typedef struct VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT { + VkStructureType sType; + void* pNext; + VkDeviceSize storageTexelBufferOffsetAlignmentBytes; + VkBool32 storageTexelBufferOffsetSingleTexelAlignment; + VkDeviceSize uniformTexelBufferOffsetAlignmentBytes; + VkBool32 uniformTexelBufferOffsetSingleTexelAlignment; +} VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT; + + + +#define VK_QCOM_render_pass_transform 1 +#define VK_QCOM_RENDER_PASS_TRANSFORM_SPEC_VERSION 1 +#define VK_QCOM_RENDER_PASS_TRANSFORM_EXTENSION_NAME "VK_QCOM_render_pass_transform" +typedef struct VkRenderPassTransformBeginInfoQCOM { + VkStructureType sType; + void* pNext; + VkSurfaceTransformFlagBitsKHR transform; +} VkRenderPassTransformBeginInfoQCOM; + +typedef struct VkCommandBufferInheritanceRenderPassTransformInfoQCOM { + VkStructureType sType; + void* pNext; + VkSurfaceTransformFlagBitsKHR transform; + VkRect2D renderArea; +} VkCommandBufferInheritanceRenderPassTransformInfoQCOM; + + + +#define VK_EXT_robustness2 1 +#define VK_EXT_ROBUSTNESS_2_SPEC_VERSION 1 +#define VK_EXT_ROBUSTNESS_2_EXTENSION_NAME "VK_EXT_robustness2" +typedef struct VkPhysicalDeviceRobustness2FeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 robustBufferAccess2; + VkBool32 robustImageAccess2; + VkBool32 nullDescriptor; +} VkPhysicalDeviceRobustness2FeaturesEXT; + +typedef struct VkPhysicalDeviceRobustness2PropertiesEXT { + VkStructureType sType; + void* pNext; + VkDeviceSize robustStorageBufferAccessSizeAlignment; + VkDeviceSize robustUniformBufferAccessSizeAlignment; +} VkPhysicalDeviceRobustness2PropertiesEXT; + + + +#define VK_EXT_custom_border_color 1 +#define VK_EXT_CUSTOM_BORDER_COLOR_SPEC_VERSION 12 +#define VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME "VK_EXT_custom_border_color" +typedef struct VkSamplerCustomBorderColorCreateInfoEXT { + VkStructureType sType; + const void* pNext; + VkClearColorValue customBorderColor; + VkFormat format; +} VkSamplerCustomBorderColorCreateInfoEXT; + +typedef struct VkPhysicalDeviceCustomBorderColorPropertiesEXT { + VkStructureType sType; + void* pNext; + uint32_t maxCustomBorderColorSamplers; +} VkPhysicalDeviceCustomBorderColorPropertiesEXT; + +typedef struct VkPhysicalDeviceCustomBorderColorFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 customBorderColors; + VkBool32 customBorderColorWithoutFormat; +} VkPhysicalDeviceCustomBorderColorFeaturesEXT; + + + +#define VK_GOOGLE_user_type 1 +#define VK_GOOGLE_USER_TYPE_SPEC_VERSION 1 +#define VK_GOOGLE_USER_TYPE_EXTENSION_NAME "VK_GOOGLE_user_type" + + +#define VK_EXT_private_data 1 +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkPrivateDataSlotEXT) +#define VK_EXT_PRIVATE_DATA_SPEC_VERSION 1 +#define VK_EXT_PRIVATE_DATA_EXTENSION_NAME "VK_EXT_private_data" + +typedef enum VkPrivateDataSlotCreateFlagBitsEXT { + VK_PRIVATE_DATA_SLOT_CREATE_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF +} VkPrivateDataSlotCreateFlagBitsEXT; +typedef VkFlags VkPrivateDataSlotCreateFlagsEXT; +typedef struct VkPhysicalDevicePrivateDataFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 privateData; +} VkPhysicalDevicePrivateDataFeaturesEXT; + +typedef struct VkDevicePrivateDataCreateInfoEXT { + VkStructureType sType; + const void* pNext; + uint32_t privateDataSlotRequestCount; +} VkDevicePrivateDataCreateInfoEXT; + +typedef struct VkPrivateDataSlotCreateInfoEXT { + VkStructureType sType; + const void* pNext; + VkPrivateDataSlotCreateFlagsEXT flags; +} VkPrivateDataSlotCreateInfoEXT; + +typedef VkResult (VKAPI_PTR *PFN_vkCreatePrivateDataSlotEXT)(VkDevice device, const VkPrivateDataSlotCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkPrivateDataSlotEXT* pPrivateDataSlot); +typedef void (VKAPI_PTR *PFN_vkDestroyPrivateDataSlotEXT)(VkDevice device, VkPrivateDataSlotEXT privateDataSlot, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkSetPrivateDataEXT)(VkDevice device, VkObjectType objectType, uint64_t objectHandle, VkPrivateDataSlotEXT privateDataSlot, uint64_t data); +typedef void (VKAPI_PTR *PFN_vkGetPrivateDataEXT)(VkDevice device, VkObjectType objectType, uint64_t objectHandle, VkPrivateDataSlotEXT privateDataSlot, uint64_t* pData); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreatePrivateDataSlotEXT( + VkDevice device, + const VkPrivateDataSlotCreateInfoEXT* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPrivateDataSlotEXT* pPrivateDataSlot); + +VKAPI_ATTR void VKAPI_CALL vkDestroyPrivateDataSlotEXT( + VkDevice device, + VkPrivateDataSlotEXT privateDataSlot, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkSetPrivateDataEXT( + VkDevice device, + VkObjectType objectType, + uint64_t objectHandle, + VkPrivateDataSlotEXT privateDataSlot, + uint64_t data); + +VKAPI_ATTR void VKAPI_CALL vkGetPrivateDataEXT( + VkDevice device, + VkObjectType objectType, + uint64_t objectHandle, + VkPrivateDataSlotEXT privateDataSlot, + uint64_t* pData); +#endif + + +#define VK_EXT_pipeline_creation_cache_control 1 +#define VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_SPEC_VERSION 3 +#define VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME "VK_EXT_pipeline_creation_cache_control" +typedef struct VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 pipelineCreationCacheControl; +} VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT; + + + +#define VK_NV_device_diagnostics_config 1 +#define VK_NV_DEVICE_DIAGNOSTICS_CONFIG_SPEC_VERSION 1 +#define VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME "VK_NV_device_diagnostics_config" + +typedef enum VkDeviceDiagnosticsConfigFlagBitsNV { + VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_SHADER_DEBUG_INFO_BIT_NV = 0x00000001, + VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_RESOURCE_TRACKING_BIT_NV = 0x00000002, + VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_AUTOMATIC_CHECKPOINTS_BIT_NV = 0x00000004, + VK_DEVICE_DIAGNOSTICS_CONFIG_FLAG_BITS_MAX_ENUM_NV = 0x7FFFFFFF +} VkDeviceDiagnosticsConfigFlagBitsNV; +typedef VkFlags VkDeviceDiagnosticsConfigFlagsNV; +typedef struct VkPhysicalDeviceDiagnosticsConfigFeaturesNV { + VkStructureType sType; + void* pNext; + VkBool32 diagnosticsConfig; +} VkPhysicalDeviceDiagnosticsConfigFeaturesNV; + +typedef struct VkDeviceDiagnosticsConfigCreateInfoNV { + VkStructureType sType; + const void* pNext; + VkDeviceDiagnosticsConfigFlagsNV flags; +} VkDeviceDiagnosticsConfigCreateInfoNV; + + + +#define VK_QCOM_render_pass_store_ops 1 +#define VK_QCOM_render_pass_store_ops_SPEC_VERSION 2 +#define VK_QCOM_render_pass_store_ops_EXTENSION_NAME "VK_QCOM_render_pass_store_ops" + + +#define VK_EXT_fragment_density_map2 1 +#define VK_EXT_FRAGMENT_DENSITY_MAP_2_SPEC_VERSION 1 +#define VK_EXT_FRAGMENT_DENSITY_MAP_2_EXTENSION_NAME "VK_EXT_fragment_density_map2" +typedef struct VkPhysicalDeviceFragmentDensityMap2FeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 fragmentDensityMapDeferred; +} VkPhysicalDeviceFragmentDensityMap2FeaturesEXT; + +typedef struct VkPhysicalDeviceFragmentDensityMap2PropertiesEXT { + VkStructureType sType; + void* pNext; + VkBool32 subsampledLoads; + VkBool32 subsampledCoarseReconstructionEarlyAccess; + uint32_t maxSubsampledArrayLayers; + uint32_t maxDescriptorSetSubsampledSamplers; +} VkPhysicalDeviceFragmentDensityMap2PropertiesEXT; + + + +#define VK_EXT_image_robustness 1 +#define VK_EXT_IMAGE_ROBUSTNESS_SPEC_VERSION 1 +#define VK_EXT_IMAGE_ROBUSTNESS_EXTENSION_NAME "VK_EXT_image_robustness" +typedef struct VkPhysicalDeviceImageRobustnessFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 robustImageAccess; +} VkPhysicalDeviceImageRobustnessFeaturesEXT; + + #ifdef __cplusplus } #endif diff --git a/third_party/vulkan/vulkan_directfb.h b/third_party/vulkan/vulkan_directfb.h new file mode 100644 index 000000000..f75bd3a4e --- /dev/null +++ b/third_party/vulkan/vulkan_directfb.h @@ -0,0 +1,54 @@ +#ifndef VULKAN_DIRECTFB_H_ +#define VULKAN_DIRECTFB_H_ 1 + +/* +** Copyright (c) 2015-2020 The Khronos Group Inc. +** +** SPDX-License-Identifier: Apache-2.0 +*/ + +/* +** This header is generated from the Khronos Vulkan XML API Registry. +** +*/ + + +#ifdef __cplusplus +extern "C" { +#endif + + + +#define VK_EXT_directfb_surface 1 +#define VK_EXT_DIRECTFB_SURFACE_SPEC_VERSION 1 +#define VK_EXT_DIRECTFB_SURFACE_EXTENSION_NAME "VK_EXT_directfb_surface" +typedef VkFlags VkDirectFBSurfaceCreateFlagsEXT; +typedef struct VkDirectFBSurfaceCreateInfoEXT { + VkStructureType sType; + const void* pNext; + VkDirectFBSurfaceCreateFlagsEXT flags; + IDirectFB* dfb; + IDirectFBSurface* surface; +} VkDirectFBSurfaceCreateInfoEXT; + +typedef VkResult (VKAPI_PTR *PFN_vkCreateDirectFBSurfaceEXT)(VkInstance instance, const VkDirectFBSurfaceCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); +typedef VkBool32 (VKAPI_PTR *PFN_vkGetPhysicalDeviceDirectFBPresentationSupportEXT)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, IDirectFB* dfb); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateDirectFBSurfaceEXT( + VkInstance instance, + const VkDirectFBSurfaceCreateInfoEXT* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface); + +VKAPI_ATTR VkBool32 VKAPI_CALL vkGetPhysicalDeviceDirectFBPresentationSupportEXT( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + IDirectFB* dfb); +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/third_party/vulkan/vulkan_fuchsia.h b/third_party/vulkan/vulkan_fuchsia.h new file mode 100644 index 000000000..03e27cb0a --- /dev/null +++ b/third_party/vulkan/vulkan_fuchsia.h @@ -0,0 +1,47 @@ +#ifndef VULKAN_FUCHSIA_H_ +#define VULKAN_FUCHSIA_H_ 1 + +/* +** Copyright (c) 2015-2020 The Khronos Group Inc. +** +** SPDX-License-Identifier: Apache-2.0 +*/ + +/* +** This header is generated from the Khronos Vulkan XML API Registry. +** +*/ + + +#ifdef __cplusplus +extern "C" { +#endif + + + +#define VK_FUCHSIA_imagepipe_surface 1 +#define VK_FUCHSIA_IMAGEPIPE_SURFACE_SPEC_VERSION 1 +#define VK_FUCHSIA_IMAGEPIPE_SURFACE_EXTENSION_NAME "VK_FUCHSIA_imagepipe_surface" +typedef VkFlags VkImagePipeSurfaceCreateFlagsFUCHSIA; +typedef struct VkImagePipeSurfaceCreateInfoFUCHSIA { + VkStructureType sType; + const void* pNext; + VkImagePipeSurfaceCreateFlagsFUCHSIA flags; + zx_handle_t imagePipeHandle; +} VkImagePipeSurfaceCreateInfoFUCHSIA; + +typedef VkResult (VKAPI_PTR *PFN_vkCreateImagePipeSurfaceFUCHSIA)(VkInstance instance, const VkImagePipeSurfaceCreateInfoFUCHSIA* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateImagePipeSurfaceFUCHSIA( + VkInstance instance, + const VkImagePipeSurfaceCreateInfoFUCHSIA* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface); +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/third_party/vulkan/vulkan_ggp.h b/third_party/vulkan/vulkan_ggp.h new file mode 100644 index 000000000..273c88005 --- /dev/null +++ b/third_party/vulkan/vulkan_ggp.h @@ -0,0 +1,58 @@ +#ifndef VULKAN_GGP_H_ +#define VULKAN_GGP_H_ 1 + +/* +** Copyright (c) 2015-2020 The Khronos Group Inc. +** +** SPDX-License-Identifier: Apache-2.0 +*/ + +/* +** This header is generated from the Khronos Vulkan XML API Registry. +** +*/ + + +#ifdef __cplusplus +extern "C" { +#endif + + + +#define VK_GGP_stream_descriptor_surface 1 +#define VK_GGP_STREAM_DESCRIPTOR_SURFACE_SPEC_VERSION 1 +#define VK_GGP_STREAM_DESCRIPTOR_SURFACE_EXTENSION_NAME "VK_GGP_stream_descriptor_surface" +typedef VkFlags VkStreamDescriptorSurfaceCreateFlagsGGP; +typedef struct VkStreamDescriptorSurfaceCreateInfoGGP { + VkStructureType sType; + const void* pNext; + VkStreamDescriptorSurfaceCreateFlagsGGP flags; + GgpStreamDescriptor streamDescriptor; +} VkStreamDescriptorSurfaceCreateInfoGGP; + +typedef VkResult (VKAPI_PTR *PFN_vkCreateStreamDescriptorSurfaceGGP)(VkInstance instance, const VkStreamDescriptorSurfaceCreateInfoGGP* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateStreamDescriptorSurfaceGGP( + VkInstance instance, + const VkStreamDescriptorSurfaceCreateInfoGGP* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface); +#endif + + +#define VK_GGP_frame_token 1 +#define VK_GGP_FRAME_TOKEN_SPEC_VERSION 1 +#define VK_GGP_FRAME_TOKEN_EXTENSION_NAME "VK_GGP_frame_token" +typedef struct VkPresentFrameTokenGGP { + VkStructureType sType; + const void* pNext; + GgpFrameToken frameToken; +} VkPresentFrameTokenGGP; + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/third_party/vulkan/vulkan_ios.h b/third_party/vulkan/vulkan_ios.h index a0924816d..651945cc7 100644 --- a/third_party/vulkan/vulkan_ios.h +++ b/third_party/vulkan/vulkan_ios.h @@ -1,24 +1,10 @@ #ifndef VULKAN_IOS_H_ #define VULKAN_IOS_H_ 1 -#ifdef __cplusplus -extern "C" { -#endif - /* -** Copyright (c) 2015-2018 The Khronos Group Inc. +** Copyright (c) 2015-2020 The Khronos Group Inc. ** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. +** SPDX-License-Identifier: Apache-2.0 */ /* @@ -27,12 +13,16 @@ extern "C" { */ +#ifdef __cplusplus +extern "C" { +#endif + + + #define VK_MVK_ios_surface 1 #define VK_MVK_IOS_SURFACE_SPEC_VERSION 2 #define VK_MVK_IOS_SURFACE_EXTENSION_NAME "VK_MVK_ios_surface" - typedef VkFlags VkIOSSurfaceCreateFlagsMVK; - typedef struct VkIOSSurfaceCreateInfoMVK { VkStructureType sType; const void* pNext; @@ -40,7 +30,6 @@ typedef struct VkIOSSurfaceCreateInfoMVK { const void* pView; } VkIOSSurfaceCreateInfoMVK; - typedef VkResult (VKAPI_PTR *PFN_vkCreateIOSSurfaceMVK)(VkInstance instance, const VkIOSSurfaceCreateInfoMVK* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); #ifndef VK_NO_PROTOTYPES diff --git a/third_party/vulkan/vulkan_macos.h b/third_party/vulkan/vulkan_macos.h index ff0b70180..3208b728e 100644 --- a/third_party/vulkan/vulkan_macos.h +++ b/third_party/vulkan/vulkan_macos.h @@ -1,24 +1,10 @@ #ifndef VULKAN_MACOS_H_ #define VULKAN_MACOS_H_ 1 -#ifdef __cplusplus -extern "C" { -#endif - /* -** Copyright (c) 2015-2018 The Khronos Group Inc. +** Copyright (c) 2015-2020 The Khronos Group Inc. ** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. +** SPDX-License-Identifier: Apache-2.0 */ /* @@ -27,12 +13,16 @@ extern "C" { */ +#ifdef __cplusplus +extern "C" { +#endif + + + #define VK_MVK_macos_surface 1 #define VK_MVK_MACOS_SURFACE_SPEC_VERSION 2 #define VK_MVK_MACOS_SURFACE_EXTENSION_NAME "VK_MVK_macos_surface" - typedef VkFlags VkMacOSSurfaceCreateFlagsMVK; - typedef struct VkMacOSSurfaceCreateInfoMVK { VkStructureType sType; const void* pNext; @@ -40,7 +30,6 @@ typedef struct VkMacOSSurfaceCreateInfoMVK { const void* pView; } VkMacOSSurfaceCreateInfoMVK; - typedef VkResult (VKAPI_PTR *PFN_vkCreateMacOSSurfaceMVK)(VkInstance instance, const VkMacOSSurfaceCreateInfoMVK* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); #ifndef VK_NO_PROTOTYPES diff --git a/third_party/vulkan/vulkan_metal.h b/third_party/vulkan/vulkan_metal.h new file mode 100644 index 000000000..99f097d95 --- /dev/null +++ b/third_party/vulkan/vulkan_metal.h @@ -0,0 +1,54 @@ +#ifndef VULKAN_METAL_H_ +#define VULKAN_METAL_H_ 1 + +/* +** Copyright (c) 2015-2020 The Khronos Group Inc. +** +** SPDX-License-Identifier: Apache-2.0 +*/ + +/* +** This header is generated from the Khronos Vulkan XML API Registry. +** +*/ + + +#ifdef __cplusplus +extern "C" { +#endif + + + +#define VK_EXT_metal_surface 1 + +#ifdef __OBJC__ +@class CAMetalLayer; +#else +typedef void CAMetalLayer; +#endif + +#define VK_EXT_METAL_SURFACE_SPEC_VERSION 1 +#define VK_EXT_METAL_SURFACE_EXTENSION_NAME "VK_EXT_metal_surface" +typedef VkFlags VkMetalSurfaceCreateFlagsEXT; +typedef struct VkMetalSurfaceCreateInfoEXT { + VkStructureType sType; + const void* pNext; + VkMetalSurfaceCreateFlagsEXT flags; + const CAMetalLayer* pLayer; +} VkMetalSurfaceCreateInfoEXT; + +typedef VkResult (VKAPI_PTR *PFN_vkCreateMetalSurfaceEXT)(VkInstance instance, const VkMetalSurfaceCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateMetalSurfaceEXT( + VkInstance instance, + const VkMetalSurfaceCreateInfoEXT* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface); +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/third_party/vulkan/vulkan_mir.h b/third_party/vulkan/vulkan_mir.h deleted file mode 100644 index 7d24ed27a..000000000 --- a/third_party/vulkan/vulkan_mir.h +++ /dev/null @@ -1,65 +0,0 @@ -#ifndef VULKAN_MIR_H_ -#define VULKAN_MIR_H_ 1 - -#ifdef __cplusplus -extern "C" { -#endif - -/* -** Copyright (c) 2015-2018 The Khronos Group Inc. -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ - -/* -** This header is generated from the Khronos Vulkan XML API Registry. -** -*/ - - -#define VK_KHR_mir_surface 1 -#define VK_KHR_MIR_SURFACE_SPEC_VERSION 4 -#define VK_KHR_MIR_SURFACE_EXTENSION_NAME "VK_KHR_mir_surface" - -typedef VkFlags VkMirSurfaceCreateFlagsKHR; - -typedef struct VkMirSurfaceCreateInfoKHR { - VkStructureType sType; - const void* pNext; - VkMirSurfaceCreateFlagsKHR flags; - MirConnection* connection; - MirSurface* mirSurface; -} VkMirSurfaceCreateInfoKHR; - - -typedef VkResult (VKAPI_PTR *PFN_vkCreateMirSurfaceKHR)(VkInstance instance, const VkMirSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); -typedef VkBool32 (VKAPI_PTR *PFN_vkGetPhysicalDeviceMirPresentationSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, MirConnection* connection); - -#ifndef VK_NO_PROTOTYPES -VKAPI_ATTR VkResult VKAPI_CALL vkCreateMirSurfaceKHR( - VkInstance instance, - const VkMirSurfaceCreateInfoKHR* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkSurfaceKHR* pSurface); - -VKAPI_ATTR VkBool32 VKAPI_CALL vkGetPhysicalDeviceMirPresentationSupportKHR( - VkPhysicalDevice physicalDevice, - uint32_t queueFamilyIndex, - MirConnection* connection); -#endif - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/third_party/vulkan/vulkan_vi.h b/third_party/vulkan/vulkan_vi.h index 015166bfc..2e62d7d3a 100644 --- a/third_party/vulkan/vulkan_vi.h +++ b/third_party/vulkan/vulkan_vi.h @@ -1,24 +1,10 @@ #ifndef VULKAN_VI_H_ #define VULKAN_VI_H_ 1 -#ifdef __cplusplus -extern "C" { -#endif - /* -** Copyright (c) 2015-2018 The Khronos Group Inc. +** Copyright (c) 2015-2020 The Khronos Group Inc. ** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. +** SPDX-License-Identifier: Apache-2.0 */ /* @@ -27,12 +13,16 @@ extern "C" { */ +#ifdef __cplusplus +extern "C" { +#endif + + + #define VK_NN_vi_surface 1 #define VK_NN_VI_SURFACE_SPEC_VERSION 1 #define VK_NN_VI_SURFACE_EXTENSION_NAME "VK_NN_vi_surface" - typedef VkFlags VkViSurfaceCreateFlagsNN; - typedef struct VkViSurfaceCreateInfoNN { VkStructureType sType; const void* pNext; @@ -40,7 +30,6 @@ typedef struct VkViSurfaceCreateInfoNN { void* window; } VkViSurfaceCreateInfoNN; - typedef VkResult (VKAPI_PTR *PFN_vkCreateViSurfaceNN)(VkInstance instance, const VkViSurfaceCreateInfoNN* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); #ifndef VK_NO_PROTOTYPES diff --git a/third_party/vulkan/vulkan_wayland.h b/third_party/vulkan/vulkan_wayland.h index 5ba0827aa..f7b307e51 100644 --- a/third_party/vulkan/vulkan_wayland.h +++ b/third_party/vulkan/vulkan_wayland.h @@ -1,24 +1,10 @@ #ifndef VULKAN_WAYLAND_H_ #define VULKAN_WAYLAND_H_ 1 -#ifdef __cplusplus -extern "C" { -#endif - /* -** Copyright (c) 2015-2018 The Khronos Group Inc. +** Copyright (c) 2015-2020 The Khronos Group Inc. ** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. +** SPDX-License-Identifier: Apache-2.0 */ /* @@ -27,12 +13,16 @@ extern "C" { */ +#ifdef __cplusplus +extern "C" { +#endif + + + #define VK_KHR_wayland_surface 1 #define VK_KHR_WAYLAND_SURFACE_SPEC_VERSION 6 #define VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME "VK_KHR_wayland_surface" - typedef VkFlags VkWaylandSurfaceCreateFlagsKHR; - typedef struct VkWaylandSurfaceCreateInfoKHR { VkStructureType sType; const void* pNext; @@ -41,7 +31,6 @@ typedef struct VkWaylandSurfaceCreateInfoKHR { struct wl_surface* surface; } VkWaylandSurfaceCreateInfoKHR; - typedef VkResult (VKAPI_PTR *PFN_vkCreateWaylandSurfaceKHR)(VkInstance instance, const VkWaylandSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); typedef VkBool32 (VKAPI_PTR *PFN_vkGetPhysicalDeviceWaylandPresentationSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, struct wl_display* display); diff --git a/third_party/vulkan/vulkan_win32.h b/third_party/vulkan/vulkan_win32.h index 6a85409eb..4b561ea10 100644 --- a/third_party/vulkan/vulkan_win32.h +++ b/third_party/vulkan/vulkan_win32.h @@ -1,24 +1,10 @@ #ifndef VULKAN_WIN32_H_ #define VULKAN_WIN32_H_ 1 -#ifdef __cplusplus -extern "C" { -#endif - /* -** Copyright (c) 2015-2018 The Khronos Group Inc. +** Copyright (c) 2015-2020 The Khronos Group Inc. ** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. +** SPDX-License-Identifier: Apache-2.0 */ /* @@ -27,12 +13,16 @@ extern "C" { */ +#ifdef __cplusplus +extern "C" { +#endif + + + #define VK_KHR_win32_surface 1 #define VK_KHR_WIN32_SURFACE_SPEC_VERSION 6 #define VK_KHR_WIN32_SURFACE_EXTENSION_NAME "VK_KHR_win32_surface" - typedef VkFlags VkWin32SurfaceCreateFlagsKHR; - typedef struct VkWin32SurfaceCreateInfoKHR { VkStructureType sType; const void* pNext; @@ -41,7 +31,6 @@ typedef struct VkWin32SurfaceCreateInfoKHR { HWND hwnd; } VkWin32SurfaceCreateInfoKHR; - typedef VkResult (VKAPI_PTR *PFN_vkCreateWin32SurfaceKHR)(VkInstance instance, const VkWin32SurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); typedef VkBool32 (VKAPI_PTR *PFN_vkGetPhysicalDeviceWin32PresentationSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex); @@ -57,10 +46,10 @@ VKAPI_ATTR VkBool32 VKAPI_CALL vkGetPhysicalDeviceWin32PresentationSupportKHR( uint32_t queueFamilyIndex); #endif + #define VK_KHR_external_memory_win32 1 #define VK_KHR_EXTERNAL_MEMORY_WIN32_SPEC_VERSION 1 #define VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME "VK_KHR_external_memory_win32" - typedef struct VkImportMemoryWin32HandleInfoKHR { VkStructureType sType; const void* pNext; @@ -90,7 +79,6 @@ typedef struct VkMemoryGetWin32HandleInfoKHR { VkExternalMemoryHandleTypeFlagBits handleType; } VkMemoryGetWin32HandleInfoKHR; - typedef VkResult (VKAPI_PTR *PFN_vkGetMemoryWin32HandleKHR)(VkDevice device, const VkMemoryGetWin32HandleInfoKHR* pGetWin32HandleInfo, HANDLE* pHandle); typedef VkResult (VKAPI_PTR *PFN_vkGetMemoryWin32HandlePropertiesKHR)(VkDevice device, VkExternalMemoryHandleTypeFlagBits handleType, HANDLE handle, VkMemoryWin32HandlePropertiesKHR* pMemoryWin32HandleProperties); @@ -107,10 +95,10 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetMemoryWin32HandlePropertiesKHR( VkMemoryWin32HandlePropertiesKHR* pMemoryWin32HandleProperties); #endif + #define VK_KHR_win32_keyed_mutex 1 #define VK_KHR_WIN32_KEYED_MUTEX_SPEC_VERSION 1 #define VK_KHR_WIN32_KEYED_MUTEX_EXTENSION_NAME "VK_KHR_win32_keyed_mutex" - typedef struct VkWin32KeyedMutexAcquireReleaseInfoKHR { VkStructureType sType; const void* pNext; @@ -128,7 +116,6 @@ typedef struct VkWin32KeyedMutexAcquireReleaseInfoKHR { #define VK_KHR_external_semaphore_win32 1 #define VK_KHR_EXTERNAL_SEMAPHORE_WIN32_SPEC_VERSION 1 #define VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME "VK_KHR_external_semaphore_win32" - typedef struct VkImportSemaphoreWin32HandleInfoKHR { VkStructureType sType; const void* pNext; @@ -163,7 +150,6 @@ typedef struct VkSemaphoreGetWin32HandleInfoKHR { VkExternalSemaphoreHandleTypeFlagBits handleType; } VkSemaphoreGetWin32HandleInfoKHR; - typedef VkResult (VKAPI_PTR *PFN_vkImportSemaphoreWin32HandleKHR)(VkDevice device, const VkImportSemaphoreWin32HandleInfoKHR* pImportSemaphoreWin32HandleInfo); typedef VkResult (VKAPI_PTR *PFN_vkGetSemaphoreWin32HandleKHR)(VkDevice device, const VkSemaphoreGetWin32HandleInfoKHR* pGetWin32HandleInfo, HANDLE* pHandle); @@ -178,10 +164,10 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetSemaphoreWin32HandleKHR( HANDLE* pHandle); #endif + #define VK_KHR_external_fence_win32 1 #define VK_KHR_EXTERNAL_FENCE_WIN32_SPEC_VERSION 1 #define VK_KHR_EXTERNAL_FENCE_WIN32_EXTENSION_NAME "VK_KHR_external_fence_win32" - typedef struct VkImportFenceWin32HandleInfoKHR { VkStructureType sType; const void* pNext; @@ -207,7 +193,6 @@ typedef struct VkFenceGetWin32HandleInfoKHR { VkExternalFenceHandleTypeFlagBits handleType; } VkFenceGetWin32HandleInfoKHR; - typedef VkResult (VKAPI_PTR *PFN_vkImportFenceWin32HandleKHR)(VkDevice device, const VkImportFenceWin32HandleInfoKHR* pImportFenceWin32HandleInfo); typedef VkResult (VKAPI_PTR *PFN_vkGetFenceWin32HandleKHR)(VkDevice device, const VkFenceGetWin32HandleInfoKHR* pGetWin32HandleInfo, HANDLE* pHandle); @@ -222,10 +207,10 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetFenceWin32HandleKHR( HANDLE* pHandle); #endif + #define VK_NV_external_memory_win32 1 #define VK_NV_EXTERNAL_MEMORY_WIN32_SPEC_VERSION 1 #define VK_NV_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME "VK_NV_external_memory_win32" - typedef struct VkImportMemoryWin32HandleInfoNV { VkStructureType sType; const void* pNext; @@ -240,7 +225,6 @@ typedef struct VkExportMemoryWin32HandleInfoNV { DWORD dwAccess; } VkExportMemoryWin32HandleInfoNV; - typedef VkResult (VKAPI_PTR *PFN_vkGetMemoryWin32HandleNV)(VkDevice device, VkDeviceMemory memory, VkExternalMemoryHandleTypeFlagsNV handleType, HANDLE* pHandle); #ifndef VK_NO_PROTOTYPES @@ -251,10 +235,10 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetMemoryWin32HandleNV( HANDLE* pHandle); #endif -#define VK_NV_win32_keyed_mutex 1 -#define VK_NV_WIN32_KEYED_MUTEX_SPEC_VERSION 1 -#define VK_NV_WIN32_KEYED_MUTEX_EXTENSION_NAME "VK_NV_win32_keyed_mutex" +#define VK_NV_win32_keyed_mutex 1 +#define VK_NV_WIN32_KEYED_MUTEX_SPEC_VERSION 2 +#define VK_NV_WIN32_KEYED_MUTEX_EXTENSION_NAME "VK_NV_win32_keyed_mutex" typedef struct VkWin32KeyedMutexAcquireReleaseInfoNV { VkStructureType sType; const void* pNext; @@ -269,6 +253,61 @@ typedef struct VkWin32KeyedMutexAcquireReleaseInfoNV { +#define VK_EXT_full_screen_exclusive 1 +#define VK_EXT_FULL_SCREEN_EXCLUSIVE_SPEC_VERSION 4 +#define VK_EXT_FULL_SCREEN_EXCLUSIVE_EXTENSION_NAME "VK_EXT_full_screen_exclusive" + +typedef enum VkFullScreenExclusiveEXT { + VK_FULL_SCREEN_EXCLUSIVE_DEFAULT_EXT = 0, + VK_FULL_SCREEN_EXCLUSIVE_ALLOWED_EXT = 1, + VK_FULL_SCREEN_EXCLUSIVE_DISALLOWED_EXT = 2, + VK_FULL_SCREEN_EXCLUSIVE_APPLICATION_CONTROLLED_EXT = 3, + VK_FULL_SCREEN_EXCLUSIVE_MAX_ENUM_EXT = 0x7FFFFFFF +} VkFullScreenExclusiveEXT; +typedef struct VkSurfaceFullScreenExclusiveInfoEXT { + VkStructureType sType; + void* pNext; + VkFullScreenExclusiveEXT fullScreenExclusive; +} VkSurfaceFullScreenExclusiveInfoEXT; + +typedef struct VkSurfaceCapabilitiesFullScreenExclusiveEXT { + VkStructureType sType; + void* pNext; + VkBool32 fullScreenExclusiveSupported; +} VkSurfaceCapabilitiesFullScreenExclusiveEXT; + +typedef struct VkSurfaceFullScreenExclusiveWin32InfoEXT { + VkStructureType sType; + const void* pNext; + HMONITOR hmonitor; +} VkSurfaceFullScreenExclusiveWin32InfoEXT; + +typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfacePresentModes2EXT)(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceSurfaceInfo2KHR* pSurfaceInfo, uint32_t* pPresentModeCount, VkPresentModeKHR* pPresentModes); +typedef VkResult (VKAPI_PTR *PFN_vkAcquireFullScreenExclusiveModeEXT)(VkDevice device, VkSwapchainKHR swapchain); +typedef VkResult (VKAPI_PTR *PFN_vkReleaseFullScreenExclusiveModeEXT)(VkDevice device, VkSwapchainKHR swapchain); +typedef VkResult (VKAPI_PTR *PFN_vkGetDeviceGroupSurfacePresentModes2EXT)(VkDevice device, const VkPhysicalDeviceSurfaceInfo2KHR* pSurfaceInfo, VkDeviceGroupPresentModeFlagsKHR* pModes); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfacePresentModes2EXT( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceSurfaceInfo2KHR* pSurfaceInfo, + uint32_t* pPresentModeCount, + VkPresentModeKHR* pPresentModes); + +VKAPI_ATTR VkResult VKAPI_CALL vkAcquireFullScreenExclusiveModeEXT( + VkDevice device, + VkSwapchainKHR swapchain); + +VKAPI_ATTR VkResult VKAPI_CALL vkReleaseFullScreenExclusiveModeEXT( + VkDevice device, + VkSwapchainKHR swapchain); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetDeviceGroupSurfacePresentModes2EXT( + VkDevice device, + const VkPhysicalDeviceSurfaceInfo2KHR* pSurfaceInfo, + VkDeviceGroupPresentModeFlagsKHR* pModes); +#endif + #ifdef __cplusplus } #endif diff --git a/third_party/vulkan/vulkan_xcb.h b/third_party/vulkan/vulkan_xcb.h index ba0360060..c5441b239 100644 --- a/third_party/vulkan/vulkan_xcb.h +++ b/third_party/vulkan/vulkan_xcb.h @@ -1,24 +1,10 @@ #ifndef VULKAN_XCB_H_ #define VULKAN_XCB_H_ 1 -#ifdef __cplusplus -extern "C" { -#endif - /* -** Copyright (c) 2015-2018 The Khronos Group Inc. +** Copyright (c) 2015-2020 The Khronos Group Inc. ** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. +** SPDX-License-Identifier: Apache-2.0 */ /* @@ -27,12 +13,16 @@ extern "C" { */ +#ifdef __cplusplus +extern "C" { +#endif + + + #define VK_KHR_xcb_surface 1 #define VK_KHR_XCB_SURFACE_SPEC_VERSION 6 #define VK_KHR_XCB_SURFACE_EXTENSION_NAME "VK_KHR_xcb_surface" - typedef VkFlags VkXcbSurfaceCreateFlagsKHR; - typedef struct VkXcbSurfaceCreateInfoKHR { VkStructureType sType; const void* pNext; @@ -41,7 +31,6 @@ typedef struct VkXcbSurfaceCreateInfoKHR { xcb_window_t window; } VkXcbSurfaceCreateInfoKHR; - typedef VkResult (VKAPI_PTR *PFN_vkCreateXcbSurfaceKHR)(VkInstance instance, const VkXcbSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); typedef VkBool32 (VKAPI_PTR *PFN_vkGetPhysicalDeviceXcbPresentationSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, xcb_connection_t* connection, xcb_visualid_t visual_id); diff --git a/third_party/vulkan/vulkan_xlib.h b/third_party/vulkan/vulkan_xlib.h index e1d967e01..c54628a7e 100644 --- a/third_party/vulkan/vulkan_xlib.h +++ b/third_party/vulkan/vulkan_xlib.h @@ -1,24 +1,10 @@ #ifndef VULKAN_XLIB_H_ #define VULKAN_XLIB_H_ 1 -#ifdef __cplusplus -extern "C" { -#endif - /* -** Copyright (c) 2015-2018 The Khronos Group Inc. +** Copyright (c) 2015-2020 The Khronos Group Inc. ** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. +** SPDX-License-Identifier: Apache-2.0 */ /* @@ -27,12 +13,16 @@ extern "C" { */ +#ifdef __cplusplus +extern "C" { +#endif + + + #define VK_KHR_xlib_surface 1 #define VK_KHR_XLIB_SURFACE_SPEC_VERSION 6 #define VK_KHR_XLIB_SURFACE_EXTENSION_NAME "VK_KHR_xlib_surface" - typedef VkFlags VkXlibSurfaceCreateFlagsKHR; - typedef struct VkXlibSurfaceCreateInfoKHR { VkStructureType sType; const void* pNext; @@ -41,7 +31,6 @@ typedef struct VkXlibSurfaceCreateInfoKHR { Window window; } VkXlibSurfaceCreateInfoKHR; - typedef VkResult (VKAPI_PTR *PFN_vkCreateXlibSurfaceKHR)(VkInstance instance, const VkXlibSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); typedef VkBool32 (VKAPI_PTR *PFN_vkGetPhysicalDeviceXlibPresentationSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, Display* dpy, VisualID visualID); diff --git a/third_party/vulkan/vulkan_xlib_xrandr.h b/third_party/vulkan/vulkan_xlib_xrandr.h index 117d01799..436432f84 100644 --- a/third_party/vulkan/vulkan_xlib_xrandr.h +++ b/third_party/vulkan/vulkan_xlib_xrandr.h @@ -1,24 +1,10 @@ #ifndef VULKAN_XLIB_XRANDR_H_ #define VULKAN_XLIB_XRANDR_H_ 1 -#ifdef __cplusplus -extern "C" { -#endif - /* -** Copyright (c) 2015-2018 The Khronos Group Inc. +** Copyright (c) 2015-2020 The Khronos Group Inc. ** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. +** SPDX-License-Identifier: Apache-2.0 */ /* @@ -27,10 +13,15 @@ extern "C" { */ +#ifdef __cplusplus +extern "C" { +#endif + + + #define VK_EXT_acquire_xlib_display 1 #define VK_EXT_ACQUIRE_XLIB_DISPLAY_SPEC_VERSION 1 #define VK_EXT_ACQUIRE_XLIB_DISPLAY_EXTENSION_NAME "VK_EXT_acquire_xlib_display" - typedef VkResult (VKAPI_PTR *PFN_vkAcquireXlibDisplayEXT)(VkPhysicalDevice physicalDevice, Display* dpy, VkDisplayKHR display); typedef VkResult (VKAPI_PTR *PFN_vkGetRandROutputDisplayEXT)(VkPhysicalDevice physicalDevice, Display* dpy, RROutput rrOutput, VkDisplayKHR* pDisplay); From b3c2e2aee67eefa217c85e30eb4c1d59de7c13d5 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Tue, 1 Sep 2020 12:44:37 +0300 Subject: [PATCH 003/123] [Memory] Fix Protect range calculation --- src/xenia/memory.cc | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/src/xenia/memory.cc b/src/xenia/memory.cc index 834091a07..7e60797f5 100644 --- a/src/xenia/memory.cc +++ b/src/xenia/memory.cc @@ -1145,12 +1145,38 @@ bool BaseHeap::Release(uint32_t base_address, uint32_t* out_region_size) { bool BaseHeap::Protect(uint32_t address, uint32_t size, uint32_t protect, uint32_t* old_protect) { - uint32_t page_count = xe::round_up(size, page_size_) / page_size_; + if (!size) { + XELOGE("BaseHeap::Protect failed due to zero size"); + return false; + } + + // From the VirtualProtect MSDN page: + // + // "The region of affected pages includes all pages containing one or more + // bytes in the range from the lpAddress parameter to (lpAddress+dwSize). + // This means that a 2-byte range straddling a page boundary causes the + // protection attributes of both pages to be changed." + // + // "The access protection value can be set only on committed pages. If the + // state of any page in the specified region is not committed, the function + // fails and returns without modifying the access protection of any pages in + // the specified region." + uint32_t start_page_number = (address - heap_base_) / page_size_; - uint32_t end_page_number = start_page_number + page_count - 1; - start_page_number = - std::min(uint32_t(page_table_.size()) - 1, start_page_number); - end_page_number = std::min(uint32_t(page_table_.size()) - 1, end_page_number); + if (start_page_number >= page_table_.size()) { + XELOGE("BaseHeap::Protect failed due to out-of-bounds base address {:08X}", + address); + return false; + } + uint32_t end_page_number = + uint32_t((uint64_t(address) + size - 1 - heap_base_) / page_size_); + if (end_page_number >= page_table_.size()) { + XELOGE( + "BaseHeap::Protect failed due to out-of-bounds range ({:08X} bytes " + "from {:08x})", + size, address); + return false; + } auto global_lock = global_critical_region_.Acquire(); @@ -1173,6 +1199,7 @@ bool BaseHeap::Protect(uint32_t address, uint32_t size, uint32_t protect, // Attempt host change (hopefully won't fail). // We can only do this if our size matches system page granularity. + uint32_t page_count = end_page_number - start_page_number + 1; if (page_size_ == xe::memory::page_size() || (((page_count * page_size_) % xe::memory::page_size() == 0) && ((start_page_number * page_size_) % xe::memory::page_size() == 0))) { From 524201eca4f8f7d658858bdc70114606f7d25145 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 5 Sep 2020 17:47:32 +0300 Subject: [PATCH 004/123] [GPU/D3D12] Letterboxing cropping to action-safe area --- src/xenia/gpu/d3d12/d3d12_graphics_system.cc | 35 ++++-- src/xenia/gpu/draw_util.cc | 112 +++++++++++++++++++ src/xenia/gpu/draw_util.h | 8 ++ src/xenia/ui/d3d12/d3d12_context.cc | 6 +- src/xenia/ui/window_win.cc | 27 +---- 5 files changed, 154 insertions(+), 34 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_graphics_system.cc b/src/xenia/gpu/d3d12/d3d12_graphics_system.cc index ee4656eec..8eaefd15f 100644 --- a/src/xenia/gpu/d3d12/d3d12_graphics_system.cc +++ b/src/xenia/gpu/d3d12/d3d12_graphics_system.cc @@ -10,7 +10,9 @@ #include "xenia/gpu/d3d12/d3d12_graphics_system.h" #include "xenia/base/logging.h" +#include "xenia/base/math.h" #include "xenia/gpu/d3d12/d3d12_command_processor.h" +#include "xenia/gpu/draw_util.h" #include "xenia/ui/d3d12/d3d12_util.h" #include "xenia/xbox.h" @@ -265,22 +267,39 @@ void D3D12GraphicsSystem::Swap(xe::ui::UIEvent* e) { return; } + uint32_t window_width, window_height; + display_context_->GetSwapChainSize(window_width, window_height); + + int32_t target_x, target_y; + uint32_t target_width, target_height; + draw_util::GetPresentArea(swap_state.width, swap_state.height, window_width, + window_height, target_x, target_y, target_width, + target_height); + // For safety. + target_x = clamp(target_x, int32_t(D3D12_VIEWPORT_BOUNDS_MIN), + int32_t(D3D12_VIEWPORT_BOUNDS_MAX)); + target_y = clamp(target_y, int32_t(D3D12_VIEWPORT_BOUNDS_MIN), + int32_t(D3D12_VIEWPORT_BOUNDS_MAX)); + target_width = std::min( + target_width, uint32_t(int32_t(D3D12_VIEWPORT_BOUNDS_MAX) - target_x)); + target_height = std::min( + target_height, uint32_t(int32_t(D3D12_VIEWPORT_BOUNDS_MAX) - target_y)); + auto command_list = display_context_->GetSwapCommandList(); - uint32_t swap_width, swap_height; - display_context_->GetSwapChainSize(swap_width, swap_height); + // Assuming the window has already been cleared to the needed letterbox color. D3D12_VIEWPORT viewport; - viewport.TopLeftX = 0.0f; - viewport.TopLeftY = 0.0f; - viewport.Width = float(swap_width); - viewport.Height = float(swap_height); + viewport.TopLeftX = float(target_x); + viewport.TopLeftY = float(target_y); + viewport.Width = float(target_width); + viewport.Height = float(target_height); viewport.MinDepth = 0.0f; viewport.MaxDepth = 0.0f; command_list->RSSetViewports(1, &viewport); D3D12_RECT scissor; scissor.left = 0; scissor.top = 0; - scissor.right = swap_width; - scissor.bottom = swap_height; + scissor.right = window_width; + scissor.bottom = window_height; command_list->RSSetScissorRects(1, &scissor); command_list->SetDescriptorHeaps(1, &swap_srv_heap); StretchTextureToFrontBuffer( diff --git a/src/xenia/gpu/draw_util.cc b/src/xenia/gpu/draw_util.cc index 6c9ba1e73..d28df6d0e 100644 --- a/src/xenia/gpu/draw_util.cc +++ b/src/xenia/gpu/draw_util.cc @@ -9,6 +9,7 @@ #include "xenia/gpu/draw_util.h" +#include #include #include @@ -31,6 +32,36 @@ DEFINE_bool( "for certain games like GTA IV to work).", "GPU"); +DEFINE_bool( + present_stretch, true, + "Whether to rescale the image, instead of maintaining the original pixel " + "size, when presenting to the window. When this is disabled, other " + "positioning options are ignored.", + "GPU"); +DEFINE_bool( + present_letterbox, true, + "Maintain aspect ratio when stretching by displaying bars around the image " + "when there's no more overscan area to crop out.", + "GPU"); +// https://github.com/MonoGame/MonoGame/issues/4697#issuecomment-217779403 +// Using the value from DirectXTK (5% cropped out from each side, thus 90%), +// which is not exactly the Xbox One title-safe area, but close, and within the +// action-safe area: +// https://github.com/microsoft/DirectXTK/blob/1e80a465c6960b457ef9ab6716672c1443a45024/Src/SimpleMath.cpp#L144 +// XNA TitleSafeArea is 80%, but it's very conservative, designed for CRT, and +// is the title-safe area rather than the action-safe area. +// 90% is also exactly the fraction of 16:9 height in 16:10. +DEFINE_int32( + present_safe_area_x, 90, + "Percentage of the image width that can be kept when presenting to " + "maintain aspect ratio without letterboxing or stretching.", + "GPU"); +DEFINE_int32( + present_safe_area_y, 90, + "Percentage of the image height that can be kept when presenting to " + "maintain aspect ratio without letterboxing or stretching.", + "GPU"); + namespace xe { namespace gpu { namespace draw_util { @@ -589,6 +620,87 @@ ResolveCopyShaderIndex ResolveInfo::GetCopyShader( return shader; } +void GetPresentArea(uint32_t source_width, uint32_t source_height, + uint32_t window_width, uint32_t window_height, + int32_t& target_x_out, int32_t& target_y_out, + uint32_t& target_width_out, uint32_t& target_height_out) { + if (!cvars::present_stretch) { + target_x_out = (int32_t(window_width) - int32_t(source_width)) / 2; + target_y_out = (int32_t(window_height) - int32_t(source_height)) / 2; + target_width_out = source_width; + target_height_out = source_height; + return; + } + // Prevent division by zero. + if (!source_width || !source_height) { + target_x_out = 0; + target_y_out = 0; + target_width_out = 0; + target_height_out = 0; + return; + } + if (uint64_t(window_width) * source_height > + uint64_t(source_width) * window_height) { + // The window is wider that the source - crop along Y, then letterbox or + // stretch along X. + uint32_t present_safe_area; + if (cvars::present_safe_area_y > 0 && cvars::present_safe_area_y < 100) { + present_safe_area = uint32_t(cvars::present_safe_area_y); + } else { + present_safe_area = 100; + } + uint32_t target_height = + uint32_t(uint64_t(window_width) * source_height / source_width); + bool letterbox = false; + if (target_height * present_safe_area > window_height * 100) { + // Don't crop out more than the safe area margin - letterbox or stretch. + target_height = window_height * 100 / present_safe_area; + letterbox = true; + } + if (letterbox && cvars::present_letterbox) { + uint32_t target_width = + uint32_t(uint64_t(source_width) * window_height * 100 / + (source_height * present_safe_area)); + target_x_out = (int32_t(window_width) - int32_t(target_width)) / 2; + target_width_out = target_width; + } else { + target_x_out = 0; + target_width_out = window_width; + } + target_y_out = (int32_t(window_height) - int32_t(target_height)) / 2; + target_height_out = target_height; + } else { + // The window is taller than the source - crop along X, then letterbox or + // stretch along Y. + uint32_t present_safe_area; + if (cvars::present_safe_area_x > 0 && cvars::present_safe_area_x < 100) { + present_safe_area = uint32_t(cvars::present_safe_area_x); + } else { + present_safe_area = 100; + } + uint32_t target_width = + uint32_t(uint64_t(window_height) * source_width / source_height); + bool letterbox = false; + if (target_width * present_safe_area > window_width * 100) { + // Don't crop out more than the safe area margin - letterbox or stretch. + target_width = window_width * 100 / present_safe_area; + letterbox = true; + } + if (letterbox && cvars::present_letterbox) { + uint32_t target_height = + uint32_t(uint64_t(source_height) * window_width * 100 / + (source_width * present_safe_area)); + target_y_out = (int32_t(window_height) - int32_t(target_height)) / 2; + target_height_out = target_height; + } else { + target_y_out = 0; + target_height_out = window_height; + } + target_x_out = (int32_t(window_width) - int32_t(target_width)) / 2; + target_width_out = target_width; + } +} + } // namespace draw_util } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/draw_util.h b/src/xenia/gpu/draw_util.h index 76827c093..edb880ab0 100644 --- a/src/xenia/gpu/draw_util.h +++ b/src/xenia/gpu/draw_util.h @@ -272,6 +272,14 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory, TraceWriter& trace_writer, uint32_t resolution_scale, bool edram_16_as_minus_1_to_1, ResolveInfo& info_out); +// Taking user configuration - stretching or letterboxing, overscan region to +// crop to fill while maintaining the aspect ratio - into account, returns the +// area where the frame should be presented in the host window. +void GetPresentArea(uint32_t source_width, uint32_t source_height, + uint32_t window_width, uint32_t window_height, + int32_t& target_x_out, int32_t& target_y_out, + uint32_t& target_width_out, uint32_t& target_height_out); + } // namespace draw_util } // namespace gpu } // namespace xe diff --git a/src/xenia/ui/d3d12/d3d12_context.cc b/src/xenia/ui/d3d12/d3d12_context.cc index 506ca6141..f897a5516 100644 --- a/src/xenia/ui/d3d12/d3d12_context.cc +++ b/src/xenia/ui/d3d12/d3d12_context.cc @@ -300,9 +300,9 @@ void D3D12Context::BeginSwap() { clear_color[1] = 1.0f; clear_color[2] = 0.0f; } else { - clear_color[0] = 238.0f / 255.0f; - clear_color[1] = 238.0f / 255.0f; - clear_color[2] = 238.0f / 255.0f; + clear_color[0] = 0.0f; + clear_color[1] = 0.0f; + clear_color[2] = 0.0f; } clear_color[3] = 1.0f; swap_command_list_->ClearRenderTargetView(back_buffer_rtv, clear_color, 0, diff --git a/src/xenia/ui/window_win.cc b/src/xenia/ui/window_win.cc index 2e60d2e42..c86a5cec8 100644 --- a/src/xenia/ui/window_win.cc +++ b/src/xenia/ui/window_win.cc @@ -253,20 +253,6 @@ bool Win32Window::ReleaseMouse() { bool Win32Window::is_fullscreen() const { return fullscreen_; } -// https://blogs.msdn.microsoft.com/oldnewthing/20131017-00/?p=2903 -BOOL UnadjustWindowRect(LPRECT prc, DWORD dwStyle, BOOL fMenu) { - RECT rc; - SetRectEmpty(&rc); - BOOL fRc = AdjustWindowRect(&rc, dwStyle, fMenu); - if (fRc) { - prc->left -= rc.left; - prc->top -= rc.top; - prc->right -= rc.right; - prc->bottom -= rc.bottom; - } - return fRc; -} - void Win32Window::ToggleFullscreen(bool fullscreen) { if (fullscreen == is_fullscreen()) { return; @@ -288,9 +274,6 @@ void Win32Window::ToggleFullscreen(bool fullscreen) { AdjustWindowRect(&rc, GetWindowLong(hwnd_, GWL_STYLE), false); MoveWindow(hwnd_, rc.left, rc.top, rc.right - rc.left, rc.bottom - rc.top, TRUE); - - width_ = rc.right - rc.left; - height_ = rc.bottom - rc.top; } } else { // Reinstate borders, resize to 1280x720 @@ -301,15 +284,13 @@ void Win32Window::ToggleFullscreen(bool fullscreen) { if (main_menu) { ::SetMenu(hwnd_, main_menu->handle()); } - - auto& rc = windowed_pos_.rcNormalPosition; - bool has_menu = main_menu_ ? true : false; - UnadjustWindowRect(&rc, GetWindowLong(hwnd_, GWL_STYLE), has_menu); - width_ = rc.right - rc.left; - height_ = rc.bottom - rc.top; } fullscreen_ = fullscreen; + + // width_ and height_ will be updated by the WM_SIZE handler - + // windowed_pos_.rcNormalPosition is also not the correct source for them when + // switching from fullscreen to maximized. } bool Win32Window::is_bordered() const { From 3daa899feb4ecf6841571009865559f99c8e78ef Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 5 Sep 2020 19:23:45 +0300 Subject: [PATCH 005/123] [DXBC] ROV: Fix depth being overwritten if stencil failed --- src/xenia/gpu/dxbc_shader_translator_om.cc | 25 ++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/src/xenia/gpu/dxbc_shader_translator_om.cc b/src/xenia/gpu/dxbc_shader_translator_om.cc index bb83200ee..24963008f 100644 --- a/src/xenia/gpu/dxbc_shader_translator_om.cc +++ b/src/xenia/gpu/dxbc_shader_translator_om.cc @@ -733,12 +733,6 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { DxbcOpAnd(DxbcDest::R(system_temp_rov_params_, 0b0001), DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), DxbcSrc::LU(~uint32_t(1 << i))); - // temp.x? = resulting sample depth after the depth test - // temp.y = polygon offset if not writing to oDepth - // temp.z = viewport maximum depth if not writing to oDepth - // temp.w = old depth/stencil - // sample_temp.x = free - DxbcOpMov(sample_depth_stencil_dest, sample_temp_x_src); } DxbcOpEndIf(); // Create packed depth/stencil, with the stencil value unchanged at this @@ -977,6 +971,25 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { // Close the stencil test check. DxbcOpEndIf(); + // Check if the depth/stencil has failed not to modify the depth if it has. + // sample_temp.x = whether depth/stencil has passed for this sample + DxbcOpAnd(sample_temp_x_dest, + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), + DxbcSrc::LU(1 << i)); + // If the depth/stencil test has failed, don't change the depth. + // sample_temp.x = free + DxbcOpIf(false, sample_temp_x_src); + { + // Copy the new stencil over the old depth. + // temp.x? = resulting sample depth/stencil + // temp.y = polygon offset if not writing to oDepth + // temp.z = viewport maximum depth if not writing to oDepth + // temp.w = old depth/stencil + DxbcOpBFI(sample_depth_stencil_dest, DxbcSrc::LU(8), DxbcSrc::LU(0), + sample_depth_stencil_src, temp_w_src); + } + // Close the depth/stencil passing check. + DxbcOpEndIf(); // Check if the new depth/stencil is different, and thus needs to be // written, to temp.w. // temp.x? = resulting sample depth/stencil From 46cd281cd3e9782343df5d94124ad669b1512d84 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 5 Sep 2020 19:34:36 +0300 Subject: [PATCH 006/123] [GPU] Rename present_stretch to present_rescale --- src/xenia/gpu/draw_util.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/xenia/gpu/draw_util.cc b/src/xenia/gpu/draw_util.cc index d28df6d0e..6aaa1b856 100644 --- a/src/xenia/gpu/draw_util.cc +++ b/src/xenia/gpu/draw_util.cc @@ -33,7 +33,7 @@ DEFINE_bool( "GPU"); DEFINE_bool( - present_stretch, true, + present_rescale, true, "Whether to rescale the image, instead of maintaining the original pixel " "size, when presenting to the window. When this is disabled, other " "positioning options are ignored.", @@ -624,7 +624,7 @@ void GetPresentArea(uint32_t source_width, uint32_t source_height, uint32_t window_width, uint32_t window_height, int32_t& target_x_out, int32_t& target_y_out, uint32_t& target_width_out, uint32_t& target_height_out) { - if (!cvars::present_stretch) { + if (!cvars::present_rescale) { target_x_out = (int32_t(window_width) - int32_t(source_width)) / 2; target_y_out = (int32_t(window_height) - int32_t(source_height)) / 2; target_width_out = source_width; From fed33be62b839ba4140160f78634facd9f15ec67 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 6 Sep 2020 00:52:30 +0300 Subject: [PATCH 007/123] [D3D12] Non-zeroed heaps on 2004 --- docs/building.md | 2 +- premake5.lua | 6 ++-- .../gpu/d3d12/d3d12_command_processor.cc | 28 +++++++++------- src/xenia/gpu/d3d12/primitive_converter.cc | 12 ++++--- src/xenia/gpu/d3d12/render_target_cache.cc | 33 +++++++++++-------- src/xenia/gpu/d3d12/shared_memory.cc | 19 ++++++----- src/xenia/gpu/d3d12/texture_cache.cc | 12 ++++--- src/xenia/ui/d3d12/d3d12_immediate_drawer.cc | 22 +++++++------ src/xenia/ui/d3d12/d3d12_provider.cc | 26 +++++++++++---- src/xenia/ui/d3d12/d3d12_provider.h | 4 +++ src/xenia/ui/d3d12/pools.cc | 11 ++++--- src/xenia/ui/d3d12/pools.h | 6 ++-- 12 files changed, 110 insertions(+), 71 deletions(-) diff --git a/docs/building.md b/docs/building.md index d8334793f..6aafc521e 100644 --- a/docs/building.md +++ b/docs/building.md @@ -12,7 +12,7 @@ drivers. * [Visual Studio 2019 or Visual Studio 2017](https://www.visualstudio.com/downloads/) * [Python 3.6+](https://www.python.org/downloads/) * Ensure Python is in PATH. -* Windows 10 SDK +* Windows 10 SDK version 10.0.19041.0 (for Visual Studio 2019, this or any newer version) ``` git clone https://github.com/xenia-project/xenia.git diff --git a/premake5.lua b/premake5.lua index e0c8f8d92..622a4da2c 100644 --- a/premake5.lua +++ b/premake5.lua @@ -202,10 +202,10 @@ solution("xenia") platforms({"Linux"}) elseif os.istarget("windows") then platforms({"Windows"}) - -- Minimum version to support ID3D12GraphicsCommandList1 (for - -- SetSamplePositions). + -- 10.0.15063.0: ID3D12GraphicsCommandList1::SetSamplePositions. + -- 10.0.19041.0: D3D12_HEAP_FLAG_CREATE_NOT_ZEROED. filter("action:vs2017") - systemversion("10.0.15063.0") + systemversion("10.0.19041.0") filter("action:vs2019") systemversion("10.0") filter({}) diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index e9831347d..23163a609 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -646,14 +646,16 @@ ID3D12Resource* D3D12CommandProcessor::RequestScratchGPUBuffer( size = xe::align(size, kScratchBufferSizeIncrement); - auto device = GetD3D12Context().GetD3D12Provider().GetDevice(); + auto& provider = GetD3D12Context().GetD3D12Provider(); + auto device = provider.GetDevice(); D3D12_RESOURCE_DESC buffer_desc; ui::d3d12::util::FillBufferResourceDesc( buffer_desc, size, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); ID3D12Resource* buffer; if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, - &buffer_desc, state, nullptr, IID_PPV_ARGS(&buffer)))) { + &ui::d3d12::util::kHeapPropertiesDefault, + provider.GetHeapFlagCreateNotZeroed(), &buffer_desc, state, nullptr, + IID_PPV_ARGS(&buffer)))) { XELOGE("Failed to create a {} MB scratch GPU buffer", size >> 20); return nullptr; } @@ -889,7 +891,7 @@ bool D3D12CommandProcessor::SetupContext() { // Initialize resource binding. constant_buffer_pool_ = - std::make_unique(device, 1024 * 1024); + std::make_unique(provider, 1024 * 1024); if (bindless_resources_used_) { D3D12_DESCRIPTOR_HEAP_DESC view_bindless_heap_desc; view_bindless_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; @@ -1181,6 +1183,9 @@ bool D3D12CommandProcessor::SetupContext() { return false; } + D3D12_HEAP_FLAGS heap_flag_create_not_zeroed = + provider.GetHeapFlagCreateNotZeroed(); + // Create gamma ramp resources. The PWL gamma ramp is 16-bit, but 6 bits are // hardwired to zero, so DXGI_FORMAT_R10G10B10A2_UNORM can be used for it too. // https://www.x.org/docs/AMD/old/42590_m76_rrg_1.01o.pdf @@ -1202,7 +1207,7 @@ bool D3D12CommandProcessor::SetupContext() { // The first action will be uploading. gamma_ramp_texture_state_ = D3D12_RESOURCE_STATE_COPY_DEST; if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, + &ui::d3d12::util::kHeapPropertiesDefault, heap_flag_create_not_zeroed, &gamma_ramp_desc, gamma_ramp_texture_state_, nullptr, IID_PPV_ARGS(&gamma_ramp_texture_)))) { XELOGE("Failed to create the gamma ramp texture"); @@ -1218,7 +1223,7 @@ bool D3D12CommandProcessor::SetupContext() { ui::d3d12::util::FillBufferResourceDesc( gamma_ramp_desc, gamma_ramp_upload_size, D3D12_RESOURCE_FLAG_NONE); if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesUpload, D3D12_HEAP_FLAG_NONE, + &ui::d3d12::util::kHeapPropertiesUpload, heap_flag_create_not_zeroed, &gamma_ramp_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&gamma_ramp_upload_)))) { XELOGE("Failed to create the gamma ramp upload buffer"); @@ -1246,7 +1251,7 @@ bool D3D12CommandProcessor::SetupContext() { swap_texture_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; // Can be sampled at any time, switch to render target when needed, then back. if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, + &ui::d3d12::util::kHeapPropertiesDefault, heap_flag_create_not_zeroed, &swap_texture_desc, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, nullptr, IID_PPV_ARGS(&swap_texture_)))) { XELOGE("Failed to create the command processor front buffer"); @@ -4286,15 +4291,16 @@ ID3D12Resource* D3D12CommandProcessor::RequestReadbackBuffer(uint32_t size) { } size = xe::align(size, kReadbackBufferSizeIncrement); if (size > readback_buffer_size_) { - auto device = GetD3D12Context().GetD3D12Provider().GetDevice(); + auto& provider = GetD3D12Context().GetD3D12Provider(); + auto device = provider.GetDevice(); D3D12_RESOURCE_DESC buffer_desc; ui::d3d12::util::FillBufferResourceDesc(buffer_desc, size, D3D12_RESOURCE_FLAG_NONE); ID3D12Resource* buffer; if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesReadback, D3D12_HEAP_FLAG_NONE, - &buffer_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, - IID_PPV_ARGS(&buffer)))) { + &ui::d3d12::util::kHeapPropertiesReadback, + provider.GetHeapFlagCreateNotZeroed(), &buffer_desc, + D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&buffer)))) { XELOGE("Failed to create a {} MB readback buffer", size >> 20); return nullptr; } diff --git a/src/xenia/gpu/d3d12/primitive_converter.cc b/src/xenia/gpu/d3d12/primitive_converter.cc index 4884865d0..ab2138b47 100644 --- a/src/xenia/gpu/d3d12/primitive_converter.cc +++ b/src/xenia/gpu/d3d12/primitive_converter.cc @@ -47,14 +47,16 @@ PrimitiveConverter::PrimitiveConverter(D3D12CommandProcessor& command_processor, PrimitiveConverter::~PrimitiveConverter() { Shutdown(); } bool PrimitiveConverter::Initialize() { - auto device = - command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice(); + auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider(); + auto device = provider.GetDevice(); + D3D12_HEAP_FLAGS heap_flag_create_not_zeroed = + provider.GetHeapFlagCreateNotZeroed(); // There can be at most 65535 indices in a Xenos draw call, but they can be up // to 4 bytes large, and conversion can add more indices (almost triple the // count for triangle strips, for instance). buffer_pool_ = - std::make_unique(device, 4 * 1024 * 1024); + std::make_unique(provider, 4 * 1024 * 1024); // Create the static index buffer for non-indexed drawing. D3D12_RESOURCE_DESC static_ib_desc; @@ -62,7 +64,7 @@ bool PrimitiveConverter::Initialize() { static_ib_desc, kStaticIBTotalCount * sizeof(uint16_t), D3D12_RESOURCE_FLAG_NONE); if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesUpload, D3D12_HEAP_FLAG_NONE, + &ui::d3d12::util::kHeapPropertiesUpload, heap_flag_create_not_zeroed, &static_ib_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&static_ib_upload_)))) { XELOGE( @@ -108,7 +110,7 @@ bool PrimitiveConverter::Initialize() { // Not uploaded yet. static_ib_upload_submission_ = UINT64_MAX; if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, + &ui::d3d12::util::kHeapPropertiesDefault, heap_flag_create_not_zeroed, &static_ib_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&static_ib_)))) { XELOGE("Failed to create the primitive conversion static index buffer"); diff --git a/src/xenia/gpu/d3d12/render_target_cache.cc b/src/xenia/gpu/d3d12/render_target_cache.cc index 0c09b6864..3530c8f5a 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.cc +++ b/src/xenia/gpu/d3d12/render_target_cache.cc @@ -137,8 +137,6 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) { uint32_t edram_buffer_size = GetEdramBufferSize(); // Create the buffer for reinterpreting EDRAM contents. - // No need to clear it in the first frame, memory is zeroed out when allocated - // on Windows. D3D12_RESOURCE_DESC edram_buffer_desc; ui::d3d12::util::FillBufferResourceDesc( edram_buffer_desc, edram_buffer_size, @@ -147,8 +145,15 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) { edram_buffer_state_ = edram_rov_used_ ? D3D12_RESOURCE_STATE_UNORDERED_ACCESS : D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + // Request zeroed (though no guarantee) when not using ROV so the host 32-bit + // depth buffer will be initialized to deterministic values (because it's + // involved in comparison with converted 24-bit values - whether the 32-bit + // value is up to date is determined by whether it's equal to the 24-bit + // value in the main EDRAM buffer when converted to 24-bit). if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, + &ui::d3d12::util::kHeapPropertiesDefault, + edram_rov_used_ ? provider.GetHeapFlagCreateNotZeroed() + : D3D12_HEAP_FLAG_NONE, &edram_buffer_desc, edram_buffer_state_, nullptr, IID_PPV_ARGS(&edram_buffer_)))) { XELOGE("Failed to create the EDRAM buffer"); @@ -1451,10 +1456,11 @@ bool RenderTargetCache::InitializeTraceSubmitDownloads() { ui::d3d12::util::FillBufferResourceDesc(edram_snapshot_download_buffer_desc, xenos::kEdramSizeBytes, D3D12_RESOURCE_FLAG_NONE); - auto device = - command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice(); + auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider(); + auto device = provider.GetDevice(); if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesReadback, D3D12_HEAP_FLAG_NONE, + &ui::d3d12::util::kHeapPropertiesReadback, + provider.GetHeapFlagCreateNotZeroed(), &edram_snapshot_download_buffer_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&edram_snapshot_download_buffer_)))) { @@ -1493,10 +1499,9 @@ void RenderTargetCache::RestoreEdramSnapshot(const void* snapshot) { return; } auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider(); - auto device = provider.GetDevice(); if (!edram_snapshot_restore_pool_) { edram_snapshot_restore_pool_ = - std::make_unique(device, + std::make_unique(provider, xenos::kEdramSizeBytes); } ID3D12Resource* upload_buffer; @@ -1603,14 +1608,15 @@ bool RenderTargetCache::MakeHeapResident(uint32_t heap_index) { if (heaps_[heap_index] != nullptr) { return true; } - auto device = - command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice(); + auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider(); + auto device = provider.GetDevice(); D3D12_HEAP_DESC heap_desc = {}; heap_desc.SizeInBytes = kHeap4MBPages << 22; heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; // TODO(Triang3l): If real MSAA is added, alignment must be 4 MB. heap_desc.Alignment = 0; - heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES; + heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES | + provider.GetHeapFlagCreateNotZeroed(); if (FAILED( device->CreateHeap(&heap_desc, IID_PPV_ARGS(&heaps_[heap_index])))) { XELOGE("Failed to create a {} MB heap for render targets", @@ -1756,8 +1762,9 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget( } #else if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, - &resource_desc, state, nullptr, IID_PPV_ARGS(&resource)))) { + &ui::d3d12::util::kHeapPropertiesDefault, + provider.GetHeapFlagCreateNotZeroed(), &resource_desc, state, nullptr, + IID_PPV_ARGS(&resource)))) { XELOGE( "Failed to create a committed resource for {}x{} {} render target with " "format {}", diff --git a/src/xenia/gpu/d3d12/shared_memory.cc b/src/xenia/gpu/d3d12/shared_memory.cc index f2d2e6296..c24336664 100644 --- a/src/xenia/gpu/d3d12/shared_memory.cc +++ b/src/xenia/gpu/d3d12/shared_memory.cc @@ -73,8 +73,9 @@ bool SharedMemory::Initialize() { "resources yet."); } if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, - &buffer_desc, buffer_state_, nullptr, IID_PPV_ARGS(&buffer_)))) { + &ui::d3d12::util::kHeapPropertiesDefault, + provider.GetHeapFlagCreateNotZeroed(), &buffer_desc, buffer_state_, + nullptr, IID_PPV_ARGS(&buffer_)))) { XELOGE("Shared memory: Failed to create the 512 MB buffer"); Shutdown(); return false; @@ -153,7 +154,7 @@ bool SharedMemory::Initialize() { system_page_flags_.resize((page_count_ + 63) / 64); upload_buffer_pool_ = std::make_unique( - device, + provider, xe::align(uint32_t(4 * 1024 * 1024), uint32_t(1) << page_size_log2_)); memory_invalidation_callback_handle_ = @@ -370,7 +371,8 @@ bool SharedMemory::EnsureTilesResident(uint32_t start, uint32_t length) { D3D12_HEAP_DESC heap_desc = {}; heap_desc.SizeInBytes = kHeapSize; heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; - heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; + heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS | + provider.GetHeapFlagCreateNotZeroed(); if (FAILED(device->CreateHeap(&heap_desc, IID_PPV_ARGS(&heaps_[i])))) { XELOGE("Shared memory: Failed to create a tile heap"); return false; @@ -890,11 +892,12 @@ bool SharedMemory::InitializeTraceSubmitDownloads() { ui::d3d12::util::FillBufferResourceDesc( gpu_written_buffer_desc, gpu_written_page_count << page_size_log2_, D3D12_RESOURCE_FLAG_NONE); - auto device = - command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice(); + auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider(); + auto device = provider.GetDevice(); if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesReadback, D3D12_HEAP_FLAG_NONE, - &gpu_written_buffer_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, + &ui::d3d12::util::kHeapPropertiesReadback, + provider.GetHeapFlagCreateNotZeroed(), &gpu_written_buffer_desc, + D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&trace_gpu_written_buffer_)))) { XELOGE( "Shared memory: Failed to create a {} KB GPU-written memory download " diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index 01c7812c9..0ec94c132 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -1633,7 +1633,8 @@ bool TextureCache::EnsureScaledResolveBufferResident(uint32_t start_unscaled, D3D12_HEAP_DESC heap_desc = {}; heap_desc.SizeInBytes = kScaledResolveHeapSize; heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; - heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; + heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS | + provider.GetHeapFlagCreateNotZeroed(); if (FAILED(device->CreateHeap(&heap_desc, IID_PPV_ARGS(&scaled_resolve_heaps_[i])))) { XELOGE("Texture cache: Failed to create a scaled resolve tile heap"); @@ -1953,14 +1954,15 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) { // Untiling through a buffer instead of using unordered access because copying // is not done that often. desc.Flags = D3D12_RESOURCE_FLAG_NONE; - auto device = - command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice(); + auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider(); + auto device = provider.GetDevice(); // Assuming untiling will be the next operation. D3D12_RESOURCE_STATES state = D3D12_RESOURCE_STATE_COPY_DEST; ID3D12Resource* resource; if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, &desc, - state, nullptr, IID_PPV_ARGS(&resource)))) { + &ui::d3d12::util::kHeapPropertiesDefault, + provider.GetHeapFlagCreateNotZeroed(), &desc, state, nullptr, + IID_PPV_ARGS(&resource)))) { LogTextureKeyAction(key, "Failed to create"); return nullptr; } diff --git a/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc b/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc index 6bc92e8c0..0c958ebd3 100644 --- a/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc +++ b/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc @@ -33,7 +33,7 @@ class D3D12ImmediateTexture : public ImmediateTexture { ImmediateTextureFilter filter, bool repeat); ~D3D12ImmediateTexture() override; - bool Initialize(ID3D12Device* device); + bool Initialize(D3D12Provider& provider); void Shutdown(); ID3D12Resource* GetResource() const { return resource_; } @@ -59,7 +59,7 @@ D3D12ImmediateTexture::D3D12ImmediateTexture(uint32_t width, uint32_t height, D3D12ImmediateTexture::~D3D12ImmediateTexture() { Shutdown(); } -bool D3D12ImmediateTexture::Initialize(ID3D12Device* device) { +bool D3D12ImmediateTexture::Initialize(D3D12Provider& provider) { // The first operation will likely be copying the contents. state_ = D3D12_RESOURCE_STATE_COPY_DEST; @@ -75,9 +75,9 @@ bool D3D12ImmediateTexture::Initialize(ID3D12Device* device) { resource_desc.SampleDesc.Quality = 0; resource_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; resource_desc.Flags = D3D12_RESOURCE_FLAG_NONE; - if (FAILED(device->CreateCommittedResource( - &util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, &resource_desc, - state_, nullptr, IID_PPV_ARGS(&resource_)))) { + if (FAILED(provider.GetDevice()->CreateCommittedResource( + &util::kHeapPropertiesDefault, provider.GetHeapFlagCreateNotZeroed(), + &resource_desc, state_, nullptr, IID_PPV_ARGS(&resource_)))) { XELOGE("Failed to create a {}x{} texture for immediate drawing", width, height); return false; @@ -288,7 +288,7 @@ bool D3D12ImmediateDrawer::Initialize() { // Create pools for draws. vertex_buffer_pool_ = - std::make_unique(device, 2 * 1024 * 1024); + std::make_unique(provider, 2 * 1024 * 1024); texture_descriptor_pool_ = std::make_unique( device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 2048); texture_descriptor_pool_heap_index_ = DescriptorHeapPool::kHeapIndexInvalid; @@ -326,7 +326,7 @@ std::unique_ptr D3D12ImmediateDrawer::CreateTexture( const uint8_t* data) { auto texture = std::make_unique(width, height, filter, repeat); - texture->Initialize(context_.GetD3D12Provider().GetDevice()); + texture->Initialize(context_.GetD3D12Provider()); if (data != nullptr) { UpdateTexture(texture.get(), data); } @@ -343,7 +343,8 @@ void D3D12ImmediateDrawer::UpdateTexture(ImmediateTexture* texture, } uint32_t width = d3d_texture->width, height = d3d_texture->height; - auto device = context_.GetD3D12Provider().GetDevice(); + auto& provider = context_.GetD3D12Provider(); + auto device = provider.GetDevice(); // Create and fill the upload buffer. D3D12_RESOURCE_DESC texture_desc = texture_resource->GetDesc(); @@ -356,8 +357,9 @@ void D3D12ImmediateDrawer::UpdateTexture(ImmediateTexture* texture, D3D12_RESOURCE_FLAG_NONE); ID3D12Resource* buffer; if (FAILED(device->CreateCommittedResource( - &util::kHeapPropertiesUpload, D3D12_HEAP_FLAG_NONE, &buffer_desc, - D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&buffer)))) { + &util::kHeapPropertiesUpload, provider.GetHeapFlagCreateNotZeroed(), + &buffer_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, + IID_PPV_ARGS(&buffer)))) { XELOGE( "Failed to create an upload buffer for a {}x{} texture for " "immediate drawing", diff --git a/src/xenia/ui/d3d12/d3d12_provider.cc b/src/xenia/ui/d3d12/d3d12_provider.cc index 77e4e70aa..0231a5bec 100644 --- a/src/xenia/ui/d3d12/d3d12_provider.cc +++ b/src/xenia/ui/d3d12/d3d12_provider.cc @@ -409,17 +409,29 @@ bool D3D12Provider::Initialize() { virtual_address_bits_per_resource_ = virtual_address_support.MaxGPUVirtualAddressBitsPerResource; } + // D3D12_HEAP_FLAG_CREATE_NOT_ZEROED requires Windows 10 2004 (indicated by + // the availability of ID3D12Device8 or D3D12_FEATURE_D3D12_OPTIONS7). + heap_flag_create_not_zeroed_ = D3D12_HEAP_FLAG_NONE; + D3D12_FEATURE_DATA_D3D12_OPTIONS7 options7; + if (SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS7, + &options7, sizeof(options7)))) { + heap_flag_create_not_zeroed_ = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; + } XELOGD3D( - "Direct3D 12 device features:\n" - "Max GPU virtual address bits per resource: {}\n" - "Programmable sample positions: tier {}\n" - "Rasterizer-ordered views: {}\n" - "Resource binding: tier {}\n" - "Tiled resources: tier {}\n", + "Direct3D 12 device and OS features:\n" + "* Max GPU virtual address bits per resource: {}\n" + "* Programmable sample positions: tier {}\n" + "* Rasterizer-ordered views: {}\n" + "* Resource binding: tier {}\n" + "* Tiled resources: tier {}\n" + "* Non-zeroed heap creation: {}\n", virtual_address_bits_per_resource_, uint32_t(programmable_sample_positions_tier_), rasterizer_ordered_views_supported_ ? "yes" : "no", - uint32_t(resource_binding_tier_), uint32_t(tiled_resources_tier_)); + uint32_t(resource_binding_tier_), uint32_t(tiled_resources_tier_), + (heap_flag_create_not_zeroed_ & D3D12_HEAP_FLAG_CREATE_NOT_ZEROED) + ? "yes" + : "no"); // Get the graphics analysis interface, will silently fail if PIX is not // attached. diff --git a/src/xenia/ui/d3d12/d3d12_provider.h b/src/xenia/ui/d3d12/d3d12_provider.h index 122f16e2f..1c8694fd0 100644 --- a/src/xenia/ui/d3d12/d3d12_provider.h +++ b/src/xenia/ui/d3d12/d3d12_provider.h @@ -84,6 +84,9 @@ class D3D12Provider : public GraphicsProvider { uint32_t GetVirtualAddressBitsPerResource() const { return virtual_address_bits_per_resource_; } + D3D12_HEAP_FLAGS GetHeapFlagCreateNotZeroed() const { + return heap_flag_create_not_zeroed_; + } // Proxies for Direct3D 12 functions since they are loaded dynamically. inline HRESULT SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc, @@ -164,6 +167,7 @@ class D3D12Provider : public GraphicsProvider { D3D12_RESOURCE_BINDING_TIER resource_binding_tier_; D3D12_TILED_RESOURCES_TIER tiled_resources_tier_; uint32_t virtual_address_bits_per_resource_; + D3D12_HEAP_FLAGS heap_flag_create_not_zeroed_; }; } // namespace d3d12 diff --git a/src/xenia/ui/d3d12/pools.cc b/src/xenia/ui/d3d12/pools.cc index deff80b12..7b892caa9 100644 --- a/src/xenia/ui/d3d12/pools.cc +++ b/src/xenia/ui/d3d12/pools.cc @@ -19,8 +19,8 @@ namespace xe { namespace ui { namespace d3d12 { -UploadBufferPool::UploadBufferPool(ID3D12Device* device, uint32_t page_size) - : device_(device), page_size_(page_size) {} +UploadBufferPool::UploadBufferPool(D3D12Provider& provider, uint32_t page_size) + : provider_(provider), page_size_(page_size) {} UploadBufferPool::~UploadBufferPool() { ClearCache(); } @@ -101,9 +101,10 @@ uint8_t* UploadBufferPool::Request(uint64_t submission_index, uint32_t size, util::FillBufferResourceDesc(new_buffer_desc, page_size_, D3D12_RESOURCE_FLAG_NONE); ID3D12Resource* new_buffer; - if (FAILED(device_->CreateCommittedResource( - &util::kHeapPropertiesUpload, D3D12_HEAP_FLAG_NONE, - &new_buffer_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, + if (FAILED(provider_.GetDevice()->CreateCommittedResource( + &util::kHeapPropertiesUpload, + provider_.GetHeapFlagCreateNotZeroed(), &new_buffer_desc, + D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&new_buffer)))) { XELOGE("Failed to create a D3D upload buffer with {} bytes", page_size_); diff --git a/src/xenia/ui/d3d12/pools.h b/src/xenia/ui/d3d12/pools.h index 4499bd08f..21606cc42 100644 --- a/src/xenia/ui/d3d12/pools.h +++ b/src/xenia/ui/d3d12/pools.h @@ -12,7 +12,7 @@ #include -#include "xenia/ui/d3d12/d3d12_api.h" +#include "xenia/ui/d3d12/d3d12_provider.h" namespace xe { namespace ui { @@ -23,7 +23,7 @@ namespace d3d12 { class UploadBufferPool { public: - UploadBufferPool(ID3D12Device* device, uint32_t page_size); + UploadBufferPool(D3D12Provider& provider, uint32_t page_size); ~UploadBufferPool(); void Reclaim(uint64_t completed_submission_index); @@ -41,7 +41,7 @@ class UploadBufferPool { D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out); private: - ID3D12Device* device_; + D3D12Provider& provider_; uint32_t page_size_; struct Page { From df1db5c62786092280d678829e90f2d58688061d Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 6 Sep 2020 00:57:00 +0300 Subject: [PATCH 008/123] [D3D12] Make the feature list in the log sorted again --- src/xenia/ui/d3d12/d3d12_provider.cc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/xenia/ui/d3d12/d3d12_provider.cc b/src/xenia/ui/d3d12/d3d12_provider.cc index 0231a5bec..cb5287e14 100644 --- a/src/xenia/ui/d3d12/d3d12_provider.cc +++ b/src/xenia/ui/d3d12/d3d12_provider.cc @@ -420,18 +420,17 @@ bool D3D12Provider::Initialize() { XELOGD3D( "Direct3D 12 device and OS features:\n" "* Max GPU virtual address bits per resource: {}\n" + "* Non-zeroed heap creation: {}\n" "* Programmable sample positions: tier {}\n" "* Rasterizer-ordered views: {}\n" "* Resource binding: tier {}\n" - "* Tiled resources: tier {}\n" - "* Non-zeroed heap creation: {}\n", + "* Tiled resources: tier {}\n", virtual_address_bits_per_resource_, + (heap_flag_create_not_zeroed_ & D3D12_HEAP_FLAG_CREATE_NOT_ZEROED) ? "yes" + : "no", uint32_t(programmable_sample_positions_tier_), rasterizer_ordered_views_supported_ ? "yes" : "no", - uint32_t(resource_binding_tier_), uint32_t(tiled_resources_tier_), - (heap_flag_create_not_zeroed_ & D3D12_HEAP_FLAG_CREATE_NOT_ZEROED) - ? "yes" - : "no"); + uint32_t(resource_binding_tier_), uint32_t(tiled_resources_tier_)); // Get the graphics analysis interface, will silently fail if PIX is not // attached. From dfa181a529fdff5a245174100353326b4078f73c Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 6 Sep 2020 22:08:17 +0300 Subject: [PATCH 009/123] [Vulkan] Provider init, Android platform defines --- src/xenia/app/xenia_main.cc | 2 +- src/xenia/base/platform.h | 15 +- src/xenia/gpu/vulkan/vulkan_graphics_system.h | 2 +- src/xenia/ui/vulkan/vulkan_provider.cc | 461 +++++++++++++++++- src/xenia/ui/vulkan/vulkan_provider.h | 112 +++++ src/xenia/ui/vulkan/vulkan_util.h | 45 ++ 6 files changed, 630 insertions(+), 7 deletions(-) create mode 100644 src/xenia/ui/vulkan/vulkan_util.h diff --git a/src/xenia/app/xenia_main.cc b/src/xenia/app/xenia_main.cc index a52656814..f13f0de32 100644 --- a/src/xenia/app/xenia_main.cc +++ b/src/xenia/app/xenia_main.cc @@ -217,7 +217,7 @@ int xenia_main(const std::vector& args) { if (!cvars::portable && !std::filesystem::exists(storage_root / "portable.txt")) { storage_root = xe::filesystem::GetUserFolder(); -#if defined(XE_PLATFORM_WIN32) || defined(XE_PLATFORM_LINUX) +#if defined(XE_PLATFORM_WIN32) || defined(XE_PLATFORM_GNU_LINUX) storage_root = storage_root / "Xenia"; #else #warning Unhandled platform for the data root. diff --git a/src/xenia/base/platform.h b/src/xenia/base/platform.h index 33083a831..c1f983a8c 100644 --- a/src/xenia/base/platform.h +++ b/src/xenia/base/platform.h @@ -31,8 +31,14 @@ #define XE_PLATFORM_MAC 1 #elif defined(WIN32) || defined(_WIN32) #define XE_PLATFORM_WIN32 1 -#else +#elif defined(__ANDROID__) +#define XE_PLATFORM_ANDROID 1 #define XE_PLATFORM_LINUX 1 +#elif defined(__gnu_linux__) +#define XE_PLATFORM_GNU_LINUX 1 +#define XE_PLATFORM_LINUX 1 +#else +#error Unsupported target OS. #endif #if defined(__clang__) @@ -51,8 +57,11 @@ #if defined(_M_AMD64) || defined(__amd64__) #define XE_ARCH_AMD64 1 -#elif defined(_M_IX86) -#error "Xenia is not supported on 32-bit platforms." +#elif defined(_M_ARM64) || defined(__aarch64__) +#define XE_ARCH_ARM64 1 +#elif defined(_M_IX86) || defined(__i386__) || defined(_M_ARM) || \ + defined(__arm__) +#error Xenia is not supported on 32-bit platforms. #elif defined(_M_PPC) || defined(__powerpc__) #define XE_ARCH_PPC 1 #endif diff --git a/src/xenia/gpu/vulkan/vulkan_graphics_system.h b/src/xenia/gpu/vulkan/vulkan_graphics_system.h index eb04d2b71..76ba8903f 100644 --- a/src/xenia/gpu/vulkan/vulkan_graphics_system.h +++ b/src/xenia/gpu/vulkan/vulkan_graphics_system.h @@ -26,7 +26,7 @@ class VulkanGraphicsSystem : public GraphicsSystem { static bool IsAvailable() { return true; } - std::string name() const override { return "Vulkan Prototype"; } + std::string name() const override { return "Vulkan Prototype - DO NOT USE"; } X_STATUS Setup(cpu::Processor* processor, kernel::KernelState* kernel_state, ui::Window* target_window) override; diff --git a/src/xenia/ui/vulkan/vulkan_provider.cc b/src/xenia/ui/vulkan/vulkan_provider.cc index 669c04d31..0e8930eb2 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.cc +++ b/src/xenia/ui/vulkan/vulkan_provider.cc @@ -9,9 +9,26 @@ #include "xenia/ui/vulkan/vulkan_provider.h" +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/base/cvar.h" #include "xenia/base/logging.h" +#include "xenia/base/platform.h" #include "xenia/ui/vulkan/vulkan_context.h" +#if XE_PLATFORM_LINUX +#include +#elif XE_PLATFORM_WIN32 +#include "xenia/base/platform_win.h" +#endif + +DEFINE_int32( + vulkan_device, -1, + "Index of the physical device to use, or -1 for any compatible device.", + "Vulkan"); + namespace xe { namespace ui { namespace vulkan { @@ -20,7 +37,7 @@ std::unique_ptr VulkanProvider::Create(Window* main_window) { std::unique_ptr provider(new VulkanProvider(main_window)); if (!provider->Initialize()) { xe::FatalError( - "Unable to initialize Direct3D 12 graphics subsystem.\n" + "Unable to initialize Vulkan graphics subsystem.\n" "\n" "Ensure that you have the latest drivers for your GPU and it supports " "Vulkan, and that you have the latest Vulkan runtime installed, which " @@ -36,7 +53,447 @@ std::unique_ptr VulkanProvider::Create(Window* main_window) { VulkanProvider::VulkanProvider(Window* main_window) : GraphicsProvider(main_window) {} -bool VulkanProvider::Initialize() { return false; } +VulkanProvider::~VulkanProvider() { + if (device_ != VK_NULL_HANDLE) { + ifn_.destroyDevice(device_, nullptr); + } + if (instance_ != VK_NULL_HANDLE) { + destroyInstance_(instance_, nullptr); + } + +#if XE_PLATFORM_LINUX + if (library_) { + dlclose(library_); + } +#elif XE_PLATFORM_WIN32 + if (library_) { + FreeLibrary(library_); + } +#endif +} + +bool VulkanProvider::Initialize() { + // Load the library. +#if XE_PLATFORM_LINUX +#if XE_PLATFORM_ANDROID + const char* libvulkan_name = "libvulkan.so"; +#else + const char* libvulkan_name = "libvulkan.so.1"; +#endif + // http://developer.download.nvidia.com/mobile/shield/assets/Vulkan/UsingtheVulkanAPI.pdf + library_ = dlopen(libvulkan_name, RTLD_NOW | RTLD_LOCAL); + if (!library_) { + XELOGE("Failed to load {}", libvulkan_name); + return false; + } + getInstanceProcAddr_ = + PFN_vkGetInstanceProcAddr(dlsym(library_, "vkGetInstanceProcAddr")); + destroyInstance_ = + PFN_vkDestroyInstance(dlsym(library_, "vkDestroyInstance")); + if (!getInstanceProcAddr_ || !destroyInstance_) { + XELOGE("Failed to get vkGetInstanceProcAddr and vkDestroyInstance from {}", + libvulkan_name); + return false; + } +#elif XE_PLATFORM_WIN32 + library_ = LoadLibraryA("vulkan-1.dll"); + if (!library_) { + XELOGE("Failed to load vulkan-1.dll"); + return false; + } + getInstanceProcAddr_ = PFN_vkGetInstanceProcAddr( + GetProcAddress(library_, "vkGetInstanceProcAddr")); + destroyInstance_ = + PFN_vkDestroyInstance(GetProcAddress(library_, "vkDestroyInstance")); + if (!getInstanceProcAddr_ || !destroyInstance_) { + XELOGE( + "Failed to get vkGetInstanceProcAddr and vkDestroyInstance from " + "vulkan-1.dll"); + return false; + } +#else +#error No Vulkan library loading provided for the target platform. +#endif + assert_not_null(getInstanceProcAddr_); + assert_not_null(destroyInstance_); + bool library_functions_loaded = true; + library_functions_loaded &= + (library_functions_.createInstance = PFN_vkCreateInstance( + getInstanceProcAddr_(VK_NULL_HANDLE, "vkCreateInstance"))) != + nullptr; + library_functions_loaded &= + (library_functions_.enumerateInstanceExtensionProperties = + PFN_vkEnumerateInstanceExtensionProperties(getInstanceProcAddr_( + VK_NULL_HANDLE, "vkEnumerateInstanceExtensionProperties"))) != + nullptr; + if (!library_functions_loaded) { + XELOGE("Failed to get Vulkan library function pointers"); + return false; + } + library_functions_.enumerateInstanceVersion_1_1 = + PFN_vkEnumerateInstanceVersion( + getInstanceProcAddr_(VK_NULL_HANDLE, "vkEnumerateInstanceVersion")); + + // Get the API version. + const uint32_t api_version_target = VK_MAKE_VERSION(1, 2, 148); + static_assert(VK_HEADER_VERSION_COMPLETE >= api_version_target, + "Vulkan header files must be up to date"); + if (!library_functions_.enumerateInstanceVersion_1_1 || + library_functions_.enumerateInstanceVersion_1_1(&api_version_) != + VK_SUCCESS) { + api_version_ = VK_API_VERSION_1_0; + } + XELOGVK("Vulkan instance version {}.{}.{}", VK_VERSION_MAJOR(api_version_), + VK_VERSION_MINOR(api_version_), VK_VERSION_PATCH(api_version_)); + + // Create the instance. + std::vector instance_extensions_enabled; + instance_extensions_enabled.push_back("VK_KHR_surface"); +#if XE_PLATFORM_ANDROID + instance_extensions_enabled.push_back("VK_KHR_android_surface"); +#elif XE_PLATFORM_WIN32 + instance_extensions_enabled.push_back("VK_KHR_win32_surface"); +#endif + VkApplicationInfo application_info; + application_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; + application_info.pNext = nullptr; + application_info.pApplicationName = "Xenia"; + application_info.applicationVersion = 1; + application_info.pEngineName = nullptr; + application_info.engineVersion = 0; + // "apiVersion must be the highest version of Vulkan that the application is + // designed to use" + // "Vulkan 1.0 implementations were required to return + // VK_ERROR_INCOMPATIBLE_DRIVER if apiVersion was larger than 1.0" + application_info.apiVersion = api_version_ >= VK_MAKE_VERSION(1, 1, 0) + ? api_version_target + : api_version_; + VkInstanceCreateInfo instance_create_info; + instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + instance_create_info.pNext = nullptr; + instance_create_info.flags = 0; + instance_create_info.pApplicationInfo = &application_info; + // TODO(Triang3l): Enable the validation layer. + instance_create_info.enabledLayerCount = 0; + instance_create_info.ppEnabledLayerNames = nullptr; + instance_create_info.enabledExtensionCount = + uint32_t(instance_extensions_enabled.size()); + instance_create_info.ppEnabledExtensionNames = + instance_extensions_enabled.data(); + if (library_functions_.createInstance(&instance_create_info, nullptr, + &instance_) != VK_SUCCESS) { + XELOGE("Failed to create a Vulkan instance with surface support"); + return false; + } + + // Get instance functions. + bool instance_functions_loaded = true; + instance_functions_loaded &= + (ifn_.createDevice = PFN_vkCreateDevice( + getInstanceProcAddr_(instance_, "vkCreateDevice"))) != nullptr; + instance_functions_loaded &= + (ifn_.destroyDevice = PFN_vkDestroyDevice( + getInstanceProcAddr_(instance_, "vkDestroyDevice"))) != nullptr; + instance_functions_loaded &= + (ifn_.enumerateDeviceExtensionProperties = + PFN_vkEnumerateDeviceExtensionProperties(getInstanceProcAddr_( + instance_, "vkEnumerateDeviceExtensionProperties"))) != nullptr; + instance_functions_loaded &= + (ifn_.enumeratePhysicalDevices = PFN_vkEnumeratePhysicalDevices( + getInstanceProcAddr_(instance_, "vkEnumeratePhysicalDevices"))) != + nullptr; + instance_functions_loaded &= + (ifn_.getDeviceProcAddr = PFN_vkGetDeviceProcAddr( + getInstanceProcAddr_(instance_, "vkGetDeviceProcAddr"))) != nullptr; + instance_functions_loaded &= + (ifn_.getPhysicalDeviceFeatures = PFN_vkGetPhysicalDeviceFeatures( + getInstanceProcAddr_(instance_, "vkGetPhysicalDeviceFeatures"))) != + nullptr; + instance_functions_loaded &= + (ifn_.getPhysicalDeviceProperties = PFN_vkGetPhysicalDeviceProperties( + getInstanceProcAddr_(instance_, "vkGetPhysicalDeviceProperties"))) != + nullptr; + instance_functions_loaded &= + (ifn_.getPhysicalDeviceQueueFamilyProperties = + PFN_vkGetPhysicalDeviceQueueFamilyProperties(getInstanceProcAddr_( + instance_, "vkGetPhysicalDeviceQueueFamilyProperties"))) != + nullptr; + instance_functions_loaded &= + (ifn_.getPhysicalDeviceSurfaceSupportKHR = + PFN_vkGetPhysicalDeviceSurfaceSupportKHR(getInstanceProcAddr_( + instance_, "vkGetPhysicalDeviceSurfaceSupportKHR"))) != nullptr; +#if XE_PLATFORM_ANDROID + instance_functions_loaded &= + (ifn_.createAndroidSurfaceKHR = PFN_vkCreateAndroidSurfaceKHR( + getInstanceProcAddr_(instance_, "vkCreateAndroidSurfaceKHR"))) != + nullptr; +#elif XE_PLATFORM_WIN32 + instance_functions_loaded &= + (ifn_.createWin32SurfaceKHR = PFN_vkCreateWin32SurfaceKHR( + getInstanceProcAddr_(instance_, "vkCreateWin32SurfaceKHR"))) != + nullptr; +#endif + if (!instance_functions_loaded) { + XELOGE("Failed to get Vulkan instance function pointers"); + return false; + } + + // Get the compatible physical device. + std::vector physical_devices; + for (;;) { + uint32_t physical_device_count = uint32_t(physical_devices.size()); + bool physical_devices_was_empty = physical_devices.empty(); + VkResult physical_device_enumerate_result = ifn_.enumeratePhysicalDevices( + instance_, &physical_device_count, + physical_devices_was_empty ? nullptr : physical_devices.data()); + // If the original device count was 0 (first call), SUCCESS is returned, not + // INCOMPLETE. + if (physical_device_enumerate_result == VK_SUCCESS || + physical_device_enumerate_result == VK_INCOMPLETE) { + physical_devices.resize(physical_device_count); + if (physical_device_enumerate_result == VK_SUCCESS && + (!physical_devices_was_empty || !physical_device_count)) { + break; + } + } else { + XELOGE("Failed to enumerate Vulkan physical devices"); + return false; + } + } + if (physical_devices.empty()) { + XELOGE("No Vulkan physical devices are available"); + return false; + } + size_t physical_device_index_first, physical_device_index_last; + if (cvars::vulkan_device >= 0) { + physical_device_index_first = uint32_t(cvars::vulkan_device); + physical_device_index_last = physical_device_index_first; + if (physical_device_index_first >= physical_devices.size()) { + XELOGE( + "vulkan_device config variable is out of range, {} devices are " + "available", + physical_devices.size()); + return false; + } + } else { + physical_device_index_first = 0; + physical_device_index_last = physical_devices.size() - 1; + } + physical_device_ = VK_NULL_HANDLE; + std::vector queue_families; + uint32_t queue_family_sparse_binding = UINT32_MAX; + std::vector device_extension_properties; + for (size_t i = physical_device_index_first; i <= physical_device_index_last; + ++i) { + VkPhysicalDevice physical_device_current = physical_devices[i]; + + // Get physical device features and check if the needed ones are supported. + ifn_.getPhysicalDeviceFeatures(physical_device_current, &device_features_); + // TODO(Triang3l): Make geometry shaders optional by providing compute + // shader fallback (though that would require vertex shader stores). + if (!device_features_.geometryShader) { + continue; + } + + // Get the graphics and compute queue, and also a sparse binding queue + // (preferably the same for the least latency between the two, as Xenia + // submits sparse binding commands right before graphics commands anyway). + uint32_t queue_family_count = 0; + ifn_.getPhysicalDeviceQueueFamilyProperties(physical_device_current, + &queue_family_count, nullptr); + queue_families.resize(queue_family_count); + ifn_.getPhysicalDeviceQueueFamilyProperties( + physical_device_current, &queue_family_count, queue_families.data()); + assert_true(queue_family_count == queue_families.size()); + queue_family_graphics_compute_ = UINT32_MAX; + queue_family_sparse_binding = UINT32_MAX; + constexpr VkQueueFlags flags_graphics_compute = + VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT; + constexpr VkQueueFlags flags_graphics_compute_sparse = + flags_graphics_compute | VK_QUEUE_SPARSE_BINDING_BIT; + for (uint32_t j = 0; j < queue_family_count; ++j) { + VkQueueFlags queue_flags = queue_families[j].queueFlags; + if (device_features_.sparseBinding) { + // First, check if the queue family supports both graphics/compute and + // sparse binding. This would be the best for Xenia. + if ((queue_flags & flags_graphics_compute_sparse) == + flags_graphics_compute_sparse) { + queue_family_graphics_compute_ = j; + queue_family_sparse_binding = j; + break; + } + // If not supporting both, for sparse binding, for now (until a queue + // supporting all three is found), pick the first queue supporting it. + if ((queue_flags & VK_QUEUE_SPARSE_BINDING_BIT) && + queue_family_sparse_binding == UINT32_MAX) { + queue_family_sparse_binding = j; + } + } + // If the device supports sparse binding, for now (until a queue + // supporting all three is found), pick the first queue supporting + // graphics/compute for graphics. + // If it doesn't, just pick the first queue supporting graphics/compute. + if ((queue_flags & flags_graphics_compute) == flags_graphics_compute && + queue_family_graphics_compute_ == UINT32_MAX) { + queue_family_graphics_compute_ = j; + if (!device_features_.sparseBinding) { + break; + } + } + } + // FIXME(Triang3l): Here we're assuming that the graphics/compute queue + // family supports presentation to the surface. It is probably true for most + // target Vulkan implementations, however, there are no guarantees in the + // specification. + // To check if the queue supports presentation, the target surface must + // exist at this point. However, the actual window that is created in + // GraphicsContext, not in GraphicsProvider. + // While we do have main_window here, it's not necessarily the window that + // presentation will actually happen to. Also, while on Windows the HWND is + // persistent, on Android, ANativeWindow is destroyed whenever the activity + // goes to background, and the application may even be started in background + // (programmatically, or using ADB, while the device is locked), thus the + // window doesn't necessarily exist at this point. + if (queue_family_graphics_compute_ == UINT32_MAX) { + continue; + } + + // Get the extensions, check if swapchain is supported. + device_extension_properties.clear(); + VkResult device_extensions_enumerate_result; + for (;;) { + uint32_t device_extension_count = + uint32_t(device_extension_properties.size()); + bool device_extensions_was_empty = device_extension_properties.empty(); + device_extensions_enumerate_result = + ifn_.enumerateDeviceExtensionProperties( + physical_device_current, nullptr, &device_extension_count, + device_extensions_was_empty ? nullptr + : device_extension_properties.data()); + // If the original extension count was 0 (first call), SUCCESS is + // returned, not INCOMPLETE. + if (device_extensions_enumerate_result == VK_SUCCESS || + device_extensions_enumerate_result == VK_INCOMPLETE) { + device_extension_properties.resize(device_extension_count); + if (device_extensions_enumerate_result == VK_SUCCESS && + (!device_extensions_was_empty || !device_extension_count)) { + break; + } + } else { + break; + } + } + if (device_extensions_enumerate_result != VK_SUCCESS) { + continue; + } + std::memset(&device_extensions_, 0, sizeof(device_extensions_)); + bool device_supports_swapchain = false; + for (const VkExtensionProperties& device_extension : + device_extension_properties) { + const char* device_extension_name = device_extension.extensionName; + if (!std::strcmp(device_extension_name, + "VK_EXT_fragment_shader_interlock")) { + device_extensions_.ext_fragment_shader_interlock = true; + } else if (!std::strcmp(device_extension_name, "VK_KHR_swapchain")) { + device_supports_swapchain = true; + } + } + if (!device_supports_swapchain) { + continue; + } + + physical_device_ = physical_device_current; + break; + } + if (physical_device_ == VK_NULL_HANDLE) { + XELOGE( + "Failed to get a compatible Vulkan physical device with swapchain " + "support"); + return false; + } + ifn_.getPhysicalDeviceProperties(physical_device_, &device_properties_); + XELOGVK( + "Vulkan device: {} (vendor {:04X}, device {:04X}, driver {:08X}, API " + "{}.{}.{})", + device_properties_.deviceName, device_properties_.vendorID, + device_properties_.deviceID, device_properties_.driverVersion, + VK_VERSION_MAJOR(device_properties_.apiVersion), + VK_VERSION_MINOR(device_properties_.apiVersion), + VK_VERSION_PATCH(device_properties_.apiVersion)); + // TODO(Triang3l): Report properties, features, extensions. + + // Create the device. + float queue_priority_high = 1.0f; + VkDeviceQueueCreateInfo queue_create_infos[2]; + queue_create_infos[0].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + queue_create_infos[0].pNext = nullptr; + queue_create_infos[0].flags = 0; + queue_create_infos[0].queueFamilyIndex = queue_family_graphics_compute_; + queue_create_infos[0].queueCount = 1; + queue_create_infos[0].pQueuePriorities = &queue_priority_high; + bool separate_sparse_binding_queue = + queue_family_sparse_binding != UINT32_MAX && + queue_family_sparse_binding != queue_family_graphics_compute_; + if (separate_sparse_binding_queue) { + queue_create_infos[1].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + queue_create_infos[1].pNext = nullptr; + queue_create_infos[1].flags = 0; + queue_create_infos[1].queueFamilyIndex = queue_family_sparse_binding; + queue_create_infos[1].queueCount = 1; + queue_create_infos[1].pQueuePriorities = &queue_priority_high; + } + std::vector device_extensions_enabled; + device_extensions_enabled.push_back("VK_KHR_swapchain"); + if (device_extensions_.ext_fragment_shader_interlock) { + device_extensions_enabled.push_back("VK_EXT_fragment_shader_interlock"); + } + VkDeviceCreateInfo device_create_info; + device_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; + device_create_info.pNext = nullptr; + device_create_info.flags = 0; + device_create_info.queueCreateInfoCount = + separate_sparse_binding_queue ? 2 : 1; + device_create_info.pQueueCreateInfos = queue_create_infos; + // TODO(Triang3l): Enable the validation layer. + device_create_info.enabledLayerCount = 0; + device_create_info.ppEnabledLayerNames = nullptr; + device_create_info.enabledExtensionCount = + uint32_t(device_extensions_enabled.size()); + device_create_info.ppEnabledExtensionNames = device_extensions_enabled.data(); + // TODO(Triang3l): Enable only needed features. + device_create_info.pEnabledFeatures = &device_features_; + if (ifn_.createDevice(physical_device_, &device_create_info, nullptr, + &device_) != VK_SUCCESS) { + XELOGE("Failed to create a Vulkan device"); + return false; + } + + // Get device functions. + bool device_functions_loaded = true; + device_functions_loaded &= + (dfn_.getDeviceQueue = PFN_vkGetDeviceQueue( + ifn_.getDeviceProcAddr(device_, "vkGetDeviceQueue"))) != nullptr; + if (!device_functions_loaded) { + XELOGE("Failed to get Vulkan device function pointers"); + return false; + } + + // Get the queues. + dfn_.getDeviceQueue(device_, queue_family_graphics_compute_, 0, + &queue_graphics_compute_); + if (queue_family_sparse_binding != UINT32_MAX) { + if (separate_sparse_binding_queue) { + dfn_.getDeviceQueue(device_, queue_family_sparse_binding, 0, + &queue_sparse_binding_); + } else { + queue_sparse_binding_ = queue_graphics_compute_; + } + } else { + queue_sparse_binding_ = VK_NULL_HANDLE; + } + + return true; +} std::unique_ptr VulkanProvider::CreateContext( Window* target_window) { diff --git a/src/xenia/ui/vulkan/vulkan_provider.h b/src/xenia/ui/vulkan/vulkan_provider.h index 3313b8d92..f03e6d390 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.h +++ b/src/xenia/ui/vulkan/vulkan_provider.h @@ -10,26 +10,138 @@ #ifndef XENIA_UI_VULKAN_VULKAN_PROVIDER_H_ #define XENIA_UI_VULKAN_VULKAN_PROVIDER_H_ +#include #include +#include +#include "xenia/base/platform.h" #include "xenia/ui/graphics_provider.h" +#if XE_PLATFORM_WIN32 +// Must be included before vulkan.h with VK_USE_PLATFORM_WIN32_KHR because it +// includes Windows.h too. +#include "xenia/base/platform_win.h" +#ifndef VK_USE_PLATFORM_WIN32_KHR +#define VK_USE_PLATFORM_WIN32_KHR 1 +#endif +#endif // XE_PLATFORM_WIN32 + +#ifndef VK_NO_PROTOTYPES +#define VK_NO_PROTOTYPES 1 +#endif +#include "third_party/vulkan/vulkan.h" + +#define XELOGVK XELOGI + namespace xe { namespace ui { namespace vulkan { class VulkanProvider : public GraphicsProvider { public: + ~VulkanProvider() override; + static std::unique_ptr Create(Window* main_window); std::unique_ptr CreateContext( Window* target_window) override; std::unique_ptr CreateOffscreenContext() override; + // Functions with a version suffix (like _1_1) are null when api_version() is + // below this version. + + struct LibraryFunctions { + PFN_vkCreateInstance createInstance; + PFN_vkEnumerateInstanceExtensionProperties + enumerateInstanceExtensionProperties; + PFN_vkEnumerateInstanceVersion enumerateInstanceVersion_1_1; + }; + const LibraryFunctions& library_functions() const { + return library_functions_; + } + + uint32_t api_version() const { return api_version_; } + + VkInstance instance() const { return instance_; } + struct InstanceFunctions { + PFN_vkCreateDevice createDevice; + PFN_vkDestroyDevice destroyDevice; + PFN_vkEnumerateDeviceExtensionProperties enumerateDeviceExtensionProperties; + PFN_vkEnumeratePhysicalDevices enumeratePhysicalDevices; + PFN_vkGetDeviceProcAddr getDeviceProcAddr; + PFN_vkGetPhysicalDeviceFeatures getPhysicalDeviceFeatures; + PFN_vkGetPhysicalDeviceProperties getPhysicalDeviceProperties; + PFN_vkGetPhysicalDeviceQueueFamilyProperties + getPhysicalDeviceQueueFamilyProperties; + PFN_vkGetPhysicalDeviceSurfaceSupportKHR getPhysicalDeviceSurfaceSupportKHR; +#if XE_PLATFORM_ANDROID + PFN_vkCreateAndroidSurfaceKHR createAndroidSurfaceKHR; +#elif XE_PLATFORM_WIN32 + PFN_vkCreateWin32SurfaceKHR createWin32SurfaceKHR; +#endif + }; + const InstanceFunctions& ifn() const { return ifn_; } + + VkPhysicalDevice physical_device() const { return physical_device_; } + const VkPhysicalDeviceProperties& device_properties() const { + return device_properties_; + } + const VkPhysicalDeviceFeatures& device_features() const { + return device_features_; + } + struct DeviceExtensions { + bool ext_fragment_shader_interlock; + }; + const DeviceExtensions& device_extensions() const { + return device_extensions_; + } + // FIXME(Triang3l): Allow a separate queue for present - see + // vulkan_provider.cc for details. + uint32_t queue_family_graphics_compute() const { + return queue_family_graphics_compute_; + } + + VkDevice device() const { return device_; } + struct DeviceFunctions { + PFN_vkGetDeviceQueue getDeviceQueue; + }; + const DeviceFunctions& dfn() const { return dfn_; } + + VkQueue queue_graphics_compute() const { return queue_graphics_compute_; } + // May be VK_NULL_HANDLE if not available. + VkQueue queue_sparse_binding() const { return queue_sparse_binding_; } + private: explicit VulkanProvider(Window* main_window); bool Initialize(); + +#if XE_PLATFORM_LINUX + void* library_ = nullptr; +#elif XE_PLATFORM_WIN32 + HMODULE library_ = nullptr; +#endif + + PFN_vkGetInstanceProcAddr getInstanceProcAddr_ = nullptr; + PFN_vkDestroyInstance destroyInstance_ = nullptr; + LibraryFunctions library_functions_ = {}; + + uint32_t api_version_ = VK_API_VERSION_1_0; + + VkInstance instance_ = VK_NULL_HANDLE; + InstanceFunctions ifn_ = {}; + + VkPhysicalDevice physical_device_ = VK_NULL_HANDLE; + VkPhysicalDeviceProperties device_properties_; + VkPhysicalDeviceFeatures device_features_; + DeviceExtensions device_extensions_; + uint32_t queue_family_graphics_compute_; + + VkDevice device_ = VK_NULL_HANDLE; + DeviceFunctions dfn_ = {}; + VkQueue queue_graphics_compute_; + // May be VK_NULL_HANDLE if not available. + VkQueue queue_sparse_binding_; }; } // namespace vulkan diff --git a/src/xenia/ui/vulkan/vulkan_util.h b/src/xenia/ui/vulkan/vulkan_util.h new file mode 100644 index 000000000..c0702ba99 --- /dev/null +++ b/src/xenia/ui/vulkan/vulkan_util.h @@ -0,0 +1,45 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2019 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_UI_VULKAN_VULKAN_UTIL_H_ +#define XENIA_UI_VULKAN_VULKAN_UTIL_H_ + +#include "xenia/ui/vulkan/vulkan_provider.h" + +namespace xe { +namespace ui { +namespace vulkan { +namespace util { + +template +inline bool DestroyAndNullHandle(F* destroy_function, T& handle) { + if (handle != VK_NULL_HANDLE) { + destroy_function(handle, nullptr); + handle = VK_NULL_HANDLE; + return true; + } + return false; +} + +template +inline bool DestroyAndNullHandle(F* destroy_function, P parent, T& handle) { + if (handle != VK_NULL_HANDLE) { + destroy_function(parent, handle, nullptr); + handle = VK_NULL_HANDLE; + return true; + } + return false; +} + +} // namespace util +} // namespace vulkan +} // namespace ui +} // namespace xe + +#endif // XENIA_UI_VULKAN_VULKAN_UTIL_H_ From 203bf64d8893259d0ffdb1740a0e36daf6992687 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 13 Sep 2020 17:51:00 +0300 Subject: [PATCH 010/123] [Vulkan] Context, remove Volk --- premake5.lua | 1 - src/xenia/app/premake5.lua | 1 - src/xenia/hid/premake5.lua | 1 - src/xenia/ui/d3d12/d3d12_context.cc | 215 +++-- src/xenia/ui/d3d12/d3d12_context.h | 4 +- src/xenia/ui/graphics_context.cc | 18 + src/xenia/ui/graphics_context.h | 5 +- src/xenia/ui/vulkan/vulkan_context.cc | 843 +++++++++++++++++- src/xenia/ui/vulkan/vulkan_context.h | 77 +- .../ui/vulkan/vulkan_immediate_drawer.cc | 14 +- src/xenia/ui/vulkan/vulkan_immediate_drawer.h | 7 + src/xenia/ui/vulkan/vulkan_provider.cc | 229 ++--- src/xenia/ui/vulkan/vulkan_provider.h | 84 +- src/xenia/ui/window.cc | 6 +- third_party/volk | 1 - third_party/volk.lua | 30 - 16 files changed, 1241 insertions(+), 295 deletions(-) delete mode 160000 third_party/volk delete mode 100644 third_party/volk.lua diff --git a/premake5.lua b/premake5.lua index 622a4da2c..2b8042334 100644 --- a/premake5.lua +++ b/premake5.lua @@ -226,7 +226,6 @@ solution("xenia") include("third_party/SDL2.lua") include("third_party/snappy.lua") include("third_party/spirv-tools.lua") - include("third_party/volk.lua") include("third_party/xxhash.lua") include("src/xenia") diff --git a/src/xenia/app/premake5.lua b/src/xenia/app/premake5.lua index 520da24e4..3a0f6bb2a 100644 --- a/src/xenia/app/premake5.lua +++ b/src/xenia/app/premake5.lua @@ -20,7 +20,6 @@ project("xenia-app") "mspack", "snappy", "spirv-tools", - "volk", "xenia-app-discord", "xenia-apu", "xenia-apu-nop", diff --git a/src/xenia/hid/premake5.lua b/src/xenia/hid/premake5.lua index 152887e2b..b137f5178 100644 --- a/src/xenia/hid/premake5.lua +++ b/src/xenia/hid/premake5.lua @@ -22,7 +22,6 @@ project("xenia-hid-demo") links({ "fmt", "imgui", - "volk", "xenia-base", "xenia-helper-sdl", "xenia-hid", diff --git a/src/xenia/ui/d3d12/d3d12_context.cc b/src/xenia/ui/d3d12/d3d12_context.cc index f897a5516..7764afa44 100644 --- a/src/xenia/ui/d3d12/d3d12_context.cc +++ b/src/xenia/ui/d3d12/d3d12_context.cc @@ -9,9 +9,6 @@ #include "xenia/ui/d3d12/d3d12_context.h" -#include - -#include "xenia/base/cvar.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" #include "xenia/ui/d3d12/d3d12_immediate_drawer.h" @@ -19,9 +16,6 @@ #include "xenia/ui/d3d12/d3d12_util.h" #include "xenia/ui/window.h" -DEFINE_bool(d3d12_random_clear_color, false, - "Randomize presentation back buffer clear color.", "D3D12"); - namespace xe { namespace ui { namespace d3d12 { @@ -32,110 +26,112 @@ D3D12Context::D3D12Context(D3D12Provider* provider, Window* target_window) D3D12Context::~D3D12Context() { Shutdown(); } bool D3D12Context::Initialize() { + context_lost_ = false; + + if (!target_window_) { + return true; + } + auto& provider = GetD3D12Provider(); auto dxgi_factory = provider.GetDXGIFactory(); auto device = provider.GetDevice(); auto direct_queue = provider.GetDirectQueue(); - context_lost_ = false; + swap_fence_current_value_ = 1; + swap_fence_completed_value_ = 0; + swap_fence_completion_event_ = CreateEvent(nullptr, false, false, nullptr); + if (swap_fence_completion_event_ == nullptr) { + XELOGE("Failed to create the composition fence completion event"); + Shutdown(); + return false; + } + // Create a fence for transient resources of compositing. + if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, + IID_PPV_ARGS(&swap_fence_)))) { + XELOGE("Failed to create the composition fence"); + Shutdown(); + return false; + } - if (target_window_) { - swap_fence_current_value_ = 1; - swap_fence_completed_value_ = 0; - swap_fence_completion_event_ = CreateEvent(nullptr, false, false, nullptr); - if (swap_fence_completion_event_ == nullptr) { - XELOGE("Failed to create the composition fence completion event"); - Shutdown(); - return false; - } - // Create a fence for transient resources of compositing. - if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, - IID_PPV_ARGS(&swap_fence_)))) { - XELOGE("Failed to create the composition fence"); - Shutdown(); - return false; - } - - // Create the swap chain. - swap_chain_width_ = target_window_->scaled_width(); - swap_chain_height_ = target_window_->scaled_height(); - DXGI_SWAP_CHAIN_DESC1 swap_chain_desc; - swap_chain_desc.Width = swap_chain_width_; - swap_chain_desc.Height = swap_chain_height_; - swap_chain_desc.Format = kSwapChainFormat; - swap_chain_desc.Stereo = FALSE; - swap_chain_desc.SampleDesc.Count = 1; - swap_chain_desc.SampleDesc.Quality = 0; - swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; - swap_chain_desc.BufferCount = kSwapChainBufferCount; - swap_chain_desc.Scaling = DXGI_SCALING_STRETCH; - swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; - swap_chain_desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE; - swap_chain_desc.Flags = 0; - IDXGISwapChain1* swap_chain_1; - if (FAILED(dxgi_factory->CreateSwapChainForHwnd( - provider.GetDirectQueue(), - static_cast(target_window_->native_handle()), - &swap_chain_desc, nullptr, nullptr, &swap_chain_1))) { - XELOGE("Failed to create a DXGI swap chain"); - Shutdown(); - return false; - } - if (FAILED(swap_chain_1->QueryInterface(IID_PPV_ARGS(&swap_chain_)))) { - XELOGE("Failed to get version 3 of the DXGI swap chain interface"); - swap_chain_1->Release(); - Shutdown(); - return false; - } + // Create the swap chain. + swap_chain_width_ = target_window_->scaled_width(); + swap_chain_height_ = target_window_->scaled_height(); + DXGI_SWAP_CHAIN_DESC1 swap_chain_desc; + swap_chain_desc.Width = swap_chain_width_; + swap_chain_desc.Height = swap_chain_height_; + swap_chain_desc.Format = kSwapChainFormat; + swap_chain_desc.Stereo = FALSE; + swap_chain_desc.SampleDesc.Count = 1; + swap_chain_desc.SampleDesc.Quality = 0; + swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + swap_chain_desc.BufferCount = kSwapChainBufferCount; + swap_chain_desc.Scaling = DXGI_SCALING_STRETCH; + swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; + swap_chain_desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE; + swap_chain_desc.Flags = 0; + IDXGISwapChain1* swap_chain_1; + if (FAILED(dxgi_factory->CreateSwapChainForHwnd( + provider.GetDirectQueue(), + reinterpret_cast(target_window_->native_handle()), + &swap_chain_desc, nullptr, nullptr, &swap_chain_1))) { + XELOGE("Failed to create a DXGI swap chain"); + Shutdown(); + return false; + } + if (FAILED(swap_chain_1->QueryInterface(IID_PPV_ARGS(&swap_chain_)))) { + XELOGE("Failed to get version 3 of the DXGI swap chain interface"); swap_chain_1->Release(); + Shutdown(); + return false; + } + swap_chain_1->Release(); - // Create a heap for RTV descriptors of swap chain buffers. - D3D12_DESCRIPTOR_HEAP_DESC rtv_heap_desc; - rtv_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; - rtv_heap_desc.NumDescriptors = kSwapChainBufferCount; - rtv_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; - rtv_heap_desc.NodeMask = 0; - if (FAILED(device->CreateDescriptorHeap( - &rtv_heap_desc, IID_PPV_ARGS(&swap_chain_rtv_heap_)))) { - XELOGE("Failed to create swap chain RTV descriptor heap"); + // Create a heap for RTV descriptors of swap chain buffers. + D3D12_DESCRIPTOR_HEAP_DESC rtv_heap_desc; + rtv_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; + rtv_heap_desc.NumDescriptors = kSwapChainBufferCount; + rtv_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; + rtv_heap_desc.NodeMask = 0; + if (FAILED(device->CreateDescriptorHeap( + &rtv_heap_desc, IID_PPV_ARGS(&swap_chain_rtv_heap_)))) { + XELOGE("Failed to create swap chain RTV descriptor heap"); + Shutdown(); + return false; + } + swap_chain_rtv_heap_start_ = + swap_chain_rtv_heap_->GetCPUDescriptorHandleForHeapStart(); + + // Get the buffers and create their RTV descriptors. + if (!InitializeSwapChainBuffers()) { + Shutdown(); + return false; + } + + // Create the command list for compositing. + for (uint32_t i = 0; i < kSwapCommandAllocatorCount; ++i) { + if (FAILED(device->CreateCommandAllocator( + D3D12_COMMAND_LIST_TYPE_DIRECT, + IID_PPV_ARGS(&swap_command_allocators_[i])))) { + XELOGE("Failed to create a composition command allocator"); Shutdown(); return false; } - swap_chain_rtv_heap_start_ = - swap_chain_rtv_heap_->GetCPUDescriptorHandleForHeapStart(); + } + if (FAILED(device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, + swap_command_allocators_[0], nullptr, + IID_PPV_ARGS(&swap_command_list_)))) { + XELOGE("Failed to create the composition graphics command list"); + Shutdown(); + return false; + } + // Initially in open state, wait until BeginSwap. + swap_command_list_->Close(); - // Get the buffers and create their RTV descriptors. - if (!InitializeSwapChainBuffers()) { - Shutdown(); - return false; - } - - // Create the command list for compositing. - for (uint32_t i = 0; i < kSwapCommandAllocatorCount; ++i) { - if (FAILED(device->CreateCommandAllocator( - D3D12_COMMAND_LIST_TYPE_DIRECT, - IID_PPV_ARGS(&swap_command_allocators_[i])))) { - XELOGE("Failed to create a composition command allocator"); - Shutdown(); - return false; - } - } - if (FAILED(device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, - swap_command_allocators_[0], nullptr, - IID_PPV_ARGS(&swap_command_list_)))) { - XELOGE("Failed to create the composition graphics command list"); - Shutdown(); - return false; - } - // Initially in open state, wait until BeginSwap. - swap_command_list_->Close(); - - // Initialize the immediate mode drawer if not offscreen. - immediate_drawer_ = std::make_unique(*this); - if (!immediate_drawer_->Initialize()) { - Shutdown(); - return false; - } + // Initialize the immediate mode drawer if not offscreen. + immediate_drawer_ = std::make_unique(*this); + if (!immediate_drawer_->Initialize()) { + Shutdown(); + return false; } return true; @@ -223,9 +219,11 @@ ImmediateDrawer* D3D12Context::immediate_drawer() { return immediate_drawer_.get(); } -void D3D12Context::BeginSwap() { +bool D3D12Context::WasLost() { return context_lost_; } + +bool D3D12Context::BeginSwap() { if (!target_window_ || context_lost_) { - return; + return false; } // Resize the swap chain if the window is resized. @@ -252,13 +250,13 @@ void D3D12Context::BeginSwap() { kSwapChainBufferCount, target_window_width, target_window_height, kSwapChainFormat, 0))) { context_lost_ = true; - return; + return false; } swap_chain_width_ = target_window_width; swap_chain_height_ = target_window_height; if (!InitializeSwapChainBuffers()) { context_lost_ = true; - return; + return false; } } @@ -295,18 +293,11 @@ void D3D12Context::BeginSwap() { D3D12_CPU_DESCRIPTOR_HANDLE back_buffer_rtv = GetSwapChainBackBufferRTV(); swap_command_list_->OMSetRenderTargets(1, &back_buffer_rtv, TRUE, nullptr); float clear_color[4]; - if (cvars::d3d12_random_clear_color) { - clear_color[0] = rand() / float(RAND_MAX); // NOLINT(runtime/threadsafe_fn) - clear_color[1] = 1.0f; - clear_color[2] = 0.0f; - } else { - clear_color[0] = 0.0f; - clear_color[1] = 0.0f; - clear_color[2] = 0.0f; - } - clear_color[3] = 1.0f; + GetClearColor(clear_color); swap_command_list_->ClearRenderTargetView(back_buffer_rtv, clear_color, 0, nullptr); + + return true; } void D3D12Context::EndSwap() { diff --git a/src/xenia/ui/d3d12/d3d12_context.h b/src/xenia/ui/d3d12/d3d12_context.h index 2651adae9..c9f235b97 100644 --- a/src/xenia/ui/d3d12/d3d12_context.h +++ b/src/xenia/ui/d3d12/d3d12_context.h @@ -28,9 +28,9 @@ class D3D12Context : public GraphicsContext { ImmediateDrawer* immediate_drawer() override; - bool WasLost() override { return context_lost_; } + bool WasLost() override; - void BeginSwap() override; + bool BeginSwap() override; void EndSwap() override; std::unique_ptr Capture() override; diff --git a/src/xenia/ui/graphics_context.cc b/src/xenia/ui/graphics_context.cc index 73980cd37..7f5ab07b6 100644 --- a/src/xenia/ui/graphics_context.cc +++ b/src/xenia/ui/graphics_context.cc @@ -9,8 +9,13 @@ #include "xenia/ui/graphics_context.h" +#include + +#include "xenia/base/cvar.h" #include "xenia/ui/graphics_provider.h" +DEFINE_bool(random_clear_color, false, "Randomize window clear color.", "UI"); + namespace xe { namespace ui { @@ -26,5 +31,18 @@ bool GraphicsContext::MakeCurrent() { return true; } void GraphicsContext::ClearCurrent() {} +void GraphicsContext::GetClearColor(float* rgba) { + if (cvars::random_clear_color) { + rgba[0] = rand() / float(RAND_MAX); // NOLINT(runtime/threadsafe_fn) + rgba[1] = 1.0f; + rgba[2] = 0.0f; + } else { + rgba[0] = 0.0f; + rgba[1] = 0.0f; + rgba[2] = 0.0f; + } + rgba[3] = 1.0f; +} + } // namespace ui } // namespace xe diff --git a/src/xenia/ui/graphics_context.h b/src/xenia/ui/graphics_context.h index 383338770..0ed5bd881 100644 --- a/src/xenia/ui/graphics_context.h +++ b/src/xenia/ui/graphics_context.h @@ -51,7 +51,8 @@ class GraphicsContext { // This context must be made current in order for this call to work properly. virtual bool WasLost() = 0; - virtual void BeginSwap() = 0; + // Returns true if able to draw now (the target surface is available). + virtual bool BeginSwap() = 0; virtual void EndSwap() = 0; virtual std::unique_ptr Capture() = 0; @@ -59,6 +60,8 @@ class GraphicsContext { protected: explicit GraphicsContext(GraphicsProvider* provider, Window* target_window); + static void GetClearColor(float* rgba); + GraphicsProvider* provider_ = nullptr; Window* target_window_ = nullptr; }; diff --git a/src/xenia/ui/vulkan/vulkan_context.cc b/src/xenia/ui/vulkan/vulkan_context.cc index 2503c105d..28c68bcd5 100644 --- a/src/xenia/ui/vulkan/vulkan_context.cc +++ b/src/xenia/ui/vulkan/vulkan_context.cc @@ -9,8 +9,24 @@ #include "xenia/ui/vulkan/vulkan_context.h" +#include +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/base/platform.h" #include "xenia/ui/vulkan/vulkan_immediate_drawer.h" #include "xenia/ui/vulkan/vulkan_provider.h" +#include "xenia/ui/vulkan/vulkan_util.h" +#include "xenia/ui/window.h" + +#if XE_PLATFORM_ANDROID +#include +#elif XE_PLATFORM_WIN32 +#include "xenia/base/platform_win.h" +#endif namespace xe { namespace ui { @@ -19,21 +35,842 @@ namespace vulkan { VulkanContext::VulkanContext(VulkanProvider* provider, Window* target_window) : GraphicsContext(provider, target_window) {} -bool VulkanContext::Initialize() { return false; } +bool VulkanContext::Initialize() { + context_lost_ = false; + + if (!target_window_) { + return true; + } + + const VulkanProvider& provider = GetVulkanProvider(); + const VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + VkFenceCreateInfo fence_create_info; + fence_create_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fence_create_info.pNext = nullptr; + fence_create_info.flags = VK_FENCE_CREATE_SIGNALED_BIT; + + VkCommandPoolCreateInfo command_pool_create_info; + command_pool_create_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + command_pool_create_info.pNext = nullptr; + command_pool_create_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT; + command_pool_create_info.queueFamilyIndex = + provider.queue_family_graphics_compute(); + + VkCommandBufferAllocateInfo command_buffer_allocate_info; + command_buffer_allocate_info.sType = + VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + command_buffer_allocate_info.pNext = nullptr; + command_buffer_allocate_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + command_buffer_allocate_info.commandBufferCount = 1; + + for (uint32_t i = 0; i < kSwapchainMaxImageCount; ++i) { + SwapSubmission& submission = swap_submissions_[i]; + if (dfn.vkCreateFence(device, &fence_create_info, nullptr, + &submission.fence) != VK_SUCCESS) { + XELOGE("Failed to create the Vulkan composition fences"); + Shutdown(); + return false; + } + if (dfn.vkCreateCommandPool(device, &command_pool_create_info, nullptr, + &submission.command_pool) != VK_SUCCESS) { + XELOGE("Failed to create the Vulkan composition command pools"); + Shutdown(); + return false; + } + command_buffer_allocate_info.commandPool = submission.command_pool; + if (dfn.vkAllocateCommandBuffers(device, &command_buffer_allocate_info, + &submission.command_buffer) != + VK_SUCCESS) { + XELOGE("Failed to allocate the Vulkan composition command buffers"); + Shutdown(); + return false; + } + } + + VkSemaphoreCreateInfo semaphore_create_info; + semaphore_create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + semaphore_create_info.pNext = nullptr; + semaphore_create_info.flags = 0; + if (dfn.vkCreateSemaphore(device, &semaphore_create_info, nullptr, + &swap_image_acquisition_semaphore_) != VK_SUCCESS) { + XELOGE( + "Failed to create the Vulkan swap chain image acquisition semaphore"); + Shutdown(); + return false; + } + if (dfn.vkCreateSemaphore(device, &semaphore_create_info, nullptr, + &swap_render_completion_semaphore_) != VK_SUCCESS) { + XELOGE( + "Failed to create the Vulkan swap chain rendering completion " + "semaphore"); + Shutdown(); + return false; + } + + immediate_drawer_ = std::make_unique(*this); + // TODO(Triang3l): Initialize the immediate drawer. + + swap_swapchain_or_surface_recreation_needed_ = true; + + return true; +} + +void VulkanContext::Shutdown() { + if (!target_window_) { + return; + } + + AwaitAllSwapSubmissionsCompletion(); + + const VulkanProvider& provider = GetVulkanProvider(); + const VulkanProvider::InstanceFunctions& ifn = provider.ifn(); + VkInstance instance = provider.instance(); + const VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + swap_swapchain_image_current_ = UINT32_MAX; + DestroySwapchainFramebuffers(); + util::DestroyAndNullHandle(dfn.vkDestroySwapchainKHR, device, + swap_swapchain_); + util::DestroyAndNullHandle(dfn.vkDestroyRenderPass, device, + swap_render_pass_); + util::DestroyAndNullHandle(ifn.vkDestroySurfaceKHR, instance, swap_surface_); + swap_swapchain_or_surface_recreation_needed_ = false; + + util::DestroyAndNullHandle(dfn.vkDestroySemaphore, device, + swap_render_completion_semaphore_); + util::DestroyAndNullHandle(dfn.vkDestroySemaphore, device, + swap_image_acquisition_semaphore_); + + for (uint32_t i = 0; i < kSwapchainMaxImageCount; ++i) { + SwapSubmission& submission = swap_submissions_[i]; + util::DestroyAndNullHandle(dfn.vkDestroyCommandPool, device, + submission.command_pool); + util::DestroyAndNullHandle(dfn.vkDestroyFence, device, submission.fence); + } + swap_submission_current_ = 1; + swap_submission_completed_ = 0; +} ImmediateDrawer* VulkanContext::immediate_drawer() { return immediate_drawer_.get(); } -void VulkanContext::BeginSwap() {} +bool VulkanContext::WasLost() { return context_lost_; } -void VulkanContext::EndSwap() {} +bool VulkanContext::BeginSwap() { + if (!target_window_ || context_lost_) { + return false; + } + + const VulkanProvider& provider = GetVulkanProvider(); + const VulkanProvider::InstanceFunctions& ifn = provider.ifn(); + VkPhysicalDevice physical_device = provider.physical_device(); + const VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + uint32_t window_width = uint32_t(target_window_->scaled_width()); + uint32_t window_height = uint32_t(target_window_->scaled_height()); + if (swap_swapchain_ != VK_NULL_HANDLE) { + // Check if need to resize. + assert_true(swap_surface_ != VK_NULL_HANDLE); + // Win32 has minImageExtent == maxImageExtent == currentExtent, so the + // capabilities need to be requested every time they are needed. + VkSurfaceCapabilitiesKHR surface_capabilities; + if (ifn.vkGetPhysicalDeviceSurfaceCapabilitiesKHR( + physical_device, swap_surface_, &surface_capabilities) == + VK_SUCCESS) { + if (swap_swapchain_extent_.width != + xe::clamp(window_width, surface_capabilities.minImageExtent.width, + surface_capabilities.maxImageExtent.width) || + swap_swapchain_extent_.height != + xe::clamp(window_height, + surface_capabilities.minImageExtent.height, + surface_capabilities.maxImageExtent.height)) { + swap_swapchain_or_surface_recreation_needed_ = true; + } + } + } + + // If the swap chain turns out to be out of date, try to recreate it on the + // second attempt (to avoid skipping the frame entirely in this case). + for (uint32_t attempt = 0; attempt < 2; ++attempt) { + if (swap_swapchain_or_surface_recreation_needed_) { + // If recreation fails, don't retry until some change happens. + swap_swapchain_or_surface_recreation_needed_ = false; + + AwaitAllSwapSubmissionsCompletion(); + + uint32_t queue_family_graphics_compute = + provider.queue_family_graphics_compute(); + + if (swap_surface_ == VK_NULL_HANDLE) { + assert_true(swap_swapchain_ == VK_NULL_HANDLE); + assert_true(swap_swapchain_image_views_.empty()); + assert_true(swap_swapchain_framebuffers_.empty()); + + VkInstance instance = provider.instance(); + + VkResult surface_create_result; +#if XE_PLATFORM_ANDROID + VkAndroidSurfaceCreateInfoKHR surface_create_info; + surface_create_info.window = + reinterpret_cast(target_window_->native_handle()); + if (!surface_create_info.window) { + // The activity is in background - try again when the window is + // created. + return false; + } + surface_create_info.sType = + VK_STRUCTURE_TYPE_ANDROID_SURFACE_CREATE_INFO_KHR; + surface_create_info.pNext = nullptr; + surface_create_info.flags = 0; + surface_create_result = ifn.vkCreateAndroidSurfaceKHR( + instance, &surface_create_info, nullptr, &swap_surface_); +#elif XE_PLATFORM_WIN32 + VkWin32SurfaceCreateInfoKHR surface_create_info; + surface_create_info.sType = + VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR; + surface_create_info.pNext = nullptr; + surface_create_info.flags = 0; + surface_create_info.hinstance = reinterpret_cast( + target_window_->native_platform_handle()); + surface_create_info.hwnd = + reinterpret_cast(target_window_->native_handle()); + surface_create_result = ifn.vkCreateWin32SurfaceKHR( + instance, &surface_create_info, nullptr, &swap_surface_); +#else +#error No Vulkan surface creation for the target platform. +#endif + if (surface_create_result != VK_SUCCESS) { + XELOGE("Failed to create a Vulkan surface"); + return false; + } + + // FIXME(Triang3l): Allow a separate queue for present - see + // vulkan_provider.cc for details. + VkBool32 surface_supported; + if (ifn.vkGetPhysicalDeviceSurfaceSupportKHR( + physical_device, queue_family_graphics_compute, swap_surface_, + &surface_supported) != VK_SUCCESS || + !surface_supported) { + XELOGE( + "The Vulkan graphics and compute queue doesn't support " + "presentation"); + ifn.vkDestroySurfaceKHR(instance, swap_surface_, nullptr); + swap_surface_ = VK_NULL_HANDLE; + return false; + } + + // Choose an SDR format, 8.8.8.8 preferred, or if not available, any + // supported. Windows and GNU/Linux use B8G8R8A8, Android uses R8G8B8A8. + std::vector surface_formats; + VkResult surface_formats_get_result; + for (;;) { + uint32_t surface_format_count = uint32_t(surface_formats.size()); + bool surface_formats_was_empty = !surface_format_count; + surface_formats_get_result = ifn.vkGetPhysicalDeviceSurfaceFormatsKHR( + physical_device, swap_surface_, &surface_format_count, + surface_formats_was_empty ? nullptr : surface_formats.data()); + // If the original surface format count was 0 (first call), SUCCESS is + // returned, not INCOMPLETE. + if (surface_formats_get_result == VK_SUCCESS || + surface_formats_get_result == VK_INCOMPLETE) { + surface_formats.resize(surface_format_count); + if (surface_formats_get_result == VK_SUCCESS && + (!surface_formats_was_empty || !surface_format_count)) { + break; + } + } else { + break; + } + } + if (surface_formats_get_result != VK_SUCCESS || + surface_formats.empty()) { + XELOGE("Failed to get Vulkan surface formats"); + ifn.vkDestroySurfaceKHR(instance, swap_surface_, nullptr); + swap_surface_ = VK_NULL_HANDLE; + return false; + } + VkSurfaceFormatKHR surface_format; + if (surface_formats.size() == 1 && + surface_formats[0].format == VK_FORMAT_UNDEFINED) { +#if XE_PLATFORM_ANDROID + surface_format.format = VK_FORMAT_R8G8B8A8_UNORM; +#else + surface_format.format = VK_FORMAT_B8G8R8A8_UNORM; +#endif + surface_format.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR; + } else { + surface_format = surface_formats.front(); + for (const VkSurfaceFormatKHR& surface_format_current : + surface_formats) { + if (surface_format_current.format == VK_FORMAT_B8G8R8A8_UNORM || + surface_format_current.format == VK_FORMAT_R8G8B8A8_UNORM || + surface_format_current.format == + VK_FORMAT_A8B8G8R8_UNORM_PACK32) { + surface_format = surface_format_current; + break; + } + } + } + if (swap_surface_format_.format != surface_format.format) { + util::DestroyAndNullHandle(dfn.vkDestroyRenderPass, device, + swap_render_pass_); + } + swap_surface_format_ = surface_format; + + // Prefer a low-latency present mode because emulation is done on the + // same queue, ordered by the decreasing amount of tearing, fall back to + // FIFO if no other options. + swap_surface_present_mode_ = VK_PRESENT_MODE_FIFO_KHR; + std::vector surface_present_modes; + VkResult surface_present_modes_get_result; + for (;;) { + uint32_t surface_present_mode_count = + uint32_t(surface_present_modes.size()); + bool surface_present_modes_was_empty = !surface_present_mode_count; + surface_present_modes_get_result = + ifn.vkGetPhysicalDeviceSurfacePresentModesKHR( + physical_device, swap_surface_, &surface_present_mode_count, + surface_present_modes_was_empty + ? nullptr + : surface_present_modes.data()); + // If the original surface present mode count was 0 (first call), + // SUCCESS is returned, not INCOMPLETE. + if (surface_present_modes_get_result == VK_SUCCESS || + surface_present_modes_get_result == VK_INCOMPLETE) { + surface_present_modes.resize(surface_present_mode_count); + if (surface_present_modes_get_result == VK_SUCCESS && + (!surface_present_modes_was_empty || + !surface_present_mode_count)) { + break; + } + } else { + break; + } + } + if (surface_present_modes_get_result == VK_SUCCESS) { + static const VkPresentModeKHR present_modes_preferred[] = { + VK_PRESENT_MODE_MAILBOX_KHR, + VK_PRESENT_MODE_FIFO_RELAXED_KHR, + VK_PRESENT_MODE_IMMEDIATE_KHR, + }; + for (size_t i = 0; i < xe::countof(present_modes_preferred); ++i) { + VkPresentModeKHR present_mode_preferred = + present_modes_preferred[i]; + if (std::find(surface_present_modes.cbegin(), + surface_present_modes.cend(), + present_mode_preferred) != + surface_present_modes.cend()) { + swap_surface_present_mode_ = present_mode_preferred; + break; + } + } + } + } + + // Recreate the swap chain unconditionally because a request was made. + // The old swapchain will be retired even if vkCreateSwapchainKHR fails, + // so destroy the framebuffers and the image views unconditionally. + // If anything fails before the vkCreateSwapchainKHR call, also destroy + // the swapchain to fulfill the request. + // It was safe to handle errors while creating the surface without caring + // about destroying the swapchain, because there can't be swapchain when + // there is no surface. + DestroySwapchainFramebuffers(); + // Win32 has minImageExtent == maxImageExtent == currentExtent, so the + // capabilities need to be requested every time they are needed. + VkSurfaceCapabilitiesKHR surface_capabilities; + if (ifn.vkGetPhysicalDeviceSurfaceCapabilitiesKHR( + physical_device, swap_surface_, &surface_capabilities) != + VK_SUCCESS) { + XELOGE("Failed to get Vulkan surface capabilities"); + util::DestroyAndNullHandle(dfn.vkDestroySwapchainKHR, device, + swap_swapchain_); + return false; + } + // TODO(Triang3l): Support rotated transforms. + if (!(surface_capabilities.supportedTransforms & + VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR)) { + XELOGE("The Vulkan surface doesn't support identity transform"); + util::DestroyAndNullHandle(dfn.vkDestroySwapchainKHR, device, + swap_swapchain_); + return false; + } + VkSwapchainCreateInfoKHR swapchain_create_info; + swapchain_create_info.imageExtent.width = + xe::clamp(window_width, surface_capabilities.minImageExtent.width, + surface_capabilities.maxImageExtent.width); + swapchain_create_info.imageExtent.height = + xe::clamp(window_height, surface_capabilities.minImageExtent.height, + surface_capabilities.maxImageExtent.height); + if (!swapchain_create_info.imageExtent.width || + !swapchain_create_info.imageExtent.height) { + // Everything else is fine with the surface, but the window is too + // small, try again when the window may be resized (won't try to do some + // vkCreate* every BeginSwap, will reach this part again, so okay to set + // swap_swapchain_or_surface_recreation_needed_ back to true). + swap_swapchain_or_surface_recreation_needed_ = true; + util::DestroyAndNullHandle(dfn.vkDestroySwapchainKHR, device, + swap_swapchain_); + return false; + } + swapchain_create_info.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR; + swapchain_create_info.pNext = nullptr; + swapchain_create_info.flags = 0; + swapchain_create_info.surface = swap_surface_; + swapchain_create_info.minImageCount = kSwapchainMaxImageCount; + if (surface_capabilities.maxImageCount) { + swapchain_create_info.minImageCount = + std::min(swapchain_create_info.minImageCount, + surface_capabilities.maxImageCount); + } + swapchain_create_info.imageFormat = swap_surface_format_.format; + swapchain_create_info.imageColorSpace = swap_surface_format_.colorSpace; + swapchain_create_info.imageArrayLayers = 1; + swapchain_create_info.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + // FIXME(Triang3l): Allow a separate queue for present - see + // vulkan_provider.cc for details. + swapchain_create_info.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; + swapchain_create_info.queueFamilyIndexCount = 1; + swapchain_create_info.pQueueFamilyIndices = + &queue_family_graphics_compute; + // TODO(Triang3l): Support rotated transforms. + swapchain_create_info.preTransform = + VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; + swapchain_create_info.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; + if (!(surface_capabilities.supportedCompositeAlpha & + VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR)) { + if (surface_capabilities.supportedCompositeAlpha & + VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR) { + swapchain_create_info.compositeAlpha = + VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR; + } else { + // Whatever. supportedCompositeAlpha must have at least one bit set, + // but if it somehow doesn't (impossible situation according to the + // specification, but who knows), just assume opaque. + uint32_t composite_alpha_bit_index; + if (xe::bit_scan_forward( + uint32_t(surface_capabilities.supportedCompositeAlpha), + &composite_alpha_bit_index)) { + swapchain_create_info.compositeAlpha = VkCompositeAlphaFlagBitsKHR( + uint32_t(1) << composite_alpha_bit_index); + } + } + } + swapchain_create_info.presentMode = swap_surface_present_mode_; + swapchain_create_info.clipped = VK_TRUE; + swapchain_create_info.oldSwapchain = swap_swapchain_; + VkResult swapchain_create_result = dfn.vkCreateSwapchainKHR( + device, &swapchain_create_info, nullptr, &swap_swapchain_); + // The old swapchain is retired even if vkCreateSwapchainKHR has failed. + if (swapchain_create_info.oldSwapchain != VK_NULL_HANDLE) { + dfn.vkDestroySwapchainKHR(device, swapchain_create_info.oldSwapchain, + nullptr); + } + if (swapchain_create_result != VK_SUCCESS) { + XELOGE("Failed to create a Vulkan swapchain"); + swap_swapchain_ = VK_NULL_HANDLE; + return false; + } + swap_swapchain_extent_ = swapchain_create_info.imageExtent; + + // The render pass is needed to create framebuffers for swapchain images. + // It depends on the surface format, and thus can be reused with different + // surfaces by different swapchains, so it has separate lifetime tracking. + // It's safe to fail now (though destroying the new swapchain), because + // the request to destroy the old VkSwapchain somehow (after retiring, or + // directly) has been fulfilled. + if (swap_render_pass_ == VK_NULL_HANDLE) { + VkAttachmentDescription render_pass_color_attachment; + render_pass_color_attachment.flags = 0; + render_pass_color_attachment.format = swap_surface_format_.format; + render_pass_color_attachment.samples = VK_SAMPLE_COUNT_1_BIT; + render_pass_color_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + render_pass_color_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + render_pass_color_attachment.stencilLoadOp = + VK_ATTACHMENT_LOAD_OP_DONT_CARE; + render_pass_color_attachment.stencilStoreOp = + VK_ATTACHMENT_STORE_OP_DONT_CARE; + render_pass_color_attachment.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + render_pass_color_attachment.finalLayout = + VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + VkAttachmentReference render_pass_color_attachment_reference; + render_pass_color_attachment_reference.attachment = 0; + render_pass_color_attachment_reference.layout = + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + VkSubpassDescription render_pass_subpass; + render_pass_subpass.flags = 0; + render_pass_subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + render_pass_subpass.inputAttachmentCount = 0; + render_pass_subpass.pInputAttachments = nullptr; + render_pass_subpass.colorAttachmentCount = 1; + render_pass_subpass.pColorAttachments = + &render_pass_color_attachment_reference; + render_pass_subpass.pResolveAttachments = nullptr; + render_pass_subpass.pDepthStencilAttachment = nullptr; + render_pass_subpass.preserveAttachmentCount = 0; + render_pass_subpass.pPreserveAttachments = nullptr; + VkRenderPassCreateInfo render_pass_create_info; + render_pass_create_info.sType = + VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + render_pass_create_info.pNext = nullptr; + render_pass_create_info.flags = 0; + render_pass_create_info.attachmentCount = 1; + render_pass_create_info.pAttachments = &render_pass_color_attachment; + render_pass_create_info.subpassCount = 1; + render_pass_create_info.pSubpasses = &render_pass_subpass; + render_pass_create_info.dependencyCount = 0; + render_pass_create_info.pDependencies = nullptr; + if (dfn.vkCreateRenderPass(device, &render_pass_create_info, nullptr, + &swap_render_pass_) != VK_SUCCESS) { + XELOGE("Failed to create the Vulkan presentation render pass."); + dfn.vkDestroySwapchainKHR(device, swap_swapchain_, nullptr); + swap_swapchain_ = VK_NULL_HANDLE; + return false; + } + } + + std::vector swapchain_images; + uint32_t swapchain_image_count; + VkResult swapchain_images_get_result; + for (;;) { + swapchain_image_count = uint32_t(swapchain_images.size()); + bool swapchain_images_was_empty = !swapchain_image_count; + swapchain_images_get_result = dfn.vkGetSwapchainImagesKHR( + device, swap_swapchain_, &swapchain_image_count, + swapchain_images_was_empty ? nullptr : swapchain_images.data()); + // If the original swapchain image count was 0 (first call), SUCCESS is + // returned, not INCOMPLETE. + if (swapchain_images_get_result == VK_SUCCESS || + swapchain_images_get_result == VK_INCOMPLETE) { + swapchain_images.resize(swapchain_image_count); + if (swapchain_images_get_result == VK_SUCCESS && + (!swapchain_images_was_empty || !swapchain_image_count)) { + break; + } + } else { + break; + } + } + if (swapchain_images_get_result != VK_SUCCESS || + swapchain_images.empty()) { + XELOGE("Failed to get Vulkan swapchain images"); + dfn.vkDestroySwapchainKHR(device, swap_swapchain_, nullptr); + swap_swapchain_ = VK_NULL_HANDLE; + return false; + } + assert_true(swap_swapchain_image_views_.empty()); + swap_swapchain_image_views_.reserve(swapchain_image_count); + assert_true(swap_swapchain_framebuffers_.empty()); + swap_swapchain_framebuffers_.reserve(swapchain_image_count); + VkImageViewCreateInfo swapchain_image_view_create_info; + swapchain_image_view_create_info.sType = + VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + swapchain_image_view_create_info.pNext = nullptr; + swapchain_image_view_create_info.flags = 0; + swapchain_image_view_create_info.viewType = VK_IMAGE_VIEW_TYPE_2D; + swapchain_image_view_create_info.format = swap_surface_format_.format; + swapchain_image_view_create_info.components.r = + VK_COMPONENT_SWIZZLE_IDENTITY; + swapchain_image_view_create_info.components.g = + VK_COMPONENT_SWIZZLE_IDENTITY; + swapchain_image_view_create_info.components.b = + VK_COMPONENT_SWIZZLE_IDENTITY; + swapchain_image_view_create_info.components.a = + VK_COMPONENT_SWIZZLE_IDENTITY; + swapchain_image_view_create_info.subresourceRange.aspectMask = + VK_IMAGE_ASPECT_COLOR_BIT; + swapchain_image_view_create_info.subresourceRange.baseMipLevel = 0; + swapchain_image_view_create_info.subresourceRange.levelCount = 1; + swapchain_image_view_create_info.subresourceRange.baseArrayLayer = 0; + swapchain_image_view_create_info.subresourceRange.layerCount = 1; + VkFramebufferCreateInfo swapchain_framebuffer_create_info; + swapchain_framebuffer_create_info.sType = + VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + swapchain_framebuffer_create_info.pNext = nullptr; + swapchain_framebuffer_create_info.flags = 0; + swapchain_framebuffer_create_info.renderPass = swap_render_pass_; + swapchain_framebuffer_create_info.attachmentCount = 1; + swapchain_framebuffer_create_info.width = swap_swapchain_extent_.width; + swapchain_framebuffer_create_info.height = swap_swapchain_extent_.height; + swapchain_framebuffer_create_info.layers = 1; + for (uint32_t i = 0; i < swapchain_image_count; ++i) { + VkImage swapchain_image = swapchain_images[i]; + swapchain_image_view_create_info.image = swapchain_image; + VkImageView swapchain_image_view; + if (dfn.vkCreateImageView(device, &swapchain_image_view_create_info, + nullptr, + &swapchain_image_view) != VK_SUCCESS) { + XELOGE("Failed to create Vulkan swapchain image views"); + DestroySwapchainFramebuffers(); + dfn.vkDestroySwapchainKHR(device, swap_swapchain_, nullptr); + swap_swapchain_ = VK_NULL_HANDLE; + return false; + } + swap_swapchain_image_views_.push_back(swapchain_image_view); + swapchain_framebuffer_create_info.pAttachments = &swapchain_image_view; + VkFramebuffer swapchain_framebuffer; + if (dfn.vkCreateFramebuffer(device, &swapchain_framebuffer_create_info, + nullptr, + &swapchain_framebuffer) != VK_SUCCESS) { + XELOGE("Failed to create Vulkan swapchain framebuffers"); + DestroySwapchainFramebuffers(); + dfn.vkDestroySwapchainKHR(device, swap_swapchain_, nullptr); + swap_swapchain_ = VK_NULL_HANDLE; + return false; + } + swap_swapchain_framebuffers_.push_back(swapchain_framebuffer); + } + } + + if (swap_swapchain_ == VK_NULL_HANDLE) { + return false; + } + assert_true(swap_surface_ != VK_NULL_HANDLE); + assert_true(swap_render_pass_ != VK_NULL_HANDLE); + assert_false(swap_swapchain_image_views_.empty()); + assert_false(swap_swapchain_framebuffers_.empty()); + + // Await the frame data to be available before doing anything else. + if (swap_submission_completed_ + kSwapchainMaxImageCount < + swap_submission_current_) { + uint64_t submission_awaited = + swap_submission_current_ - kSwapchainMaxImageCount; + VkFence submission_fences[kSwapchainMaxImageCount]; + uint32_t submission_fence_count = 0; + while (swap_submission_completed_ + 1 + submission_fence_count <= + submission_awaited) { + assert_true(submission_fence_count < kSwapchainMaxImageCount); + uint32_t submission_index = + (swap_submission_completed_ + 1 + submission_fence_count) % + kSwapchainMaxImageCount; + submission_fences[submission_fence_count++] = + swap_submissions_[submission_index].fence; + } + if (submission_fence_count) { + if (dfn.vkWaitForFences(device, submission_fence_count, + submission_fences, VK_TRUE, + UINT64_MAX) != VK_SUCCESS) { + XELOGE("Failed to await the Vulkan presentation submission fences"); + return false; + } + swap_submission_completed_ += submission_fence_count; + } + } + + const SwapSubmission& submission = + swap_submissions_[swap_submission_current_ % kSwapchainMaxImageCount]; + if (dfn.vkResetCommandPool(device, submission.command_pool, 0) != + VK_SUCCESS) { + XELOGE("Failed to reset the Vulkan presentation command pool"); + return false; + } + + // After the image is acquired, this function must not fail before the + // semaphore has been signaled, and the image also must be returned to the + // swapchain. + uint32_t acquired_image_index; + switch (dfn.vkAcquireNextImageKHR(device, swap_swapchain_, UINT64_MAX, + swap_image_acquisition_semaphore_, + nullptr, &acquired_image_index)) { + case VK_SUCCESS: + case VK_SUBOPTIMAL_KHR: + // Not recreating in case of suboptimal, just to prevent a recreation + // loop in case the newly created swapchain is suboptimal too. + break; + case VK_ERROR_DEVICE_LOST: + context_lost_ = true; + return false; + case VK_ERROR_OUT_OF_DATE_KHR: + case VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT: + swap_swapchain_or_surface_recreation_needed_ = true; + continue; + case VK_ERROR_SURFACE_LOST_KHR: + RequestSurfaceRecreation(); + continue; + default: + return false; + } + swap_swapchain_image_current_ = acquired_image_index; + + VkCommandBufferBeginInfo command_buffer_begin_info; + command_buffer_begin_info.sType = + VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + command_buffer_begin_info.pNext = nullptr; + command_buffer_begin_info.flags = + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + command_buffer_begin_info.pInheritanceInfo = nullptr; + dfn.vkBeginCommandBuffer(submission.command_buffer, + &command_buffer_begin_info); + VkClearValue clear_value; + GetClearColor(clear_value.color.float32); + VkRenderPassBeginInfo render_pass_begin_info; + render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + render_pass_begin_info.pNext = nullptr; + render_pass_begin_info.renderPass = swap_render_pass_; + render_pass_begin_info.framebuffer = + swap_swapchain_framebuffers_[acquired_image_index]; + render_pass_begin_info.renderArea.offset.x = 0; + render_pass_begin_info.renderArea.offset.y = 0; + render_pass_begin_info.renderArea.extent = swap_swapchain_extent_; + render_pass_begin_info.clearValueCount = 1; + render_pass_begin_info.pClearValues = &clear_value; + dfn.vkCmdBeginRenderPass(submission.command_buffer, &render_pass_begin_info, + VK_SUBPASS_CONTENTS_INLINE); + + return true; + } + + // vkAcquireNextImageKHR returned VK_ERROR_OUT_OF_DATE_KHR even after + // recreation. + return false; +} + +void VulkanContext::EndSwap() { + if (!target_window_ || context_lost_) { + return; + } + assert_true(swap_swapchain_image_current_ != UINT32_MAX); + if (swap_swapchain_image_current_ == UINT32_MAX) { + return; + } + + const VulkanProvider& provider = GetVulkanProvider(); + const VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + VkQueue queue_graphics_compute = provider.queue_graphics_compute(); + + const SwapSubmission& submission = + swap_submissions_[swap_submission_current_ % kSwapchainMaxImageCount]; + dfn.vkCmdEndRenderPass(submission.command_buffer); + dfn.vkEndCommandBuffer(submission.command_buffer); + dfn.vkResetFences(device, 1, &submission.fence); + VkSubmitInfo submit_info; + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.pNext = nullptr; + submit_info.waitSemaphoreCount = 1; + submit_info.pWaitSemaphores = &swap_image_acquisition_semaphore_; + VkPipelineStageFlags image_acquisition_semaphore_wait_stage = + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + submit_info.pWaitDstStageMask = &image_acquisition_semaphore_wait_stage; + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = &submission.command_buffer; + submit_info.signalSemaphoreCount = 1; + submit_info.pSignalSemaphores = &swap_render_completion_semaphore_; + VkResult submit_result = dfn.vkQueueSubmit(queue_graphics_compute, 1, + &submit_info, submission.fence); + if (submit_result != VK_SUCCESS) { + // If failed, can't even return the swapchain image - so treat all errors as + // context loss. + context_lost_ = true; + return; + } + ++swap_submission_current_; + + VkPresentInfoKHR present_info; + present_info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; + present_info.pNext = nullptr; + present_info.waitSemaphoreCount = 1; + present_info.pWaitSemaphores = &swap_render_completion_semaphore_; + present_info.swapchainCount = 1; + present_info.pSwapchains = &swap_swapchain_; + present_info.pImageIndices = &swap_swapchain_image_current_; + present_info.pResults = nullptr; + // FIXME(Triang3l): Allow a separate queue for present - see + // vulkan_provider.cc for details. + VkResult present_result = + dfn.vkQueuePresentKHR(queue_graphics_compute, &present_info); + swap_swapchain_image_current_ = UINT32_MAX; + switch (present_result) { + case VK_SUCCESS: + case VK_SUBOPTIMAL_KHR: + // Not recreating in case of suboptimal, just to prevent a recreation + // loop in case the newly created swapchain is suboptimal too. + break; + case VK_ERROR_OUT_OF_DATE_KHR: + case VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT: + swap_swapchain_or_surface_recreation_needed_ = true; + return; + case VK_ERROR_SURFACE_LOST_KHR: + // Safe to await submission completion now - swap_submission_current_ has + // already been incremented to the next frame. + RequestSurfaceRecreation(); + return; + default: + // Treat any error as device loss since it would leave the semaphore + // forever signaled anyway, and the image won't be returned to the + // swapchain. + context_lost_ = true; + return; + } +} std::unique_ptr VulkanContext::Capture() { // TODO(Triang3l): Read back swap chain front buffer. return nullptr; } +void VulkanContext::RequestSurfaceRecreation() { +#if XE_PLATFORM_ANDROID + // The surface doesn't exist when the activity is in background. + swap_swapchain_or_surface_recreation_needed_ = + target_window_->native_handle() != nullptr; +#else + swap_swapchain_or_surface_recreation_needed_ = true; +#endif + if (swap_surface_ == VK_NULL_HANDLE) { + return; + } + AwaitAllSwapSubmissionsCompletion(); + DestroySwapchainFramebuffers(); + const VulkanProvider& provider = GetVulkanProvider(); + const VulkanProvider::InstanceFunctions& ifn = provider.ifn(); + VkInstance instance = provider.instance(); + const VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + util::DestroyAndNullHandle(dfn.vkDestroySwapchainKHR, device, + swap_swapchain_); + ifn.vkDestroySurfaceKHR(instance, swap_surface_, nullptr); + swap_surface_ = VK_NULL_HANDLE; +} + +void VulkanContext::AwaitAllSwapSubmissionsCompletion() { + assert_not_null(target_window_); + const VulkanProvider& provider = GetVulkanProvider(); + const VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + VkFence fences[kSwapchainMaxImageCount]; + uint32_t fence_count = 0; + while (swap_submission_completed_ + 1 < swap_submission_current_) { + assert_true(fence_count < kSwapchainMaxImageCount); + uint32_t submission_index = + ++swap_submission_completed_ % kSwapchainMaxImageCount; + fences[fence_count++] = swap_submissions_[submission_index].fence; + } + if (fence_count && !context_lost_) { + dfn.vkWaitForFences(device, fence_count, fences, VK_TRUE, UINT64_MAX); + } +} + +void VulkanContext::DestroySwapchainFramebuffers() { + assert_not_null(target_window_); + const VulkanProvider& provider = GetVulkanProvider(); + const VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + for (VkFramebuffer framebuffer : swap_swapchain_framebuffers_) { + dfn.vkDestroyFramebuffer(device, framebuffer, nullptr); + } + swap_swapchain_framebuffers_.clear(); + for (VkImageView image_view : swap_swapchain_image_views_) { + dfn.vkDestroyImageView(device, image_view, nullptr); + } + swap_swapchain_image_views_.clear(); +} + } // namespace vulkan } // namespace ui } // namespace xe diff --git a/src/xenia/ui/vulkan/vulkan_context.h b/src/xenia/ui/vulkan/vulkan_context.h index 880e99561..477d4de17 100644 --- a/src/xenia/ui/vulkan/vulkan_context.h +++ b/src/xenia/ui/vulkan/vulkan_context.h @@ -10,7 +10,9 @@ #ifndef XENIA_UI_VULKAN_VULKAN_CONTEXT_H_ #define XENIA_UI_VULKAN_VULKAN_CONTEXT_H_ +#include #include +#include #include "xenia/ui/graphics_context.h" #include "xenia/ui/vulkan/vulkan_immediate_drawer.h" @@ -24,19 +26,22 @@ class VulkanContext : public GraphicsContext { public: ImmediateDrawer* immediate_drawer() override; - // Returns true if the OS took away our context because we caused a TDR or - // some other outstanding error. When this happens, this context, as well as - // any other shared contexts are junk. - // This context must be made current in order for this call to work properly. - bool WasLost() override { return false; } + bool WasLost() override; - void BeginSwap() override; + bool BeginSwap() override; void EndSwap() override; std::unique_ptr Capture() override; - VulkanProvider* GetVulkanProvider() const { - return static_cast(provider_); + VulkanProvider& GetVulkanProvider() const { + return static_cast(*provider_); + } + + void RequestSurfaceRecreation(); + + VkCommandBuffer GetSwapCommandBuffer() const { + return swap_submissions_[swap_submission_current_ % kSwapchainMaxImageCount] + .command_buffer; } private: @@ -45,6 +50,62 @@ class VulkanContext : public GraphicsContext { bool Initialize(); private: + void Shutdown(); + + void AwaitAllSwapSubmissionsCompletion(); + + // AwaitAllSwapSubmissionsCompletion must be called before. As this can be + // used in swapchain creation or in shutdown, + // swap_swapchain_or_surface_recreation_needed_ won't be set by this. + void DestroySwapchainFramebuffers(); + + bool context_lost_ = false; + + // Actual image count may be less, depending on what the surface can provide. + static constexpr uint32_t kSwapchainMaxImageCount = 3; + + // Because of the nature of Vulkan fences (that they belong only to their + // specific submission, not the submission and all prior submissions), ALL + // fences since the last completed submission to the needed submission should + // individually be checked, not just the last one. However, this submission + // number abstraction hides the loosely ordered design of Vulkan submissions + // (it's okay to wait first for completion of A, then of B, no matter if they + // are actually completed in AB or in BA order). + + struct SwapSubmission { + // One pool per frame, with resetting the pool itself rather than individual + // command buffers (resetting command buffers themselves is not recommended + // by Arm since it makes the pool unable to use a single big allocation), as + // recommended by Nvidia (Direct3D 12-like way): + // https://developer.nvidia.com/sites/default/files/akamai/gameworks/blog/munich/mschott_vulkan_multi_threading.pdf + VkFence fence = VK_NULL_HANDLE; + VkCommandPool command_pool = VK_NULL_HANDLE; + VkCommandBuffer command_buffer; + }; + SwapSubmission swap_submissions_[kSwapchainMaxImageCount]; + uint64_t swap_submission_current_ = 1; + uint64_t swap_submission_completed_ = 0; + + VkSemaphore swap_image_acquisition_semaphore_ = VK_NULL_HANDLE; + VkSemaphore swap_render_completion_semaphore_ = VK_NULL_HANDLE; + + VkSurfaceKHR swap_surface_ = VK_NULL_HANDLE; + VkSurfaceFormatKHR swap_surface_format_ = {VK_FORMAT_UNDEFINED, + VK_COLOR_SPACE_SRGB_NONLINEAR_KHR}; + VkPresentModeKHR swap_surface_present_mode_; + VkRenderPass swap_render_pass_ = VK_NULL_HANDLE; + VkSwapchainKHR swap_swapchain_ = VK_NULL_HANDLE; + VkExtent2D swap_swapchain_extent_; + std::vector swap_swapchain_image_views_; + std::vector swap_swapchain_framebuffers_; + + uint32_t swap_swapchain_image_current_ = UINT32_MAX; + + // Attempts to recreate the swapchain will only be made in BeginSwap if this + // is true (set when something relevant is changed), so if creation fails, + // there won't be attempts every frame again and again. + bool swap_swapchain_or_surface_recreation_needed_ = false; + std::unique_ptr immediate_drawer_ = nullptr; }; diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc index 3fc06ebd5..abd787f12 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc @@ -9,14 +9,26 @@ #include "xenia/ui/vulkan/vulkan_immediate_drawer.h" +#include "xenia/ui/vulkan/vulkan_context.h" + namespace xe { namespace ui { namespace vulkan { +class VulkanImmediateTexture : public ImmediateTexture { + public: + VulkanImmediateTexture(uint32_t width, uint32_t height) + : ImmediateTexture(width, height) {} +}; + +VulkanImmediateDrawer::VulkanImmediateDrawer(VulkanContext& graphics_context) + : ImmediateDrawer(&graphics_context), context_(graphics_context) {} + std::unique_ptr VulkanImmediateDrawer::CreateTexture( uint32_t width, uint32_t height, ImmediateTextureFilter filter, bool repeat, const uint8_t* data) { - return nullptr; + auto texture = std::make_unique(width, height); + return std::unique_ptr(texture.release()); } void VulkanImmediateDrawer::UpdateTexture(ImmediateTexture* texture, diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.h b/src/xenia/ui/vulkan/vulkan_immediate_drawer.h index f51ffdd97..2e437ea25 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.h +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.h @@ -16,8 +16,12 @@ namespace xe { namespace ui { namespace vulkan { +class VulkanContext; + class VulkanImmediateDrawer : public ImmediateDrawer { public: + VulkanImmediateDrawer(VulkanContext& graphics_context); + std::unique_ptr CreateTexture(uint32_t width, uint32_t height, ImmediateTextureFilter filter, @@ -30,6 +34,9 @@ class VulkanImmediateDrawer : public ImmediateDrawer { void Draw(const ImmediateDraw& draw) override; void EndDrawBatch() override; void End() override; + + private: + VulkanContext& context_; }; } // namespace vulkan diff --git a/src/xenia/ui/vulkan/vulkan_provider.cc b/src/xenia/ui/vulkan/vulkan_provider.cc index 0e8930eb2..1a9a94921 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.cc +++ b/src/xenia/ui/vulkan/vulkan_provider.cc @@ -24,6 +24,12 @@ #include "xenia/base/platform_win.h" #endif +// TODO(Triang3l): Disable Vulkan validation before releasing a stable version. +DEFINE_bool( + vulkan_validation, true, + "Enable Vulkan validation (VK_LAYER_KHRONOS_validation). Messages will be " + "written to the OS debug log.", + "GPU"); DEFINE_int32( vulkan_device, -1, "Index of the physical device to use, or -1 for any compatible device.", @@ -55,10 +61,10 @@ VulkanProvider::VulkanProvider(Window* main_window) VulkanProvider::~VulkanProvider() { if (device_ != VK_NULL_HANDLE) { - ifn_.destroyDevice(device_, nullptr); + ifn_.vkDestroyDevice(device_, nullptr); } if (instance_ != VK_NULL_HANDLE) { - destroyInstance_(instance_, nullptr); + lfn_.vkDestroyInstance(instance_, nullptr); } #if XE_PLATFORM_LINUX @@ -74,6 +80,7 @@ VulkanProvider::~VulkanProvider() { bool VulkanProvider::Initialize() { // Load the library. + bool library_functions_loaded = true; #if XE_PLATFORM_LINUX #if XE_PLATFORM_ANDROID const char* libvulkan_name = "libvulkan.so"; @@ -86,61 +93,46 @@ bool VulkanProvider::Initialize() { XELOGE("Failed to load {}", libvulkan_name); return false; } - getInstanceProcAddr_ = - PFN_vkGetInstanceProcAddr(dlsym(library_, "vkGetInstanceProcAddr")); - destroyInstance_ = - PFN_vkDestroyInstance(dlsym(library_, "vkDestroyInstance")); - if (!getInstanceProcAddr_ || !destroyInstance_) { - XELOGE("Failed to get vkGetInstanceProcAddr and vkDestroyInstance from {}", - libvulkan_name); - return false; - } +#define XE_VULKAN_LOAD_MODULE_LFN(name) \ + library_functions_loaded &= \ + (lfn_.name = PFN_##name(dlsym(library_, #name))) != nullptr; #elif XE_PLATFORM_WIN32 library_ = LoadLibraryA("vulkan-1.dll"); if (!library_) { XELOGE("Failed to load vulkan-1.dll"); return false; } - getInstanceProcAddr_ = PFN_vkGetInstanceProcAddr( - GetProcAddress(library_, "vkGetInstanceProcAddr")); - destroyInstance_ = - PFN_vkDestroyInstance(GetProcAddress(library_, "vkDestroyInstance")); - if (!getInstanceProcAddr_ || !destroyInstance_) { - XELOGE( - "Failed to get vkGetInstanceProcAddr and vkDestroyInstance from " - "vulkan-1.dll"); - return false; - } +#define XE_VULKAN_LOAD_MODULE_LFN(name) \ + library_functions_loaded &= \ + (lfn_.name = PFN_##name(GetProcAddress(library_, #name))) != nullptr; #else #error No Vulkan library loading provided for the target platform. #endif - assert_not_null(getInstanceProcAddr_); - assert_not_null(destroyInstance_); - bool library_functions_loaded = true; - library_functions_loaded &= - (library_functions_.createInstance = PFN_vkCreateInstance( - getInstanceProcAddr_(VK_NULL_HANDLE, "vkCreateInstance"))) != - nullptr; - library_functions_loaded &= - (library_functions_.enumerateInstanceExtensionProperties = - PFN_vkEnumerateInstanceExtensionProperties(getInstanceProcAddr_( - VK_NULL_HANDLE, "vkEnumerateInstanceExtensionProperties"))) != - nullptr; + XE_VULKAN_LOAD_MODULE_LFN(vkGetInstanceProcAddr); + XE_VULKAN_LOAD_MODULE_LFN(vkDestroyInstance); +#undef XE_VULKAN_LOAD_MODULE_LFN if (!library_functions_loaded) { XELOGE("Failed to get Vulkan library function pointers"); return false; } - library_functions_.enumerateInstanceVersion_1_1 = - PFN_vkEnumerateInstanceVersion( - getInstanceProcAddr_(VK_NULL_HANDLE, "vkEnumerateInstanceVersion")); + library_functions_loaded &= + (lfn_.vkCreateInstance = PFN_vkCreateInstance(lfn_.vkGetInstanceProcAddr( + VK_NULL_HANDLE, "vkCreateInstance"))) != nullptr; + if (!library_functions_loaded) { + XELOGE( + "Failed to get Vulkan library function pointers via " + "vkGetInstanceProcAddr"); + return false; + } + lfn_.v_1_1.vkEnumerateInstanceVersion = PFN_vkEnumerateInstanceVersion( + lfn_.vkGetInstanceProcAddr(VK_NULL_HANDLE, "vkEnumerateInstanceVersion")); // Get the API version. const uint32_t api_version_target = VK_MAKE_VERSION(1, 2, 148); static_assert(VK_HEADER_VERSION_COMPLETE >= api_version_target, "Vulkan header files must be up to date"); - if (!library_functions_.enumerateInstanceVersion_1_1 || - library_functions_.enumerateInstanceVersion_1_1(&api_version_) != - VK_SUCCESS) { + if (!lfn_.v_1_1.vkEnumerateInstanceVersion || + lfn_.v_1_1.vkEnumerateInstanceVersion(&api_version_) != VK_SUCCESS) { api_version_ = VK_API_VERSION_1_0; } XELOGVK("Vulkan instance version {}.{}.{}", VK_VERSION_MAJOR(api_version_), @@ -173,66 +165,59 @@ bool VulkanProvider::Initialize() { instance_create_info.pNext = nullptr; instance_create_info.flags = 0; instance_create_info.pApplicationInfo = &application_info; - // TODO(Triang3l): Enable the validation layer. - instance_create_info.enabledLayerCount = 0; - instance_create_info.ppEnabledLayerNames = nullptr; + static const char* validation_layer = "VK_LAYER_KHRONOS_validation"; + if (cvars::vulkan_validation) { + instance_create_info.enabledLayerCount = 1; + instance_create_info.ppEnabledLayerNames = &validation_layer; + } else { + instance_create_info.enabledLayerCount = 0; + instance_create_info.ppEnabledLayerNames = nullptr; + } instance_create_info.enabledExtensionCount = uint32_t(instance_extensions_enabled.size()); instance_create_info.ppEnabledExtensionNames = instance_extensions_enabled.data(); - if (library_functions_.createInstance(&instance_create_info, nullptr, - &instance_) != VK_SUCCESS) { - XELOGE("Failed to create a Vulkan instance with surface support"); - return false; + VkResult instance_create_result = + lfn_.vkCreateInstance(&instance_create_info, nullptr, &instance_); + if (instance_create_result != VK_SUCCESS) { + if (instance_create_result == VK_ERROR_LAYER_NOT_PRESENT) { + XELOGE("Failed to enable the Vulkan validation layer"); + instance_create_info.enabledLayerCount = 0; + instance_create_info.ppEnabledLayerNames = nullptr; + instance_create_result = + lfn_.vkCreateInstance(&instance_create_info, nullptr, &instance_); + } + if (instance_create_result != VK_SUCCESS) { + XELOGE("Failed to create a Vulkan instance with surface support"); + return false; + } } // Get instance functions. bool instance_functions_loaded = true; - instance_functions_loaded &= - (ifn_.createDevice = PFN_vkCreateDevice( - getInstanceProcAddr_(instance_, "vkCreateDevice"))) != nullptr; - instance_functions_loaded &= - (ifn_.destroyDevice = PFN_vkDestroyDevice( - getInstanceProcAddr_(instance_, "vkDestroyDevice"))) != nullptr; - instance_functions_loaded &= - (ifn_.enumerateDeviceExtensionProperties = - PFN_vkEnumerateDeviceExtensionProperties(getInstanceProcAddr_( - instance_, "vkEnumerateDeviceExtensionProperties"))) != nullptr; - instance_functions_loaded &= - (ifn_.enumeratePhysicalDevices = PFN_vkEnumeratePhysicalDevices( - getInstanceProcAddr_(instance_, "vkEnumeratePhysicalDevices"))) != - nullptr; - instance_functions_loaded &= - (ifn_.getDeviceProcAddr = PFN_vkGetDeviceProcAddr( - getInstanceProcAddr_(instance_, "vkGetDeviceProcAddr"))) != nullptr; - instance_functions_loaded &= - (ifn_.getPhysicalDeviceFeatures = PFN_vkGetPhysicalDeviceFeatures( - getInstanceProcAddr_(instance_, "vkGetPhysicalDeviceFeatures"))) != - nullptr; - instance_functions_loaded &= - (ifn_.getPhysicalDeviceProperties = PFN_vkGetPhysicalDeviceProperties( - getInstanceProcAddr_(instance_, "vkGetPhysicalDeviceProperties"))) != - nullptr; - instance_functions_loaded &= - (ifn_.getPhysicalDeviceQueueFamilyProperties = - PFN_vkGetPhysicalDeviceQueueFamilyProperties(getInstanceProcAddr_( - instance_, "vkGetPhysicalDeviceQueueFamilyProperties"))) != - nullptr; - instance_functions_loaded &= - (ifn_.getPhysicalDeviceSurfaceSupportKHR = - PFN_vkGetPhysicalDeviceSurfaceSupportKHR(getInstanceProcAddr_( - instance_, "vkGetPhysicalDeviceSurfaceSupportKHR"))) != nullptr; +#define XE_VULKAN_LOAD_IFN(name) \ + instance_functions_loaded &= \ + (ifn_.name = PFN_##name( \ + lfn_.vkGetInstanceProcAddr(instance_, #name))) != nullptr; + XE_VULKAN_LOAD_IFN(vkCreateDevice); + XE_VULKAN_LOAD_IFN(vkDestroyDevice); + XE_VULKAN_LOAD_IFN(vkDestroySurfaceKHR); + XE_VULKAN_LOAD_IFN(vkEnumerateDeviceExtensionProperties); + XE_VULKAN_LOAD_IFN(vkEnumeratePhysicalDevices); + XE_VULKAN_LOAD_IFN(vkGetDeviceProcAddr); + XE_VULKAN_LOAD_IFN(vkGetPhysicalDeviceFeatures); + XE_VULKAN_LOAD_IFN(vkGetPhysicalDeviceProperties); + XE_VULKAN_LOAD_IFN(vkGetPhysicalDeviceQueueFamilyProperties); + XE_VULKAN_LOAD_IFN(vkGetPhysicalDeviceSurfaceCapabilitiesKHR); + XE_VULKAN_LOAD_IFN(vkGetPhysicalDeviceSurfaceFormatsKHR); + XE_VULKAN_LOAD_IFN(vkGetPhysicalDeviceSurfacePresentModesKHR); + XE_VULKAN_LOAD_IFN(vkGetPhysicalDeviceSurfaceSupportKHR); #if XE_PLATFORM_ANDROID - instance_functions_loaded &= - (ifn_.createAndroidSurfaceKHR = PFN_vkCreateAndroidSurfaceKHR( - getInstanceProcAddr_(instance_, "vkCreateAndroidSurfaceKHR"))) != - nullptr; + XE_VULKAN_LOAD_IFN(vkCreateAndroidSurfaceKHR); #elif XE_PLATFORM_WIN32 - instance_functions_loaded &= - (ifn_.createWin32SurfaceKHR = PFN_vkCreateWin32SurfaceKHR( - getInstanceProcAddr_(instance_, "vkCreateWin32SurfaceKHR"))) != - nullptr; + XE_VULKAN_LOAD_IFN(vkCreateWin32SurfaceKHR); #endif +#undef XE_VULKAN_LOAD_IFN if (!instance_functions_loaded) { XELOGE("Failed to get Vulkan instance function pointers"); return false; @@ -242,8 +227,8 @@ bool VulkanProvider::Initialize() { std::vector physical_devices; for (;;) { uint32_t physical_device_count = uint32_t(physical_devices.size()); - bool physical_devices_was_empty = physical_devices.empty(); - VkResult physical_device_enumerate_result = ifn_.enumeratePhysicalDevices( + bool physical_devices_was_empty = !physical_device_count; + VkResult physical_device_enumerate_result = ifn_.vkEnumeratePhysicalDevices( instance_, &physical_device_count, physical_devices_was_empty ? nullptr : physical_devices.data()); // If the original device count was 0 (first call), SUCCESS is returned, not @@ -288,7 +273,8 @@ bool VulkanProvider::Initialize() { VkPhysicalDevice physical_device_current = physical_devices[i]; // Get physical device features and check if the needed ones are supported. - ifn_.getPhysicalDeviceFeatures(physical_device_current, &device_features_); + ifn_.vkGetPhysicalDeviceFeatures(physical_device_current, + &device_features_); // TODO(Triang3l): Make geometry shaders optional by providing compute // shader fallback (though that would require vertex shader stores). if (!device_features_.geometryShader) { @@ -299,10 +285,10 @@ bool VulkanProvider::Initialize() { // (preferably the same for the least latency between the two, as Xenia // submits sparse binding commands right before graphics commands anyway). uint32_t queue_family_count = 0; - ifn_.getPhysicalDeviceQueueFamilyProperties(physical_device_current, - &queue_family_count, nullptr); + ifn_.vkGetPhysicalDeviceQueueFamilyProperties(physical_device_current, + &queue_family_count, nullptr); queue_families.resize(queue_family_count); - ifn_.getPhysicalDeviceQueueFamilyProperties( + ifn_.vkGetPhysicalDeviceQueueFamilyProperties( physical_device_current, &queue_family_count, queue_families.data()); assert_true(queue_family_count == queue_families.size()); queue_family_graphics_compute_ = UINT32_MAX; @@ -364,9 +350,9 @@ bool VulkanProvider::Initialize() { for (;;) { uint32_t device_extension_count = uint32_t(device_extension_properties.size()); - bool device_extensions_was_empty = device_extension_properties.empty(); + bool device_extensions_was_empty = !device_extension_count; device_extensions_enumerate_result = - ifn_.enumerateDeviceExtensionProperties( + ifn_.vkEnumerateDeviceExtensionProperties( physical_device_current, nullptr, &device_extension_count, device_extensions_was_empty ? nullptr : device_extension_properties.data()); @@ -411,7 +397,7 @@ bool VulkanProvider::Initialize() { "support"); return false; } - ifn_.getPhysicalDeviceProperties(physical_device_, &device_properties_); + ifn_.vkGetPhysicalDeviceProperties(physical_device_, &device_properties_); XELOGVK( "Vulkan device: {} (vendor {:04X}, device {:04X}, driver {:08X}, API " "{}.{}.{})", @@ -454,7 +440,7 @@ bool VulkanProvider::Initialize() { device_create_info.queueCreateInfoCount = separate_sparse_binding_queue ? 2 : 1; device_create_info.pQueueCreateInfos = queue_create_infos; - // TODO(Triang3l): Enable the validation layer. + // Device layers are deprecated - using validation layer on the instance. device_create_info.enabledLayerCount = 0; device_create_info.ppEnabledLayerNames = nullptr; device_create_info.enabledExtensionCount = @@ -462,29 +448,58 @@ bool VulkanProvider::Initialize() { device_create_info.ppEnabledExtensionNames = device_extensions_enabled.data(); // TODO(Triang3l): Enable only needed features. device_create_info.pEnabledFeatures = &device_features_; - if (ifn_.createDevice(physical_device_, &device_create_info, nullptr, - &device_) != VK_SUCCESS) { + if (ifn_.vkCreateDevice(physical_device_, &device_create_info, nullptr, + &device_) != VK_SUCCESS) { XELOGE("Failed to create a Vulkan device"); return false; } // Get device functions. bool device_functions_loaded = true; - device_functions_loaded &= - (dfn_.getDeviceQueue = PFN_vkGetDeviceQueue( - ifn_.getDeviceProcAddr(device_, "vkGetDeviceQueue"))) != nullptr; +#define XE_VULKAN_LOAD_DFN(name) \ + device_functions_loaded &= \ + (dfn_.name = PFN_##name(ifn_.vkGetDeviceProcAddr(device_, #name))) != \ + nullptr; + XE_VULKAN_LOAD_DFN(vkAcquireNextImageKHR); + XE_VULKAN_LOAD_DFN(vkAllocateCommandBuffers); + XE_VULKAN_LOAD_DFN(vkBeginCommandBuffer); + XE_VULKAN_LOAD_DFN(vkCmdBeginRenderPass); + XE_VULKAN_LOAD_DFN(vkCmdEndRenderPass); + XE_VULKAN_LOAD_DFN(vkCreateCommandPool); + XE_VULKAN_LOAD_DFN(vkCreateFence); + XE_VULKAN_LOAD_DFN(vkCreateFramebuffer); + XE_VULKAN_LOAD_DFN(vkCreateImageView); + XE_VULKAN_LOAD_DFN(vkCreateRenderPass); + XE_VULKAN_LOAD_DFN(vkCreateSemaphore); + XE_VULKAN_LOAD_DFN(vkCreateSwapchainKHR); + XE_VULKAN_LOAD_DFN(vkDestroyCommandPool); + XE_VULKAN_LOAD_DFN(vkDestroyFence); + XE_VULKAN_LOAD_DFN(vkDestroyFramebuffer); + XE_VULKAN_LOAD_DFN(vkDestroyImageView); + XE_VULKAN_LOAD_DFN(vkDestroyRenderPass); + XE_VULKAN_LOAD_DFN(vkDestroySemaphore); + XE_VULKAN_LOAD_DFN(vkDestroySwapchainKHR); + XE_VULKAN_LOAD_DFN(vkEndCommandBuffer); + XE_VULKAN_LOAD_DFN(vkGetDeviceQueue); + XE_VULKAN_LOAD_DFN(vkGetSwapchainImagesKHR); + XE_VULKAN_LOAD_DFN(vkResetCommandPool); + XE_VULKAN_LOAD_DFN(vkResetFences); + XE_VULKAN_LOAD_DFN(vkQueuePresentKHR); + XE_VULKAN_LOAD_DFN(vkQueueSubmit); + XE_VULKAN_LOAD_DFN(vkWaitForFences); +#undef XE_VULKAN_LOAD_DFN if (!device_functions_loaded) { XELOGE("Failed to get Vulkan device function pointers"); return false; } // Get the queues. - dfn_.getDeviceQueue(device_, queue_family_graphics_compute_, 0, - &queue_graphics_compute_); + dfn_.vkGetDeviceQueue(device_, queue_family_graphics_compute_, 0, + &queue_graphics_compute_); if (queue_family_sparse_binding != UINT32_MAX) { if (separate_sparse_binding_queue) { - dfn_.getDeviceQueue(device_, queue_family_sparse_binding, 0, - &queue_sparse_binding_); + dfn_.vkGetDeviceQueue(device_, queue_family_sparse_binding, 0, + &queue_sparse_binding_); } else { queue_sparse_binding_ = queue_graphics_compute_; } diff --git a/src/xenia/ui/vulkan/vulkan_provider.h b/src/xenia/ui/vulkan/vulkan_provider.h index f03e6d390..da4d56b80 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.h +++ b/src/xenia/ui/vulkan/vulkan_provider.h @@ -24,7 +24,7 @@ #ifndef VK_USE_PLATFORM_WIN32_KHR #define VK_USE_PLATFORM_WIN32_KHR 1 #endif -#endif // XE_PLATFORM_WIN32 +#endif #ifndef VK_NO_PROTOTYPES #define VK_NO_PROTOTYPES 1 @@ -47,37 +47,45 @@ class VulkanProvider : public GraphicsProvider { Window* target_window) override; std::unique_ptr CreateOffscreenContext() override; - // Functions with a version suffix (like _1_1) are null when api_version() is - // below this version. - struct LibraryFunctions { - PFN_vkCreateInstance createInstance; - PFN_vkEnumerateInstanceExtensionProperties - enumerateInstanceExtensionProperties; - PFN_vkEnumerateInstanceVersion enumerateInstanceVersion_1_1; + // From the module. + PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr; + PFN_vkDestroyInstance vkDestroyInstance; + // From vkGetInstanceProcAddr. + PFN_vkCreateInstance vkCreateInstance; + struct { + PFN_vkEnumerateInstanceVersion vkEnumerateInstanceVersion; + } v_1_1; }; - const LibraryFunctions& library_functions() const { - return library_functions_; - } + const LibraryFunctions& lfn() const { return lfn_; } uint32_t api_version() const { return api_version_; } VkInstance instance() const { return instance_; } struct InstanceFunctions { - PFN_vkCreateDevice createDevice; - PFN_vkDestroyDevice destroyDevice; - PFN_vkEnumerateDeviceExtensionProperties enumerateDeviceExtensionProperties; - PFN_vkEnumeratePhysicalDevices enumeratePhysicalDevices; - PFN_vkGetDeviceProcAddr getDeviceProcAddr; - PFN_vkGetPhysicalDeviceFeatures getPhysicalDeviceFeatures; - PFN_vkGetPhysicalDeviceProperties getPhysicalDeviceProperties; + PFN_vkCreateDevice vkCreateDevice; + PFN_vkDestroyDevice vkDestroyDevice; + PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR; + PFN_vkEnumerateDeviceExtensionProperties + vkEnumerateDeviceExtensionProperties; + PFN_vkEnumeratePhysicalDevices vkEnumeratePhysicalDevices; + PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr; + PFN_vkGetPhysicalDeviceFeatures vkGetPhysicalDeviceFeatures; + PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties; PFN_vkGetPhysicalDeviceQueueFamilyProperties - getPhysicalDeviceQueueFamilyProperties; - PFN_vkGetPhysicalDeviceSurfaceSupportKHR getPhysicalDeviceSurfaceSupportKHR; + vkGetPhysicalDeviceQueueFamilyProperties; + PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR + vkGetPhysicalDeviceSurfaceCapabilitiesKHR; + PFN_vkGetPhysicalDeviceSurfaceFormatsKHR + vkGetPhysicalDeviceSurfaceFormatsKHR; + PFN_vkGetPhysicalDeviceSurfacePresentModesKHR + vkGetPhysicalDeviceSurfacePresentModesKHR; + PFN_vkGetPhysicalDeviceSurfaceSupportKHR + vkGetPhysicalDeviceSurfaceSupportKHR; #if XE_PLATFORM_ANDROID - PFN_vkCreateAndroidSurfaceKHR createAndroidSurfaceKHR; + PFN_vkCreateAndroidSurfaceKHR vkCreateAndroidSurfaceKHR; #elif XE_PLATFORM_WIN32 - PFN_vkCreateWin32SurfaceKHR createWin32SurfaceKHR; + PFN_vkCreateWin32SurfaceKHR vkCreateWin32SurfaceKHR; #endif }; const InstanceFunctions& ifn() const { return ifn_; } @@ -103,7 +111,33 @@ class VulkanProvider : public GraphicsProvider { VkDevice device() const { return device_; } struct DeviceFunctions { - PFN_vkGetDeviceQueue getDeviceQueue; + PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR; + PFN_vkAllocateCommandBuffers vkAllocateCommandBuffers; + PFN_vkBeginCommandBuffer vkBeginCommandBuffer; + PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass; + PFN_vkCmdEndRenderPass vkCmdEndRenderPass; + PFN_vkCreateCommandPool vkCreateCommandPool; + PFN_vkCreateFence vkCreateFence; + PFN_vkCreateFramebuffer vkCreateFramebuffer; + PFN_vkCreateImageView vkCreateImageView; + PFN_vkCreateRenderPass vkCreateRenderPass; + PFN_vkCreateSemaphore vkCreateSemaphore; + PFN_vkCreateSwapchainKHR vkCreateSwapchainKHR; + PFN_vkDestroyCommandPool vkDestroyCommandPool; + PFN_vkDestroyFence vkDestroyFence; + PFN_vkDestroyFramebuffer vkDestroyFramebuffer; + PFN_vkDestroyImageView vkDestroyImageView; + PFN_vkDestroyRenderPass vkDestroyRenderPass; + PFN_vkDestroySemaphore vkDestroySemaphore; + PFN_vkDestroySwapchainKHR vkDestroySwapchainKHR; + PFN_vkEndCommandBuffer vkEndCommandBuffer; + PFN_vkGetDeviceQueue vkGetDeviceQueue; + PFN_vkGetSwapchainImagesKHR vkGetSwapchainImagesKHR; + PFN_vkResetCommandPool vkResetCommandPool; + PFN_vkResetFences vkResetFences; + PFN_vkQueuePresentKHR vkQueuePresentKHR; + PFN_vkQueueSubmit vkQueueSubmit; + PFN_vkWaitForFences vkWaitForFences; }; const DeviceFunctions& dfn() const { return dfn_; } @@ -122,9 +156,7 @@ class VulkanProvider : public GraphicsProvider { HMODULE library_ = nullptr; #endif - PFN_vkGetInstanceProcAddr getInstanceProcAddr_ = nullptr; - PFN_vkDestroyInstance destroyInstance_ = nullptr; - LibraryFunctions library_functions_ = {}; + LibraryFunctions lfn_ = {}; uint32_t api_version_ = VK_API_VERSION_1_0; diff --git a/src/xenia/ui/window.cc b/src/xenia/ui/window.cc index 8be8900c8..1273b61f0 100644 --- a/src/xenia/ui/window.cc +++ b/src/xenia/ui/window.cc @@ -200,11 +200,15 @@ void Window::OnPaint(UIEvent* e) { io.DisplaySize = ImVec2(static_cast(scaled_width()), static_cast(scaled_height())); - context_->BeginSwap(); + bool can_swap = context_->BeginSwap(); if (context_->WasLost()) { on_context_lost(e); return; } + if (!can_swap) { + // Surface not available. + return; + } ImGui::NewFrame(); diff --git a/third_party/volk b/third_party/volk deleted file mode 160000 index 30a851b67..000000000 --- a/third_party/volk +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 30a851b67e129a3d91f191b2e9dcdad65ba98438 diff --git a/third_party/volk.lua b/third_party/volk.lua deleted file mode 100644 index 7ba0dd618..000000000 --- a/third_party/volk.lua +++ /dev/null @@ -1,30 +0,0 @@ -group("third_party") -project("volk") - uuid("C9781C93-2DF5-47A2-94EE-2C5EBED61239") - kind("StaticLib") - language("C") - - defines({ - "_LIB", - "API_NAME=\"vulkan\"", - }) - removedefines({ - "_UNICODE", - "UNICODE", - }) - includedirs({ - "volk", - }) - files({ - "volk/volk.c", - "volk/volk.h", - }) - - filter("platforms:Windows") - defines({ - "VK_USE_PLATFORM_WIN32_KHR", - }) - filter("platforms:Linux") - defines({ - "VK_USE_PLATFORM_XCB_KHR", - }) From 36347ffedd9061db99a1b61223d14b66ccdbb0a4 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Fri, 18 Sep 2020 21:26:24 +0300 Subject: [PATCH 011/123] [D3D12] Fix Vulkan branch merge error --- src/xenia/ui/d3d12/d3d12_provider.cc | 8 -------- src/xenia/ui/d3d12/d3d12_provider.h | 4 ---- 2 files changed, 12 deletions(-) diff --git a/src/xenia/ui/d3d12/d3d12_provider.cc b/src/xenia/ui/d3d12/d3d12_provider.cc index c2f3bf132..d1f6594ca 100644 --- a/src/xenia/ui/d3d12/d3d12_provider.cc +++ b/src/xenia/ui/d3d12/d3d12_provider.cc @@ -417,14 +417,6 @@ bool D3D12Provider::Initialize() { virtual_address_bits_per_resource_ = virtual_address_support.MaxGPUVirtualAddressBitsPerResource; } - // D3D12_HEAP_FLAG_CREATE_NOT_ZEROED requires Windows 10 2004 (indicated by - // the availability of ID3D12Device8 or D3D12_FEATURE_D3D12_OPTIONS7). - heap_flag_create_not_zeroed_ = D3D12_HEAP_FLAG_NONE; - D3D12_FEATURE_DATA_D3D12_OPTIONS7 options7; - if (SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS7, - &options7, sizeof(options7)))) { - heap_flag_create_not_zeroed_ = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; - } XELOGD3D( "Direct3D 12 device and OS features:\n" "* Max GPU virtual address bits per resource: {}\n" diff --git a/src/xenia/ui/d3d12/d3d12_provider.h b/src/xenia/ui/d3d12/d3d12_provider.h index d89dec101..c8332801c 100644 --- a/src/xenia/ui/d3d12/d3d12_provider.h +++ b/src/xenia/ui/d3d12/d3d12_provider.h @@ -87,9 +87,6 @@ class D3D12Provider : public GraphicsProvider { uint32_t GetVirtualAddressBitsPerResource() const { return virtual_address_bits_per_resource_; } - D3D12_HEAP_FLAGS GetHeapFlagCreateNotZeroed() const { - return heap_flag_create_not_zeroed_; - } // Proxies for Direct3D 12 functions since they are loaded dynamically. inline HRESULT SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc, @@ -171,7 +168,6 @@ class D3D12Provider : public GraphicsProvider { D3D12_RESOURCE_BINDING_TIER resource_binding_tier_; D3D12_TILED_RESOURCES_TIER tiled_resources_tier_; uint32_t virtual_address_bits_per_resource_; - D3D12_HEAP_FLAGS heap_flag_create_not_zeroed_; }; } // namespace d3d12 From 229eb49b54700c88b5d623e59d40b599f623533c Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 19 Sep 2020 18:05:54 +0300 Subject: [PATCH 012/123] [Vulkan] Untextured ImmediateDrawer, [D3D12] Small refactoring --- src/xenia/gpu/d3d12/d3d12_command_processor.h | 20 +- src/xenia/gpu/d3d12/d3d12_graphics_system.cc | 2 +- src/xenia/gpu/d3d12/pipeline_cache.cc | 2 +- src/xenia/gpu/d3d12/pipeline_cache.h | 4 +- src/xenia/gpu/d3d12/primitive_converter.h | 2 +- src/xenia/gpu/d3d12/render_target_cache.cc | 2 +- src/xenia/gpu/d3d12/render_target_cache.h | 2 +- src/xenia/gpu/d3d12/shared_memory.h | 3 +- src/xenia/gpu/d3d12/texture_cache.cc | 2 +- src/xenia/ui/d3d12/d3d12_context.h | 2 +- src/xenia/ui/d3d12/d3d12_immediate_drawer.cc | 44 +- src/xenia/ui/d3d12/d3d12_immediate_drawer.h | 6 +- .../bytecode/vulkan_spirv/immediate_frag.h | 47 ++ .../bytecode/vulkan_spirv/immediate_frag.spv | Bin 0 -> 516 bytes .../bytecode/vulkan_spirv/immediate_frag.txt | 38 ++ .../bytecode/vulkan_spirv/immediate_vert.h | 116 +++++ .../bytecode/vulkan_spirv/immediate_vert.spv | Bin 0 -> 1340 bytes .../bytecode/vulkan_spirv/immediate_vert.txt | 82 ++++ src/xenia/ui/shaders/immediate.frag | 11 + src/xenia/ui/shaders/immediate.vert | 20 + src/xenia/ui/shaders/immediate.vs.hlsl | 8 +- src/xenia/ui/vulkan/premake5.lua | 21 + src/xenia/ui/vulkan/vulkan_context.cc | 9 +- src/xenia/ui/vulkan/vulkan_context.h | 11 + .../ui/vulkan/vulkan_immediate_drawer.cc | 413 +++++++++++++++++- src/xenia/ui/vulkan/vulkan_immediate_drawer.h | 29 ++ src/xenia/ui/vulkan/vulkan_provider.cc | 96 +++- src/xenia/ui/vulkan/vulkan_provider.h | 41 +- .../ui/vulkan/vulkan_upload_buffer_pool.cc | 227 ++++++++++ .../ui/vulkan/vulkan_upload_buffer_pool.h | 67 +++ src/xenia/ui/vulkan/vulkan_util.h | 18 +- src/xenia/ui/vulkan/vulkan_window_demo.cc | 29 ++ xenia-build | 7 +- 33 files changed, 1305 insertions(+), 76 deletions(-) create mode 100644 src/xenia/ui/shaders/bytecode/vulkan_spirv/immediate_frag.h create mode 100644 src/xenia/ui/shaders/bytecode/vulkan_spirv/immediate_frag.spv create mode 100644 src/xenia/ui/shaders/bytecode/vulkan_spirv/immediate_frag.txt create mode 100644 src/xenia/ui/shaders/bytecode/vulkan_spirv/immediate_vert.h create mode 100644 src/xenia/ui/shaders/bytecode/vulkan_spirv/immediate_vert.spv create mode 100644 src/xenia/ui/shaders/bytecode/vulkan_spirv/immediate_vert.txt create mode 100644 src/xenia/ui/shaders/immediate.frag create mode 100644 src/xenia/ui/shaders/immediate.vert create mode 100644 src/xenia/ui/vulkan/vulkan_upload_buffer_pool.cc create mode 100644 src/xenia/ui/vulkan/vulkan_upload_buffer_pool.h create mode 100644 src/xenia/ui/vulkan/vulkan_window_demo.cc diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index ba464d8e8..92e0f5c02 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -376,7 +376,7 @@ class D3D12CommandProcessor : public CommandProcessor { CommandAllocator* command_allocator_submitted_last_ = nullptr; ID3D12GraphicsCommandList* command_list_ = nullptr; ID3D12GraphicsCommandList1* command_list_1_ = nullptr; - std::unique_ptr deferred_command_list_ = nullptr; + std::unique_ptr deferred_command_list_; // Should bindless textures and samplers be used - many times faster // UpdateBindings than bindful (that becomes a significant bottleneck with @@ -388,14 +388,12 @@ class D3D12CommandProcessor : public CommandProcessor { // targets. bool edram_rov_used_ = false; - std::unique_ptr constant_buffer_pool_ = - nullptr; + std::unique_ptr constant_buffer_pool_; static constexpr uint32_t kViewBindfulHeapSize = 32768; static_assert(kViewBindfulHeapSize <= D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1); - std::unique_ptr view_bindful_heap_pool_ = - nullptr; + std::unique_ptr view_bindful_heap_pool_; // Currently bound descriptor heap - updated by RequestViewBindfulDescriptors. ID3D12DescriptorHeap* view_bindful_heap_current_; // Rationale: textures have 4 KB alignment in guest memory, and there can be @@ -426,7 +424,7 @@ class D3D12CommandProcessor : public CommandProcessor { static constexpr uint32_t kSamplerHeapSize = 2000; static_assert(kSamplerHeapSize <= D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE); std::unique_ptr - sampler_bindful_heap_pool_ = nullptr; + sampler_bindful_heap_pool_; ID3D12DescriptorHeap* sampler_bindful_heap_current_; ID3D12DescriptorHeap* sampler_bindless_heap_current_ = nullptr; D3D12_CPU_DESCRIPTOR_HANDLE sampler_bindless_heap_cpu_start_; @@ -452,15 +450,15 @@ class D3D12CommandProcessor : public CommandProcessor { ID3D12RootSignature* root_signature_bindless_vs_ = nullptr; ID3D12RootSignature* root_signature_bindless_ds_ = nullptr; - std::unique_ptr shared_memory_ = nullptr; + std::unique_ptr shared_memory_; - std::unique_ptr pipeline_cache_ = nullptr; + std::unique_ptr pipeline_cache_; - std::unique_ptr texture_cache_ = nullptr; + std::unique_ptr texture_cache_; - std::unique_ptr render_target_cache_ = nullptr; + std::unique_ptr render_target_cache_; - std::unique_ptr primitive_converter_ = nullptr; + std::unique_ptr primitive_converter_; // Mip 0 contains the normal gamma ramp (256 entries), mip 1 contains the PWL // ramp (128 entries). DXGI_FORMAT_R10G10B10A2_UNORM 1D. diff --git a/src/xenia/gpu/d3d12/d3d12_graphics_system.cc b/src/xenia/gpu/d3d12/d3d12_graphics_system.cc index 8eaefd15f..b8438a6fe 100644 --- a/src/xenia/gpu/d3d12/d3d12_graphics_system.cc +++ b/src/xenia/gpu/d3d12/d3d12_graphics_system.cc @@ -20,7 +20,7 @@ namespace xe { namespace gpu { namespace d3d12 { -// Generated with `xb buildhlsl`. +// Generated with `xb gendxbc`. #include "xenia/gpu/shaders/bytecode/d3d12_5_1/fullscreen_vs.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/stretch_gamma_ps.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/stretch_ps.h" diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc index 41bb72790..de0d43ea5 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.cc +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -56,7 +56,7 @@ namespace xe { namespace gpu { namespace d3d12 { -// Generated with `xb buildhlsl`. +// Generated with `xb gendxbc`. #include "xenia/gpu/shaders/bytecode/d3d12_5_1/adaptive_quad_hs.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/adaptive_triangle_hs.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/continuous_quad_hs.h" diff --git a/src/xenia/gpu/d3d12/pipeline_cache.h b/src/xenia/gpu/d3d12/pipeline_cache.h index 30fd68a4e..cdc6ed5f3 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.h +++ b/src/xenia/gpu/d3d12/pipeline_cache.h @@ -247,7 +247,7 @@ class PipelineCache { uint32_t resolution_scale_; // Reusable shader translator. - std::unique_ptr shader_translator_ = nullptr; + std::unique_ptr shader_translator_; // Command processor thread DXIL conversion/disassembly interfaces, if DXIL // disassembly is enabled. @@ -344,7 +344,7 @@ class PipelineCache { // Manual-reset event set when the last queued pipeline state object is // created and there are no more pipeline state objects to create. This is // triggered by the thread creating the last pipeline state object. - std::unique_ptr creation_completion_event_ = nullptr; + std::unique_ptr creation_completion_event_; // Whether setting the event on completion is queued. Protected with // creation_request_lock_, notify_one creation_request_cond_ when set. bool creation_completion_set_event_ = false; diff --git a/src/xenia/gpu/d3d12/primitive_converter.h b/src/xenia/gpu/d3d12/primitive_converter.h index c00d29f7c..4d5c80f2d 100644 --- a/src/xenia/gpu/d3d12/primitive_converter.h +++ b/src/xenia/gpu/d3d12/primitive_converter.h @@ -107,7 +107,7 @@ class PrimitiveConverter { Memory& memory_; TraceWriter& trace_writer_; - std::unique_ptr buffer_pool_ = nullptr; + std::unique_ptr buffer_pool_; // Static index buffers for emulating unsupported primitive types when drawing // without an index buffer. diff --git a/src/xenia/gpu/d3d12/render_target_cache.cc b/src/xenia/gpu/d3d12/render_target_cache.cc index 35a0bc25d..a43ad90d3 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.cc +++ b/src/xenia/gpu/d3d12/render_target_cache.cc @@ -36,7 +36,7 @@ namespace xe { namespace gpu { namespace d3d12 { -// Generated with `xb buildhlsl`. +// Generated with `xb gendxbc`. #include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_32bpp_cs.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_64bpp_cs.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_7e3_cs.h" diff --git a/src/xenia/gpu/d3d12/render_target_cache.h b/src/xenia/gpu/d3d12/render_target_cache.h index de3846635..cf575dcdf 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.h +++ b/src/xenia/gpu/d3d12/render_target_cache.h @@ -603,7 +603,7 @@ class RenderTargetCache { // For traces only. ID3D12Resource* edram_snapshot_download_buffer_ = nullptr; std::unique_ptr - edram_snapshot_restore_pool_ = nullptr; + edram_snapshot_restore_pool_; }; } // namespace d3d12 diff --git a/src/xenia/gpu/d3d12/shared_memory.h b/src/xenia/gpu/d3d12/shared_memory.h index 676803f15..86a55b2b7 100644 --- a/src/xenia/gpu/d3d12/shared_memory.h +++ b/src/xenia/gpu/d3d12/shared_memory.h @@ -212,8 +212,7 @@ class SharedMemory { std::vector upload_ranges_; void GetRangesToUpload(uint32_t request_page_first, uint32_t request_page_last); - std::unique_ptr upload_buffer_pool_ = - nullptr; + std::unique_ptr upload_buffer_pool_; // GPU-written memory downloading for traces. // Start page, length in pages. diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index 7aed8ff22..db1d30a45 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -53,7 +53,7 @@ namespace xe { namespace gpu { namespace d3d12 { -// Generated with `xb buildhlsl`. +// Generated with `xb gendxbc`. #include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_128bpb_2x_cs.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_128bpb_cs.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_16bpb_2x_cs.h" diff --git a/src/xenia/ui/d3d12/d3d12_context.h b/src/xenia/ui/d3d12/d3d12_context.h index c9f235b97..8ae3a0b2c 100644 --- a/src/xenia/ui/d3d12/d3d12_context.h +++ b/src/xenia/ui/d3d12/d3d12_context.h @@ -99,7 +99,7 @@ class D3D12Context : public GraphicsContext { // kSwapCommandAllocatorCount. ID3D12GraphicsCommandList* swap_command_list_ = nullptr; - std::unique_ptr immediate_drawer_ = nullptr; + std::unique_ptr immediate_drawer_; }; } // namespace d3d12 diff --git a/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc b/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc index 72a4f8736..6cc1e4d56 100644 --- a/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc +++ b/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc @@ -21,7 +21,7 @@ namespace xe { namespace ui { namespace d3d12 { -// Generated with `xb buildhlsl`. +// Generated with `xb gendxbc`. #include "xenia/ui/shaders/bytecode/d3d12_5_1/immediate_ps.h" #include "xenia/ui/shaders/bytecode/d3d12_5_1/immediate_vs.h" @@ -158,7 +158,7 @@ bool D3D12ImmediateDrawer::Initialize() { } { auto& root_parameter = - root_parameters[size_t(RootParameter::kViewportInvSize)]; + root_parameters[size_t(RootParameter::kViewportSizeInv)]; root_parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; root_parameter.Constants.ShaderRegister = 0; root_parameter.Constants.RegisterSpace = 0; @@ -179,7 +179,7 @@ bool D3D12ImmediateDrawer::Initialize() { return false; } - // Create the pipelines. + // Create the pipeline states. D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeline_desc = {}; pipeline_desc.pRootSignature = root_signature_; pipeline_desc.VS.pShaderBytecode = immediate_vs; @@ -192,10 +192,13 @@ bool D3D12ImmediateDrawer::Initialize() { pipeline_blend_desc.SrcBlend = D3D12_BLEND_SRC_ALPHA; pipeline_blend_desc.DestBlend = D3D12_BLEND_INV_SRC_ALPHA; pipeline_blend_desc.BlendOp = D3D12_BLEND_OP_ADD; - pipeline_blend_desc.SrcBlendAlpha = D3D12_BLEND_SRC_ALPHA; - pipeline_blend_desc.DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA; + // Don't change alpha (always 1). + pipeline_blend_desc.SrcBlendAlpha = D3D12_BLEND_ZERO; + pipeline_blend_desc.DestBlendAlpha = D3D12_BLEND_ONE; pipeline_blend_desc.BlendOpAlpha = D3D12_BLEND_OP_ADD; - pipeline_blend_desc.RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; + pipeline_blend_desc.RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_RED | + D3D12_COLOR_WRITE_ENABLE_GREEN | + D3D12_COLOR_WRITE_ENABLE_BLUE; pipeline_desc.SampleMask = UINT_MAX; pipeline_desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID; pipeline_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; @@ -295,6 +298,7 @@ bool D3D12ImmediateDrawer::Initialize() { // Reset the current state. current_command_list_ = nullptr; + batch_open_ = false; return true; } @@ -415,6 +419,9 @@ void D3D12ImmediateDrawer::UpdateTexture(ImmediateTexture* texture, void D3D12ImmediateDrawer::Begin(int render_target_width, int render_target_height) { + assert_null(current_command_list_); + assert_false(batch_open_); + auto device = context_.GetD3D12Provider().GetDevice(); // Use the compositing command list. @@ -485,7 +492,7 @@ void D3D12ImmediateDrawer::Begin(int render_target_width, viewport_inv_size[0] = 1.0f / viewport.Width; viewport_inv_size[1] = 1.0f / viewport.Height; current_command_list_->SetGraphicsRoot32BitConstants( - UINT(RootParameter::kViewportInvSize), 2, viewport_inv_size, 0); + UINT(RootParameter::kViewportSizeInv), 2, viewport_inv_size, 0); current_primitive_topology_ = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; current_texture_ = nullptr; @@ -493,21 +500,18 @@ void D3D12ImmediateDrawer::Begin(int render_target_width, } void D3D12ImmediateDrawer::BeginDrawBatch(const ImmediateDrawBatch& batch) { + assert_false(batch_open_); assert_not_null(current_command_list_); - if (current_command_list_ == nullptr) { - return; - } - uint64_t current_fence_value = context_.GetSwapCurrentFenceValue(); - batch_open_ = false; + uint64_t current_fence_value = context_.GetSwapCurrentFenceValue(); // Bind the vertices. D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view; vertex_buffer_view.StrideInBytes = UINT(sizeof(ImmediateVertex)); vertex_buffer_view.SizeInBytes = - batch.vertex_count * uint32_t(sizeof(ImmediateVertex)); + UINT(sizeof(ImmediateVertex)) * batch.vertex_count; void* vertex_buffer_mapping = vertex_buffer_pool_->Request( - current_fence_value, vertex_buffer_view.SizeInBytes, sizeof(uint32_t), + current_fence_value, vertex_buffer_view.SizeInBytes, sizeof(float), nullptr, nullptr, &vertex_buffer_view.BufferLocation); if (vertex_buffer_mapping == nullptr) { XELOGE("Failed to get a buffer for {} vertices in the immediate drawer", @@ -522,7 +526,7 @@ void D3D12ImmediateDrawer::BeginDrawBatch(const ImmediateDrawBatch& batch) { batch_has_index_buffer_ = batch.indices != nullptr; if (batch_has_index_buffer_) { D3D12_INDEX_BUFFER_VIEW index_buffer_view; - index_buffer_view.SizeInBytes = batch.index_count * sizeof(uint16_t); + index_buffer_view.SizeInBytes = UINT(sizeof(uint16_t)) * batch.index_count; index_buffer_view.Format = DXGI_FORMAT_R16_UINT; void* index_buffer_mapping = vertex_buffer_pool_->Request( current_fence_value, index_buffer_view.SizeInBytes, sizeof(uint16_t), @@ -541,11 +545,6 @@ void D3D12ImmediateDrawer::BeginDrawBatch(const ImmediateDrawBatch& batch) { } void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) { - assert_not_null(current_command_list_); - if (current_command_list_ == nullptr) { - return; - } - if (!batch_open_) { // Could be an error while obtaining the vertex and index buffers. return; @@ -678,7 +677,10 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) { void D3D12ImmediateDrawer::EndDrawBatch() { batch_open_ = false; } -void D3D12ImmediateDrawer::End() { current_command_list_ = nullptr; } +void D3D12ImmediateDrawer::End() { + assert_false(batch_open_); + current_command_list_ = nullptr; +} } // namespace d3d12 } // namespace ui diff --git a/src/xenia/ui/d3d12/d3d12_immediate_drawer.h b/src/xenia/ui/d3d12/d3d12_immediate_drawer.h index ce3fbcd84..b18d87909 100644 --- a/src/xenia/ui/d3d12/d3d12_immediate_drawer.h +++ b/src/xenia/ui/d3d12/d3d12_immediate_drawer.h @@ -54,7 +54,7 @@ class D3D12ImmediateDrawer : public ImmediateDrawer { kRestrictTextureSamples, kTexture, kSampler, - kViewportInvSize, + kViewportSizeInv, kCount }; @@ -75,8 +75,8 @@ class D3D12ImmediateDrawer : public ImmediateDrawer { D3D12_CPU_DESCRIPTOR_HANDLE sampler_heap_cpu_start_; D3D12_GPU_DESCRIPTOR_HANDLE sampler_heap_gpu_start_; - std::unique_ptr vertex_buffer_pool_ = nullptr; - std::unique_ptr texture_descriptor_pool_ = nullptr; + std::unique_ptr vertex_buffer_pool_; + std::unique_ptr texture_descriptor_pool_; uint64_t texture_descriptor_pool_heap_index_; struct PendingTextureUpload { diff --git a/src/xenia/ui/shaders/bytecode/vulkan_spirv/immediate_frag.h b/src/xenia/ui/shaders/bytecode/vulkan_spirv/immediate_frag.h new file mode 100644 index 000000000..3fc30c576 --- /dev/null +++ b/src/xenia/ui/shaders/bytecode/vulkan_spirv/immediate_frag.h @@ -0,0 +1,47 @@ +// generated from `xb genspirv` +// source: immediate.frag +const uint8_t immediate_frag[] = { + 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x0A, 0x00, 0x08, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x47, 0x4C, 0x53, 0x4C, 0x2E, 0x73, 0x74, 0x64, 0x2E, 0x34, 0x35, 0x30, + 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x08, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x01, 0x00, 0x00, + 0x05, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, + 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x78, 0x65, 0x5F, 0x66, 0x72, 0x61, 0x67, 0x5F, 0x63, 0x6F, 0x6C, 0x6F, + 0x72, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x78, 0x65, 0x5F, 0x76, 0x61, 0x72, 0x5F, 0x63, 0x6F, 0x6C, 0x6F, 0x72, + 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x78, 0x65, 0x5F, 0x76, 0x61, 0x72, 0x5F, 0x74, 0x65, 0x78, 0x63, 0x6F, + 0x6F, 0x72, 0x64, 0x00, 0x47, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x03, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x0C, 0x00, 0x00, 0x00, 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, +}; diff --git a/src/xenia/ui/shaders/bytecode/vulkan_spirv/immediate_frag.spv b/src/xenia/ui/shaders/bytecode/vulkan_spirv/immediate_frag.spv new file mode 100644 index 0000000000000000000000000000000000000000..390a723c0ecc0e557e4cbeb8657bb2f6ea76d443 GIT binary patch literal 516 zcmYk2!AiqW5Jktt)Y@uW5p*XN_r--4q3AZ7?)-of+A4w?h(-G8ZUoPHHsXWHyt((z zoj1v|tAmKW=tVb1@q11p1BvKIC*^YfIG=oMR+F3CYZZgYlTgi&zJ0RsR=vDS91?wE zNQ}_RF@#&5{H8P(p#OUcX4$u0FP@s}d9kct>xNofFYwQ*`GXU#{|?`*x68V2R#7rT zA5X~y?-!}E&XZTm=_}_Raz_#z`}A)6PWc}h^(7iN_kPB$Lmm_7=-toSoyL5-WVPb6 zUF}=M;g56E)j`dRx%u_=E;owGg8_yQ@&_n^*jH~k-AeTVfo H-FL(fq{1Ng literal 0 HcmV?d00001 diff --git a/src/xenia/ui/shaders/bytecode/vulkan_spirv/immediate_frag.txt b/src/xenia/ui/shaders/bytecode/vulkan_spirv/immediate_frag.txt new file mode 100644 index 000000000..5834004d1 --- /dev/null +++ b/src/xenia/ui/shaders/bytecode/vulkan_spirv/immediate_frag.txt @@ -0,0 +1,38 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 16 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %xe_frag_color %xe_var_color %xe_var_texcoord + OpExecutionMode %main OriginUpperLeft + OpSource ESSL 310 + OpName %main "main" + OpName %xe_frag_color "xe_frag_color" + OpName %xe_var_color "xe_var_color" + OpName %xe_var_texcoord "xe_var_texcoord" + OpDecorate %xe_frag_color RelaxedPrecision + OpDecorate %xe_frag_color Location 0 + OpDecorate %xe_var_color RelaxedPrecision + OpDecorate %xe_var_color Location 1 + OpDecorate %12 RelaxedPrecision + OpDecorate %xe_var_texcoord Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float +%xe_frag_color = OpVariable %_ptr_Output_v4float Output +%_ptr_Input_v4float = OpTypePointer Input %v4float +%xe_var_color = OpVariable %_ptr_Input_v4float Input + %v2float = OpTypeVector %float 2 +%_ptr_Input_v2float = OpTypePointer Input %v2float +%xe_var_texcoord = OpVariable %_ptr_Input_v2float Input + %main = OpFunction %void None %3 + %5 = OpLabel + %12 = OpLoad %v4float %xe_var_color + OpStore %xe_frag_color %12 + OpReturn + OpFunctionEnd diff --git a/src/xenia/ui/shaders/bytecode/vulkan_spirv/immediate_vert.h b/src/xenia/ui/shaders/bytecode/vulkan_spirv/immediate_vert.h new file mode 100644 index 000000000..1ea709c75 --- /dev/null +++ b/src/xenia/ui/shaders/bytecode/vulkan_spirv/immediate_vert.h @@ -0,0 +1,116 @@ +// generated from `xb genspirv` +// source: immediate.vert +const uint8_t immediate_vert[] = { + 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x0A, 0x00, 0x08, 0x00, + 0x2C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x47, 0x4C, 0x53, 0x4C, 0x2E, 0x73, 0x74, 0x64, 0x2E, 0x34, 0x35, 0x30, + 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x01, 0x00, 0x00, + 0x05, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, + 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x78, 0x65, 0x5F, 0x76, 0x61, 0x72, 0x5F, 0x74, 0x65, 0x78, 0x63, 0x6F, + 0x6F, 0x72, 0x64, 0x00, 0x05, 0x00, 0x07, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x78, 0x65, 0x5F, 0x61, 0x74, 0x74, 0x72, 0x5F, 0x74, 0x65, 0x78, 0x63, + 0x6F, 0x6F, 0x72, 0x64, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x78, 0x65, 0x5F, 0x76, 0x61, 0x72, 0x5F, 0x63, + 0x6F, 0x6C, 0x6F, 0x72, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x78, 0x65, 0x5F, 0x61, 0x74, 0x74, 0x72, 0x5F, + 0x63, 0x6F, 0x6C, 0x6F, 0x72, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x65, 0x72, 0x56, 0x65, + 0x72, 0x74, 0x65, 0x78, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, + 0x6F, 0x73, 0x69, 0x74, 0x69, 0x6F, 0x6E, 0x00, 0x06, 0x00, 0x07, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, + 0x6F, 0x69, 0x6E, 0x74, 0x53, 0x69, 0x7A, 0x65, 0x00, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x03, 0x00, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x07, 0x00, 0x18, 0x00, 0x00, 0x00, 0x78, 0x65, 0x5F, 0x61, + 0x74, 0x74, 0x72, 0x5F, 0x70, 0x6F, 0x73, 0x69, 0x74, 0x69, 0x6F, 0x6E, + 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, 0x1A, 0x00, 0x00, 0x00, + 0x58, 0x65, 0x50, 0x75, 0x73, 0x68, 0x43, 0x6F, 0x6E, 0x73, 0x74, 0x61, + 0x6E, 0x74, 0x73, 0x00, 0x06, 0x00, 0x08, 0x00, 0x1A, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x76, 0x69, 0x65, 0x77, 0x70, 0x6F, 0x72, 0x74, + 0x5F, 0x73, 0x69, 0x7A, 0x65, 0x5F, 0x69, 0x6E, 0x76, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x03, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x03, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, + 0x1A, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x17, 0x00, 0x04, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, + 0x0E, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x04, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x15, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x2B, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x03, 0x00, + 0x1A, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x1B, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x04, 0x00, 0x1B, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x1D, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, + 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x80, 0x3F, 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x05, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0xF8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x1D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x1F, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x85, 0x00, 0x05, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x1F, 0x00, 0x00, 0x00, 0x8E, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x22, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x83, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, + 0x22, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x50, 0x00, 0x07, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, + 0x27, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0E, 0x00, 0x00, 0x00, + 0x2A, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, + 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, +}; diff --git a/src/xenia/ui/shaders/bytecode/vulkan_spirv/immediate_vert.spv b/src/xenia/ui/shaders/bytecode/vulkan_spirv/immediate_vert.spv new file mode 100644 index 0000000000000000000000000000000000000000..ef72522c08b6a84eb82160ab7e2db18d625b5905 GIT binary patch literal 1340 zcmYk5%TE+R5XPGw_8}^W2>8UojV~m`gAxmK^Q~IS18oRSO!=y)h>`h+h%^?Km6f^_RTXEyVW%kBzTt3SU&E*uKTn zv++gg_lxtKxUOblE`%PLFWz?=3z)CxZtMJ-kH0rZAFHzQlhQY;JuqG_H!boXr&aB< zM#ZzDoLXMu?&6e;v?+G8VW`FI7ff#>^}-$yVzFm^>;l1ujjv-kao2SspEL!#LE10+ zpW_a=co%VP{=Cf{`=SQ3BE$Bu-r$H|(J$wwy@Zxqlid@SO4rq&9;0F6QsIG4UK{-D zfY}8c{so=%75(7YEA_4lQ=hj+{Gv{B;i(5^U$Ndb+0=`EFgt_WP&~88n$)`?OfEe2 zz`P5%RpB+Asg(R@0k6wOh^JrPPFp{*{V@x*@91P!_D3w3HvxzLUa*O$Hgj%DCGR61 z8vOY#;zx#^{?~c6=YdH zZpmiO52{ArycuR;7cu63;D3kSQepDoF9P>Kb}D7gUx9ll8=hG{i(?OurSM4Pp2&uq ila4hFJwKHai;vx~2YTdPY)Sc+^vFzL;{U3`E9pO88(zx* literal 0 HcmV?d00001 diff --git a/src/xenia/ui/shaders/bytecode/vulkan_spirv/immediate_vert.txt b/src/xenia/ui/shaders/bytecode/vulkan_spirv/immediate_vert.txt new file mode 100644 index 000000000..fbc229b84 --- /dev/null +++ b/src/xenia/ui/shaders/bytecode/vulkan_spirv/immediate_vert.txt @@ -0,0 +1,82 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 44 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Vertex %main "main" %xe_var_texcoord %xe_attr_texcoord %xe_var_color %xe_attr_color %_ %xe_attr_position + OpSource ESSL 310 + OpName %main "main" + OpName %xe_var_texcoord "xe_var_texcoord" + OpName %xe_attr_texcoord "xe_attr_texcoord" + OpName %xe_var_color "xe_var_color" + OpName %xe_attr_color "xe_attr_color" + OpName %gl_PerVertex "gl_PerVertex" + OpMemberName %gl_PerVertex 0 "gl_Position" + OpMemberName %gl_PerVertex 1 "gl_PointSize" + OpName %_ "" + OpName %xe_attr_position "xe_attr_position" + OpName %XePushConstants "XePushConstants" + OpMemberName %XePushConstants 0 "viewport_size_inv" + OpName %__0 "" + OpDecorate %xe_var_texcoord Location 0 + OpDecorate %xe_attr_texcoord Location 1 + OpDecorate %xe_var_color RelaxedPrecision + OpDecorate %xe_var_color Location 1 + OpDecorate %xe_attr_color RelaxedPrecision + OpDecorate %xe_attr_color Location 2 + OpDecorate %18 RelaxedPrecision + OpMemberDecorate %gl_PerVertex 0 BuiltIn Position + OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize + OpDecorate %gl_PerVertex Block + OpDecorate %xe_attr_position Location 0 + OpMemberDecorate %XePushConstants 0 Offset 0 + OpDecorate %XePushConstants Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 +%_ptr_Output_v2float = OpTypePointer Output %v2float +%xe_var_texcoord = OpVariable %_ptr_Output_v2float Output +%_ptr_Input_v2float = OpTypePointer Input %v2float +%xe_attr_texcoord = OpVariable %_ptr_Input_v2float Input + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float +%xe_var_color = OpVariable %_ptr_Output_v4float Output +%_ptr_Input_v4float = OpTypePointer Input %v4float +%xe_attr_color = OpVariable %_ptr_Input_v4float Input +%gl_PerVertex = OpTypeStruct %v4float %float +%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex + %_ = OpVariable %_ptr_Output_gl_PerVertex Output + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%xe_attr_position = OpVariable %_ptr_Input_v2float Input +%XePushConstants = OpTypeStruct %v2float +%_ptr_PushConstant_XePushConstants = OpTypePointer PushConstant %XePushConstants + %__0 = OpVariable %_ptr_PushConstant_XePushConstants PushConstant +%_ptr_PushConstant_v2float = OpTypePointer PushConstant %v2float + %float_2 = OpConstant %float 2 + %float_1 = OpConstant %float 1 + %float_0 = OpConstant %float 0 + %43 = OpConstantComposite %v2float %float_1 %float_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %12 = OpLoad %v2float %xe_attr_texcoord + OpStore %xe_var_texcoord %12 + %18 = OpLoad %v4float %xe_attr_color + OpStore %xe_var_color %18 + %25 = OpLoad %v2float %xe_attr_position + %30 = OpAccessChain %_ptr_PushConstant_v2float %__0 %int_0 + %31 = OpLoad %v2float %30 + %32 = OpFMul %v2float %25 %31 + %34 = OpVectorTimesScalar %v2float %32 %float_2 + %37 = OpFSub %v2float %34 %43 + %39 = OpCompositeExtract %float %37 0 + %40 = OpCompositeExtract %float %37 1 + %41 = OpCompositeConstruct %v4float %39 %40 %float_0 %float_1 + %42 = OpAccessChain %_ptr_Output_v4float %_ %int_0 + OpStore %42 %41 + OpReturn + OpFunctionEnd diff --git a/src/xenia/ui/shaders/immediate.frag b/src/xenia/ui/shaders/immediate.frag new file mode 100644 index 000000000..e1c9d63d0 --- /dev/null +++ b/src/xenia/ui/shaders/immediate.frag @@ -0,0 +1,11 @@ +#version 310 es +precision highp float; + +layout(location = 0) in vec2 xe_var_texcoord; +layout(location = 1) in lowp vec4 xe_var_color; + +layout(location = 0) out lowp vec4 xe_frag_color; + +void main() { + xe_frag_color = xe_var_color; +} diff --git a/src/xenia/ui/shaders/immediate.vert b/src/xenia/ui/shaders/immediate.vert new file mode 100644 index 000000000..15328f108 --- /dev/null +++ b/src/xenia/ui/shaders/immediate.vert @@ -0,0 +1,20 @@ +#version 310 es +precision highp float; + +layout(push_constant) uniform XePushConstants { + layout(offset = 0) vec2 viewport_size_inv; +}; + +layout(location = 0) in vec2 xe_attr_position; +layout(location = 1) in vec2 xe_attr_texcoord; +layout(location = 2) in lowp vec4 xe_attr_color; + +layout(location = 0) out vec2 xe_var_texcoord; +layout(location = 1) out lowp vec4 xe_var_color; + +void main() { + xe_var_texcoord = xe_attr_texcoord; + xe_var_color = xe_attr_color; + gl_Position = vec4(xe_attr_position * viewport_size_inv * 2.0 - 1.0, 0.0, + 1.0); +} diff --git a/src/xenia/ui/shaders/immediate.vs.hlsl b/src/xenia/ui/shaders/immediate.vs.hlsl index 7c3e22530..2391d7c3b 100644 --- a/src/xenia/ui/shaders/immediate.vs.hlsl +++ b/src/xenia/ui/shaders/immediate.vs.hlsl @@ -1,4 +1,4 @@ -float2 xe_viewport_inv_size : register(b0); +float2 xe_viewport_size_inv : register(b0); struct XeVertexShaderInput { float2 position : POSITION; @@ -14,10 +14,10 @@ struct XeVertexShaderOutput { XeVertexShaderOutput main(XeVertexShaderInput input) { XeVertexShaderOutput output; - output.position = float4( - input.position * xe_viewport_inv_size * float2(2.0, -2.0) + - float2(-1.0, 1.0), 0.0, 1.0); output.texcoord = input.texcoord; output.color = input.color; + output.position = float4( + input.position * xe_viewport_size_inv * float2(2.0, -2.0) + + float2(-1.0, 1.0), 0.0, 1.0); return output; } diff --git a/src/xenia/ui/vulkan/premake5.lua b/src/xenia/ui/vulkan/premake5.lua index e657b4af3..21a829d69 100644 --- a/src/xenia/ui/vulkan/premake5.lua +++ b/src/xenia/ui/vulkan/premake5.lua @@ -14,3 +14,24 @@ project("xenia-ui-vulkan") files({ "../shaders/bytecode/vulkan_spirv/*.h", }) + +group("demos") +project("xenia-ui-window-vulkan-demo") + uuid("97598f13-3177-454c-8e58-c59e2b6ede27") + kind("WindowedApp") + language("C++") + links({ + "fmt", + "imgui", + "xenia-base", + "xenia-ui", + "xenia-ui-vulkan", + }) + files({ + "../window_demo.cc", + "vulkan_window_demo.cc", + project_root.."/src/xenia/base/main_"..platform_suffix..".cc", + }) + resincludedirs({ + project_root, + }) diff --git a/src/xenia/ui/vulkan/vulkan_context.cc b/src/xenia/ui/vulkan/vulkan_context.cc index 28c68bcd5..2d2306045 100644 --- a/src/xenia/ui/vulkan/vulkan_context.cc +++ b/src/xenia/ui/vulkan/vulkan_context.cc @@ -35,6 +35,8 @@ namespace vulkan { VulkanContext::VulkanContext(VulkanProvider* provider, Window* target_window) : GraphicsContext(provider, target_window) {} +VulkanContext::~VulkanContext() { Shutdown(); } + bool VulkanContext::Initialize() { context_lost_ = false; @@ -110,7 +112,10 @@ bool VulkanContext::Initialize() { } immediate_drawer_ = std::make_unique(*this); - // TODO(Triang3l): Initialize the immediate drawer. + if (!immediate_drawer_->Initialize()) { + Shutdown(); + return false; + } swap_swapchain_or_surface_recreation_needed_ = true; @@ -124,6 +129,8 @@ void VulkanContext::Shutdown() { AwaitAllSwapSubmissionsCompletion(); + immediate_drawer_.reset(); + const VulkanProvider& provider = GetVulkanProvider(); const VulkanProvider::InstanceFunctions& ifn = provider.ifn(); VkInstance instance = provider.instance(); diff --git a/src/xenia/ui/vulkan/vulkan_context.h b/src/xenia/ui/vulkan/vulkan_context.h index 477d4de17..687967057 100644 --- a/src/xenia/ui/vulkan/vulkan_context.h +++ b/src/xenia/ui/vulkan/vulkan_context.h @@ -24,6 +24,8 @@ namespace vulkan { class VulkanContext : public GraphicsContext { public: + ~VulkanContext() override; + ImmediateDrawer* immediate_drawer() override; bool WasLost() override; @@ -43,6 +45,15 @@ class VulkanContext : public GraphicsContext { return swap_submissions_[swap_submission_current_ % kSwapchainMaxImageCount] .command_buffer; } + uint64_t swap_submission_current() const { return swap_submission_current_; } + uint64_t swap_submission_completed() const { + return swap_submission_completed_; + } + + const VkSurfaceFormatKHR& swap_surface_format() const { + return swap_surface_format_; + } + VkRenderPass swap_render_pass() const { return swap_render_pass_; } private: friend class VulkanProvider; diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc index abd787f12..159a13266 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc @@ -9,12 +9,22 @@ #include "xenia/ui/vulkan/vulkan_immediate_drawer.h" +#include + +#include "xenia/base/assert.h" +#include "xenia/base/logging.h" +#include "xenia/base/math.h" #include "xenia/ui/vulkan/vulkan_context.h" +#include "xenia/ui/vulkan/vulkan_util.h" namespace xe { namespace ui { namespace vulkan { +// Generated with `xb genspirv`. +#include "xenia/ui/shaders/bytecode/vulkan_spirv/immediate_frag.h" +#include "xenia/ui/shaders/bytecode/vulkan_spirv/immediate_vert.h" + class VulkanImmediateTexture : public ImmediateTexture { public: VulkanImmediateTexture(uint32_t width, uint32_t height) @@ -24,6 +34,59 @@ class VulkanImmediateTexture : public ImmediateTexture { VulkanImmediateDrawer::VulkanImmediateDrawer(VulkanContext& graphics_context) : ImmediateDrawer(&graphics_context), context_(graphics_context) {} +VulkanImmediateDrawer::~VulkanImmediateDrawer() { Shutdown(); } + +bool VulkanImmediateDrawer::Initialize() { + const VulkanProvider& provider = context_.GetVulkanProvider(); + const VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + VkPushConstantRange push_constant_ranges[1]; + push_constant_ranges[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + push_constant_ranges[0].offset = offsetof(PushConstants, vertex); + push_constant_ranges[0].size = sizeof(PushConstants::Vertex); + VkPipelineLayoutCreateInfo pipeline_layout_create_info; + pipeline_layout_create_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + pipeline_layout_create_info.pNext = nullptr; + pipeline_layout_create_info.flags = 0; + pipeline_layout_create_info.setLayoutCount = 0; + pipeline_layout_create_info.pSetLayouts = nullptr; + pipeline_layout_create_info.pushConstantRangeCount = + uint32_t(xe::countof(push_constant_ranges)); + pipeline_layout_create_info.pPushConstantRanges = push_constant_ranges; + if (dfn.vkCreatePipelineLayout(device, &pipeline_layout_create_info, nullptr, + &pipeline_layout_) != VK_SUCCESS) { + XELOGE("Failed to create the immediate drawer Vulkan pipeline layout"); + Shutdown(); + return false; + } + + vertex_buffer_pool_ = std::make_unique( + provider, + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT); + + // Reset the current state. + current_command_buffer_ = VK_NULL_HANDLE; + batch_open_ = false; + + return true; +} + +void VulkanImmediateDrawer::Shutdown() { + const VulkanProvider& provider = context_.GetVulkanProvider(); + const VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + util::DestroyAndNullHandle(dfn.vkDestroyPipeline, device, pipeline_line_); + util::DestroyAndNullHandle(dfn.vkDestroyPipeline, device, pipeline_triangle_); + + vertex_buffer_pool_.reset(); + + util::DestroyAndNullHandle(dfn.vkDestroyPipelineLayout, device, + pipeline_layout_); +} + std::unique_ptr VulkanImmediateDrawer::CreateTexture( uint32_t width, uint32_t height, ImmediateTextureFilter filter, bool repeat, const uint8_t* data) { @@ -35,15 +98,355 @@ void VulkanImmediateDrawer::UpdateTexture(ImmediateTexture* texture, const uint8_t* data) {} void VulkanImmediateDrawer::Begin(int render_target_width, - int render_target_height) {} + int render_target_height) { + assert_true(current_command_buffer_ == VK_NULL_HANDLE); + assert_false(batch_open_); -void VulkanImmediateDrawer::BeginDrawBatch(const ImmediateDrawBatch& batch) {} + if (!EnsurePipelinesCreated()) { + return; + } -void VulkanImmediateDrawer::Draw(const ImmediateDraw& draw) {} + current_command_buffer_ = context_.GetSwapCommandBuffer(); -void VulkanImmediateDrawer::EndDrawBatch() {} + uint64_t submission_completed = context_.swap_submission_completed(); + vertex_buffer_pool_->Reclaim(submission_completed); -void VulkanImmediateDrawer::End() {} + const VulkanProvider::DeviceFunctions& dfn = + context_.GetVulkanProvider().dfn(); + + current_render_target_extent_.width = uint32_t(render_target_width); + current_render_target_extent_.height = uint32_t(render_target_height); + VkViewport viewport; + viewport.x = 0.0f; + viewport.y = 0.0f; + viewport.width = float(render_target_width); + viewport.height = float(render_target_height); + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + dfn.vkCmdSetViewport(current_command_buffer_, 0, 1, &viewport); + PushConstants::Vertex push_constants_vertex; + push_constants_vertex.viewport_size_inv[0] = 1.0f / viewport.width; + push_constants_vertex.viewport_size_inv[1] = 1.0f / viewport.height; + dfn.vkCmdPushConstants(current_command_buffer_, pipeline_layout_, + VK_SHADER_STAGE_VERTEX_BIT, + offsetof(PushConstants, vertex), + sizeof(PushConstants::Vertex), &push_constants_vertex); + + current_pipeline_ = VK_NULL_HANDLE; +} + +void VulkanImmediateDrawer::BeginDrawBatch(const ImmediateDrawBatch& batch) { + assert_false(batch_open_); + if (current_command_buffer_ == VK_NULL_HANDLE) { + // No surface, or failed to create the pipelines. + return; + } + + uint64_t submission_current = context_.swap_submission_current(); + const VulkanProvider::DeviceFunctions& dfn = + context_.GetVulkanProvider().dfn(); + + // Bind the vertices. + size_t vertex_buffer_size = sizeof(ImmediateVertex) * batch.vertex_count; + VkBuffer vertex_buffer; + VkDeviceSize vertex_buffer_offset; + void* vertex_buffer_mapping = vertex_buffer_pool_->Request( + submission_current, vertex_buffer_size, sizeof(float), vertex_buffer, + vertex_buffer_offset); + if (!vertex_buffer_mapping) { + XELOGE("Failed to get a buffer for {} vertices in the immediate drawer", + batch.vertex_count); + return; + } + std::memcpy(vertex_buffer_mapping, batch.vertices, vertex_buffer_size); + dfn.vkCmdBindVertexBuffers(current_command_buffer_, 0, 1, &vertex_buffer, + &vertex_buffer_offset); + + // Bind the indices. + batch_has_index_buffer_ = batch.indices != nullptr; + if (batch_has_index_buffer_) { + size_t index_buffer_size = sizeof(uint16_t) * batch.index_count; + VkBuffer index_buffer; + VkDeviceSize index_buffer_offset; + void* index_buffer_mapping = vertex_buffer_pool_->Request( + submission_current, index_buffer_size, sizeof(uint16_t), index_buffer, + index_buffer_offset); + if (!index_buffer_mapping) { + XELOGE("Failed to get a buffer for {} indices in the immediate drawer", + batch.index_count); + return; + } + std::memcpy(index_buffer_mapping, batch.indices, index_buffer_size); + dfn.vkCmdBindIndexBuffer(current_command_buffer_, index_buffer, + index_buffer_offset, VK_INDEX_TYPE_UINT16); + } + + batch_open_ = true; +} + +void VulkanImmediateDrawer::Draw(const ImmediateDraw& draw) { + if (!batch_open_) { + // No surface, or failed to create the pipelines, or could be an error while + // obtaining the vertex and index buffers. + return; + } + + const VulkanProvider::DeviceFunctions& dfn = + context_.GetVulkanProvider().dfn(); + + // Bind the pipeline for the current primitive count. + VkPipeline pipeline; + switch (draw.primitive_type) { + case ImmediatePrimitiveType::kLines: + pipeline = pipeline_line_; + break; + case ImmediatePrimitiveType::kTriangles: + pipeline = pipeline_triangle_; + break; + default: + assert_unhandled_case(draw.primitive_type); + return; + } + if (current_pipeline_ != pipeline) { + current_pipeline_ = pipeline; + dfn.vkCmdBindPipeline(current_command_buffer_, + VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + } + + // Set the scissor rectangle if enabled. + VkRect2D scissor; + if (draw.scissor) { + scissor.offset.x = draw.scissor_rect[0]; + scissor.offset.y = current_render_target_extent_.height - + (draw.scissor_rect[1] + draw.scissor_rect[3]); + scissor.extent.width = draw.scissor_rect[2]; + scissor.extent.height = draw.scissor_rect[3]; + } else { + scissor.offset.x = 0; + scissor.offset.y = 0; + scissor.extent = current_render_target_extent_; + } + dfn.vkCmdSetScissor(current_command_buffer_, 0, 1, &scissor); + + // Draw. + if (batch_has_index_buffer_) { + dfn.vkCmdDrawIndexed(current_command_buffer_, draw.count, 1, + draw.index_offset, draw.base_vertex, 0); + } else { + dfn.vkCmdDraw(current_command_buffer_, draw.count, 1, draw.base_vertex, 0); + } +} + +void VulkanImmediateDrawer::EndDrawBatch() { batch_open_ = false; } + +void VulkanImmediateDrawer::End() { + assert_false(batch_open_); + if (current_command_buffer_ == VK_NULL_HANDLE) { + // Didn't draw anything because the of some issue or surface not being + // available. + return; + } + vertex_buffer_pool_->FlushWrites(); + current_command_buffer_ = VK_NULL_HANDLE; +} + +bool VulkanImmediateDrawer::EnsurePipelinesCreated() { + VkFormat swap_surface_format = context_.swap_surface_format().format; + if (swap_surface_format == pipeline_framebuffer_format_) { + // Either created, or failed to create once (don't try to create every + // frame). + return pipeline_triangle_ != VK_NULL_HANDLE && + pipeline_line_ != VK_NULL_HANDLE; + } + VkRenderPass swap_render_pass = context_.swap_render_pass(); + if (swap_surface_format == VK_FORMAT_UNDEFINED || + swap_render_pass == VK_NULL_HANDLE) { + // Not ready yet. + return false; + } + + const VulkanProvider& provider = context_.GetVulkanProvider(); + const VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + // Safe to destroy the pipelines now - if the render pass was recreated, + // completion of its usage has already been awaited. + util::DestroyAndNullHandle(dfn.vkDestroyPipeline, device, pipeline_line_); + util::DestroyAndNullHandle(dfn.vkDestroyPipeline, device, pipeline_triangle_); + // If creation fails now, don't try to create every frame. + pipeline_framebuffer_format_ = swap_surface_format; + + // Triangle pipeline. + + VkPipelineShaderStageCreateInfo stages[2] = {}; + stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; + stages[0].module = util::CreateShaderModule(provider, immediate_vert, + sizeof(immediate_vert)); + if (stages[0].module == VK_NULL_HANDLE) { + XELOGE("Failed to create the immediate drawer Vulkan vertex shader module"); + return false; + } + stages[0].pName = "main"; + stages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; + stages[1].module = util::CreateShaderModule(provider, immediate_frag, + sizeof(immediate_frag)); + if (stages[1].module == VK_NULL_HANDLE) { + XELOGE( + "Failed to create the immediate drawer Vulkan fragment shader module"); + dfn.vkDestroyShaderModule(device, stages[0].module, nullptr); + return false; + } + stages[1].pName = "main"; + + VkVertexInputBindingDescription vertex_input_binding; + vertex_input_binding.binding = 0; + vertex_input_binding.stride = sizeof(ImmediateVertex); + vertex_input_binding.inputRate = VK_VERTEX_INPUT_RATE_VERTEX; + VkVertexInputAttributeDescription vertex_input_attributes[3]; + vertex_input_attributes[0].location = 0; + vertex_input_attributes[0].binding = 0; + vertex_input_attributes[0].format = VK_FORMAT_R32G32_SFLOAT; + vertex_input_attributes[0].offset = offsetof(ImmediateVertex, x); + vertex_input_attributes[1].location = 1; + vertex_input_attributes[1].binding = 0; + vertex_input_attributes[1].format = VK_FORMAT_R32G32_SFLOAT; + vertex_input_attributes[1].offset = offsetof(ImmediateVertex, u); + vertex_input_attributes[2].location = 2; + vertex_input_attributes[2].binding = 0; + vertex_input_attributes[2].format = VK_FORMAT_R8G8B8A8_UNORM; + vertex_input_attributes[2].offset = offsetof(ImmediateVertex, color); + VkPipelineVertexInputStateCreateInfo vertex_input_state; + vertex_input_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + vertex_input_state.pNext = nullptr; + vertex_input_state.flags = 0; + vertex_input_state.vertexBindingDescriptionCount = 1; + vertex_input_state.pVertexBindingDescriptions = &vertex_input_binding; + vertex_input_state.vertexAttributeDescriptionCount = + uint32_t(xe::countof(vertex_input_attributes)); + vertex_input_state.pVertexAttributeDescriptions = vertex_input_attributes; + + VkPipelineInputAssemblyStateCreateInfo input_assembly_state; + input_assembly_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + input_assembly_state.pNext = nullptr; + input_assembly_state.flags = 0; + input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + input_assembly_state.primitiveRestartEnable = VK_FALSE; + + VkPipelineViewportStateCreateInfo viewport_state; + viewport_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + viewport_state.pNext = nullptr; + viewport_state.flags = 0; + viewport_state.viewportCount = 1; + viewport_state.pViewports = nullptr; + viewport_state.scissorCount = 1; + viewport_state.pScissors = nullptr; + + VkPipelineRasterizationStateCreateInfo rasterization_state = {}; + rasterization_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rasterization_state.polygonMode = VK_POLYGON_MODE_FILL; + rasterization_state.cullMode = VK_CULL_MODE_NONE; + rasterization_state.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; + rasterization_state.lineWidth = 1.0f; + + VkPipelineMultisampleStateCreateInfo multisample_state = {}; + multisample_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + multisample_state.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + + VkPipelineColorBlendAttachmentState color_blend_attachment_state; + color_blend_attachment_state.blendEnable = VK_TRUE; + color_blend_attachment_state.srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; + color_blend_attachment_state.dstColorBlendFactor = + VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + color_blend_attachment_state.colorBlendOp = VK_BLEND_OP_ADD; + // Don't change alpha (always 1). + color_blend_attachment_state.srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; + color_blend_attachment_state.dstAlphaBlendFactor = VK_BLEND_FACTOR_ONE; + color_blend_attachment_state.alphaBlendOp = VK_BLEND_OP_ADD; + color_blend_attachment_state.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT; + VkPipelineColorBlendStateCreateInfo color_blend_state; + color_blend_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + color_blend_state.pNext = nullptr; + color_blend_state.flags = 0; + color_blend_state.logicOpEnable = VK_FALSE; + color_blend_state.logicOp = VK_LOGIC_OP_NO_OP; + color_blend_state.attachmentCount = 1; + color_blend_state.pAttachments = &color_blend_attachment_state; + color_blend_state.blendConstants[0] = 1.0f; + color_blend_state.blendConstants[1] = 1.0f; + color_blend_state.blendConstants[2] = 1.0f; + color_blend_state.blendConstants[3] = 1.0f; + + static const VkDynamicState dynamic_states[] = { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + }; + VkPipelineDynamicStateCreateInfo dynamic_state; + dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + dynamic_state.pNext = nullptr; + dynamic_state.flags = 0; + dynamic_state.dynamicStateCount = uint32_t(xe::countof(dynamic_states)); + dynamic_state.pDynamicStates = dynamic_states; + + VkGraphicsPipelineCreateInfo pipeline_create_info; + pipeline_create_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + pipeline_create_info.pNext = nullptr; + pipeline_create_info.flags = VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT; + pipeline_create_info.stageCount = uint32_t(xe::countof(stages)); + pipeline_create_info.pStages = stages; + pipeline_create_info.pVertexInputState = &vertex_input_state; + pipeline_create_info.pInputAssemblyState = &input_assembly_state; + pipeline_create_info.pTessellationState = nullptr; + pipeline_create_info.pViewportState = &viewport_state; + pipeline_create_info.pRasterizationState = &rasterization_state; + pipeline_create_info.pMultisampleState = &multisample_state; + pipeline_create_info.pDepthStencilState = nullptr; + pipeline_create_info.pColorBlendState = &color_blend_state; + pipeline_create_info.pDynamicState = &dynamic_state; + pipeline_create_info.layout = pipeline_layout_; + pipeline_create_info.renderPass = swap_render_pass; + pipeline_create_info.subpass = 0; + pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE; + pipeline_create_info.basePipelineIndex = -1; + if (dfn.vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, + &pipeline_create_info, nullptr, + &pipeline_triangle_) != VK_SUCCESS) { + XELOGE( + "Failed to create the immediate drawer triangle list Vulkan pipeline"); + dfn.vkDestroyShaderModule(device, stages[1].module, nullptr); + dfn.vkDestroyShaderModule(device, stages[0].module, nullptr); + return false; + } + + // Line pipeline. + + input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST; + pipeline_create_info.flags = + (pipeline_create_info.flags & ~VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT) | + VK_PIPELINE_CREATE_DERIVATIVE_BIT; + pipeline_create_info.basePipelineHandle = pipeline_triangle_; + VkResult pipeline_line_create_result = dfn.vkCreateGraphicsPipelines( + device, VK_NULL_HANDLE, 1, &pipeline_create_info, nullptr, + &pipeline_line_); + dfn.vkDestroyShaderModule(device, stages[1].module, nullptr); + dfn.vkDestroyShaderModule(device, stages[0].module, nullptr); + if (pipeline_line_create_result != VK_SUCCESS) { + XELOGE("Failed to create the immediate drawer line list Vulkan pipeline"); + dfn.vkDestroyPipeline(device, pipeline_triangle_, nullptr); + pipeline_triangle_ = VK_NULL_HANDLE; + return false; + } + + return true; +} } // namespace vulkan } // namespace ui diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.h b/src/xenia/ui/vulkan/vulkan_immediate_drawer.h index 2e437ea25..eb1a4ebb4 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.h +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.h @@ -10,7 +10,10 @@ #ifndef XENIA_UI_VULKAN_VULKAN_IMMEDIATE_DRAWER_H_ #define XENIA_UI_VULKAN_VULKAN_IMMEDIATE_DRAWER_H_ +#include + #include "xenia/ui/immediate_drawer.h" +#include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h" namespace xe { namespace ui { @@ -21,6 +24,10 @@ class VulkanContext; class VulkanImmediateDrawer : public ImmediateDrawer { public: VulkanImmediateDrawer(VulkanContext& graphics_context); + ~VulkanImmediateDrawer() override; + + bool Initialize(); + void Shutdown(); std::unique_ptr CreateTexture(uint32_t width, uint32_t height, @@ -36,7 +43,29 @@ class VulkanImmediateDrawer : public ImmediateDrawer { void End() override; private: + struct PushConstants { + struct Vertex { + float viewport_size_inv[2]; + } vertex; + }; + + bool EnsurePipelinesCreated(); + VulkanContext& context_; + + VkPipelineLayout pipeline_layout_ = VK_NULL_HANDLE; + + std::unique_ptr vertex_buffer_pool_; + + VkFormat pipeline_framebuffer_format_ = VK_FORMAT_UNDEFINED; + VkPipeline pipeline_triangle_ = VK_NULL_HANDLE; + VkPipeline pipeline_line_ = VK_NULL_HANDLE; + + VkCommandBuffer current_command_buffer_ = VK_NULL_HANDLE; + VkExtent2D current_render_target_extent_; + VkPipeline current_pipeline_; + bool batch_open_ = false; + bool batch_has_index_buffer_; }; } // namespace vulkan diff --git a/src/xenia/ui/vulkan/vulkan_provider.cc b/src/xenia/ui/vulkan/vulkan_provider.cc index 1a9a94921..36c3b9a2e 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.cc +++ b/src/xenia/ui/vulkan/vulkan_provider.cc @@ -131,12 +131,16 @@ bool VulkanProvider::Initialize() { const uint32_t api_version_target = VK_MAKE_VERSION(1, 2, 148); static_assert(VK_HEADER_VERSION_COMPLETE >= api_version_target, "Vulkan header files must be up to date"); + uint32_t instance_api_version; if (!lfn_.v_1_1.vkEnumerateInstanceVersion || - lfn_.v_1_1.vkEnumerateInstanceVersion(&api_version_) != VK_SUCCESS) { - api_version_ = VK_API_VERSION_1_0; + lfn_.v_1_1.vkEnumerateInstanceVersion(&instance_api_version) != + VK_SUCCESS) { + instance_api_version = VK_API_VERSION_1_0; } - XELOGVK("Vulkan instance version {}.{}.{}", VK_VERSION_MAJOR(api_version_), - VK_VERSION_MINOR(api_version_), VK_VERSION_PATCH(api_version_)); + XELOGVK("Vulkan instance version {}.{}.{}", + VK_VERSION_MAJOR(instance_api_version), + VK_VERSION_MINOR(instance_api_version), + VK_VERSION_PATCH(instance_api_version)); // Create the instance. std::vector instance_extensions_enabled; @@ -157,9 +161,9 @@ bool VulkanProvider::Initialize() { // designed to use" // "Vulkan 1.0 implementations were required to return // VK_ERROR_INCOMPATIBLE_DRIVER if apiVersion was larger than 1.0" - application_info.apiVersion = api_version_ >= VK_MAKE_VERSION(1, 1, 0) + application_info.apiVersion = instance_api_version >= VK_MAKE_VERSION(1, 1, 0) ? api_version_target - : api_version_; + : instance_api_version; VkInstanceCreateInfo instance_create_info; instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; instance_create_info.pNext = nullptr; @@ -207,6 +211,7 @@ bool VulkanProvider::Initialize() { XE_VULKAN_LOAD_IFN(vkGetDeviceProcAddr); XE_VULKAN_LOAD_IFN(vkGetPhysicalDeviceFeatures); XE_VULKAN_LOAD_IFN(vkGetPhysicalDeviceProperties); + XE_VULKAN_LOAD_IFN(vkGetPhysicalDeviceMemoryProperties); XE_VULKAN_LOAD_IFN(vkGetPhysicalDeviceQueueFamilyProperties); XE_VULKAN_LOAD_IFN(vkGetPhysicalDeviceSurfaceCapabilitiesKHR); XE_VULKAN_LOAD_IFN(vkGetPhysicalDeviceSurfaceFormatsKHR); @@ -344,6 +349,11 @@ bool VulkanProvider::Initialize() { continue; } + // Get device properties, will be needed to check if extensions have been + // promoted to core. + ifn_.vkGetPhysicalDeviceProperties(physical_device_current, + &device_properties_); + // Get the extensions, check if swapchain is supported. device_extension_properties.clear(); VkResult device_extensions_enumerate_result; @@ -373,14 +383,23 @@ bool VulkanProvider::Initialize() { continue; } std::memset(&device_extensions_, 0, sizeof(device_extensions_)); + if (device_properties_.apiVersion >= VK_MAKE_VERSION(1, 1, 0)) { + device_extensions_.khr_dedicated_allocation = true; + } bool device_supports_swapchain = false; for (const VkExtensionProperties& device_extension : device_extension_properties) { const char* device_extension_name = device_extension.extensionName; - if (!std::strcmp(device_extension_name, + if (!device_extensions_.ext_fragment_shader_interlock && + !std::strcmp(device_extension_name, "VK_EXT_fragment_shader_interlock")) { device_extensions_.ext_fragment_shader_interlock = true; - } else if (!std::strcmp(device_extension_name, "VK_KHR_swapchain")) { + } else if (!device_extensions_.khr_dedicated_allocation && + !std::strcmp(device_extension_name, + "VK_KHR_dedicated_allocation")) { + device_extensions_.khr_dedicated_allocation = true; + } else if (!device_supports_swapchain && + !std::strcmp(device_extension_name, "VK_KHR_swapchain")) { device_supports_swapchain = true; } } @@ -388,6 +407,32 @@ bool VulkanProvider::Initialize() { continue; } + // Get the memory types. + VkPhysicalDeviceMemoryProperties memory_properties; + ifn_.vkGetPhysicalDeviceMemoryProperties(physical_device_current, + &memory_properties); + memory_types_device_local_ = 0; + memory_types_host_visible_ = 0; + memory_types_host_coherent_ = 0; + for (uint32_t j = 0; j < memory_properties.memoryTypeCount; ++j) { + VkMemoryPropertyFlags memory_property_flags = + memory_properties.memoryTypes[j].propertyFlags; + uint32_t memory_type_bit = uint32_t(1) << j; + if (memory_property_flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) { + memory_types_device_local_ |= memory_type_bit; + } + if (memory_property_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { + memory_types_host_visible_ |= memory_type_bit; + } + if (memory_property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) { + memory_types_host_coherent_ |= memory_type_bit; + } + } + if (!memory_types_device_local_ && !memory_types_host_visible_) { + // Shouldn't happen according to the specification. + continue; + } + physical_device_ = physical_device_current; break; } @@ -397,7 +442,6 @@ bool VulkanProvider::Initialize() { "support"); return false; } - ifn_.vkGetPhysicalDeviceProperties(physical_device_, &device_properties_); XELOGVK( "Vulkan device: {} (vendor {:04X}, device {:04X}, driver {:08X}, API " "{}.{}.{})", @@ -406,7 +450,12 @@ bool VulkanProvider::Initialize() { VK_VERSION_MAJOR(device_properties_.apiVersion), VK_VERSION_MINOR(device_properties_.apiVersion), VK_VERSION_PATCH(device_properties_.apiVersion)); - // TODO(Triang3l): Report properties, features, extensions. + XELOGVK("Vulkan device extensions:"); + XELOGVK("* VK_EXT_fragment_shader_interlock: {}", + device_extensions_.ext_fragment_shader_interlock ? "yes" : "no"); + XELOGVK("* VK_KHR_dedicated_allocation: {}", + device_extensions_.khr_dedicated_allocation ? "yes" : "no"); + // TODO(Triang3l): Report properties, features. // Create the device. float queue_priority_high = 1.0f; @@ -433,6 +482,11 @@ bool VulkanProvider::Initialize() { if (device_extensions_.ext_fragment_shader_interlock) { device_extensions_enabled.push_back("VK_EXT_fragment_shader_interlock"); } + if (device_properties_.apiVersion < VK_MAKE_VERSION(1, 1, 0)) { + if (device_extensions_.khr_dedicated_allocation) { + device_extensions_enabled.push_back("VK_KHR_dedicated_allocation"); + } + } VkDeviceCreateInfo device_create_info; device_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; device_create_info.pNext = nullptr; @@ -462,26 +516,48 @@ bool VulkanProvider::Initialize() { nullptr; XE_VULKAN_LOAD_DFN(vkAcquireNextImageKHR); XE_VULKAN_LOAD_DFN(vkAllocateCommandBuffers); + XE_VULKAN_LOAD_DFN(vkAllocateMemory); XE_VULKAN_LOAD_DFN(vkBeginCommandBuffer); + XE_VULKAN_LOAD_DFN(vkBindBufferMemory); XE_VULKAN_LOAD_DFN(vkCmdBeginRenderPass); + XE_VULKAN_LOAD_DFN(vkCmdBindIndexBuffer); + XE_VULKAN_LOAD_DFN(vkCmdBindPipeline); + XE_VULKAN_LOAD_DFN(vkCmdBindVertexBuffers); + XE_VULKAN_LOAD_DFN(vkCmdDraw); + XE_VULKAN_LOAD_DFN(vkCmdDrawIndexed); XE_VULKAN_LOAD_DFN(vkCmdEndRenderPass); + XE_VULKAN_LOAD_DFN(vkCmdPushConstants); + XE_VULKAN_LOAD_DFN(vkCmdSetScissor); + XE_VULKAN_LOAD_DFN(vkCmdSetViewport); + XE_VULKAN_LOAD_DFN(vkCreateBuffer); XE_VULKAN_LOAD_DFN(vkCreateCommandPool); XE_VULKAN_LOAD_DFN(vkCreateFence); XE_VULKAN_LOAD_DFN(vkCreateFramebuffer); + XE_VULKAN_LOAD_DFN(vkCreateGraphicsPipelines); XE_VULKAN_LOAD_DFN(vkCreateImageView); + XE_VULKAN_LOAD_DFN(vkCreatePipelineLayout); XE_VULKAN_LOAD_DFN(vkCreateRenderPass); XE_VULKAN_LOAD_DFN(vkCreateSemaphore); + XE_VULKAN_LOAD_DFN(vkCreateShaderModule); XE_VULKAN_LOAD_DFN(vkCreateSwapchainKHR); + XE_VULKAN_LOAD_DFN(vkDestroyBuffer); XE_VULKAN_LOAD_DFN(vkDestroyCommandPool); XE_VULKAN_LOAD_DFN(vkDestroyFence); XE_VULKAN_LOAD_DFN(vkDestroyFramebuffer); XE_VULKAN_LOAD_DFN(vkDestroyImageView); + XE_VULKAN_LOAD_DFN(vkDestroyPipeline); + XE_VULKAN_LOAD_DFN(vkDestroyPipelineLayout); XE_VULKAN_LOAD_DFN(vkDestroyRenderPass); XE_VULKAN_LOAD_DFN(vkDestroySemaphore); + XE_VULKAN_LOAD_DFN(vkDestroyShaderModule); XE_VULKAN_LOAD_DFN(vkDestroySwapchainKHR); XE_VULKAN_LOAD_DFN(vkEndCommandBuffer); + XE_VULKAN_LOAD_DFN(vkFlushMappedMemoryRanges); + XE_VULKAN_LOAD_DFN(vkFreeMemory); + XE_VULKAN_LOAD_DFN(vkGetBufferMemoryRequirements); XE_VULKAN_LOAD_DFN(vkGetDeviceQueue); XE_VULKAN_LOAD_DFN(vkGetSwapchainImagesKHR); + XE_VULKAN_LOAD_DFN(vkMapMemory); XE_VULKAN_LOAD_DFN(vkResetCommandPool); XE_VULKAN_LOAD_DFN(vkResetFences); XE_VULKAN_LOAD_DFN(vkQueuePresentKHR); diff --git a/src/xenia/ui/vulkan/vulkan_provider.h b/src/xenia/ui/vulkan/vulkan_provider.h index da4d56b80..4e254bbe0 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.h +++ b/src/xenia/ui/vulkan/vulkan_provider.h @@ -59,8 +59,6 @@ class VulkanProvider : public GraphicsProvider { }; const LibraryFunctions& lfn() const { return lfn_; } - uint32_t api_version() const { return api_version_; } - VkInstance instance() const { return instance_; } struct InstanceFunctions { PFN_vkCreateDevice vkCreateDevice; @@ -71,6 +69,7 @@ class VulkanProvider : public GraphicsProvider { PFN_vkEnumeratePhysicalDevices vkEnumeratePhysicalDevices; PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr; PFN_vkGetPhysicalDeviceFeatures vkGetPhysicalDeviceFeatures; + PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties; PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties; PFN_vkGetPhysicalDeviceQueueFamilyProperties vkGetPhysicalDeviceQueueFamilyProperties; @@ -99,10 +98,21 @@ class VulkanProvider : public GraphicsProvider { } struct DeviceExtensions { bool ext_fragment_shader_interlock; + // Core since 1.1.0. + bool khr_dedicated_allocation; }; const DeviceExtensions& device_extensions() const { return device_extensions_; } + uint32_t memory_types_device_local() const { + return memory_types_device_local_; + } + uint32_t memory_types_host_visible() const { + return memory_types_host_visible_; + } + uint32_t memory_types_host_coherent() const { + return memory_types_host_coherent_; + } // FIXME(Triang3l): Allow a separate queue for present - see // vulkan_provider.cc for details. uint32_t queue_family_graphics_compute() const { @@ -113,26 +123,48 @@ class VulkanProvider : public GraphicsProvider { struct DeviceFunctions { PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR; PFN_vkAllocateCommandBuffers vkAllocateCommandBuffers; + PFN_vkAllocateMemory vkAllocateMemory; PFN_vkBeginCommandBuffer vkBeginCommandBuffer; + PFN_vkBindBufferMemory vkBindBufferMemory; PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass; + PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer; + PFN_vkCmdBindPipeline vkCmdBindPipeline; + PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers; + PFN_vkCmdDraw vkCmdDraw; + PFN_vkCmdDrawIndexed vkCmdDrawIndexed; PFN_vkCmdEndRenderPass vkCmdEndRenderPass; + PFN_vkCmdPushConstants vkCmdPushConstants; + PFN_vkCmdSetScissor vkCmdSetScissor; + PFN_vkCmdSetViewport vkCmdSetViewport; + PFN_vkCreateBuffer vkCreateBuffer; PFN_vkCreateCommandPool vkCreateCommandPool; PFN_vkCreateFence vkCreateFence; PFN_vkCreateFramebuffer vkCreateFramebuffer; + PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines; PFN_vkCreateImageView vkCreateImageView; + PFN_vkCreatePipelineLayout vkCreatePipelineLayout; PFN_vkCreateRenderPass vkCreateRenderPass; PFN_vkCreateSemaphore vkCreateSemaphore; + PFN_vkCreateShaderModule vkCreateShaderModule; PFN_vkCreateSwapchainKHR vkCreateSwapchainKHR; + PFN_vkDestroyBuffer vkDestroyBuffer; PFN_vkDestroyCommandPool vkDestroyCommandPool; PFN_vkDestroyFence vkDestroyFence; PFN_vkDestroyFramebuffer vkDestroyFramebuffer; PFN_vkDestroyImageView vkDestroyImageView; + PFN_vkDestroyPipeline vkDestroyPipeline; + PFN_vkDestroyPipelineLayout vkDestroyPipelineLayout; PFN_vkDestroyRenderPass vkDestroyRenderPass; PFN_vkDestroySemaphore vkDestroySemaphore; + PFN_vkDestroyShaderModule vkDestroyShaderModule; PFN_vkDestroySwapchainKHR vkDestroySwapchainKHR; PFN_vkEndCommandBuffer vkEndCommandBuffer; + PFN_vkFlushMappedMemoryRanges vkFlushMappedMemoryRanges; + PFN_vkFreeMemory vkFreeMemory; + PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements; PFN_vkGetDeviceQueue vkGetDeviceQueue; PFN_vkGetSwapchainImagesKHR vkGetSwapchainImagesKHR; + PFN_vkMapMemory vkMapMemory; PFN_vkResetCommandPool vkResetCommandPool; PFN_vkResetFences vkResetFences; PFN_vkQueuePresentKHR vkQueuePresentKHR; @@ -158,8 +190,6 @@ class VulkanProvider : public GraphicsProvider { LibraryFunctions lfn_ = {}; - uint32_t api_version_ = VK_API_VERSION_1_0; - VkInstance instance_ = VK_NULL_HANDLE; InstanceFunctions ifn_ = {}; @@ -167,6 +197,9 @@ class VulkanProvider : public GraphicsProvider { VkPhysicalDeviceProperties device_properties_; VkPhysicalDeviceFeatures device_features_; DeviceExtensions device_extensions_; + uint32_t memory_types_device_local_; + uint32_t memory_types_host_visible_; + uint32_t memory_types_host_coherent_; uint32_t queue_family_graphics_compute_; VkDevice device_ = VK_NULL_HANDLE; diff --git a/src/xenia/ui/vulkan/vulkan_upload_buffer_pool.cc b/src/xenia/ui/vulkan/vulkan_upload_buffer_pool.cc new file mode 100644 index 000000000..11ef6766b --- /dev/null +++ b/src/xenia/ui/vulkan/vulkan_upload_buffer_pool.cc @@ -0,0 +1,227 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h" + +#include + +#include "xenia/base/logging.h" +#include "xenia/base/math.h" + +namespace xe { +namespace ui { +namespace vulkan { + +VulkanUploadBufferPool::VulkanUploadBufferPool(const VulkanProvider& provider, + VkBufferUsageFlags usage, + size_t page_size) + : GraphicsUploadBufferPool(page_size), provider_(provider), usage_(usage) { + VkDeviceSize non_coherent_atom_size = + provider_.device_properties().limits.nonCoherentAtomSize; + // Memory mappings are always aligned to nonCoherentAtomSize, so for + // simplicity, round the page size to it now. On some Android implementations, + // nonCoherentAtomSize is 0, not 1. + if (non_coherent_atom_size > 1) { + page_size_ = xe::round_up(page_size_, non_coherent_atom_size); + } +} + +uint8_t* VulkanUploadBufferPool::Request(uint64_t submission_index, size_t size, + size_t alignment, VkBuffer& buffer_out, + VkDeviceSize& offset_out) { + size_t offset; + const VulkanPage* page = + static_cast(GraphicsUploadBufferPool::Request( + submission_index, size, alignment, offset)); + if (!page) { + return nullptr; + } + buffer_out = page->buffer_; + offset_out = VkDeviceSize(offset); + return reinterpret_cast(page->mapping_) + offset; +} + +uint8_t* VulkanUploadBufferPool::RequestPartial(uint64_t submission_index, + size_t size, size_t alignment, + VkBuffer& buffer_out, + VkDeviceSize& offset_out, + VkDeviceSize& size_out) { + size_t offset, size_obtained; + const VulkanPage* page = + static_cast(GraphicsUploadBufferPool::RequestPartial( + submission_index, size, alignment, offset, size_obtained)); + if (!page) { + return nullptr; + } + buffer_out = page->buffer_; + offset_out = VkDeviceSize(offset); + size_out = VkDeviceSize(size_obtained); + return reinterpret_cast(page->mapping_) + offset; +} + +GraphicsUploadBufferPool::Page* +VulkanUploadBufferPool::CreatePageImplementation() { + if (memory_type_ == kMemoryTypeUnavailable) { + // Don't try to create everything again and again if totally broken. + return nullptr; + } + + const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); + VkDevice device = provider_.device(); + + // For the first call, the page size is already aligned to nonCoherentAtomSize + // for mapping. + VkBufferCreateInfo buffer_create_info; + buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + buffer_create_info.pNext = nullptr; + buffer_create_info.flags = 0; + buffer_create_info.size = VkDeviceSize(page_size_); + buffer_create_info.usage = usage_; + buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + buffer_create_info.queueFamilyIndexCount = 0; + buffer_create_info.pQueueFamilyIndices = nullptr; + VkBuffer buffer; + if (dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer) != + VK_SUCCESS) { + XELOGE("Failed to create a Vulkan upload buffer with {} bytes", page_size_); + return nullptr; + } + + if (memory_type_ == kMemoryTypeUnknown) { + VkMemoryRequirements memory_requirements; + dfn.vkGetBufferMemoryRequirements(device, buffer, &memory_requirements); + uint32_t memory_types_host_visible = provider_.memory_types_host_visible(); + if (!xe::bit_scan_forward( + memory_requirements.memoryTypeBits & memory_types_host_visible, + &memory_type_)) { + XELOGE( + "No host-visible memory types can store an Vulkan upload buffer with " + "{} bytes", + page_size_); + memory_type_ = kMemoryTypeUnavailable; + dfn.vkDestroyBuffer(device, buffer, nullptr); + return nullptr; + } + allocation_size_ = memory_requirements.size; + // On some Android implementations, nonCoherentAtomSize is 0, not 1. + VkDeviceSize non_coherent_atom_size = + std::max(provider_.device_properties().limits.nonCoherentAtomSize, + VkDeviceSize(1)); + VkDeviceSize allocation_size_aligned = + allocation_size_ / non_coherent_atom_size * non_coherent_atom_size; + if (allocation_size_aligned > page_size_) { + // Try to occupy all the allocation padding. If that's going to require + // even more memory for some reason, don't. + buffer_create_info.size = allocation_size_aligned; + VkBuffer buffer_expanded; + if (dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, + &buffer_expanded) == VK_SUCCESS) { + VkMemoryRequirements memory_requirements_expanded; + dfn.vkGetBufferMemoryRequirements(device, buffer_expanded, + &memory_requirements_expanded); + uint32_t memory_type_expanded; + if (memory_requirements_expanded.size <= allocation_size_ && + xe::bit_scan_forward(memory_requirements_expanded.memoryTypeBits & + memory_types_host_visible, + &memory_type_expanded)) { + // page_size_ must be aligned to nonCoherentAtomSize. + page_size_ = size_t(allocation_size_aligned); + allocation_size_ = memory_requirements_expanded.size; + memory_type_ = memory_type_expanded; + dfn.vkDestroyBuffer(device, buffer, nullptr); + buffer = buffer_expanded; + } else { + dfn.vkDestroyBuffer(device, buffer_expanded, nullptr); + } + } + } + } + + VkMemoryAllocateInfo memory_allocate_info; + memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + VkMemoryDedicatedAllocateInfoKHR memory_dedicated_allocate_info; + if (provider_.device_extensions().khr_dedicated_allocation) { + memory_dedicated_allocate_info.sType = + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR; + memory_dedicated_allocate_info.pNext = nullptr; + memory_dedicated_allocate_info.image = VK_NULL_HANDLE; + memory_dedicated_allocate_info.buffer = buffer; + memory_allocate_info.pNext = &memory_dedicated_allocate_info; + } else { + memory_allocate_info.pNext = nullptr; + } + memory_allocate_info.allocationSize = allocation_size_; + memory_allocate_info.memoryTypeIndex = memory_type_; + VkDeviceMemory memory; + if (dfn.vkAllocateMemory(device, &memory_allocate_info, nullptr, &memory) != + VK_SUCCESS) { + XELOGE("Failed to allocate {} bytes of Vulkan upload buffer memory", + allocation_size_); + dfn.vkDestroyBuffer(device, buffer, nullptr); + return nullptr; + } + + if (dfn.vkBindBufferMemory(device, buffer, memory, 0) != VK_SUCCESS) { + XELOGE("Failed to bind memory to a Vulkan upload buffer with {} bytes", + page_size_); + dfn.vkDestroyBuffer(device, buffer, nullptr); + dfn.vkFreeMemory(device, memory, nullptr); + return nullptr; + } + + void* mapping; + // page_size_ is aligned to nonCoherentAtomSize. + if (dfn.vkMapMemory(device, memory, 0, page_size_, 0, &mapping) != + VK_SUCCESS) { + XELOGE("Failed to map {} bytes of Vulkan upload buffer memory", page_size_); + dfn.vkDestroyBuffer(device, buffer, nullptr); + dfn.vkFreeMemory(device, memory, nullptr); + return nullptr; + } + + return new VulkanPage(provider_, buffer, memory, mapping); +} + +void VulkanUploadBufferPool::FlushPageWrites(Page* page, size_t offset, + size_t size) { + if (provider_.memory_types_host_coherent() & (uint32_t(1) << memory_type_)) { + return; + } + const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); + VkDevice device = provider_.device(); + VkMappedMemoryRange range; + range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + range.pNext = nullptr; + range.memory = static_cast(page)->memory_; + range.offset = VkDeviceSize(offset); + range.size = VkDeviceSize(size); + VkDeviceSize non_coherent_atom_size = + provider_.device_properties().limits.nonCoherentAtomSize; + // On some Android implementations, nonCoherentAtomSize is 0, not 1. + if (non_coherent_atom_size > 1) { + VkDeviceSize end = + xe::round_up(range.offset + range.size, non_coherent_atom_size); + range.offset = + range.offset / non_coherent_atom_size * non_coherent_atom_size; + range.size = end - range.offset; + } + dfn.vkFlushMappedMemoryRanges(device, 1, &range); +} + +VulkanUploadBufferPool::VulkanPage::~VulkanPage() { + const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); + VkDevice device = provider_.device(); + dfn.vkDestroyBuffer(device, buffer_, nullptr); + // Unmapping is done implicitly when the memory is freed. + dfn.vkFreeMemory(device, memory_, nullptr); +} + +} // namespace vulkan +} // namespace ui +} // namespace xe diff --git a/src/xenia/ui/vulkan/vulkan_upload_buffer_pool.h b/src/xenia/ui/vulkan/vulkan_upload_buffer_pool.h new file mode 100644 index 000000000..309c44ff1 --- /dev/null +++ b/src/xenia/ui/vulkan/vulkan_upload_buffer_pool.h @@ -0,0 +1,67 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_UI_VULKAN_VULKAN_UPLOAD_BUFFER_POOL_H_ +#define XENIA_UI_VULKAN_VULKAN_UPLOAD_BUFFER_POOL_H_ + +#include "xenia/ui/graphics_upload_buffer_pool.h" +#include "xenia/ui/vulkan/vulkan_provider.h" + +namespace xe { +namespace ui { +namespace vulkan { + +class VulkanUploadBufferPool : public GraphicsUploadBufferPool { + public: + VulkanUploadBufferPool(const VulkanProvider& provider, + VkBufferUsageFlags usage, + size_t page_size = kDefaultPageSize); + + uint8_t* Request(uint64_t submission_index, size_t size, size_t alignment, + VkBuffer& buffer_out, VkDeviceSize& offset_out); + uint8_t* RequestPartial(uint64_t submission_index, size_t size, + size_t alignment, VkBuffer& buffer_out, + VkDeviceSize& offset_out, VkDeviceSize& size_out); + + protected: + Page* CreatePageImplementation() override; + + void FlushPageWrites(Page* page, size_t offset, size_t size) override; + + private: + struct VulkanPage : public Page { + // Takes ownership of the buffer and its memory and mapping. + VulkanPage(const VulkanProvider& provider, VkBuffer buffer, + VkDeviceMemory memory, void* mapping) + : provider_(provider), + buffer_(buffer), + memory_(memory), + mapping_(mapping) {} + ~VulkanPage() override; + const VulkanProvider& provider_; + VkBuffer buffer_; + VkDeviceMemory memory_; + void* mapping_; + }; + + const VulkanProvider& provider_; + + VkDeviceSize allocation_size_; + static constexpr uint32_t kMemoryTypeUnknown = UINT32_MAX; + static constexpr uint32_t kMemoryTypeUnavailable = kMemoryTypeUnknown - 1; + uint32_t memory_type_ = UINT32_MAX; + + VkBufferUsageFlags usage_; +}; + +} // namespace vulkan +} // namespace ui +} // namespace xe + +#endif // XENIA_UI_VULKAN_VULKAN_UPLOAD_BUFFER_POOL_H_ diff --git a/src/xenia/ui/vulkan/vulkan_util.h b/src/xenia/ui/vulkan/vulkan_util.h index c0702ba99..6239aed76 100644 --- a/src/xenia/ui/vulkan/vulkan_util.h +++ b/src/xenia/ui/vulkan/vulkan_util.h @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2019 Ben Vanik. All rights reserved. * + * Copyright 2020 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -37,6 +37,22 @@ inline bool DestroyAndNullHandle(F* destroy_function, P parent, T& handle) { return false; } +inline VkShaderModule CreateShaderModule(const VulkanProvider& provider, + const void* code, size_t code_size) { + VkShaderModuleCreateInfo shader_module_create_info; + shader_module_create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + shader_module_create_info.pNext = nullptr; + shader_module_create_info.flags = 0; + shader_module_create_info.codeSize = code_size; + shader_module_create_info.pCode = reinterpret_cast(code); + VkShaderModule shader_module; + return provider.dfn().vkCreateShaderModule( + provider.device(), &shader_module_create_info, nullptr, + &shader_module) == VK_SUCCESS + ? shader_module + : nullptr; +} + } // namespace util } // namespace vulkan } // namespace ui diff --git a/src/xenia/ui/vulkan/vulkan_window_demo.cc b/src/xenia/ui/vulkan/vulkan_window_demo.cc new file mode 100644 index 000000000..12965197b --- /dev/null +++ b/src/xenia/ui/vulkan/vulkan_window_demo.cc @@ -0,0 +1,29 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include +#include + +#include "xenia/base/main.h" +#include "xenia/ui/vulkan/vulkan_provider.h" +#include "xenia/ui/window.h" + +namespace xe { +namespace ui { + +int window_demo_main(const std::vector& args); + +std::unique_ptr CreateDemoGraphicsProvider(Window* window) { + return xe::ui::vulkan::VulkanProvider::Create(window); +} + +} // namespace ui +} // namespace xe + +DEFINE_ENTRY_POINT("xenia-ui-window-vulkan-demo", xe::ui::window_demo_main, ""); diff --git a/xenia-build b/xenia-build index 25675c809..82b18b833 100755 --- a/xenia-build +++ b/xenia-build @@ -689,7 +689,6 @@ class GenSpirvCommand(Command): vulkan_bin_path = os.path.join(vulkan_sdk_path, 'bin') glslang = os.path.join(vulkan_bin_path, 'glslangValidator') spirv_dis = os.path.join(vulkan_bin_path, 'spirv-dis') - spirv_remap = os.path.join(vulkan_bin_path, 'spirv-remap') # Ensure we have the tools. if not os.path.exists(vulkan_sdk_path): @@ -701,9 +700,6 @@ class GenSpirvCommand(Command): elif not has_bin(spirv_dis): print('ERROR: could not find spirv-dis') return 1 - elif not has_bin(spirv_remap): - print('ERROR: could not find spirv-remap') - return 1 src_files = [os.path.join(root, name) for root, dirs, files in os.walk('src') @@ -717,7 +713,8 @@ class GenSpirvCommand(Command): src_name = os.path.splitext(os.path.basename(src_file))[0] identifier = os.path.basename(src_file).replace('.', '_') - bin_path = os.path.join(os.path.dirname(src_file), 'bin') + bin_path = os.path.join(os.path.dirname(src_file), + 'bytecode/vulkan_spirv') spv_file = os.path.join(bin_path, identifier) + '.spv' txt_file = os.path.join(bin_path, identifier) + '.txt' h_file = os.path.join(bin_path, identifier) + '.h' From 6a903192c366a124f3202dee7b3a459f202002aa Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 20 Sep 2020 15:33:18 +0300 Subject: [PATCH 013/123] [Vulkan] Set immdraw scissor only when changed --- .../ui/vulkan/vulkan_immediate_drawer.cc | 45 ++++++++++++------- src/xenia/ui/vulkan/vulkan_immediate_drawer.h | 1 + 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc index 159a13266..ecbac96cc 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc @@ -9,6 +9,7 @@ #include "xenia/ui/vulkan/vulkan_immediate_drawer.h" +#include #include #include "xenia/base/assert.h" @@ -131,6 +132,10 @@ void VulkanImmediateDrawer::Begin(int render_target_width, VK_SHADER_STAGE_VERTEX_BIT, offsetof(PushConstants, vertex), sizeof(PushConstants::Vertex), &push_constants_vertex); + current_scissor_.offset.x = 0; + current_scissor_.offset.y = 0; + current_scissor_.extent.width = 0; + current_scissor_.extent.height = 0; current_pipeline_ = VK_NULL_HANDLE; } @@ -194,6 +199,31 @@ void VulkanImmediateDrawer::Draw(const ImmediateDraw& draw) { const VulkanProvider::DeviceFunctions& dfn = context_.GetVulkanProvider().dfn(); + // Set the scissor rectangle if enabled. + VkRect2D scissor; + if (draw.scissor) { + scissor.offset.x = draw.scissor_rect[0]; + scissor.offset.y = current_render_target_extent_.height - + (draw.scissor_rect[1] + draw.scissor_rect[3]); + scissor.extent.width = std::max(draw.scissor_rect[2], 0); + scissor.extent.height = std::max(draw.scissor_rect[3], 0); + } else { + scissor.offset.x = 0; + scissor.offset.y = 0; + scissor.extent = current_render_target_extent_; + } + if (!scissor.extent.width || !scissor.extent.height) { + // Nothing is visible (used as the default current_scissor_ value also). + return; + } + if (current_scissor_.offset.x != scissor.offset.x || + current_scissor_.offset.y != scissor.offset.y || + current_scissor_.extent.width != scissor.extent.width || + current_scissor_.extent.height != scissor.extent.height) { + current_scissor_ = scissor; + dfn.vkCmdSetScissor(current_command_buffer_, 0, 1, &scissor); + } + // Bind the pipeline for the current primitive count. VkPipeline pipeline; switch (draw.primitive_type) { @@ -213,21 +243,6 @@ void VulkanImmediateDrawer::Draw(const ImmediateDraw& draw) { VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); } - // Set the scissor rectangle if enabled. - VkRect2D scissor; - if (draw.scissor) { - scissor.offset.x = draw.scissor_rect[0]; - scissor.offset.y = current_render_target_extent_.height - - (draw.scissor_rect[1] + draw.scissor_rect[3]); - scissor.extent.width = draw.scissor_rect[2]; - scissor.extent.height = draw.scissor_rect[3]; - } else { - scissor.offset.x = 0; - scissor.offset.y = 0; - scissor.extent = current_render_target_extent_; - } - dfn.vkCmdSetScissor(current_command_buffer_, 0, 1, &scissor); - // Draw. if (batch_has_index_buffer_) { dfn.vkCmdDrawIndexed(current_command_buffer_, draw.count, 1, diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.h b/src/xenia/ui/vulkan/vulkan_immediate_drawer.h index eb1a4ebb4..efc3710b9 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.h +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.h @@ -63,6 +63,7 @@ class VulkanImmediateDrawer : public ImmediateDrawer { VkCommandBuffer current_command_buffer_ = VK_NULL_HANDLE; VkExtent2D current_render_target_extent_; + VkRect2D current_scissor_; VkPipeline current_pipeline_; bool batch_open_ = false; bool batch_has_index_buffer_; From 886129cefa15addf9de1521b537b37dcab13b792 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 26 Sep 2020 22:10:02 +0300 Subject: [PATCH 014/123] [Vulkan] Immediate drawer textures --- src/xenia/base/math.h | 7 +- src/xenia/debug/ui/debug_window.cc | 2 +- src/xenia/gpu/graphics_system.cc | 7 +- src/xenia/hid/hid_demo.cc | 2 +- src/xenia/ui/d3d12/d3d12_provider.cc | 4 +- src/xenia/ui/d3d12/d3d12_provider.h | 4 +- src/xenia/ui/graphics_provider.h | 10 +- .../bytecode/vulkan_spirv/immediate_frag.h | 82 +- .../bytecode/vulkan_spirv/immediate_frag.spv | Bin 516 -> 804 bytes .../bytecode/vulkan_spirv/immediate_frag.txt | 20 +- src/xenia/ui/shaders/immediate.frag | 5 +- src/xenia/ui/vulkan/vulkan_context.cc | 167 ++++- src/xenia/ui/vulkan/vulkan_context.h | 17 +- .../ui/vulkan/vulkan_immediate_drawer.cc | 700 +++++++++++++++++- src/xenia/ui/vulkan/vulkan_immediate_drawer.h | 111 +++ src/xenia/ui/vulkan/vulkan_provider.cc | 76 +- src/xenia/ui/vulkan/vulkan_provider.h | 43 +- .../ui/vulkan/vulkan_upload_buffer_pool.cc | 56 +- src/xenia/ui/vulkan/vulkan_util.cc | 49 ++ src/xenia/ui/vulkan/vulkan_util.h | 48 ++ src/xenia/ui/window_demo.cc | 2 +- 21 files changed, 1271 insertions(+), 141 deletions(-) create mode 100644 src/xenia/ui/vulkan/vulkan_util.cc diff --git a/src/xenia/base/math.h b/src/xenia/base/math.h index e2d321702..89eac56a0 100644 --- a/src/xenia/base/math.h +++ b/src/xenia/base/math.h @@ -43,8 +43,11 @@ constexpr T align(T value, T alignment) { // Rounds the given number up to the next highest multiple. template -constexpr T round_up(T value, V multiple) { - return value ? (((value + multiple - 1) / multiple) * multiple) : multiple; +constexpr T round_up(T value, V multiple, bool force_non_zero = true) { + if (force_non_zero && !value) { + return multiple; + } + return (value + multiple - 1) / multiple * multiple; } constexpr float saturate(float value) { diff --git a/src/xenia/debug/ui/debug_window.cc b/src/xenia/debug/ui/debug_window.cc index 590a66062..410e42104 100644 --- a/src/xenia/debug/ui/debug_window.cc +++ b/src/xenia/debug/ui/debug_window.cc @@ -104,7 +104,7 @@ bool DebugWindow::Initialize() { // Create the graphics context used for drawing. auto provider = emulator_->display_window()->context()->provider(); - window_->set_context(provider->CreateContext(window_.get())); + window_->set_context(provider->CreateHostContext(window_.get())); // Enable imgui input. window_->set_imgui_input_enabled(true); diff --git a/src/xenia/gpu/graphics_system.cc b/src/xenia/gpu/graphics_system.cc index e54792a27..4943faad9 100644 --- a/src/xenia/gpu/graphics_system.cc +++ b/src/xenia/gpu/graphics_system.cc @@ -61,15 +61,16 @@ X_STATUS GraphicsSystem::Setup(cpu::Processor* processor, target_window_->loop()->PostSynchronous([&]() { // Create the context used for presentation. assert_null(target_window->context()); - target_window_->set_context(provider_->CreateContext(target_window_)); + target_window_->set_context( + provider_->CreateHostContext(target_window_)); // Setup the context the command processor will do all its drawing in. // It's shared with the display context so that we can resolve // framebuffers from it. - processor_context = provider()->CreateOffscreenContext(); + processor_context = provider()->CreateEmulationContext(); }); } else { - processor_context = provider()->CreateOffscreenContext(); + processor_context = provider()->CreateEmulationContext(); } if (!processor_context) { diff --git a/src/xenia/hid/hid_demo.cc b/src/xenia/hid/hid_demo.cc index 1829d2b47..f7bea3a7f 100644 --- a/src/xenia/hid/hid_demo.cc +++ b/src/xenia/hid/hid_demo.cc @@ -127,7 +127,7 @@ int hid_demo_main(const std::vector& args) { // The window will finish initialization wtih the context (loading // resources, etc). graphics_provider = CreateDemoGraphicsProvider(window.get()); - window->set_context(graphics_provider->CreateContext(window.get())); + window->set_context(graphics_provider->CreateHostContext(window.get())); // Initialize input system and all drivers. input_system_ = std::make_unique(window.get()); diff --git a/src/xenia/ui/d3d12/d3d12_provider.cc b/src/xenia/ui/d3d12/d3d12_provider.cc index d1f6594ca..82aa0f365 100644 --- a/src/xenia/ui/d3d12/d3d12_provider.cc +++ b/src/xenia/ui/d3d12/d3d12_provider.cc @@ -439,7 +439,7 @@ bool D3D12Provider::Initialize() { return true; } -std::unique_ptr D3D12Provider::CreateContext( +std::unique_ptr D3D12Provider::CreateHostContext( Window* target_window) { auto new_context = std::unique_ptr(new D3D12Context(this, target_window)); @@ -449,7 +449,7 @@ std::unique_ptr D3D12Provider::CreateContext( return std::unique_ptr(new_context.release()); } -std::unique_ptr D3D12Provider::CreateOffscreenContext() { +std::unique_ptr D3D12Provider::CreateEmulationContext() { auto new_context = std::unique_ptr(new D3D12Context(this, nullptr)); if (!new_context->Initialize()) { diff --git a/src/xenia/ui/d3d12/d3d12_provider.h b/src/xenia/ui/d3d12/d3d12_provider.h index c8332801c..9fa6de041 100644 --- a/src/xenia/ui/d3d12/d3d12_provider.h +++ b/src/xenia/ui/d3d12/d3d12_provider.h @@ -27,9 +27,9 @@ class D3D12Provider : public GraphicsProvider { static std::unique_ptr Create(Window* main_window); - std::unique_ptr CreateContext( + std::unique_ptr CreateHostContext( Window* target_window) override; - std::unique_ptr CreateOffscreenContext() override; + std::unique_ptr CreateEmulationContext() override; IDXGIFactory2* GetDXGIFactory() const { return dxgi_factory_; } // nullptr if PIX not attached. diff --git a/src/xenia/ui/graphics_provider.h b/src/xenia/ui/graphics_provider.h index b0ebc9d3a..4b0d4b987 100644 --- a/src/xenia/ui/graphics_provider.h +++ b/src/xenia/ui/graphics_provider.h @@ -28,13 +28,13 @@ class GraphicsProvider { // The 'main' window of an application, used to query provider information. Window* main_window() const { return main_window_; } - // Creates a new graphics context and swapchain for presenting to a window. - virtual std::unique_ptr CreateContext( + // Creates a new host-side graphics context and swapchain, possibly presenting + // to a window and using the immediate drawer. + virtual std::unique_ptr CreateHostContext( Window* target_window) = 0; - // Creates a new offscreen graphics context without a swapchain or immediate - // drawer. - virtual std::unique_ptr CreateOffscreenContext() = 0; + // Creates a new offscreen emulation graphics context. + virtual std::unique_ptr CreateEmulationContext() = 0; protected: explicit GraphicsProvider(Window* main_window) : main_window_(main_window) {} diff --git a/src/xenia/ui/shaders/bytecode/vulkan_spirv/immediate_frag.h b/src/xenia/ui/shaders/bytecode/vulkan_spirv/immediate_frag.h index 3fc30c576..dc4a7f3df 100644 --- a/src/xenia/ui/shaders/bytecode/vulkan_spirv/immediate_frag.h +++ b/src/xenia/ui/shaders/bytecode/vulkan_spirv/immediate_frag.h @@ -2,13 +2,13 @@ // source: immediate.frag const uint8_t immediate_frag[] = { 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x0A, 0x00, 0x08, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x4C, 0x53, 0x4C, 0x2E, 0x73, 0x74, 0x64, 0x2E, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x08, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x01, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, @@ -16,32 +16,56 @@ const uint8_t immediate_frag[] = { 0x78, 0x65, 0x5F, 0x66, 0x72, 0x61, 0x67, 0x5F, 0x63, 0x6F, 0x6C, 0x6F, 0x72, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x78, 0x65, 0x5F, 0x76, 0x61, 0x72, 0x5F, 0x63, 0x6F, 0x6C, 0x6F, 0x72, - 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, 0x0F, 0x00, 0x00, 0x00, - 0x78, 0x65, 0x5F, 0x76, 0x61, 0x72, 0x5F, 0x74, 0x65, 0x78, 0x63, 0x6F, - 0x6F, 0x72, 0x64, 0x00, 0x47, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x08, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x78, 0x65, 0x5F, 0x69, 0x6D, 0x6D, 0x65, 0x64, 0x69, 0x61, 0x74, 0x65, + 0x5F, 0x74, 0x65, 0x78, 0x74, 0x75, 0x72, 0x65, 0x00, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x06, 0x00, 0x14, 0x00, 0x00, 0x00, 0x78, 0x65, 0x5F, 0x76, + 0x61, 0x72, 0x5F, 0x74, 0x65, 0x78, 0x63, 0x6F, 0x6F, 0x72, 0x64, 0x00, + 0x47, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, + 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x03, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x47, 0x00, 0x04, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, - 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, - 0x0D, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0D, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, - 0x0F, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, - 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, - 0x0B, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x0C, 0x00, 0x00, 0x00, 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x09, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x1B, 0x00, 0x03, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0E, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x0F, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x0C, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x0E, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x58, 0x00, 0x07, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x85, 0x00, 0x05, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, }; diff --git a/src/xenia/ui/shaders/bytecode/vulkan_spirv/immediate_frag.spv b/src/xenia/ui/shaders/bytecode/vulkan_spirv/immediate_frag.spv index 390a723c0ecc0e557e4cbeb8657bb2f6ea76d443..d0d4c2beb39b4e5ae4d0e3df14bca25110865ce1 100644 GIT binary patch literal 804 zcmYk3%}&Bl5QPT{SV2KVL1JP+mL|l75)-4_ZrHhTM;fSxL~K&4;RSqWHzuC%($-Eg zojGTI@6@YK_RK2QG~b4{lde_8n6<3oY%qVCPe0Pt^yc=OjJA0$k(`d|YO?4iT)z|8 zm$sxQ(j(;((-3){>o-$jU2M6gu36M=qs2=SzAl#WTbu~P(@XqUm=v^lU%ef_-fW`P zI!vQQ8g0|hBogiFn4i@tvW(+oWr22J4iabs{0oP`o|~1&m`jXz5<4Iu!Dlu?5By7q z{_(iLo^=bq=5A7+Hz;DBon?Mc_pizyYYvyj@5^h*j-&(4FE=!Kj)v2ujdGzaKNpQ>q=Qm-$V`2KSJNZ3Xdcl(q=KsK5 z36tBGA~XFx>UbN%8-B#pWgo=y2L1*6gV})_Gat;1Qz`wQl}$c(CI^0ZPqS}zB*xw8 QN6w?BaW}YMZFeXA1AaL#>Hq)$ delta 116 zcmZ3&*22Qe%%sfDz`)4B#lXQJFp<}vk$+-rEHfJe|HQkI>?{oY3=9l%lY1G #include +#include #include #include "xenia/base/assert.h" @@ -151,14 +152,21 @@ void VulkanContext::Shutdown() { util::DestroyAndNullHandle(dfn.vkDestroySemaphore, device, swap_image_acquisition_semaphore_); + swap_submission_completed_ = 0; + swap_submission_current_ = 1; for (uint32_t i = 0; i < kSwapchainMaxImageCount; ++i) { SwapSubmission& submission = swap_submissions_[i]; + submission.setup_command_buffer_index = UINT32_MAX; util::DestroyAndNullHandle(dfn.vkDestroyCommandPool, device, submission.command_pool); util::DestroyAndNullHandle(dfn.vkDestroyFence, device, submission.fence); + if (i < swap_setup_command_buffers_allocated_count_) { + dfn.vkDestroyCommandPool(device, swap_setup_command_buffers_[i].first, + nullptr); + } } - swap_submission_current_ = 1; - swap_submission_completed_ = 0; + swap_setup_command_buffers_free_bits_ = 0; + swap_setup_command_buffers_allocated_count_ = 0; } ImmediateDrawer* VulkanContext::immediate_drawer() { @@ -645,27 +653,10 @@ bool VulkanContext::BeginSwap() { // Await the frame data to be available before doing anything else. if (swap_submission_completed_ + kSwapchainMaxImageCount < swap_submission_current_) { - uint64_t submission_awaited = - swap_submission_current_ - kSwapchainMaxImageCount; - VkFence submission_fences[kSwapchainMaxImageCount]; - uint32_t submission_fence_count = 0; - while (swap_submission_completed_ + 1 + submission_fence_count <= - submission_awaited) { - assert_true(submission_fence_count < kSwapchainMaxImageCount); - uint32_t submission_index = - (swap_submission_completed_ + 1 + submission_fence_count) % - kSwapchainMaxImageCount; - submission_fences[submission_fence_count++] = - swap_submissions_[submission_index].fence; - } - if (submission_fence_count) { - if (dfn.vkWaitForFences(device, submission_fence_count, - submission_fences, VK_TRUE, - UINT64_MAX) != VK_SUCCESS) { - XELOGE("Failed to await the Vulkan presentation submission fences"); - return false; - } - swap_submission_completed_ += submission_fence_count; + if (!AwaitSwapSubmissionsCompletion( + swap_submission_current_ - kSwapchainMaxImageCount, false)) { + XELOGE("Failed to await the Vulkan presentation submission fences"); + return false; } } @@ -753,8 +744,20 @@ void VulkanContext::EndSwap() { const SwapSubmission& submission = swap_submissions_[swap_submission_current_ % kSwapchainMaxImageCount]; + VkCommandBuffer submit_command_buffers[2]; + uint32_t submit_command_buffer_count = 0; + if (submission.setup_command_buffer_index != UINT32_MAX) { + VkCommandBuffer submit_setup_command_buffer = + swap_setup_command_buffers_[submission.setup_command_buffer_index] + .second; + dfn.vkEndCommandBuffer(submit_setup_command_buffer); + submit_command_buffers[submit_command_buffer_count++] = + submit_setup_command_buffer; + } dfn.vkCmdEndRenderPass(submission.command_buffer); dfn.vkEndCommandBuffer(submission.command_buffer); + submit_command_buffers[submit_command_buffer_count++] = + submission.command_buffer; dfn.vkResetFences(device, 1, &submission.fence); VkSubmitInfo submit_info; submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; @@ -764,8 +767,8 @@ void VulkanContext::EndSwap() { VkPipelineStageFlags image_acquisition_semaphore_wait_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; submit_info.pWaitDstStageMask = &image_acquisition_semaphore_wait_stage; - submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = &submission.command_buffer; + submit_info.commandBufferCount = submit_command_buffer_count; + submit_info.pCommandBuffers = submit_command_buffers; submit_info.signalSemaphoreCount = 1; submit_info.pSignalSemaphores = &swap_render_completion_semaphore_; VkResult submit_result = dfn.vkQueueSubmit(queue_graphics_compute, 1, @@ -845,22 +848,124 @@ void VulkanContext::RequestSurfaceRecreation() { swap_surface_ = VK_NULL_HANDLE; } -void VulkanContext::AwaitAllSwapSubmissionsCompletion() { +bool VulkanContext::AwaitSwapSubmissionsCompletion(uint64_t awaited_submission, + bool ignore_result) { assert_not_null(target_window_); + assert_true(awaited_submission < swap_submission_current_); const VulkanProvider& provider = GetVulkanProvider(); const VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); VkFence fences[kSwapchainMaxImageCount]; uint32_t fence_count = 0; - while (swap_submission_completed_ + 1 < swap_submission_current_) { + while (swap_submission_completed_ + 1 + fence_count <= awaited_submission) { assert_true(fence_count < kSwapchainMaxImageCount); - uint32_t submission_index = - ++swap_submission_completed_ % kSwapchainMaxImageCount; + uint32_t submission_index = (swap_submission_completed_ + 1 + fence_count) % + kSwapchainMaxImageCount; fences[fence_count++] = swap_submissions_[submission_index].fence; } - if (fence_count && !context_lost_) { - dfn.vkWaitForFences(device, fence_count, fences, VK_TRUE, UINT64_MAX); + if (!fence_count) { + return true; } + VkResult result = + dfn.vkWaitForFences(device, fence_count, fences, VK_TRUE, UINT64_MAX); + if (!ignore_result && result != VK_SUCCESS) { + return false; + } + // Reclaim setup command buffers if used. + for (uint32_t i = 0; i < fence_count; ++i) { + uint32_t submission_index = + (swap_submission_completed_ + 1 + i) % kSwapchainMaxImageCount; + uint32_t& setup_command_buffer_index = + swap_submissions_[submission_index].setup_command_buffer_index; + if (setup_command_buffer_index == UINT32_MAX) { + continue; + } + assert_zero(swap_setup_command_buffers_free_bits_ & + (uint32_t(1) << setup_command_buffer_index)); + swap_setup_command_buffers_free_bits_ |= uint32_t(1) + << setup_command_buffer_index; + setup_command_buffer_index = UINT32_MAX; + } + swap_submission_completed_ += fence_count; + return result == VK_SUCCESS; +} + +VkCommandBuffer VulkanContext::AcquireSwapSetupCommandBuffer() { + assert_not_null(target_window_); + + uint32_t& submission_command_buffer_index = + swap_submissions_[swap_submission_current_ % kSwapchainMaxImageCount] + .setup_command_buffer_index; + if (submission_command_buffer_index != UINT32_MAX) { + // A command buffer is already being recorded. + return swap_setup_command_buffers_[submission_command_buffer_index].second; + } + + const VulkanProvider& provider = GetVulkanProvider(); + const VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + VkCommandBufferBeginInfo command_buffer_begin_info; + command_buffer_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + command_buffer_begin_info.pNext = nullptr; + command_buffer_begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + command_buffer_begin_info.pInheritanceInfo = nullptr; + + // Try to use a recycled one. + uint32_t command_buffer_index; + if (xe::bit_scan_forward(swap_setup_command_buffers_free_bits_, + &command_buffer_index)) { + const std::pair& command_buffer = + swap_setup_command_buffers_[command_buffer_index]; + if (dfn.vkResetCommandPool(device, command_buffer.first, 0) != VK_SUCCESS || + dfn.vkBeginCommandBuffer(command_buffer.second, + &command_buffer_begin_info) != VK_SUCCESS) { + return VK_NULL_HANDLE; + } + submission_command_buffer_index = command_buffer_index; + swap_setup_command_buffers_free_bits_ &= + ~(uint32_t(1) << command_buffer_index); + return command_buffer.second; + } + + // Create a new command buffer. + assert_true(swap_setup_command_buffers_allocated_count_ < + kSwapchainMaxImageCount); + if (swap_setup_command_buffers_allocated_count_ >= kSwapchainMaxImageCount) { + return VK_NULL_HANDLE; + } + VkCommandPoolCreateInfo command_pool_create_info; + command_pool_create_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + command_pool_create_info.pNext = nullptr; + command_pool_create_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT; + command_pool_create_info.queueFamilyIndex = + provider.queue_family_graphics_compute(); + VkCommandPool new_command_pool; + if (dfn.vkCreateCommandPool(device, &command_pool_create_info, nullptr, + &new_command_pool) != VK_SUCCESS) { + return VK_NULL_HANDLE; + } + VkCommandBufferAllocateInfo command_buffer_allocate_info; + command_buffer_allocate_info.sType = + VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + command_buffer_allocate_info.pNext = nullptr; + command_buffer_allocate_info.commandPool = new_command_pool; + command_buffer_allocate_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + command_buffer_allocate_info.commandBufferCount = 1; + VkCommandBuffer new_command_buffer; + if (dfn.vkAllocateCommandBuffers(device, &command_buffer_allocate_info, + &new_command_buffer) != VK_SUCCESS || + dfn.vkBeginCommandBuffer(new_command_buffer, + &command_buffer_begin_info) != VK_SUCCESS) { + dfn.vkDestroyCommandPool(device, new_command_pool, nullptr); + return VK_NULL_HANDLE; + } + uint32_t new_command_buffer_index = + swap_setup_command_buffers_allocated_count_++; + submission_command_buffer_index = new_command_buffer_index; + swap_setup_command_buffers_[new_command_buffer_index] = + std::make_pair(new_command_pool, new_command_buffer); + return new_command_buffer; } void VulkanContext::DestroySwapchainFramebuffers() { diff --git a/src/xenia/ui/vulkan/vulkan_context.h b/src/xenia/ui/vulkan/vulkan_context.h index 687967057..f3b43c112 100644 --- a/src/xenia/ui/vulkan/vulkan_context.h +++ b/src/xenia/ui/vulkan/vulkan_context.h @@ -12,6 +12,7 @@ #include #include +#include #include #include "xenia/ui/graphics_context.h" @@ -45,6 +46,7 @@ class VulkanContext : public GraphicsContext { return swap_submissions_[swap_submission_current_ % kSwapchainMaxImageCount] .command_buffer; } + VkCommandBuffer AcquireSwapSetupCommandBuffer(); uint64_t swap_submission_current() const { return swap_submission_current_; } uint64_t swap_submission_completed() const { return swap_submission_completed_; @@ -63,7 +65,12 @@ class VulkanContext : public GraphicsContext { private: void Shutdown(); - void AwaitAllSwapSubmissionsCompletion(); + bool AwaitSwapSubmissionsCompletion(uint64_t awaited_submission, + bool ignore_result); + void AwaitAllSwapSubmissionsCompletion() { + // Current starts from 1, so subtracting 1 can't result in a negative value. + AwaitSwapSubmissionsCompletion(swap_submission_current_ - 1, true); + } // AwaitAllSwapSubmissionsCompletion must be called before. As this can be // used in swapchain creation or in shutdown, @@ -83,6 +90,13 @@ class VulkanContext : public GraphicsContext { // (it's okay to wait first for completion of A, then of B, no matter if they // are actually completed in AB or in BA order). + // May be used infrequently, so allocated on demand (to only keep 1 rather + // than 3). + std::pair + swap_setup_command_buffers_[kSwapchainMaxImageCount]; + uint32_t swap_setup_command_buffers_allocated_count_ = 0; + uint32_t swap_setup_command_buffers_free_bits_ = 0; + struct SwapSubmission { // One pool per frame, with resetting the pool itself rather than individual // command buffers (resetting command buffers themselves is not recommended @@ -92,6 +106,7 @@ class VulkanContext : public GraphicsContext { VkFence fence = VK_NULL_HANDLE; VkCommandPool command_pool = VK_NULL_HANDLE; VkCommandBuffer command_buffer; + uint32_t setup_command_buffer_index = UINT32_MAX; }; SwapSubmission swap_submissions_[kSwapchainMaxImageCount]; uint64_t swap_submission_current_ = 1; diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc index 235fe647d..f5e17e3c9 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc @@ -26,12 +26,6 @@ namespace vulkan { #include "xenia/ui/shaders/bytecode/vulkan_spirv/immediate_frag.h" #include "xenia/ui/shaders/bytecode/vulkan_spirv/immediate_vert.h" -class VulkanImmediateTexture : public ImmediateTexture { - public: - VulkanImmediateTexture(uint32_t width, uint32_t height) - : ImmediateTexture(width, height) {} -}; - VulkanImmediateDrawer::VulkanImmediateDrawer(VulkanContext& graphics_context) : ImmediateDrawer(&graphics_context), context_(graphics_context) {} @@ -42,6 +36,42 @@ bool VulkanImmediateDrawer::Initialize() { const VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); + VkDescriptorSetLayoutBinding texture_descriptor_set_layout_binding; + texture_descriptor_set_layout_binding.binding = 0; + texture_descriptor_set_layout_binding.descriptorType = + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + texture_descriptor_set_layout_binding.descriptorCount = 1; + texture_descriptor_set_layout_binding.stageFlags = + VK_SHADER_STAGE_FRAGMENT_BIT; + texture_descriptor_set_layout_binding.pImmutableSamplers = nullptr; + VkDescriptorSetLayoutCreateInfo texture_descriptor_set_layout_create_info; + texture_descriptor_set_layout_create_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + texture_descriptor_set_layout_create_info.pNext = nullptr; + texture_descriptor_set_layout_create_info.flags = 0; + texture_descriptor_set_layout_create_info.bindingCount = 1; + texture_descriptor_set_layout_create_info.pBindings = + &texture_descriptor_set_layout_binding; + if (dfn.vkCreateDescriptorSetLayout( + device, &texture_descriptor_set_layout_create_info, nullptr, + &texture_descriptor_set_layout_) != VK_SUCCESS) { + XELOGE( + "Failed to create the immediate drawer Vulkan combined image sampler " + "descriptor set layout"); + Shutdown(); + return false; + } + + // Create the (1, 1, 1, 1) texture as a replacement when drawing without a + // real texture. + white_texture_index_ = CreateVulkanTexture( + 1, 1, ImmediateTextureFilter::kNearest, false, nullptr); + if (white_texture_index_ == SIZE_MAX) { + XELOGE("Failed to create a blank texture for the Vulkan immediate drawer"); + Shutdown(); + return false; + } + VkPushConstantRange push_constant_ranges[1]; push_constant_ranges[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; push_constant_ranges[0].offset = offsetof(PushConstants, vertex); @@ -51,8 +81,8 @@ bool VulkanImmediateDrawer::Initialize() { VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; pipeline_layout_create_info.pNext = nullptr; pipeline_layout_create_info.flags = 0; - pipeline_layout_create_info.setLayoutCount = 0; - pipeline_layout_create_info.pSetLayouts = nullptr; + pipeline_layout_create_info.setLayoutCount = 1; + pipeline_layout_create_info.pSetLayouts = &texture_descriptor_set_layout_; pipeline_layout_create_info.pushConstantRangeCount = uint32_t(xe::countof(push_constant_ranges)); pipeline_layout_create_info.pPushConstantRanges = push_constant_ranges; @@ -86,13 +116,71 @@ void VulkanImmediateDrawer::Shutdown() { util::DestroyAndNullHandle(dfn.vkDestroyPipelineLayout, device, pipeline_layout_); + + for (SubmittedTextureUpload& submitted_texture_upload : + texture_uploads_submitted_) { + if (submitted_texture_upload.buffer != VK_NULL_HANDLE) { + dfn.vkDestroyBuffer(device, submitted_texture_upload.buffer, nullptr); + } + if (submitted_texture_upload.buffer_memory != VK_NULL_HANDLE) { + dfn.vkFreeMemory(device, submitted_texture_upload.buffer_memory, nullptr); + } + } + texture_uploads_submitted_.clear(); + for (PendingTextureUpload& pending_texture_upload : + texture_uploads_pending_) { + if (pending_texture_upload.buffer != VK_NULL_HANDLE) { + dfn.vkDestroyBuffer(device, pending_texture_upload.buffer, nullptr); + } + if (pending_texture_upload.buffer_memory != VK_NULL_HANDLE) { + dfn.vkFreeMemory(device, pending_texture_upload.buffer_memory, nullptr); + } + } + texture_uploads_pending_.clear(); + textures_free_.clear(); + for (Texture& texture : textures_) { + if (!texture.reference_count) { + continue; + } + if (texture.immediate_texture) { + texture.immediate_texture->DetachFromImmediateDrawer(); + } + dfn.vkDestroyImageView(device, texture.image_view, nullptr); + dfn.vkDestroyImage(device, texture.image, nullptr); + dfn.vkFreeMemory(device, texture.memory, nullptr); + } + textures_.clear(); + + texture_descriptor_pool_recycled_first_ = nullptr; + texture_descriptor_pool_unallocated_first_ = nullptr; + for (TextureDescriptorPool* pool : texture_descriptor_pools_) { + dfn.vkDestroyDescriptorPool(device, pool->pool, nullptr); + delete pool; + } + texture_descriptor_pools_.clear(); + util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout, device, + texture_descriptor_set_layout_); } std::unique_ptr VulkanImmediateDrawer::CreateTexture( - uint32_t width, uint32_t height, ImmediateTextureFilter filter, bool repeat, - const uint8_t* data) { - auto texture = std::make_unique(width, height); - return std::unique_ptr(texture.release()); + uint32_t width, uint32_t height, ImmediateTextureFilter filter, + bool is_repeated, const uint8_t* data) { + assert_not_null(data); + size_t texture_index = + CreateVulkanTexture(width, height, filter, is_repeated, data); + if (texture_index == SIZE_MAX) { + texture_index = white_texture_index_; + } + Texture& texture = textures_[texture_index]; + auto immediate_texture = std::make_unique( + width, height, this, GetTextureHandleForIndex(texture_index)); + if (texture_index != white_texture_index_) { + texture.immediate_texture = immediate_texture.get(); + } + // Transferring a new reference to a real texture or giving a weak reference + // to the white texture (there's no backlink to the ImmediateTexture from it + // also). + return std::unique_ptr(immediate_texture.release()); } void VulkanImmediateDrawer::Begin(int render_target_width, @@ -107,10 +195,32 @@ void VulkanImmediateDrawer::Begin(int render_target_width, current_command_buffer_ = context_.GetSwapCommandBuffer(); uint64_t submission_completed = context_.swap_submission_completed(); - vertex_buffer_pool_->Reclaim(submission_completed); - const VulkanProvider::DeviceFunctions& dfn = - context_.GetVulkanProvider().dfn(); + const VulkanProvider& provider = context_.GetVulkanProvider(); + const VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + // Release upload buffers for completed texture uploads. + auto erase_texture_uploads_end = texture_uploads_submitted_.begin(); + while (erase_texture_uploads_end != texture_uploads_submitted_.end()) { + if (erase_texture_uploads_end->submission_index > submission_completed) { + break; + } + if (erase_texture_uploads_end->buffer != VK_NULL_HANDLE) { + dfn.vkDestroyBuffer(device, erase_texture_uploads_end->buffer, nullptr); + } + if (erase_texture_uploads_end->buffer_memory != VK_NULL_HANDLE) { + dfn.vkFreeMemory(device, erase_texture_uploads_end->buffer_memory, + nullptr); + } + // Release the texture reference held for uploading. + ReleaseTexture(erase_texture_uploads_end->texture_index); + ++erase_texture_uploads_end; + } + texture_uploads_submitted_.erase(texture_uploads_submitted_.begin(), + erase_texture_uploads_end); + + vertex_buffer_pool_->Reclaim(submission_completed); current_render_target_extent_.width = uint32_t(render_target_width); current_render_target_extent_.height = uint32_t(render_target_height); @@ -135,6 +245,7 @@ void VulkanImmediateDrawer::Begin(int render_target_width, current_scissor_.extent.height = 0; current_pipeline_ = VK_NULL_HANDLE; + current_texture_descriptor_index_ = UINT32_MAX; } void VulkanImmediateDrawer::BeginDrawBatch(const ImmediateDrawBatch& batch) { @@ -221,7 +332,7 @@ void VulkanImmediateDrawer::Draw(const ImmediateDraw& draw) { dfn.vkCmdSetScissor(current_command_buffer_, 0, 1, &scissor); } - // Bind the pipeline for the current primitive count. + // Bind the pipeline for the current primitive type. VkPipeline pipeline; switch (draw.primitive_type) { case ImmediatePrimitiveType::kLines: @@ -240,6 +351,18 @@ void VulkanImmediateDrawer::Draw(const ImmediateDraw& draw) { VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); } + // Bind the texture. + uint32_t texture_descriptor_index = + textures_[GetTextureIndexForHandle(draw.texture_handle)].descriptor_index; + if (current_texture_descriptor_index_ != texture_descriptor_index) { + current_texture_descriptor_index_ = texture_descriptor_index; + VkDescriptorSet texture_descriptor_set = + GetTextureDescriptor(texture_descriptor_index); + dfn.vkCmdBindDescriptorSets( + current_command_buffer_, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_layout_, 0, 1, &texture_descriptor_set, 0, nullptr); + } + // Draw. if (batch_has_index_buffer_) { dfn.vkCmdDrawIndexed(current_command_buffer_, draw.count, 1, @@ -258,6 +381,110 @@ void VulkanImmediateDrawer::End() { // available. return; } + + // Copy textures. + if (!texture_uploads_pending_.empty()) { + VkCommandBuffer setup_command_buffer = + context_.AcquireSwapSetupCommandBuffer(); + if (setup_command_buffer != VK_NULL_HANDLE) { + const VulkanProvider::DeviceFunctions& dfn = + context_.GetVulkanProvider().dfn(); + size_t texture_uploads_pending_count = texture_uploads_pending_.size(); + uint64_t submission_current = context_.swap_submission_current(); + + // Transition to VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL. + std::vector image_memory_barriers; + image_memory_barriers.reserve(texture_uploads_pending_count); + VkImageMemoryBarrier image_memory_barrier; + image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + image_memory_barrier.pNext = nullptr; + image_memory_barrier.srcAccessMask = 0; + image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + util::InitializeSubresourceRange(image_memory_barrier.subresourceRange); + for (const PendingTextureUpload& pending_texture_upload : + texture_uploads_pending_) { + image_memory_barriers.emplace_back(image_memory_barrier).image = + textures_[pending_texture_upload.texture_index].image; + } + dfn.vkCmdPipelineBarrier( + setup_command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, + uint32_t(image_memory_barriers.size()), image_memory_barriers.data()); + + // Do transfer operations and transition to + // VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL. + for (size_t i = 0; i < texture_uploads_pending_count; ++i) { + const PendingTextureUpload& pending_texture_upload = + texture_uploads_pending_[i]; + VkImage texture_upload_image = + textures_[pending_texture_upload.texture_index].image; + if (pending_texture_upload.buffer != VK_NULL_HANDLE) { + // Copying. + VkBufferImageCopy copy_region; + copy_region.bufferOffset = 0; + copy_region.bufferRowLength = pending_texture_upload.width; + copy_region.bufferImageHeight = pending_texture_upload.height; + copy_region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + copy_region.imageSubresource.mipLevel = 0; + copy_region.imageSubresource.baseArrayLayer = 0; + copy_region.imageSubresource.layerCount = 1; + copy_region.imageOffset.x = 0; + copy_region.imageOffset.y = 0; + copy_region.imageOffset.z = 0; + copy_region.imageExtent.width = pending_texture_upload.width; + copy_region.imageExtent.height = pending_texture_upload.height; + copy_region.imageExtent.depth = 1; + dfn.vkCmdCopyBufferToImage( + setup_command_buffer, pending_texture_upload.buffer, + texture_upload_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, + ©_region); + } else { + // Clearing (initializing the empty image). + VkClearColorValue white_clear_value; + white_clear_value.float32[0] = 1.0f; + white_clear_value.float32[1] = 1.0f; + white_clear_value.float32[2] = 1.0f; + white_clear_value.float32[3] = 1.0f; + dfn.vkCmdClearColorImage(setup_command_buffer, texture_upload_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + &white_clear_value, 1, + &image_memory_barrier.subresourceRange); + } + + VkImageMemoryBarrier& image_memory_barrier_current = + image_memory_barriers[i]; + image_memory_barrier_current.srcAccessMask = + VK_ACCESS_TRANSFER_WRITE_BIT; + image_memory_barrier_current.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + image_memory_barrier_current.oldLayout = + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + image_memory_barrier_current.newLayout = + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + SubmittedTextureUpload& submitted_texture_upload = + texture_uploads_submitted_.emplace_back(); + // Transfer the reference to the texture - need to keep it until the + // upload is completed. + submitted_texture_upload.texture_index = + pending_texture_upload.texture_index; + submitted_texture_upload.buffer = pending_texture_upload.buffer; + submitted_texture_upload.buffer_memory = + pending_texture_upload.buffer_memory; + submitted_texture_upload.submission_index = submission_current; + } + dfn.vkCmdPipelineBarrier( + setup_command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr, 0, nullptr, + uint32_t(image_memory_barriers.size()), image_memory_barriers.data()); + + texture_uploads_pending_.clear(); + } + } + vertex_buffer_pool_->FlushWrites(); current_command_buffer_ = VK_NULL_HANDLE; } @@ -460,6 +687,447 @@ bool VulkanImmediateDrawer::EnsurePipelinesCreated() { return true; } +uint32_t VulkanImmediateDrawer::AllocateTextureDescriptor() { + // Try to reuse a recycled descriptor first. + if (texture_descriptor_pool_recycled_first_) { + TextureDescriptorPool* pool = texture_descriptor_pool_recycled_first_; + assert_not_zero(pool->recycled_bits); + uint32_t local_index; + xe::bit_scan_forward(pool->recycled_bits, &local_index); + pool->recycled_bits &= ~(uint64_t(1) << local_index); + if (!pool->recycled_bits) { + texture_descriptor_pool_recycled_first_ = pool->recycled_next; + } + return (pool->index << 6) | local_index; + } + + const VulkanProvider& provider = context_.GetVulkanProvider(); + const VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + VkDescriptorSetAllocateInfo allocate_info; + allocate_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + allocate_info.pNext = nullptr; + allocate_info.descriptorSetCount = 1; + allocate_info.pSetLayouts = &texture_descriptor_set_layout_; + + // If no recycled, try to create a new allocation within an existing pool with + // unallocated descriptors left. + while (texture_descriptor_pool_unallocated_first_) { + TextureDescriptorPool* pool = texture_descriptor_pool_unallocated_first_; + assert_not_zero(pool->unallocated_count); + allocate_info.descriptorPool = pool->pool; + uint32_t local_index = + TextureDescriptorPool::kDescriptorCount - pool->unallocated_count; + VkResult allocate_result = dfn.vkAllocateDescriptorSets( + device, &allocate_info, &pool->sets[local_index]); + if (allocate_result == VK_SUCCESS) { + --pool->unallocated_count; + } else { + // Failed to allocate for some reason, don't try again for this pool. + pool->unallocated_count = 0; + } + if (!pool->unallocated_count) { + texture_descriptor_pool_unallocated_first_ = pool->unallocated_next; + } + if (allocate_result == VK_SUCCESS) { + return (pool->index << 6) | local_index; + } + } + + // Create a new pool and allocate the descriptor from it. + VkDescriptorPoolSize descriptor_pool_size; + descriptor_pool_size.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + descriptor_pool_size.descriptorCount = + TextureDescriptorPool::kDescriptorCount; + VkDescriptorPoolCreateInfo descriptor_pool_create_info; + descriptor_pool_create_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + descriptor_pool_create_info.pNext = nullptr; + descriptor_pool_create_info.flags = 0; + descriptor_pool_create_info.maxSets = TextureDescriptorPool::kDescriptorCount; + descriptor_pool_create_info.poolSizeCount = 1; + descriptor_pool_create_info.pPoolSizes = &descriptor_pool_size; + VkDescriptorPool descriptor_pool; + if (dfn.vkCreateDescriptorPool(device, &descriptor_pool_create_info, nullptr, + &descriptor_pool) != VK_SUCCESS) { + XELOGE( + "Failed to create an immediate drawer Vulkan combined image sampler " + "descriptor pool with {} descriptors", + TextureDescriptorPool::kDescriptorCount); + return UINT32_MAX; + } + allocate_info.descriptorPool = descriptor_pool; + VkDescriptorSet descriptor_set; + if (dfn.vkAllocateDescriptorSets(device, &allocate_info, &descriptor_set) != + VK_SUCCESS) { + XELOGE( + "Failed to allocate an immediate drawer Vulkan combined image sampler " + "descriptor"); + dfn.vkDestroyDescriptorPool(device, descriptor_pool, nullptr); + return UINT32_MAX; + } + TextureDescriptorPool* new_pool = new TextureDescriptorPool; + new_pool->pool = descriptor_pool; + new_pool->sets[0] = descriptor_set; + uint32_t new_pool_index = uint32_t(texture_descriptor_pools_.size()); + new_pool->index = new_pool_index; + new_pool->unallocated_count = TextureDescriptorPool::kDescriptorCount - 1; + new_pool->recycled_bits = 0; + new_pool->unallocated_next = texture_descriptor_pool_unallocated_first_; + texture_descriptor_pool_unallocated_first_ = new_pool; + new_pool->recycled_next = nullptr; + texture_descriptor_pools_.push_back(new_pool); + return new_pool_index << 6; +} + +VkDescriptorSet VulkanImmediateDrawer::GetTextureDescriptor( + uint32_t descriptor_index) const { + uint32_t pool_index = descriptor_index >> 6; + assert_true(pool_index < texture_descriptor_pools_.size()); + const TextureDescriptorPool* pool = texture_descriptor_pools_[pool_index]; + uint32_t allocation_index = descriptor_index & 63; + assert_true(allocation_index < TextureDescriptorPool::kDescriptorCount - + pool->unallocated_count); + return pool->sets[allocation_index]; +} + +void VulkanImmediateDrawer::FreeTextureDescriptor(uint32_t descriptor_index) { + uint32_t pool_index = descriptor_index >> 6; + assert_true(pool_index < texture_descriptor_pools_.size()); + TextureDescriptorPool* pool = texture_descriptor_pools_[pool_index]; + uint32_t allocation_index = descriptor_index & 63; + assert_true(allocation_index < TextureDescriptorPool::kDescriptorCount - + pool->unallocated_count); + assert_zero(pool->recycled_bits & (uint64_t(1) << allocation_index)); + if (!pool->recycled_bits) { + // Add to the free list if not already in it. + pool->recycled_next = texture_descriptor_pool_recycled_first_; + texture_descriptor_pool_recycled_first_ = pool; + } + pool->recycled_bits |= uint64_t(1) << allocation_index; +} + +size_t VulkanImmediateDrawer::CreateVulkanTexture(uint32_t width, + uint32_t height, + ImmediateTextureFilter filter, + bool is_repeated, + const uint8_t* data) { + const VulkanProvider& provider = context_.GetVulkanProvider(); + const VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + bool dedicated_allocation_supported = + provider.device_extensions().khr_dedicated_allocation; + + // Create the image and the descriptor. + + VkImageCreateInfo image_create_info; + image_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + image_create_info.pNext = nullptr; + image_create_info.flags = 0; + image_create_info.imageType = VK_IMAGE_TYPE_2D; + image_create_info.format = VK_FORMAT_R8G8B8A8_UNORM; + image_create_info.extent.width = width; + image_create_info.extent.height = height; + image_create_info.extent.depth = 1; + image_create_info.mipLevels = 1; + image_create_info.arrayLayers = 1; + image_create_info.samples = VK_SAMPLE_COUNT_1_BIT; + image_create_info.tiling = VK_IMAGE_TILING_OPTIMAL; + image_create_info.usage = + VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; + image_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + image_create_info.queueFamilyIndexCount = 0; + image_create_info.pQueueFamilyIndices = nullptr; + image_create_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + VkImage image; + if (dfn.vkCreateImage(device, &image_create_info, nullptr, &image) != + VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan image for a {}x{} immediate drawer texture", + width, height); + return SIZE_MAX; + } + + VkMemoryAllocateInfo image_memory_allocate_info; + VkMemoryRequirements image_memory_requirements; + dfn.vkGetImageMemoryRequirements(device, image, &image_memory_requirements); + if (!xe::bit_scan_forward(image_memory_requirements.memoryTypeBits & + provider.memory_types_device_local(), + &image_memory_allocate_info.memoryTypeIndex)) { + XELOGE( + "Failed to get a device-local memory type for a {}x{} immediate " + "drawer Vulkan image", + width, height); + dfn.vkDestroyImage(device, image, nullptr); + return SIZE_MAX; + } + image_memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + VkMemoryDedicatedAllocateInfoKHR image_memory_dedicated_allocate_info; + if (dedicated_allocation_supported) { + image_memory_dedicated_allocate_info.sType = + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR; + image_memory_dedicated_allocate_info.pNext = nullptr; + image_memory_dedicated_allocate_info.image = image; + image_memory_dedicated_allocate_info.buffer = VK_NULL_HANDLE; + image_memory_allocate_info.pNext = &image_memory_dedicated_allocate_info; + } else { + image_memory_allocate_info.pNext = nullptr; + } + image_memory_allocate_info.allocationSize = image_memory_requirements.size; + VkDeviceMemory image_memory; + if (dfn.vkAllocateMemory(device, &image_memory_allocate_info, nullptr, + &image_memory) != VK_SUCCESS) { + XELOGE( + "Failed to allocate memory for a {}x{} immediate drawer Vulkan " + "image", + width, height); + dfn.vkDestroyImage(device, image, nullptr); + return SIZE_MAX; + } + if (dfn.vkBindImageMemory(device, image, image_memory, 0) != VK_SUCCESS) { + XELOGE("Failed to bind memory to a {}x{} immediate drawer Vulkan image", + width, height); + dfn.vkDestroyImage(device, image, nullptr); + dfn.vkFreeMemory(device, image_memory, nullptr); + return SIZE_MAX; + } + + VkImageViewCreateInfo image_view_create_info; + image_view_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + image_view_create_info.pNext = nullptr; + image_view_create_info.flags = 0; + image_view_create_info.image = image; + image_view_create_info.viewType = VK_IMAGE_VIEW_TYPE_2D; + image_view_create_info.format = VK_FORMAT_R8G8B8A8_UNORM; + // data == nullptr is a special case for (1, 1, 1, 1). + VkComponentSwizzle swizzle = + data ? VK_COMPONENT_SWIZZLE_IDENTITY : VK_COMPONENT_SWIZZLE_ONE; + image_view_create_info.components.r = swizzle; + image_view_create_info.components.g = swizzle; + image_view_create_info.components.b = swizzle; + image_view_create_info.components.a = swizzle; + util::InitializeSubresourceRange(image_view_create_info.subresourceRange); + VkImageView image_view; + if (dfn.vkCreateImageView(device, &image_view_create_info, nullptr, + &image_view) != VK_SUCCESS) { + XELOGE( + "Failed to create an image view for a {}x{} immediate drawer Vulkan " + "image", + width, height); + dfn.vkDestroyImage(device, image, nullptr); + dfn.vkFreeMemory(device, image_memory, nullptr); + return SIZE_MAX; + } + + uint32_t descriptor_index = AllocateTextureDescriptor(); + if (descriptor_index == UINT32_MAX) { + XELOGE( + "Failed to allocate a Vulkan descriptor for a {}x{} immediate drawer " + "texture", + width, height); + dfn.vkDestroyImageView(device, image_view, nullptr); + dfn.vkDestroyImage(device, image, nullptr); + dfn.vkFreeMemory(device, image_memory, nullptr); + return SIZE_MAX; + } + VkDescriptorImageInfo descriptor_image_info; + VulkanProvider::HostSampler host_sampler; + if (filter == ImmediateTextureFilter::kLinear) { + host_sampler = is_repeated ? VulkanProvider::HostSampler::kLinearRepeat + : VulkanProvider::HostSampler::kLinearClamp; + } else { + host_sampler = is_repeated ? VulkanProvider::HostSampler::kNearestRepeat + : VulkanProvider::HostSampler::kNearestClamp; + } + descriptor_image_info.sampler = provider.GetHostSampler(host_sampler); + descriptor_image_info.imageView = image_view; + descriptor_image_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + VkWriteDescriptorSet descriptor_write; + descriptor_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + descriptor_write.pNext = nullptr; + descriptor_write.dstSet = GetTextureDescriptor(descriptor_index); + descriptor_write.dstBinding = 0; + descriptor_write.dstArrayElement = 0; + descriptor_write.descriptorCount = 1; + descriptor_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + descriptor_write.pImageInfo = &descriptor_image_info; + descriptor_write.pBufferInfo = nullptr; + descriptor_write.pTexelBufferView = nullptr; + dfn.vkUpdateDescriptorSets(device, 1, &descriptor_write, 0, nullptr); + + // Create and fill the upload buffer. + + // data == nullptr is a special case for (1, 1, 1, 1), clearing rather than + // uploading in this case. + VkBuffer upload_buffer = VK_NULL_HANDLE; + VkDeviceMemory upload_buffer_memory = VK_NULL_HANDLE; + if (data) { + size_t data_size = sizeof(uint32_t) * width * height; + VkBufferCreateInfo upload_buffer_create_info; + upload_buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + upload_buffer_create_info.pNext = nullptr; + upload_buffer_create_info.flags = 0; + upload_buffer_create_info.size = VkDeviceSize(data_size); + upload_buffer_create_info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + upload_buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + upload_buffer_create_info.queueFamilyIndexCount = 0; + upload_buffer_create_info.pQueueFamilyIndices = nullptr; + if (dfn.vkCreateBuffer(device, &upload_buffer_create_info, nullptr, + &upload_buffer) != VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan upload buffer for a {}x{} immediate " + "drawer texture", + width, height); + FreeTextureDescriptor(descriptor_index); + dfn.vkDestroyImageView(device, image_view, nullptr); + dfn.vkDestroyImage(device, image, nullptr); + dfn.vkFreeMemory(device, image_memory, nullptr); + return SIZE_MAX; + } + + VkMemoryAllocateInfo upload_buffer_memory_allocate_info; + VkMemoryRequirements upload_buffer_memory_requirements; + dfn.vkGetBufferMemoryRequirements(device, upload_buffer, + &upload_buffer_memory_requirements); + upload_buffer_memory_allocate_info.memoryTypeIndex = + util::ChooseHostMemoryType( + provider, upload_buffer_memory_requirements.memoryTypeBits, false); + if (upload_buffer_memory_allocate_info.memoryTypeIndex == UINT32_MAX) { + XELOGE( + "Failed to get a host-visible memory type for a Vulkan upload buffer " + "for a {}x{} immediate drawer texture", + width, height); + dfn.vkDestroyBuffer(device, upload_buffer, nullptr); + FreeTextureDescriptor(descriptor_index); + dfn.vkDestroyImageView(device, image_view, nullptr); + dfn.vkDestroyImage(device, image, nullptr); + dfn.vkFreeMemory(device, image_memory, nullptr); + return SIZE_MAX; + } + upload_buffer_memory_allocate_info.sType = + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + VkMemoryDedicatedAllocateInfoKHR + upload_buffer_memory_dedicated_allocate_info; + if (dedicated_allocation_supported) { + upload_buffer_memory_dedicated_allocate_info.sType = + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR; + upload_buffer_memory_dedicated_allocate_info.pNext = nullptr; + upload_buffer_memory_dedicated_allocate_info.image = VK_NULL_HANDLE; + upload_buffer_memory_dedicated_allocate_info.buffer = upload_buffer; + upload_buffer_memory_allocate_info.pNext = + &upload_buffer_memory_dedicated_allocate_info; + } else { + upload_buffer_memory_allocate_info.pNext = nullptr; + } + upload_buffer_memory_allocate_info.allocationSize = + util::GetMappableMemorySize(provider, + upload_buffer_memory_requirements.size); + if (dfn.vkAllocateMemory(device, &upload_buffer_memory_allocate_info, + nullptr, &upload_buffer_memory) != VK_SUCCESS) { + XELOGE( + "Failed to allocate memory for a Vulkan upload buffer for a {}x{} " + "immediate drawer texture", + width, height); + dfn.vkDestroyBuffer(device, upload_buffer, nullptr); + FreeTextureDescriptor(descriptor_index); + dfn.vkDestroyImageView(device, image_view, nullptr); + dfn.vkDestroyImage(device, image, nullptr); + dfn.vkFreeMemory(device, image_memory, nullptr); + return SIZE_MAX; + } + if (dfn.vkBindBufferMemory(device, upload_buffer, upload_buffer_memory, + 0) != VK_SUCCESS) { + XELOGE( + "Failed to bind memory to a Vulkan upload buffer for a {}x{} " + "immediate drawer texture", + width, height); + dfn.vkDestroyBuffer(device, upload_buffer, nullptr); + dfn.vkFreeMemory(device, upload_buffer_memory, nullptr); + FreeTextureDescriptor(descriptor_index); + dfn.vkDestroyImageView(device, image_view, nullptr); + dfn.vkDestroyImage(device, image, nullptr); + dfn.vkFreeMemory(device, image_memory, nullptr); + return SIZE_MAX; + } + + void* upload_buffer_mapping; + if (dfn.vkMapMemory(device, upload_buffer_memory, 0, VK_WHOLE_SIZE, 0, + &upload_buffer_mapping) != VK_SUCCESS) { + XELOGE( + "Failed to map Vulkan upload buffer memory for a {}x{} immediate " + "drawer texture", + width, height); + dfn.vkDestroyBuffer(device, upload_buffer, nullptr); + dfn.vkFreeMemory(device, upload_buffer_memory, nullptr); + FreeTextureDescriptor(descriptor_index); + dfn.vkDestroyImageView(device, image_view, nullptr); + dfn.vkDestroyImage(device, image, nullptr); + dfn.vkFreeMemory(device, image_memory, nullptr); + return SIZE_MAX; + } + std::memcpy(upload_buffer_mapping, data, data_size); + util::FlushMappedMemoryRange( + provider, upload_buffer_memory, + upload_buffer_memory_allocate_info.memoryTypeIndex); + dfn.vkUnmapMemory(device, upload_buffer_memory); + } + + size_t texture_index; + if (!textures_free_.empty()) { + texture_index = textures_free_.back(); + textures_free_.pop_back(); + } else { + texture_index = textures_.size(); + textures_.emplace_back(); + } + Texture& texture = textures_[texture_index]; + texture.immediate_texture = nullptr; + texture.image = image; + texture.memory = image_memory; + texture.image_view = image_view; + texture.descriptor_index = descriptor_index; + // The reference that will be returned to the caller. + texture.reference_count = 1; + + PendingTextureUpload& pending_texture_upload = + texture_uploads_pending_.emplace_back(); + // While the upload has not been yet completed, keep a reference to the + // texture because its lifetime is not tied to that of the ImmediateTexture + // (and thus to context's submissions) now. + ++texture.reference_count; + pending_texture_upload.texture_index = texture_index; + pending_texture_upload.width = width; + pending_texture_upload.height = height; + pending_texture_upload.buffer = upload_buffer; + pending_texture_upload.buffer_memory = upload_buffer_memory; + + return texture_index; +} + +void VulkanImmediateDrawer::ReleaseTexture(size_t index) { + assert_true(index < textures_.size()); + Texture& texture = textures_[index]; + assert_not_zero(texture.reference_count); + if (--texture.reference_count) { + return; + } + // If the texture is attached to a VulkanImmediateTexture, the + // VulkanImmediateTexture must hold a reference to it. + assert_null(texture.immediate_texture); + FreeTextureDescriptor(texture.descriptor_index); + const VulkanProvider& provider = context_.GetVulkanProvider(); + const VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + dfn.vkDestroyImageView(device, texture.image_view, nullptr); + dfn.vkDestroyImage(device, texture.image, nullptr); + dfn.vkFreeMemory(device, texture.memory, nullptr); + textures_free_.push_back(index); + // TODO(Triang3l): Track last usage submission because it turns out that + // deletion in the ImGui and the profiler actually happens before after + // awaiting submission completion. +} + } // namespace vulkan } // namespace ui } // namespace xe diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.h b/src/xenia/ui/vulkan/vulkan_immediate_drawer.h index 499dd7a8b..20b0cf73e 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.h +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.h @@ -10,7 +10,11 @@ #ifndef XENIA_UI_VULKAN_VULKAN_IMMEDIATE_DRAWER_H_ #define XENIA_UI_VULKAN_VULKAN_IMMEDIATE_DRAWER_H_ +#include +#include #include +#include +#include #include "xenia/ui/immediate_drawer.h" #include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h" @@ -48,10 +52,116 @@ class VulkanImmediateDrawer : public ImmediateDrawer { } vertex; }; + class VulkanImmediateTexture : public ImmediateTexture { + public: + VulkanImmediateTexture(uint32_t width, uint32_t height, + VulkanImmediateDrawer* immediate_drawer, + uintptr_t immediate_drawer_handle) + : ImmediateTexture(width, height), immediate_drawer_(immediate_drawer) { + handle = immediate_drawer_handle; + } + ~VulkanImmediateTexture() { + if (immediate_drawer_) { + immediate_drawer_->HandleImmediateTextureDestroyed(handle); + } + } + void DetachFromImmediateDrawer() { + immediate_drawer_ = nullptr; + handle = 0; + } + + private: + VulkanImmediateDrawer* immediate_drawer_; + }; + + struct TextureDescriptorPool { + // Using uint64_t for recycled bits. + static constexpr uint32_t kDescriptorCount = 64; + VkDescriptorPool pool; + VkDescriptorSet sets[kDescriptorCount]; + uint32_t index; + uint32_t unallocated_count; + uint64_t recycled_bits; + TextureDescriptorPool* unallocated_next; + TextureDescriptorPool* recycled_next; + }; + + // Tracked separately from VulkanImmediateTexture because copying may take + // additional references. + struct Texture { + // Null for the white texture, reference held by the drawer itself instead + // of immediate textures. + VulkanImmediateTexture* immediate_texture; + VkImage image; + VkDeviceMemory memory; + VkImageView image_view; + uint32_t descriptor_index; + uint32_t reference_count; + }; + bool EnsurePipelinesCreated(); + // Allocates a combined image sampler in a pool and returns its index, or + // UINT32_MAX in case of failure. + uint32_t AllocateTextureDescriptor(); + VkDescriptorSet GetTextureDescriptor(uint32_t descriptor_index) const; + void FreeTextureDescriptor(uint32_t descriptor_index); + + // Returns SIZE_MAX in case of failure. The created texture will have a + // reference count of 1 plus references needed for uploading, but will not be + // attached to a VulkanImmediateTexture (will return the reference to the + // caller, in short). If data is null, a (1, 1, 1, 1) image will be created, + // which can be used as a replacement when drawing without a real texture. + size_t CreateVulkanTexture(uint32_t width, uint32_t height, + ImmediateTextureFilter filter, bool is_repeated, + const uint8_t* data); + void ReleaseTexture(size_t index); + uintptr_t GetTextureHandleForIndex(size_t index) const { + return index != white_texture_index_ ? uintptr_t(index + 1) : 0; + } + size_t GetTextureIndexForHandle(uintptr_t handle) const { + // 0 is a special value for no texture. + return handle ? size_t(handle - 1) : white_texture_index_; + } + // For calling from VulkanImmediateTexture. + void HandleImmediateTextureDestroyed(uintptr_t handle) { + size_t index = GetTextureIndexForHandle(handle); + if (index == white_texture_index_) { + return; + } + textures_[index].immediate_texture = nullptr; + ReleaseTexture(index); + } + VulkanContext& context_; + // Combined image sampler pools for textures. + VkDescriptorSetLayout texture_descriptor_set_layout_; + std::vector texture_descriptor_pools_; + TextureDescriptorPool* texture_descriptor_pool_unallocated_first_ = nullptr; + TextureDescriptorPool* texture_descriptor_pool_recycled_first_ = nullptr; + + std::vector textures_; + std::vector textures_free_; + struct PendingTextureUpload { + size_t texture_index; + uint32_t width; + uint32_t height; + // VK_NULL_HANDLE if need to clear rather than to copy. + VkBuffer buffer; + VkDeviceMemory buffer_memory; + }; + std::vector texture_uploads_pending_; + struct SubmittedTextureUpload { + size_t texture_index; + // VK_NULL_HANDLE if cleared rather than copied. + VkBuffer buffer; + VkDeviceMemory buffer_memory; + uint64_t submission_index; + }; + std::deque texture_uploads_submitted_; + size_t white_texture_index_; + VkPipelineLayout pipeline_layout_ = VK_NULL_HANDLE; std::unique_ptr vertex_buffer_pool_; @@ -64,6 +174,7 @@ class VulkanImmediateDrawer : public ImmediateDrawer { VkExtent2D current_render_target_extent_; VkRect2D current_scissor_; VkPipeline current_pipeline_; + uint32_t current_texture_descriptor_index_; bool batch_open_ = false; bool batch_has_index_buffer_; }; diff --git a/src/xenia/ui/vulkan/vulkan_provider.cc b/src/xenia/ui/vulkan/vulkan_provider.cc index 36c3b9a2e..6444af302 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.cc +++ b/src/xenia/ui/vulkan/vulkan_provider.cc @@ -9,6 +9,7 @@ #include "xenia/ui/vulkan/vulkan_provider.h" +#include #include #include @@ -60,6 +61,12 @@ VulkanProvider::VulkanProvider(Window* main_window) : GraphicsProvider(main_window) {} VulkanProvider::~VulkanProvider() { + for (size_t i = 0; i < size_t(HostSampler::kCount); ++i) { + if (host_samplers_[i] != VK_NULL_HANDLE) { + dfn_.vkDestroySampler(device_, host_samplers_[i], nullptr); + } + } + if (device_ != VK_NULL_HANDLE) { ifn_.vkDestroyDevice(device_, nullptr); } @@ -414,6 +421,7 @@ bool VulkanProvider::Initialize() { memory_types_device_local_ = 0; memory_types_host_visible_ = 0; memory_types_host_coherent_ = 0; + memory_types_host_cached_ = 0; for (uint32_t j = 0; j < memory_properties.memoryTypeCount; ++j) { VkMemoryPropertyFlags memory_property_flags = memory_properties.memoryTypes[j].propertyFlags; @@ -427,6 +435,9 @@ bool VulkanProvider::Initialize() { if (memory_property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) { memory_types_host_coherent_ |= memory_type_bit; } + if (memory_property_flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) { + memory_types_host_cached_ |= memory_type_bit; + } } if (!memory_types_device_local_ && !memory_types_host_visible_) { // Shouldn't happen according to the specification. @@ -516,38 +527,52 @@ bool VulkanProvider::Initialize() { nullptr; XE_VULKAN_LOAD_DFN(vkAcquireNextImageKHR); XE_VULKAN_LOAD_DFN(vkAllocateCommandBuffers); + XE_VULKAN_LOAD_DFN(vkAllocateDescriptorSets); XE_VULKAN_LOAD_DFN(vkAllocateMemory); XE_VULKAN_LOAD_DFN(vkBeginCommandBuffer); XE_VULKAN_LOAD_DFN(vkBindBufferMemory); + XE_VULKAN_LOAD_DFN(vkBindImageMemory); XE_VULKAN_LOAD_DFN(vkCmdBeginRenderPass); + XE_VULKAN_LOAD_DFN(vkCmdBindDescriptorSets); XE_VULKAN_LOAD_DFN(vkCmdBindIndexBuffer); XE_VULKAN_LOAD_DFN(vkCmdBindPipeline); XE_VULKAN_LOAD_DFN(vkCmdBindVertexBuffers); + XE_VULKAN_LOAD_DFN(vkCmdClearColorImage); + XE_VULKAN_LOAD_DFN(vkCmdCopyBufferToImage); XE_VULKAN_LOAD_DFN(vkCmdDraw); XE_VULKAN_LOAD_DFN(vkCmdDrawIndexed); XE_VULKAN_LOAD_DFN(vkCmdEndRenderPass); + XE_VULKAN_LOAD_DFN(vkCmdPipelineBarrier); XE_VULKAN_LOAD_DFN(vkCmdPushConstants); XE_VULKAN_LOAD_DFN(vkCmdSetScissor); XE_VULKAN_LOAD_DFN(vkCmdSetViewport); XE_VULKAN_LOAD_DFN(vkCreateBuffer); XE_VULKAN_LOAD_DFN(vkCreateCommandPool); + XE_VULKAN_LOAD_DFN(vkCreateDescriptorPool); + XE_VULKAN_LOAD_DFN(vkCreateDescriptorSetLayout); XE_VULKAN_LOAD_DFN(vkCreateFence); XE_VULKAN_LOAD_DFN(vkCreateFramebuffer); XE_VULKAN_LOAD_DFN(vkCreateGraphicsPipelines); + XE_VULKAN_LOAD_DFN(vkCreateImage); XE_VULKAN_LOAD_DFN(vkCreateImageView); XE_VULKAN_LOAD_DFN(vkCreatePipelineLayout); XE_VULKAN_LOAD_DFN(vkCreateRenderPass); + XE_VULKAN_LOAD_DFN(vkCreateSampler); XE_VULKAN_LOAD_DFN(vkCreateSemaphore); XE_VULKAN_LOAD_DFN(vkCreateShaderModule); XE_VULKAN_LOAD_DFN(vkCreateSwapchainKHR); XE_VULKAN_LOAD_DFN(vkDestroyBuffer); XE_VULKAN_LOAD_DFN(vkDestroyCommandPool); + XE_VULKAN_LOAD_DFN(vkDestroyDescriptorPool); + XE_VULKAN_LOAD_DFN(vkDestroyDescriptorSetLayout); XE_VULKAN_LOAD_DFN(vkDestroyFence); XE_VULKAN_LOAD_DFN(vkDestroyFramebuffer); + XE_VULKAN_LOAD_DFN(vkDestroyImage); XE_VULKAN_LOAD_DFN(vkDestroyImageView); XE_VULKAN_LOAD_DFN(vkDestroyPipeline); XE_VULKAN_LOAD_DFN(vkDestroyPipelineLayout); XE_VULKAN_LOAD_DFN(vkDestroyRenderPass); + XE_VULKAN_LOAD_DFN(vkDestroySampler); XE_VULKAN_LOAD_DFN(vkDestroySemaphore); XE_VULKAN_LOAD_DFN(vkDestroyShaderModule); XE_VULKAN_LOAD_DFN(vkDestroySwapchainKHR); @@ -556,12 +581,15 @@ bool VulkanProvider::Initialize() { XE_VULKAN_LOAD_DFN(vkFreeMemory); XE_VULKAN_LOAD_DFN(vkGetBufferMemoryRequirements); XE_VULKAN_LOAD_DFN(vkGetDeviceQueue); + XE_VULKAN_LOAD_DFN(vkGetImageMemoryRequirements); XE_VULKAN_LOAD_DFN(vkGetSwapchainImagesKHR); XE_VULKAN_LOAD_DFN(vkMapMemory); XE_VULKAN_LOAD_DFN(vkResetCommandPool); XE_VULKAN_LOAD_DFN(vkResetFences); XE_VULKAN_LOAD_DFN(vkQueuePresentKHR); XE_VULKAN_LOAD_DFN(vkQueueSubmit); + XE_VULKAN_LOAD_DFN(vkUnmapMemory); + XE_VULKAN_LOAD_DFN(vkUpdateDescriptorSets); XE_VULKAN_LOAD_DFN(vkWaitForFences); #undef XE_VULKAN_LOAD_DFN if (!device_functions_loaded) { @@ -583,10 +611,54 @@ bool VulkanProvider::Initialize() { queue_sparse_binding_ = VK_NULL_HANDLE; } + // Create host-side samplers. + VkSamplerCreateInfo sampler_create_info = {}; + sampler_create_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + sampler_create_info.magFilter = VK_FILTER_NEAREST; + sampler_create_info.minFilter = VK_FILTER_NEAREST; + sampler_create_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + sampler_create_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + sampler_create_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + sampler_create_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + sampler_create_info.maxLod = FLT_MAX; + if (dfn_.vkCreateSampler( + device_, &sampler_create_info, nullptr, + &host_samplers_[size_t(HostSampler::kNearestClamp)]) != VK_SUCCESS) { + XELOGE("Failed to create the nearest-neighbor clamping Vulkan sampler"); + return false; + } + sampler_create_info.magFilter = VK_FILTER_LINEAR; + sampler_create_info.minFilter = VK_FILTER_LINEAR; + sampler_create_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + if (dfn_.vkCreateSampler( + device_, &sampler_create_info, nullptr, + &host_samplers_[size_t(HostSampler::kLinearClamp)]) != VK_SUCCESS) { + XELOGE("Failed to create the bilinear-filtering clamping Vulkan sampler"); + return false; + } + sampler_create_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_create_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_create_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; + if (dfn_.vkCreateSampler( + device_, &sampler_create_info, nullptr, + &host_samplers_[size_t(HostSampler::kLinearRepeat)]) != VK_SUCCESS) { + XELOGE("Failed to create the bilinear-filtering repeating Vulkan sampler"); + return false; + } + sampler_create_info.magFilter = VK_FILTER_NEAREST; + sampler_create_info.minFilter = VK_FILTER_NEAREST; + sampler_create_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + if (dfn_.vkCreateSampler( + device_, &sampler_create_info, nullptr, + &host_samplers_[size_t(HostSampler::kNearestRepeat)]) != VK_SUCCESS) { + XELOGE("Failed to create the nearest-neighbor repeating Vulkan sampler"); + return false; + } + return true; } -std::unique_ptr VulkanProvider::CreateContext( +std::unique_ptr VulkanProvider::CreateHostContext( Window* target_window) { auto new_context = std::unique_ptr(new VulkanContext(this, target_window)); @@ -596,7 +668,7 @@ std::unique_ptr VulkanProvider::CreateContext( return std::unique_ptr(new_context.release()); } -std::unique_ptr VulkanProvider::CreateOffscreenContext() { +std::unique_ptr VulkanProvider::CreateEmulationContext() { auto new_context = std::unique_ptr(new VulkanContext(this, nullptr)); if (!new_context->Initialize()) { diff --git a/src/xenia/ui/vulkan/vulkan_provider.h b/src/xenia/ui/vulkan/vulkan_provider.h index 4e254bbe0..ca3af3473 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.h +++ b/src/xenia/ui/vulkan/vulkan_provider.h @@ -43,9 +43,9 @@ class VulkanProvider : public GraphicsProvider { static std::unique_ptr Create(Window* main_window); - std::unique_ptr CreateContext( + std::unique_ptr CreateHostContext( Window* target_window) override; - std::unique_ptr CreateOffscreenContext() override; + std::unique_ptr CreateEmulationContext() override; struct LibraryFunctions { // From the module. @@ -113,6 +113,9 @@ class VulkanProvider : public GraphicsProvider { uint32_t memory_types_host_coherent() const { return memory_types_host_coherent_; } + uint32_t memory_types_host_cached() const { + return memory_types_host_cached_; + } // FIXME(Triang3l): Allow a separate queue for present - see // vulkan_provider.cc for details. uint32_t queue_family_graphics_compute() const { @@ -123,38 +126,52 @@ class VulkanProvider : public GraphicsProvider { struct DeviceFunctions { PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR; PFN_vkAllocateCommandBuffers vkAllocateCommandBuffers; + PFN_vkAllocateDescriptorSets vkAllocateDescriptorSets; PFN_vkAllocateMemory vkAllocateMemory; PFN_vkBeginCommandBuffer vkBeginCommandBuffer; PFN_vkBindBufferMemory vkBindBufferMemory; + PFN_vkBindImageMemory vkBindImageMemory; PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass; + PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets; PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer; PFN_vkCmdBindPipeline vkCmdBindPipeline; PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers; + PFN_vkCmdClearColorImage vkCmdClearColorImage; + PFN_vkCmdCopyBufferToImage vkCmdCopyBufferToImage; PFN_vkCmdDraw vkCmdDraw; PFN_vkCmdDrawIndexed vkCmdDrawIndexed; PFN_vkCmdEndRenderPass vkCmdEndRenderPass; + PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier; PFN_vkCmdPushConstants vkCmdPushConstants; PFN_vkCmdSetScissor vkCmdSetScissor; PFN_vkCmdSetViewport vkCmdSetViewport; PFN_vkCreateBuffer vkCreateBuffer; PFN_vkCreateCommandPool vkCreateCommandPool; + PFN_vkCreateDescriptorPool vkCreateDescriptorPool; + PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout; PFN_vkCreateFence vkCreateFence; PFN_vkCreateFramebuffer vkCreateFramebuffer; PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines; + PFN_vkCreateImage vkCreateImage; PFN_vkCreateImageView vkCreateImageView; PFN_vkCreatePipelineLayout vkCreatePipelineLayout; PFN_vkCreateRenderPass vkCreateRenderPass; + PFN_vkCreateSampler vkCreateSampler; PFN_vkCreateSemaphore vkCreateSemaphore; PFN_vkCreateShaderModule vkCreateShaderModule; PFN_vkCreateSwapchainKHR vkCreateSwapchainKHR; PFN_vkDestroyBuffer vkDestroyBuffer; PFN_vkDestroyCommandPool vkDestroyCommandPool; + PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool; + PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout; PFN_vkDestroyFence vkDestroyFence; PFN_vkDestroyFramebuffer vkDestroyFramebuffer; + PFN_vkDestroyImage vkDestroyImage; PFN_vkDestroyImageView vkDestroyImageView; PFN_vkDestroyPipeline vkDestroyPipeline; PFN_vkDestroyPipelineLayout vkDestroyPipelineLayout; PFN_vkDestroyRenderPass vkDestroyRenderPass; + PFN_vkDestroySampler vkDestroySampler; PFN_vkDestroySemaphore vkDestroySemaphore; PFN_vkDestroyShaderModule vkDestroyShaderModule; PFN_vkDestroySwapchainKHR vkDestroySwapchainKHR; @@ -163,12 +180,15 @@ class VulkanProvider : public GraphicsProvider { PFN_vkFreeMemory vkFreeMemory; PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements; PFN_vkGetDeviceQueue vkGetDeviceQueue; + PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements; PFN_vkGetSwapchainImagesKHR vkGetSwapchainImagesKHR; PFN_vkMapMemory vkMapMemory; PFN_vkResetCommandPool vkResetCommandPool; PFN_vkResetFences vkResetFences; PFN_vkQueuePresentKHR vkQueuePresentKHR; PFN_vkQueueSubmit vkQueueSubmit; + PFN_vkUnmapMemory vkUnmapMemory; + PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets; PFN_vkWaitForFences vkWaitForFences; }; const DeviceFunctions& dfn() const { return dfn_; } @@ -177,6 +197,22 @@ class VulkanProvider : public GraphicsProvider { // May be VK_NULL_HANDLE if not available. VkQueue queue_sparse_binding() const { return queue_sparse_binding_; } + // Samplers that may be useful for host needs. Only these samplers should be + // used in host, non-emulation contexts, because the total number of samplers + // is heavily limited (4000) on Nvidia GPUs - the rest of samplers are + // allocated for emulation. + enum class HostSampler { + kNearestClamp, + kLinearClamp, + kNearestRepeat, + kLinearRepeat, + + kCount, + }; + VkSampler GetHostSampler(HostSampler sampler) const { + return host_samplers_[size_t(sampler)]; + } + private: explicit VulkanProvider(Window* main_window); @@ -200,6 +236,7 @@ class VulkanProvider : public GraphicsProvider { uint32_t memory_types_device_local_; uint32_t memory_types_host_visible_; uint32_t memory_types_host_coherent_; + uint32_t memory_types_host_cached_; uint32_t queue_family_graphics_compute_; VkDevice device_ = VK_NULL_HANDLE; @@ -207,6 +244,8 @@ class VulkanProvider : public GraphicsProvider { VkQueue queue_graphics_compute_; // May be VK_NULL_HANDLE if not available. VkQueue queue_sparse_binding_; + + VkSampler host_samplers_[size_t(HostSampler::kCount)] = {}; }; } // namespace vulkan diff --git a/src/xenia/ui/vulkan/vulkan_upload_buffer_pool.cc b/src/xenia/ui/vulkan/vulkan_upload_buffer_pool.cc index 11ef6766b..1c71db6ea 100644 --- a/src/xenia/ui/vulkan/vulkan_upload_buffer_pool.cc +++ b/src/xenia/ui/vulkan/vulkan_upload_buffer_pool.cc @@ -13,24 +13,21 @@ #include "xenia/base/logging.h" #include "xenia/base/math.h" +#include "xenia/ui/vulkan/vulkan_util.h" namespace xe { namespace ui { namespace vulkan { +// Memory mappings are always aligned to nonCoherentAtomSize, so for simplicity, +// round the page size to it now via GetMappableMemorySize. VulkanUploadBufferPool::VulkanUploadBufferPool(const VulkanProvider& provider, VkBufferUsageFlags usage, size_t page_size) - : GraphicsUploadBufferPool(page_size), provider_(provider), usage_(usage) { - VkDeviceSize non_coherent_atom_size = - provider_.device_properties().limits.nonCoherentAtomSize; - // Memory mappings are always aligned to nonCoherentAtomSize, so for - // simplicity, round the page size to it now. On some Android implementations, - // nonCoherentAtomSize is 0, not 1. - if (non_coherent_atom_size > 1) { - page_size_ = xe::round_up(page_size_, non_coherent_atom_size); - } -} + : GraphicsUploadBufferPool(size_t( + util::GetMappableMemorySize(provider, VkDeviceSize(page_size)))), + provider_(provider), + usage_(usage) {} uint8_t* VulkanUploadBufferPool::Request(uint64_t submission_index, size_t size, size_t alignment, VkBuffer& buffer_out, @@ -96,10 +93,9 @@ VulkanUploadBufferPool::CreatePageImplementation() { if (memory_type_ == kMemoryTypeUnknown) { VkMemoryRequirements memory_requirements; dfn.vkGetBufferMemoryRequirements(device, buffer, &memory_requirements); - uint32_t memory_types_host_visible = provider_.memory_types_host_visible(); - if (!xe::bit_scan_forward( - memory_requirements.memoryTypeBits & memory_types_host_visible, - &memory_type_)) { + memory_type_ = util::ChooseHostMemoryType( + provider_, memory_requirements.memoryTypeBits, false); + if (memory_type_ == UINT32_MAX) { XELOGE( "No host-visible memory types can store an Vulkan upload buffer with " "{} bytes", @@ -125,11 +121,10 @@ VulkanUploadBufferPool::CreatePageImplementation() { VkMemoryRequirements memory_requirements_expanded; dfn.vkGetBufferMemoryRequirements(device, buffer_expanded, &memory_requirements_expanded); - uint32_t memory_type_expanded; + uint32_t memory_type_expanded = util::ChooseHostMemoryType( + provider_, memory_requirements.memoryTypeBits, false); if (memory_requirements_expanded.size <= allocation_size_ && - xe::bit_scan_forward(memory_requirements_expanded.memoryTypeBits & - memory_types_host_visible, - &memory_type_expanded)) { + memory_type_expanded != UINT32_MAX) { // page_size_ must be aligned to nonCoherentAtomSize. page_size_ = size_t(allocation_size_aligned); allocation_size_ = memory_requirements_expanded.size; @@ -190,28 +185,9 @@ VulkanUploadBufferPool::CreatePageImplementation() { void VulkanUploadBufferPool::FlushPageWrites(Page* page, size_t offset, size_t size) { - if (provider_.memory_types_host_coherent() & (uint32_t(1) << memory_type_)) { - return; - } - const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - VkMappedMemoryRange range; - range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - range.pNext = nullptr; - range.memory = static_cast(page)->memory_; - range.offset = VkDeviceSize(offset); - range.size = VkDeviceSize(size); - VkDeviceSize non_coherent_atom_size = - provider_.device_properties().limits.nonCoherentAtomSize; - // On some Android implementations, nonCoherentAtomSize is 0, not 1. - if (non_coherent_atom_size > 1) { - VkDeviceSize end = - xe::round_up(range.offset + range.size, non_coherent_atom_size); - range.offset = - range.offset / non_coherent_atom_size * non_coherent_atom_size; - range.size = end - range.offset; - } - dfn.vkFlushMappedMemoryRanges(device, 1, &range); + util::FlushMappedMemoryRange( + provider_, static_cast(page)->memory_, memory_type_, + VkDeviceSize(offset), VkDeviceSize(size)); } VulkanUploadBufferPool::VulkanPage::~VulkanPage() { diff --git a/src/xenia/ui/vulkan/vulkan_util.cc b/src/xenia/ui/vulkan/vulkan_util.cc new file mode 100644 index 000000000..d146beb83 --- /dev/null +++ b/src/xenia/ui/vulkan/vulkan_util.cc @@ -0,0 +1,49 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/ui/vulkan/vulkan_util.h" + +#include "xenia/base/math.h" +#include "xenia/ui/vulkan/vulkan_provider.h" + +namespace xe { +namespace ui { +namespace vulkan { +namespace util { + +void FlushMappedMemoryRange(const VulkanProvider& provider, + VkDeviceMemory memory, uint32_t memory_type, + VkDeviceSize offset, VkDeviceSize size) { + if (!size || + (provider.memory_types_host_coherent() & (uint32_t(1) << memory_type))) { + return; + } + VkMappedMemoryRange range; + range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + range.pNext = nullptr; + range.memory = memory; + range.offset = offset; + range.size = size; + VkDeviceSize non_coherent_atom_size = + provider.device_properties().limits.nonCoherentAtomSize; + // On some Android implementations, nonCoherentAtomSize is 0, not 1. + if (non_coherent_atom_size > 1) { + range.offset = offset / non_coherent_atom_size * non_coherent_atom_size; + if (size != VK_WHOLE_SIZE) { + range.size = + xe::round_up(offset + size, non_coherent_atom_size) - range.offset; + } + } + provider.dfn().vkFlushMappedMemoryRanges(provider.device(), 1, &range); +} + +} // namespace util +} // namespace vulkan +} // namespace ui +} // namespace xe diff --git a/src/xenia/ui/vulkan/vulkan_util.h b/src/xenia/ui/vulkan/vulkan_util.h index 6239aed76..61bfec617 100644 --- a/src/xenia/ui/vulkan/vulkan_util.h +++ b/src/xenia/ui/vulkan/vulkan_util.h @@ -10,6 +10,7 @@ #ifndef XENIA_UI_VULKAN_VULKAN_UTIL_H_ #define XENIA_UI_VULKAN_VULKAN_UTIL_H_ +#include "xenia/base/math.h" #include "xenia/ui/vulkan/vulkan_provider.h" namespace xe { @@ -37,6 +38,53 @@ inline bool DestroyAndNullHandle(F* destroy_function, P parent, T& handle) { return false; } +inline VkDeviceSize GetMappableMemorySize(const VulkanProvider& provider, + VkDeviceSize size) { + VkDeviceSize non_coherent_atom_size = + provider.device_properties().limits.nonCoherentAtomSize; + // On some Android implementations, nonCoherentAtomSize is 0, not 1. + if (non_coherent_atom_size > 1) { + size = xe::round_up(size, non_coherent_atom_size, false); + } + return size; +} + +inline uint32_t ChooseHostMemoryType(const VulkanProvider& provider, + uint32_t supported_types, + bool is_readback) { + supported_types &= provider.memory_types_host_visible(); + uint32_t host_cached = provider.memory_types_host_cached(); + uint32_t memory_type; + // For upload, uncached is preferred so writes do not pollute the CPU cache. + // For readback, cached is preferred so multiple CPU reads are fast. + // If the preferred caching behavior is not available, pick any host-visible. + if (xe::bit_scan_forward( + supported_types & (is_readback ? host_cached : ~host_cached), + &memory_type) || + xe::bit_scan_forward(supported_types, &memory_type)) { + return memory_type; + } + return UINT32_MAX; +} + +void FlushMappedMemoryRange(const VulkanProvider& provider, + VkDeviceMemory memory, uint32_t memory_type, + VkDeviceSize offset = 0, + VkDeviceSize size = VK_WHOLE_SIZE); + +inline void InitializeSubresourceRange( + VkImageSubresourceRange& range, + VkImageAspectFlags aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT, + uint32_t base_mip_level = 0, uint32_t level_count = VK_REMAINING_MIP_LEVELS, + uint32_t base_array_layer = 0, + uint32_t layer_count = VK_REMAINING_ARRAY_LAYERS) { + range.aspectMask = aspect_mask; + range.baseMipLevel = base_mip_level; + range.levelCount = level_count; + range.baseArrayLayer = base_array_layer; + range.layerCount = layer_count; +} + inline VkShaderModule CreateShaderModule(const VulkanProvider& provider, const void* code, size_t code_size) { VkShaderModuleCreateInfo shader_module_create_info; diff --git a/src/xenia/ui/window_demo.cc b/src/xenia/ui/window_demo.cc index a6a05140a..5423b13d9 100644 --- a/src/xenia/ui/window_demo.cc +++ b/src/xenia/ui/window_demo.cc @@ -73,7 +73,7 @@ int window_demo_main(const std::vector& args) { // The window will finish initialization wtih the context (loading // resources, etc). graphics_provider = CreateDemoGraphicsProvider(window.get()); - window->set_context(graphics_provider->CreateContext(window.get())); + window->set_context(graphics_provider->CreateHostContext(window.get())); // Setup the profiler display. GraphicsContextLock context_lock(window->context()); From 183269ba1644025c89e1583da3b8ea75d757a6ba Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 27 Sep 2020 15:06:10 +0300 Subject: [PATCH 015/123] [Vulkan] Delayed ImmediateTexture destruction --- .../ui/vulkan/vulkan_immediate_drawer.cc | 298 ++++++++++-------- src/xenia/ui/vulkan/vulkan_immediate_drawer.h | 105 +++--- 2 files changed, 202 insertions(+), 201 deletions(-) diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc index f5e17e3c9..671c90503 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc @@ -11,6 +11,8 @@ #include #include +#include +#include #include "xenia/base/assert.h" #include "xenia/base/logging.h" @@ -26,6 +28,12 @@ namespace vulkan { #include "xenia/ui/shaders/bytecode/vulkan_spirv/immediate_frag.h" #include "xenia/ui/shaders/bytecode/vulkan_spirv/immediate_vert.h" +VulkanImmediateDrawer::VulkanImmediateTexture::~VulkanImmediateTexture() { + if (immediate_drawer_) { + immediate_drawer_->OnImmediateTextureDestroyed(*this); + } +} + VulkanImmediateDrawer::VulkanImmediateDrawer(VulkanContext& graphics_context) : ImmediateDrawer(&graphics_context), context_(graphics_context) {} @@ -64,9 +72,10 @@ bool VulkanImmediateDrawer::Initialize() { // Create the (1, 1, 1, 1) texture as a replacement when drawing without a // real texture. - white_texture_index_ = CreateVulkanTexture( - 1, 1, ImmediateTextureFilter::kNearest, false, nullptr); - if (white_texture_index_ == SIZE_MAX) { + size_t white_texture_pending_upload_index; + if (!CreateTextureResource(1, 1, ImmediateTextureFilter::kNearest, false, + nullptr, white_texture_, + white_texture_pending_upload_index)) { XELOGE("Failed to create a blank texture for the Vulkan immediate drawer"); Shutdown(); return false; @@ -117,39 +126,36 @@ void VulkanImmediateDrawer::Shutdown() { util::DestroyAndNullHandle(dfn.vkDestroyPipelineLayout, device, pipeline_layout_); - for (SubmittedTextureUpload& submitted_texture_upload : - texture_uploads_submitted_) { - if (submitted_texture_upload.buffer != VK_NULL_HANDLE) { - dfn.vkDestroyBuffer(device, submitted_texture_upload.buffer, nullptr); - } - if (submitted_texture_upload.buffer_memory != VK_NULL_HANDLE) { - dfn.vkFreeMemory(device, submitted_texture_upload.buffer_memory, nullptr); - } + for (auto& deleted_texture : textures_deleted_) { + DestroyTextureResource(deleted_texture.first); } - texture_uploads_submitted_.clear(); + textures_deleted_.clear(); + for (SubmittedTextureUploadBuffer& submitted_texture_upload_buffer : + texture_upload_buffers_submitted_) { + dfn.vkDestroyBuffer(device, submitted_texture_upload_buffer.buffer, + nullptr); + dfn.vkFreeMemory(device, submitted_texture_upload_buffer.buffer_memory, + nullptr); + } + texture_upload_buffers_submitted_.clear(); for (PendingTextureUpload& pending_texture_upload : texture_uploads_pending_) { - if (pending_texture_upload.buffer != VK_NULL_HANDLE) { - dfn.vkDestroyBuffer(device, pending_texture_upload.buffer, nullptr); - } - if (pending_texture_upload.buffer_memory != VK_NULL_HANDLE) { - dfn.vkFreeMemory(device, pending_texture_upload.buffer_memory, nullptr); - } + dfn.vkDestroyBuffer(device, pending_texture_upload.buffer, nullptr); + dfn.vkFreeMemory(device, pending_texture_upload.buffer_memory, nullptr); } texture_uploads_pending_.clear(); - textures_free_.clear(); - for (Texture& texture : textures_) { - if (!texture.reference_count) { + for (VulkanImmediateTexture* texture : textures_) { + if (texture->immediate_drawer_ != this) { continue; } - if (texture.immediate_texture) { - texture.immediate_texture->DetachFromImmediateDrawer(); - } - dfn.vkDestroyImageView(device, texture.image_view, nullptr); - dfn.vkDestroyImage(device, texture.image, nullptr); - dfn.vkFreeMemory(device, texture.memory, nullptr); + texture->immediate_drawer_ = nullptr; + DestroyTextureResource(texture->resource_); } textures_.clear(); + if (white_texture_.image != VK_NULL_HANDLE) { + DestroyTextureResource(white_texture_); + white_texture_.image = VK_NULL_HANDLE; + } texture_descriptor_pool_recycled_first_ = nullptr; texture_descriptor_pool_unallocated_first_ = nullptr; @@ -166,21 +172,19 @@ std::unique_ptr VulkanImmediateDrawer::CreateTexture( uint32_t width, uint32_t height, ImmediateTextureFilter filter, bool is_repeated, const uint8_t* data) { assert_not_null(data); - size_t texture_index = - CreateVulkanTexture(width, height, filter, is_repeated, data); - if (texture_index == SIZE_MAX) { - texture_index = white_texture_index_; + auto texture = std::make_unique(width, height); + size_t pending_upload_index; + if (CreateTextureResource(width, height, filter, is_repeated, data, + texture->resource_, pending_upload_index)) { + // Manage by this immediate drawer. + texture->immediate_drawer_ = this; + texture->immediate_drawer_index_ = textures_.size(); + textures_.push_back(texture.get()); + texture->pending_upload_index_ = pending_upload_index; + texture_uploads_pending_[texture->pending_upload_index_].texture = + texture.get(); } - Texture& texture = textures_[texture_index]; - auto immediate_texture = std::make_unique( - width, height, this, GetTextureHandleForIndex(texture_index)); - if (texture_index != white_texture_index_) { - texture.immediate_texture = immediate_texture.get(); - } - // Transferring a new reference to a real texture or giving a weak reference - // to the white texture (there's no backlink to the ImmediateTexture from it - // also). - return std::unique_ptr(immediate_texture.release()); + return std::unique_ptr(texture.release()); } void VulkanImmediateDrawer::Begin(int render_target_width, @@ -200,25 +204,30 @@ void VulkanImmediateDrawer::Begin(int render_target_width, const VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); + // Destroy deleted textures. + for (auto it = textures_deleted_.begin(); it != textures_deleted_.end();) { + if (it->second > submission_completed) { + ++it; + continue; + } + if (std::next(it) != textures_deleted_.end()) { + *it = textures_deleted_.back(); + } + textures_deleted_.pop_back(); + } + // Release upload buffers for completed texture uploads. - auto erase_texture_uploads_end = texture_uploads_submitted_.begin(); - while (erase_texture_uploads_end != texture_uploads_submitted_.end()) { + auto erase_texture_uploads_end = texture_upload_buffers_submitted_.begin(); + while (erase_texture_uploads_end != texture_upload_buffers_submitted_.end()) { if (erase_texture_uploads_end->submission_index > submission_completed) { break; } - if (erase_texture_uploads_end->buffer != VK_NULL_HANDLE) { - dfn.vkDestroyBuffer(device, erase_texture_uploads_end->buffer, nullptr); - } - if (erase_texture_uploads_end->buffer_memory != VK_NULL_HANDLE) { - dfn.vkFreeMemory(device, erase_texture_uploads_end->buffer_memory, - nullptr); - } - // Release the texture reference held for uploading. - ReleaseTexture(erase_texture_uploads_end->texture_index); + dfn.vkDestroyBuffer(device, erase_texture_uploads_end->buffer, nullptr); + dfn.vkFreeMemory(device, erase_texture_uploads_end->buffer_memory, nullptr); ++erase_texture_uploads_end; } - texture_uploads_submitted_.erase(texture_uploads_submitted_.begin(), - erase_texture_uploads_end); + texture_upload_buffers_submitted_.erase( + texture_upload_buffers_submitted_.begin(), erase_texture_uploads_end); vertex_buffer_pool_->Reclaim(submission_completed); @@ -352,8 +361,15 @@ void VulkanImmediateDrawer::Draw(const ImmediateDraw& draw) { } // Bind the texture. - uint32_t texture_descriptor_index = - textures_[GetTextureIndexForHandle(draw.texture_handle)].descriptor_index; + uint32_t texture_descriptor_index; + VulkanImmediateTexture* texture = + reinterpret_cast(draw.texture_handle); + if (texture && texture->immediate_drawer_ == this) { + texture_descriptor_index = texture->resource_.descriptor_index; + texture->last_usage_submission_ = context_.swap_submission_current(); + } else { + texture_descriptor_index = white_texture_.descriptor_index; + } if (current_texture_descriptor_index_ != texture_descriptor_index) { current_texture_descriptor_index_ = texture_descriptor_index; VkDescriptorSet texture_descriptor_set = @@ -408,7 +424,7 @@ void VulkanImmediateDrawer::End() { for (const PendingTextureUpload& pending_texture_upload : texture_uploads_pending_) { image_memory_barriers.emplace_back(image_memory_barrier).image = - textures_[pending_texture_upload.texture_index].image; + pending_texture_upload.image; } dfn.vkCmdPipelineBarrier( setup_command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, @@ -416,12 +432,10 @@ void VulkanImmediateDrawer::End() { uint32_t(image_memory_barriers.size()), image_memory_barriers.data()); // Do transfer operations and transition to - // VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL. + // VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, and also mark as used. for (size_t i = 0; i < texture_uploads_pending_count; ++i) { const PendingTextureUpload& pending_texture_upload = texture_uploads_pending_[i]; - VkImage texture_upload_image = - textures_[pending_texture_upload.texture_index].image; if (pending_texture_upload.buffer != VK_NULL_HANDLE) { // Copying. VkBufferImageCopy copy_region; @@ -440,8 +454,16 @@ void VulkanImmediateDrawer::End() { copy_region.imageExtent.depth = 1; dfn.vkCmdCopyBufferToImage( setup_command_buffer, pending_texture_upload.buffer, - texture_upload_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, - ©_region); + pending_texture_upload.image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©_region); + + SubmittedTextureUploadBuffer& submitted_texture_upload_buffer = + texture_upload_buffers_submitted_.emplace_back(); + submitted_texture_upload_buffer.buffer = + pending_texture_upload.buffer; + submitted_texture_upload_buffer.buffer_memory = + pending_texture_upload.buffer_memory; + submitted_texture_upload_buffer.submission_index = submission_current; } else { // Clearing (initializing the empty image). VkClearColorValue white_clear_value; @@ -449,10 +471,10 @@ void VulkanImmediateDrawer::End() { white_clear_value.float32[1] = 1.0f; white_clear_value.float32[2] = 1.0f; white_clear_value.float32[3] = 1.0f; - dfn.vkCmdClearColorImage(setup_command_buffer, texture_upload_image, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - &white_clear_value, 1, - &image_memory_barrier.subresourceRange); + dfn.vkCmdClearColorImage( + setup_command_buffer, pending_texture_upload.image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &white_clear_value, 1, + &image_memory_barrier.subresourceRange); } VkImageMemoryBarrier& image_memory_barrier_current = @@ -465,16 +487,11 @@ void VulkanImmediateDrawer::End() { image_memory_barrier_current.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - SubmittedTextureUpload& submitted_texture_upload = - texture_uploads_submitted_.emplace_back(); - // Transfer the reference to the texture - need to keep it until the - // upload is completed. - submitted_texture_upload.texture_index = - pending_texture_upload.texture_index; - submitted_texture_upload.buffer = pending_texture_upload.buffer; - submitted_texture_upload.buffer_memory = - pending_texture_upload.buffer_memory; - submitted_texture_upload.submission_index = submission_current; + if (pending_texture_upload.texture) { + pending_texture_upload.texture->last_usage_submission_ = + submission_current; + pending_texture_upload.texture->pending_upload_index_ = SIZE_MAX; + } } dfn.vkCmdPipelineBarrier( setup_command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, @@ -808,11 +825,11 @@ void VulkanImmediateDrawer::FreeTextureDescriptor(uint32_t descriptor_index) { pool->recycled_bits |= uint64_t(1) << allocation_index; } -size_t VulkanImmediateDrawer::CreateVulkanTexture(uint32_t width, - uint32_t height, - ImmediateTextureFilter filter, - bool is_repeated, - const uint8_t* data) { +bool VulkanImmediateDrawer::CreateTextureResource( + uint32_t width, uint32_t height, ImmediateTextureFilter filter, + bool is_repeated, const uint8_t* data, + VulkanImmediateTexture::Resource& resource_out, + size_t& pending_upload_index_out) { const VulkanProvider& provider = context_.GetVulkanProvider(); const VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); @@ -846,7 +863,7 @@ size_t VulkanImmediateDrawer::CreateVulkanTexture(uint32_t width, XELOGE( "Failed to create a Vulkan image for a {}x{} immediate drawer texture", width, height); - return SIZE_MAX; + return false; } VkMemoryAllocateInfo image_memory_allocate_info; @@ -860,7 +877,7 @@ size_t VulkanImmediateDrawer::CreateVulkanTexture(uint32_t width, "drawer Vulkan image", width, height); dfn.vkDestroyImage(device, image, nullptr); - return SIZE_MAX; + return false; } image_memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; VkMemoryDedicatedAllocateInfoKHR image_memory_dedicated_allocate_info; @@ -883,14 +900,14 @@ size_t VulkanImmediateDrawer::CreateVulkanTexture(uint32_t width, "image", width, height); dfn.vkDestroyImage(device, image, nullptr); - return SIZE_MAX; + return false; } if (dfn.vkBindImageMemory(device, image, image_memory, 0) != VK_SUCCESS) { XELOGE("Failed to bind memory to a {}x{} immediate drawer Vulkan image", width, height); dfn.vkDestroyImage(device, image, nullptr); dfn.vkFreeMemory(device, image_memory, nullptr); - return SIZE_MAX; + return false; } VkImageViewCreateInfo image_view_create_info; @@ -917,7 +934,7 @@ size_t VulkanImmediateDrawer::CreateVulkanTexture(uint32_t width, width, height); dfn.vkDestroyImage(device, image, nullptr); dfn.vkFreeMemory(device, image_memory, nullptr); - return SIZE_MAX; + return false; } uint32_t descriptor_index = AllocateTextureDescriptor(); @@ -929,7 +946,7 @@ size_t VulkanImmediateDrawer::CreateVulkanTexture(uint32_t width, dfn.vkDestroyImageView(device, image_view, nullptr); dfn.vkDestroyImage(device, image, nullptr); dfn.vkFreeMemory(device, image_memory, nullptr); - return SIZE_MAX; + return false; } VkDescriptorImageInfo descriptor_image_info; VulkanProvider::HostSampler host_sampler; @@ -983,7 +1000,7 @@ size_t VulkanImmediateDrawer::CreateVulkanTexture(uint32_t width, dfn.vkDestroyImageView(device, image_view, nullptr); dfn.vkDestroyImage(device, image, nullptr); dfn.vkFreeMemory(device, image_memory, nullptr); - return SIZE_MAX; + return false; } VkMemoryAllocateInfo upload_buffer_memory_allocate_info; @@ -1003,7 +1020,7 @@ size_t VulkanImmediateDrawer::CreateVulkanTexture(uint32_t width, dfn.vkDestroyImageView(device, image_view, nullptr); dfn.vkDestroyImage(device, image, nullptr); dfn.vkFreeMemory(device, image_memory, nullptr); - return SIZE_MAX; + return false; } upload_buffer_memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; @@ -1034,7 +1051,7 @@ size_t VulkanImmediateDrawer::CreateVulkanTexture(uint32_t width, dfn.vkDestroyImageView(device, image_view, nullptr); dfn.vkDestroyImage(device, image, nullptr); dfn.vkFreeMemory(device, image_memory, nullptr); - return SIZE_MAX; + return false; } if (dfn.vkBindBufferMemory(device, upload_buffer, upload_buffer_memory, 0) != VK_SUCCESS) { @@ -1048,7 +1065,7 @@ size_t VulkanImmediateDrawer::CreateVulkanTexture(uint32_t width, dfn.vkDestroyImageView(device, image_view, nullptr); dfn.vkDestroyImage(device, image, nullptr); dfn.vkFreeMemory(device, image_memory, nullptr); - return SIZE_MAX; + return false; } void* upload_buffer_mapping; @@ -1064,7 +1081,7 @@ size_t VulkanImmediateDrawer::CreateVulkanTexture(uint32_t width, dfn.vkDestroyImageView(device, image_view, nullptr); dfn.vkDestroyImage(device, image, nullptr); dfn.vkFreeMemory(device, image_memory, nullptr); - return SIZE_MAX; + return false; } std::memcpy(upload_buffer_mapping, data, data_size); util::FlushMappedMemoryRange( @@ -1073,59 +1090,66 @@ size_t VulkanImmediateDrawer::CreateVulkanTexture(uint32_t width, dfn.vkUnmapMemory(device, upload_buffer_memory); } - size_t texture_index; - if (!textures_free_.empty()) { - texture_index = textures_free_.back(); - textures_free_.pop_back(); - } else { - texture_index = textures_.size(); - textures_.emplace_back(); - } - Texture& texture = textures_[texture_index]; - texture.immediate_texture = nullptr; - texture.image = image; - texture.memory = image_memory; - texture.image_view = image_view; - texture.descriptor_index = descriptor_index; - // The reference that will be returned to the caller. - texture.reference_count = 1; + resource_out.image = image; + resource_out.memory = image_memory; + resource_out.image_view = image_view; + resource_out.descriptor_index = descriptor_index; - PendingTextureUpload& pending_texture_upload = + pending_upload_index_out = texture_uploads_pending_.size(); + PendingTextureUpload& pending_upload = texture_uploads_pending_.emplace_back(); - // While the upload has not been yet completed, keep a reference to the - // texture because its lifetime is not tied to that of the ImmediateTexture - // (and thus to context's submissions) now. - ++texture.reference_count; - pending_texture_upload.texture_index = texture_index; - pending_texture_upload.width = width; - pending_texture_upload.height = height; - pending_texture_upload.buffer = upload_buffer; - pending_texture_upload.buffer_memory = upload_buffer_memory; + // The caller will set the ImmedateTexture pointer if needed. + pending_upload.texture = nullptr; + pending_upload.buffer = upload_buffer; + pending_upload.buffer_memory = upload_buffer_memory; + pending_upload.image = image; + pending_upload.width = width; + pending_upload.height = height; - return texture_index; + return true; } -void VulkanImmediateDrawer::ReleaseTexture(size_t index) { - assert_true(index < textures_.size()); - Texture& texture = textures_[index]; - assert_not_zero(texture.reference_count); - if (--texture.reference_count) { - return; - } - // If the texture is attached to a VulkanImmediateTexture, the - // VulkanImmediateTexture must hold a reference to it. - assert_null(texture.immediate_texture); - FreeTextureDescriptor(texture.descriptor_index); +void VulkanImmediateDrawer::DestroyTextureResource( + VulkanImmediateTexture::Resource& resource) { + FreeTextureDescriptor(resource.descriptor_index); const VulkanProvider& provider = context_.GetVulkanProvider(); const VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); - dfn.vkDestroyImageView(device, texture.image_view, nullptr); - dfn.vkDestroyImage(device, texture.image, nullptr); - dfn.vkFreeMemory(device, texture.memory, nullptr); - textures_free_.push_back(index); - // TODO(Triang3l): Track last usage submission because it turns out that - // deletion in the ImGui and the profiler actually happens before after - // awaiting submission completion. + dfn.vkDestroyImageView(device, resource.image_view, nullptr); + dfn.vkDestroyImage(device, resource.image, nullptr); + dfn.vkFreeMemory(device, resource.memory, nullptr); +} + +void VulkanImmediateDrawer::OnImmediateTextureDestroyed( + VulkanImmediateTexture& texture) { + // Remove from the pending uploads. + size_t pending_upload_index = texture.pending_upload_index_; + if (pending_upload_index != SIZE_MAX) { + if (pending_upload_index + 1 < texture_uploads_pending_.size()) { + PendingTextureUpload& pending_upload = + texture_uploads_pending_[pending_upload_index]; + pending_upload = texture_uploads_pending_.back(); + if (pending_upload.texture) { + pending_upload.texture->pending_upload_index_ = pending_upload_index; + } + } + texture_uploads_pending_.pop_back(); + } + + // Remove from the texture list. + VulkanImmediateTexture*& texture_at_index = + textures_[texture.immediate_drawer_index_]; + texture_at_index = textures_.back(); + texture_at_index->immediate_drawer_index_ = texture.immediate_drawer_index_; + textures_.pop_back(); + + // Destroy immediately or queue for destruction if in use. + if (texture.last_usage_submission_ > context_.swap_submission_completed()) { + textures_deleted_.emplace_back( + std::make_pair(texture.resource_, texture.last_usage_submission_)); + } else { + DestroyTextureResource(texture.resource_); + } } } // namespace vulkan diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.h b/src/xenia/ui/vulkan/vulkan_immediate_drawer.h index 20b0cf73e..e68bbc554 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.h +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.h @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include "xenia/ui/immediate_drawer.h" @@ -54,24 +54,28 @@ class VulkanImmediateDrawer : public ImmediateDrawer { class VulkanImmediateTexture : public ImmediateTexture { public: - VulkanImmediateTexture(uint32_t width, uint32_t height, - VulkanImmediateDrawer* immediate_drawer, - uintptr_t immediate_drawer_handle) - : ImmediateTexture(width, height), immediate_drawer_(immediate_drawer) { - handle = immediate_drawer_handle; - } - ~VulkanImmediateTexture() { - if (immediate_drawer_) { - immediate_drawer_->HandleImmediateTextureDestroyed(handle); - } - } - void DetachFromImmediateDrawer() { - immediate_drawer_ = nullptr; - handle = 0; - } + struct Resource { + VkImage image; + VkDeviceMemory memory; + VkImageView image_view; + uint32_t descriptor_index; + }; - private: + VulkanImmediateTexture(uint32_t width, uint32_t height) + : ImmediateTexture(width, height), immediate_drawer_(nullptr) { + handle = reinterpret_cast(this); + } + ~VulkanImmediateTexture() override; + + // If null, this is either a blank texture, or the immediate drawer has been + // destroyed. VulkanImmediateDrawer* immediate_drawer_; + size_t immediate_drawer_index_; + // Invalid if immediate_drawer_ is null, since it's managed by the immediate + // drawer. + Resource resource_; + size_t pending_upload_index_; + uint64_t last_usage_submission_ = 0; }; struct TextureDescriptorPool { @@ -86,19 +90,6 @@ class VulkanImmediateDrawer : public ImmediateDrawer { TextureDescriptorPool* recycled_next; }; - // Tracked separately from VulkanImmediateTexture because copying may take - // additional references. - struct Texture { - // Null for the white texture, reference held by the drawer itself instead - // of immediate textures. - VulkanImmediateTexture* immediate_texture; - VkImage image; - VkDeviceMemory memory; - VkImageView image_view; - uint32_t descriptor_index; - uint32_t reference_count; - }; - bool EnsurePipelinesCreated(); // Allocates a combined image sampler in a pool and returns its index, or @@ -107,31 +98,15 @@ class VulkanImmediateDrawer : public ImmediateDrawer { VkDescriptorSet GetTextureDescriptor(uint32_t descriptor_index) const; void FreeTextureDescriptor(uint32_t descriptor_index); - // Returns SIZE_MAX in case of failure. The created texture will have a - // reference count of 1 plus references needed for uploading, but will not be - // attached to a VulkanImmediateTexture (will return the reference to the - // caller, in short). If data is null, a (1, 1, 1, 1) image will be created, - // which can be used as a replacement when drawing without a real texture. - size_t CreateVulkanTexture(uint32_t width, uint32_t height, + // If data is null, a (1, 1, 1, 1) image will be created, which can be used as + // a replacement when drawing without a real texture. + bool CreateTextureResource(uint32_t width, uint32_t height, ImmediateTextureFilter filter, bool is_repeated, - const uint8_t* data); - void ReleaseTexture(size_t index); - uintptr_t GetTextureHandleForIndex(size_t index) const { - return index != white_texture_index_ ? uintptr_t(index + 1) : 0; - } - size_t GetTextureIndexForHandle(uintptr_t handle) const { - // 0 is a special value for no texture. - return handle ? size_t(handle - 1) : white_texture_index_; - } - // For calling from VulkanImmediateTexture. - void HandleImmediateTextureDestroyed(uintptr_t handle) { - size_t index = GetTextureIndexForHandle(handle); - if (index == white_texture_index_) { - return; - } - textures_[index].immediate_texture = nullptr; - ReleaseTexture(index); - } + const uint8_t* data, + VulkanImmediateTexture::Resource& resource_out, + size_t& pending_upload_index_out); + void DestroyTextureResource(VulkanImmediateTexture::Resource& resource); + void OnImmediateTextureDestroyed(VulkanImmediateTexture& texture); VulkanContext& context_; @@ -141,26 +116,28 @@ class VulkanImmediateDrawer : public ImmediateDrawer { TextureDescriptorPool* texture_descriptor_pool_unallocated_first_ = nullptr; TextureDescriptorPool* texture_descriptor_pool_recycled_first_ = nullptr; - std::vector textures_; - std::vector textures_free_; + VulkanImmediateTexture::Resource white_texture_ = {}; + std::vector textures_; struct PendingTextureUpload { - size_t texture_index; - uint32_t width; - uint32_t height; + // Null for internal resources such as the white texture. + VulkanImmediateTexture* texture; // VK_NULL_HANDLE if need to clear rather than to copy. VkBuffer buffer; VkDeviceMemory buffer_memory; + VkImage image; + uint32_t width; + uint32_t height; }; std::vector texture_uploads_pending_; - struct SubmittedTextureUpload { - size_t texture_index; - // VK_NULL_HANDLE if cleared rather than copied. + struct SubmittedTextureUploadBuffer { VkBuffer buffer; VkDeviceMemory buffer_memory; uint64_t submission_index; }; - std::deque texture_uploads_submitted_; - size_t white_texture_index_; + std::deque texture_upload_buffers_submitted_; + // Resource and last usage submission pairs. + std::vector> + textures_deleted_; VkPipelineLayout pipeline_layout_ = VK_NULL_HANDLE; From d78b2a9a932dd069861245ccd69555c4b015e82e Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 27 Sep 2020 15:42:55 +0300 Subject: [PATCH 016/123] [Vulkan] Use move instead of release to return ImmediateTexture --- src/xenia/ui/vulkan/vulkan_immediate_drawer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc index 671c90503..7b621f45f 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc @@ -184,7 +184,7 @@ std::unique_ptr VulkanImmediateDrawer::CreateTexture( texture_uploads_pending_[texture->pending_upload_index_].texture = texture.get(); } - return std::unique_ptr(texture.release()); + return std::move(texture); } void VulkanImmediateDrawer::Begin(int render_target_width, From 9f8c45c4e6c72a7867891b96f5237c1a168d820d Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 27 Sep 2020 15:55:55 +0300 Subject: [PATCH 017/123] [Vulkan] Add forgotten delayed ImmediateTexture deletion --- src/xenia/ui/vulkan/vulkan_immediate_drawer.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc index 7b621f45f..bd8ac449f 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc @@ -210,6 +210,7 @@ void VulkanImmediateDrawer::Begin(int render_target_width, ++it; continue; } + DestroyTextureResource(it->first); if (std::next(it) != textures_deleted_.end()) { *it = textures_deleted_.back(); } From 865f77bae2c47dd6674b4a2a22d172d4a14f6673 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Thu, 1 Oct 2020 21:17:10 +0300 Subject: [PATCH 018/123] [Vulkan] Submissions --- .../gpu/vulkan/vulkan_command_processor.cc | 348 +++++++++++++++++- .../gpu/vulkan/vulkan_command_processor.h | 68 +++- src/xenia/ui/vulkan/vulkan_context.cc | 12 +- src/xenia/ui/vulkan/vulkan_context.h | 2 + src/xenia/ui/vulkan/vulkan_provider.h | 27 +- 5 files changed, 441 insertions(+), 16 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 5edf7705e..f05838d29 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -9,6 +9,16 @@ #include "xenia/gpu/vulkan/vulkan_command_processor.h" +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/base/logging.h" +#include "xenia/base/profiling.h" +#include "xenia/ui/vulkan/vulkan_context.h" +#include "xenia/ui/vulkan/vulkan_provider.h" +#include "xenia/ui/vulkan/vulkan_util.h" + namespace xe { namespace gpu { namespace vulkan { @@ -24,16 +34,79 @@ void VulkanCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr, void VulkanCommandProcessor::RestoreEdramSnapshot(const void* snapshot) {} bool VulkanCommandProcessor::SetupContext() { - return CommandProcessor::SetupContext(); + if (!CommandProcessor::SetupContext()) { + XELOGE("Failed to initialize base command processor context"); + return false; + } + + const ui::vulkan::VulkanProvider& provider = + GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + return true; } void VulkanCommandProcessor::ShutdownContext() { - return CommandProcessor::ShutdownContext(); + AwaitAllQueueOperationsCompletion(); + + const ui::vulkan::VulkanProvider& provider = + GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + for (const auto& command_buffer_pair : command_buffers_submitted_) { + dfn.vkDestroyCommandPool(device, command_buffer_pair.first.pool, nullptr); + } + command_buffers_submitted_.clear(); + for (const CommandBuffer& command_buffer : command_buffers_writable_) { + dfn.vkDestroyCommandPool(device, command_buffer.pool, nullptr); + } + command_buffers_writable_.clear(); + + std::memset(closed_frame_submissions_, 0, sizeof(closed_frame_submissions_)); + frame_completed_ = 0; + frame_current_ = 1; + frame_open_ = false; + + for (const auto& semaphore : + submissions_in_flight_sparse_binding_semaphores_) { + dfn.vkDestroySemaphore(device, semaphore.first, nullptr); + } + submissions_in_flight_sparse_binding_semaphores_.clear(); + for (VkFence& fence : submissions_in_flight_fences_) { + dfn.vkDestroyFence(device, fence, nullptr); + } + submissions_in_flight_fences_.clear(); + submission_completed_ = 0; + submission_open_ = false; + + for (VkSemaphore semaphore : semaphores_free_) { + dfn.vkDestroySemaphore(device, semaphore, nullptr); + } + semaphores_free_.clear(); + for (VkFence fence : fences_free_) { + dfn.vkDestroyFence(device, fence, nullptr); + } + fences_free_.clear(); + + CommandProcessor::ShutdownContext(); } void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width, - uint32_t frontbuffer_height) {} + uint32_t frontbuffer_height) { + // FIXME(Triang3l): frontbuffer_ptr is currently unreliable, in the trace + // player it's set to 0, but it's not needed anyway since the fetch constant + // contains the address. + + SCOPE_profile_cpu_f("gpu"); + + // In case the swap command is the only one in the frame. + BeginSubmission(true); + + EndSubmission(true); +} Shader* VulkanCommandProcessor::LoadShader(xenos::ShaderType shader_type, uint32_t guest_address, @@ -46,15 +119,282 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, uint32_t index_count, IndexBufferInfo* index_buffer_info, bool major_mode_explicit) { +#if FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // FINE_GRAINED_DRAW_SCOPES + + BeginSubmission(true); + return true; } -bool VulkanCommandProcessor::IssueCopy() { return true; } +bool VulkanCommandProcessor::IssueCopy() { +#if FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // FINE_GRAINED_DRAW_SCOPES + + BeginSubmission(true); + + return true; +} void VulkanCommandProcessor::InitializeTrace() {} void VulkanCommandProcessor::FinalizeTrace() {} +void VulkanCommandProcessor::CheckSubmissionFence(uint64_t await_submission) { + if (await_submission >= GetCurrentSubmission()) { + if (submission_open_) { + EndSubmission(false); + } + // A submission won't be ended if it hasn't been started, or if ending + // has failed - clamp the index. + await_submission = GetCurrentSubmission() - 1; + } + + const ui::vulkan::VulkanProvider& provider = + GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + size_t fences_total = submissions_in_flight_fences_.size(); + size_t fences_awaited = 0; + if (await_submission > submission_completed_) { + // Await in a blocking way if requested. + if (dfn.vkWaitForFences(device, + uint32_t(await_submission - submission_completed_), + submissions_in_flight_fences_.data(), VK_TRUE, + UINT64_MAX) == VK_SUCCESS) { + fences_awaited += await_submission - submission_completed_; + } else { + XELOGE("Failed to await submission completion Vulkan fences"); + } + } + // Check how far into the submissions the GPU currently is, in order because + // submission themselves can be executed out of order, but Xenia serializes + // that for simplicity. + while (fences_awaited < fences_total) { + if (dfn.vkWaitForFences(device, 1, + &submissions_in_flight_fences_[fences_awaited], + VK_TRUE, 0) != VK_SUCCESS) { + break; + } + ++fences_awaited; + } + if (!fences_awaited) { + // Not updated - no need to reclaim or download things. + return; + } + // Reclaim fences. + fences_free_.reserve(fences_free_.size() + fences_awaited); + auto submissions_in_flight_fences_awaited_end = + submissions_in_flight_fences_.cbegin(); + std::advance(submissions_in_flight_fences_awaited_end, fences_awaited); + fences_free_.insert(fences_free_.cend(), + submissions_in_flight_fences_.cbegin(), + submissions_in_flight_fences_awaited_end); + submissions_in_flight_fences_.erase(submissions_in_flight_fences_.cbegin(), + submissions_in_flight_fences_awaited_end); + submission_completed_ += fences_awaited; + + // Reclaim semaphores used for sparse binding and graphics synchronization. + while (!submissions_in_flight_sparse_binding_semaphores_.empty()) { + const auto& semaphore_submission = + submissions_in_flight_sparse_binding_semaphores_.front(); + if (semaphore_submission.second > submission_completed_) { + break; + } + semaphores_free_.push_back(semaphore_submission.first); + submissions_in_flight_sparse_binding_semaphores_.pop_front(); + } + + // Reclaim command pools. + while (!command_buffers_submitted_.empty()) { + const auto& command_buffer_pair = command_buffers_submitted_.front(); + if (command_buffer_pair.second > submission_completed_) { + break; + } + command_buffers_writable_.push_back(command_buffer_pair.first); + command_buffers_submitted_.pop_front(); + } +} + +void VulkanCommandProcessor::BeginSubmission(bool is_guest_command) { +#if FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // FINE_GRAINED_DRAW_SCOPES + + bool is_opening_frame = is_guest_command && !frame_open_; + if (submission_open_ && !is_opening_frame) { + return; + } + + // Check the fence - needed for all kinds of submissions (to reclaim transient + // resources early) and specifically for frames (not to queue too many), and + // await the availability of the current frame. + CheckSubmissionFence( + is_opening_frame + ? closed_frame_submissions_[frame_current_ % kMaxFramesInFlight] + : 0); + // TODO(Triang3l): If failed to await (completed submission < awaited frame + // submission), do something like dropping the draw command that wanted to + // open the frame. + if (is_opening_frame) { + // Update the completed frame index, also obtaining the actual completed + // frame number (since the CPU may be actually less than 3 frames behind) + // before reclaiming resources tracked with the frame number. + frame_completed_ = std::max(frame_current_, uint64_t(kMaxFramesInFlight)) - + kMaxFramesInFlight; + for (uint64_t frame = frame_completed_ + 1; frame < frame_current_; + ++frame) { + if (closed_frame_submissions_[frame % kMaxFramesInFlight] > + submission_completed_) { + break; + } + frame_completed_ = frame; + } + } + + if (!submission_open_) { + submission_open_ = true; + } + + if (is_opening_frame) { + frame_open_ = true; + } +} + +bool VulkanCommandProcessor::EndSubmission(bool is_swap) { + ui::vulkan::VulkanProvider& provider = GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + // Make sure everything needed for submitting exist. + if (submission_open_) { + if (fences_free_.empty()) { + VkFenceCreateInfo fence_create_info; + fence_create_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fence_create_info.pNext = nullptr; + fence_create_info.flags = 0; + VkFence fence; + if (dfn.vkCreateFence(device, &fence_create_info, nullptr, &fence) != + VK_SUCCESS) { + XELOGE("Failed to create a Vulkan submission fence"); + // Try to submit later. Completely dropping the submission is not + // permitted because resources would be left in an undefined state. + return false; + } + fences_free_.push_back(fence); + } + // TODO(Triang3l): Create a sparse binding semaphore. + if (command_buffers_writable_.empty()) { + CommandBuffer command_buffer; + VkCommandPoolCreateInfo command_pool_create_info; + command_pool_create_info.sType = + VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + command_pool_create_info.pNext = nullptr; + command_pool_create_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT; + command_pool_create_info.queueFamilyIndex = + provider.queue_family_graphics_compute(); + if (dfn.vkCreateCommandPool(device, &command_pool_create_info, nullptr, + &command_buffer.pool) != VK_SUCCESS) { + XELOGE("Failed to create a Vulkan command pool"); + return false; + } + VkCommandBufferAllocateInfo command_buffer_allocate_info; + command_buffer_allocate_info.sType = + VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + command_buffer_allocate_info.pNext = nullptr; + command_buffer_allocate_info.commandPool = command_buffer.pool; + command_buffer_allocate_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + command_buffer_allocate_info.commandBufferCount = 1; + if (dfn.vkAllocateCommandBuffers(device, &command_buffer_allocate_info, + &command_buffer.buffer) != VK_SUCCESS) { + XELOGE("Failed to allocate a Vulkan command buffer"); + dfn.vkDestroyCommandPool(device, command_buffer.pool, nullptr); + return false; + } + command_buffers_writable_.push_back(command_buffer); + } + } + + bool is_closing_frame = is_swap && frame_open_; + + if (submission_open_) { + assert_false(command_buffers_writable_.empty()); + CommandBuffer command_buffer = command_buffers_writable_.back(); + if (dfn.vkResetCommandPool(device, command_buffer.pool, 0) != VK_SUCCESS) { + XELOGE("Failed to reset a Vulkan command pool"); + return false; + } + VkCommandBufferBeginInfo command_buffer_begin_info; + command_buffer_begin_info.sType = + VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + command_buffer_begin_info.pNext = nullptr; + command_buffer_begin_info.flags = + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + command_buffer_begin_info.pInheritanceInfo = nullptr; + if (dfn.vkBeginCommandBuffer(command_buffer.buffer, + &command_buffer_begin_info) != VK_SUCCESS) { + XELOGE("Failed to begin a Vulkan command buffer"); + return false; + } + // TODO(Triang3l): Write deferred command buffer commands. + if (dfn.vkEndCommandBuffer(command_buffer.buffer) != VK_SUCCESS) { + XELOGE("Failed to end a Vulkan command buffer"); + return false; + } + // TODO(Triang3l): Submit sparse binding. + VkSubmitInfo submit_info; + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.pNext = nullptr; + submit_info.waitSemaphoreCount = 0; + submit_info.pWaitSemaphores = nullptr; + submit_info.pWaitDstStageMask = nullptr; + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = &command_buffer.buffer; + submit_info.signalSemaphoreCount = 0; + submit_info.pSignalSemaphores = nullptr; + assert_false(fences_free_.empty()); + VkFence fence = fences_free_.back(); + if (dfn.vkResetFences(device, 1, &fence) != VK_SUCCESS) { + XELOGE("Failed to reset a Vulkan submission fence"); + return false; + } + if (provider.SubmitToGraphicsComputeQueue(1, &submit_info, fence) != + VK_SUCCESS) { + XELOGE("Failed to submit a Vulkan command buffer"); + return false; + } + command_buffers_submitted_.push_back( + std::make_pair(command_buffer, GetCurrentSubmission())); + command_buffers_writable_.pop_back(); + // Increments the current submission number, going to the next submission. + submissions_in_flight_fences_.push_back(fence); + fences_free_.pop_back(); + + submission_open_ = false; + } + + if (is_closing_frame) { + frame_open_ = false; + // Submission already closed now, so minus 1. + closed_frame_submissions_[(frame_current_++) % kMaxFramesInFlight] = + GetCurrentSubmission() - 1; + + if (cache_clear_requested_ && AwaitAllQueueOperationsCompletion()) { + cache_clear_requested_ = false; + + for (const CommandBuffer& command_buffer : command_buffers_writable_) { + dfn.vkDestroyCommandPool(device, command_buffer.pool, nullptr); + } + command_buffers_writable_.clear(); + } + } + + return true; +} + } // namespace vulkan } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index f841461e8..90409159d 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -10,10 +10,16 @@ #ifndef XENIA_GPU_VULKAN_VULKAN_COMMAND_PROCESSOR_H_ #define XENIA_GPU_VULKAN_VULKAN_COMMAND_PROCESSOR_H_ +#include +#include +#include +#include + #include "xenia/gpu/command_processor.h" #include "xenia/gpu/vulkan/vulkan_graphics_system.h" #include "xenia/gpu/xenos.h" #include "xenia/kernel/kernel_state.h" +#include "xenia/ui/vulkan/vulkan_context.h" namespace xe { namespace gpu { @@ -29,7 +35,17 @@ class VulkanCommandProcessor : public CommandProcessor { void RestoreEdramSnapshot(const void* snapshot) override; - private: + ui::vulkan::VulkanContext& GetVulkanContext() const { + return static_cast(*context_); + } + + uint64_t GetCurrentSubmission() const { + return submission_completed_ + + uint64_t(submissions_in_flight_fences_.size()) + 1; + } + uint64_t GetCompletedSubmission() const { return submission_completed_; } + + protected: bool SetupContext() override; void ShutdownContext() override; @@ -47,6 +63,56 @@ class VulkanCommandProcessor : public CommandProcessor { void InitializeTrace() override; void FinalizeTrace() override; + + private: + // BeginSubmission and EndSubmission may be called at any time. If there's an + // open non-frame submission, BeginSubmission(true) will promote it to a + // frame. EndSubmission(true) will close the frame no matter whether the + // submission has already been closed. + + // Rechecks submission number and reclaims per-submission resources. Pass 0 as + // the submission to await to simply check status, or pass + // GetCurrentSubmission() to wait for all queue operations to be completed. + void CheckSubmissionFence(uint64_t await_submission); + // If is_guest_command is true, a new full frame - with full cleanup of + // resources and, if needed, starting capturing - is opened if pending (as + // opposed to simply resuming after mid-frame synchronization). + void BeginSubmission(bool is_guest_command); + // If is_swap is true, a full frame is closed - with, if needed, cache + // clearing and stopping capturing. Returns whether the submission was done + // successfully, if it has failed, leaves it open. + bool EndSubmission(bool is_swap); + bool AwaitAllQueueOperationsCompletion() { + CheckSubmissionFence(GetCurrentSubmission()); + return !submission_open_ && submissions_in_flight_fences_.empty(); + } + + bool cache_clear_requested_ = false; + + std::vector fences_free_; + std::vector semaphores_free_; + + bool submission_open_ = false; + uint64_t submission_completed_ = 0; + std::vector submissions_in_flight_fences_; + std::deque> + submissions_in_flight_sparse_binding_semaphores_; + + static constexpr uint32_t kMaxFramesInFlight = 3; + bool frame_open_ = false; + // Guest frame index, since some transient resources can be reused across + // submissions. Values updated in the beginning of a frame. + uint64_t frame_current_ = 1; + uint64_t frame_completed_ = 0; + // Submission indices of frames that have already been submitted. + uint64_t closed_frame_submissions_[kMaxFramesInFlight] = {}; + + struct CommandBuffer { + VkCommandPool pool; + VkCommandBuffer buffer; + }; + std::vector command_buffers_writable_; + std::deque> command_buffers_submitted_; }; } // namespace vulkan diff --git a/src/xenia/ui/vulkan/vulkan_context.cc b/src/xenia/ui/vulkan/vulkan_context.cc index 067578e01..689d77b8b 100644 --- a/src/xenia/ui/vulkan/vulkan_context.cc +++ b/src/xenia/ui/vulkan/vulkan_context.cc @@ -737,10 +737,9 @@ void VulkanContext::EndSwap() { return; } - const VulkanProvider& provider = GetVulkanProvider(); + VulkanProvider& provider = GetVulkanProvider(); const VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); - VkQueue queue_graphics_compute = provider.queue_graphics_compute(); const SwapSubmission& submission = swap_submissions_[swap_submission_current_ % kSwapchainMaxImageCount]; @@ -771,8 +770,8 @@ void VulkanContext::EndSwap() { submit_info.pCommandBuffers = submit_command_buffers; submit_info.signalSemaphoreCount = 1; submit_info.pSignalSemaphores = &swap_render_completion_semaphore_; - VkResult submit_result = dfn.vkQueueSubmit(queue_graphics_compute, 1, - &submit_info, submission.fence); + VkResult submit_result = + provider.SubmitToGraphicsComputeQueue(1, &submit_info, submission.fence); if (submit_result != VK_SUCCESS) { // If failed, can't even return the swapchain image - so treat all errors as // context loss. @@ -790,10 +789,7 @@ void VulkanContext::EndSwap() { present_info.pSwapchains = &swap_swapchain_; present_info.pImageIndices = &swap_swapchain_image_current_; present_info.pResults = nullptr; - // FIXME(Triang3l): Allow a separate queue for present - see - // vulkan_provider.cc for details. - VkResult present_result = - dfn.vkQueuePresentKHR(queue_graphics_compute, &present_info); + VkResult present_result = provider.Present(&present_info); swap_swapchain_image_current_ = UINT32_MAX; switch (present_result) { case VK_SUCCESS: diff --git a/src/xenia/ui/vulkan/vulkan_context.h b/src/xenia/ui/vulkan/vulkan_context.h index f3b43c112..b2e34f7ec 100644 --- a/src/xenia/ui/vulkan/vulkan_context.h +++ b/src/xenia/ui/vulkan/vulkan_context.h @@ -19,6 +19,8 @@ #include "xenia/ui/vulkan/vulkan_immediate_drawer.h" #include "xenia/ui/vulkan/vulkan_provider.h" +#define FINE_GRAINED_DRAW_SCOPES 1 + namespace xe { namespace ui { namespace vulkan { diff --git a/src/xenia/ui/vulkan/vulkan_provider.h b/src/xenia/ui/vulkan/vulkan_provider.h index ca3af3473..2e14b9398 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.h +++ b/src/xenia/ui/vulkan/vulkan_provider.h @@ -12,8 +12,10 @@ #include #include +#include #include +#include "xenia/base/assert.h" #include "xenia/base/platform.h" #include "xenia/ui/graphics_provider.h" @@ -193,9 +195,22 @@ class VulkanProvider : public GraphicsProvider { }; const DeviceFunctions& dfn() const { return dfn_; } - VkQueue queue_graphics_compute() const { return queue_graphics_compute_; } - // May be VK_NULL_HANDLE if not available. - VkQueue queue_sparse_binding() const { return queue_sparse_binding_; } + VkResult SubmitToGraphicsComputeQueue(uint32_t submit_count, + const VkSubmitInfo* submits, + VkFence fence) { + std::lock_guard lock(queue_graphics_compute_mutex_); + return dfn_.vkQueueSubmit(queue_graphics_compute_, submit_count, submits, + fence); + } + bool CanSubmitSparseBindings() const { + return queue_sparse_binding_ != VK_NULL_HANDLE; + } + VkResult Present(const VkPresentInfoKHR* present_info) { + // FIXME(Triang3l): Allow a separate queue for present - see + // vulkan_provider.cc for details. + std::lock_guard lock(queue_graphics_compute_mutex_); + return dfn_.vkQueuePresentKHR(queue_graphics_compute_, present_info); + } // Samplers that may be useful for host needs. Only these samplers should be // used in host, non-emulation contexts, because the total number of samplers @@ -242,8 +257,14 @@ class VulkanProvider : public GraphicsProvider { VkDevice device_ = VK_NULL_HANDLE; DeviceFunctions dfn_ = {}; VkQueue queue_graphics_compute_; + // VkQueue access must be externally synchronized - must be locked when + // submitting anything. + std::mutex queue_graphics_compute_mutex_; // May be VK_NULL_HANDLE if not available. VkQueue queue_sparse_binding_; + // If queue_sparse_binding_ == queue_graphics_compute_, lock + // queue_graphics_compute_mutex_ instead when submitting sparse bindings. + std::mutex queue_sparse_binding_separate_mutex_; VkSampler host_samplers_[size_t(HostSampler::kCount)] = {}; }; From bc1cbd042eb1a365415b7d9f18ec2db61d2a9faa Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 3 Oct 2020 13:13:16 +0300 Subject: [PATCH 019/123] [Vulkan] Swapchain pass dependencies --- src/xenia/ui/vulkan/vulkan_context.cc | 35 +++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/src/xenia/ui/vulkan/vulkan_context.cc b/src/xenia/ui/vulkan/vulkan_context.cc index 689d77b8b..656cad3eb 100644 --- a/src/xenia/ui/vulkan/vulkan_context.cc +++ b/src/xenia/ui/vulkan/vulkan_context.cc @@ -529,6 +529,36 @@ bool VulkanContext::BeginSwap() { render_pass_subpass.pDepthStencilAttachment = nullptr; render_pass_subpass.preserveAttachmentCount = 0; render_pass_subpass.pPreserveAttachments = nullptr; + // Presentation engine does memory reading - external dependencies + // needed, and presentation doesn't occur in any normal pipeline stage, + // but it can be represented in the "bottom of pipeline" stage. + // https://software.intel.com/content/www/us/en/develop/articles/api-without-secrets-introduction-to-vulkan-part-4.html + VkSubpassDependency render_pass_dependencies[2]; + render_pass_dependencies[0].srcSubpass = VK_SUBPASS_EXTERNAL; + render_pass_dependencies[0].dstSubpass = 0; + render_pass_dependencies[0].srcStageMask = + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + render_pass_dependencies[0].dstStageMask = + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + render_pass_dependencies[0].srcAccessMask = VK_ACCESS_MEMORY_READ_BIT; + // Using blending, so both reading and writing. + render_pass_dependencies[0].dstAccessMask = + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + render_pass_dependencies[0].dependencyFlags = + VK_DEPENDENCY_BY_REGION_BIT; + render_pass_dependencies[1].srcSubpass = 0; + render_pass_dependencies[1].dstSubpass = VK_SUBPASS_EXTERNAL; + render_pass_dependencies[1].srcStageMask = + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + render_pass_dependencies[1].dstStageMask = + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + render_pass_dependencies[1].srcAccessMask = + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + render_pass_dependencies[1].dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + render_pass_dependencies[1].dependencyFlags = + VK_DEPENDENCY_BY_REGION_BIT; VkRenderPassCreateInfo render_pass_create_info; render_pass_create_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; @@ -538,8 +568,9 @@ bool VulkanContext::BeginSwap() { render_pass_create_info.pAttachments = &render_pass_color_attachment; render_pass_create_info.subpassCount = 1; render_pass_create_info.pSubpasses = &render_pass_subpass; - render_pass_create_info.dependencyCount = 0; - render_pass_create_info.pDependencies = nullptr; + render_pass_create_info.dependencyCount = + uint32_t(xe::countof(render_pass_dependencies)); + render_pass_create_info.pDependencies = render_pass_dependencies; if (dfn.vkCreateRenderPass(device, &render_pass_create_info, nullptr, &swap_render_pass_) != VK_SUCCESS) { XELOGE("Failed to create the Vulkan presentation render pass."); From 2c50c670d8d535f6edb6d8b034a19803b715356a Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 4 Oct 2020 21:56:54 +0300 Subject: [PATCH 020/123] [Vulkan] Basic shared memory uploading --- src/xenia/gpu/shared_memory.h | 4 +- .../gpu/vulkan/deferred_command_buffer.cc | 208 ++++++++++++ .../gpu/vulkan/deferred_command_buffer.h | 120 +++++++ .../gpu/vulkan/vulkan_command_processor.cc | 68 +++- .../gpu/vulkan/vulkan_command_processor.h | 12 + src/xenia/gpu/vulkan/vulkan_shared_memory.cc | 309 ++++++++++++++++++ src/xenia/gpu/vulkan/vulkan_shared_memory.h | 97 ++++++ src/xenia/ui/vulkan/vulkan_provider.cc | 1 + src/xenia/ui/vulkan/vulkan_provider.h | 3 + 9 files changed, 810 insertions(+), 12 deletions(-) create mode 100644 src/xenia/gpu/vulkan/deferred_command_buffer.cc create mode 100644 src/xenia/gpu/vulkan/deferred_command_buffer.h create mode 100644 src/xenia/gpu/vulkan/vulkan_shared_memory.cc create mode 100644 src/xenia/gpu/vulkan/vulkan_shared_memory.h diff --git a/src/xenia/gpu/shared_memory.h b/src/xenia/gpu/shared_memory.h index 6dae85909..43221426a 100644 --- a/src/xenia/gpu/shared_memory.h +++ b/src/xenia/gpu/shared_memory.h @@ -123,7 +123,9 @@ class SharedMemory { // successfully uploaded range as early as possible, before the memcpy, to // make sure invalidation that happened during the CPU -> GPU memcpy isn't // missed (upload_page_ranges is in pages because of this - MarkRangeValid has - // page granularity). + // page granularity). upload_page_ranges are sorted in ascending address + // order, so front and back can be used to determine the overall bounds of + // pages to be uploaded. virtual bool UploadRanges( const std::vector>& upload_page_ranges) = 0; diff --git a/src/xenia/gpu/vulkan/deferred_command_buffer.cc b/src/xenia/gpu/vulkan/deferred_command_buffer.cc new file mode 100644 index 000000000..f9c359506 --- /dev/null +++ b/src/xenia/gpu/vulkan/deferred_command_buffer.cc @@ -0,0 +1,208 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/vulkan/deferred_command_buffer.h" + +#include +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/base/math.h" +#include "xenia/base/profiling.h" +#include "xenia/gpu/vulkan/vulkan_command_processor.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +DeferredCommandBuffer::DeferredCommandBuffer( + const VulkanCommandProcessor& command_processor, size_t initial_size) + : command_processor_(command_processor) { + command_stream_.reserve(initial_size / sizeof(uintmax_t)); +} + +void DeferredCommandBuffer::Reset() { command_stream_.clear(); } + +void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) { +#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = + command_processor_.GetVulkanContext().GetVulkanProvider().dfn(); + const uintmax_t* stream = command_stream_.data(); + size_t stream_remaining = command_stream_.size(); + while (stream_remaining) { + const CommandHeader& header = + *reinterpret_cast(stream); + stream += kCommandHeaderSizeElements; + stream_remaining -= kCommandHeaderSizeElements; + + switch (header.command) { + case Command::kVkBindIndexBuffer: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdBindIndexBuffer(command_buffer, args.buffer, args.offset, + args.index_type); + } break; + + case Command::kVkCopyBuffer: { + auto& args = *reinterpret_cast(stream); + static_assert(alignof(VkBufferCopy) <= alignof(uintmax_t)); + dfn.vkCmdCopyBuffer( + command_buffer, args.src_buffer, args.dst_buffer, args.region_count, + reinterpret_cast( + reinterpret_cast(stream) + + xe::align(sizeof(ArgsVkCopyBuffer), alignof(VkBufferCopy)))); + } break; + + case Command::kVkPipelineBarrier: { + auto& args = *reinterpret_cast(stream); + size_t barrier_offset_bytes = sizeof(ArgsVkPipelineBarrier); + + const VkMemoryBarrier* memory_barriers; + if (args.memory_barrier_count) { + static_assert(alignof(VkMemoryBarrier) <= alignof(uintmax_t)); + barrier_offset_bytes = + xe::align(barrier_offset_bytes, alignof(VkMemoryBarrier)); + memory_barriers = reinterpret_cast( + reinterpret_cast(stream) + barrier_offset_bytes); + barrier_offset_bytes += + sizeof(VkMemoryBarrier) * args.memory_barrier_count; + } else { + memory_barriers = nullptr; + } + + const VkBufferMemoryBarrier* buffer_memory_barriers; + if (args.buffer_memory_barrier_count) { + static_assert(alignof(VkBufferMemoryBarrier) <= alignof(uintmax_t)); + barrier_offset_bytes = + xe::align(barrier_offset_bytes, alignof(VkBufferMemoryBarrier)); + buffer_memory_barriers = + reinterpret_cast( + reinterpret_cast(stream) + + barrier_offset_bytes); + barrier_offset_bytes += + sizeof(VkBufferMemoryBarrier) * args.buffer_memory_barrier_count; + } else { + buffer_memory_barriers = nullptr; + } + + const VkImageMemoryBarrier* image_memory_barriers; + if (args.image_memory_barrier_count) { + static_assert(alignof(VkImageMemoryBarrier) <= alignof(uintmax_t)); + barrier_offset_bytes = + xe::align(barrier_offset_bytes, alignof(VkImageMemoryBarrier)); + image_memory_barriers = reinterpret_cast( + reinterpret_cast(stream) + barrier_offset_bytes); + barrier_offset_bytes += + sizeof(VkImageMemoryBarrier) * args.image_memory_barrier_count; + } else { + image_memory_barriers = nullptr; + } + + dfn.vkCmdPipelineBarrier( + command_buffer, args.src_stage_mask, args.dst_stage_mask, + args.dependency_flags, args.memory_barrier_count, memory_barriers, + args.buffer_memory_barrier_count, buffer_memory_barriers, + args.image_memory_barrier_count, image_memory_barriers); + } break; + + default: + assert_unhandled_case(header.command); + break; + } + + stream += header.arguments_size_elements; + stream_remaining -= header.arguments_size_elements; + } +} + +void DeferredCommandBuffer::CmdVkPipelineBarrier( + VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, + VkDependencyFlags dependency_flags, uint32_t memory_barrier_count, + const VkMemoryBarrier* memory_barriers, + uint32_t buffer_memory_barrier_count, + const VkBufferMemoryBarrier* buffer_memory_barriers, + uint32_t image_memory_barrier_count, + const VkImageMemoryBarrier* image_memory_barriers) { + size_t arguments_size = sizeof(ArgsVkPipelineBarrier); + + size_t memory_barriers_offset; + if (memory_barrier_count) { + static_assert(alignof(VkMemoryBarrier) <= alignof(uintmax_t)); + arguments_size = xe::align(arguments_size, alignof(VkMemoryBarrier)); + memory_barriers_offset = arguments_size; + arguments_size += sizeof(VkMemoryBarrier) * memory_barrier_count; + } else { + memory_barriers_offset = 0; + } + + size_t buffer_memory_barriers_offset; + if (buffer_memory_barrier_count) { + static_assert(alignof(VkBufferMemoryBarrier) <= alignof(uintmax_t)); + arguments_size = xe::align(arguments_size, alignof(VkBufferMemoryBarrier)); + buffer_memory_barriers_offset = arguments_size; + arguments_size += + sizeof(VkBufferMemoryBarrier) * buffer_memory_barrier_count; + } else { + buffer_memory_barriers_offset = 0; + } + + size_t image_memory_barriers_offset; + if (image_memory_barrier_count) { + static_assert(alignof(VkImageMemoryBarrier) <= alignof(uintmax_t)); + arguments_size = xe::align(arguments_size, alignof(VkImageMemoryBarrier)); + image_memory_barriers_offset = arguments_size; + arguments_size += sizeof(VkImageMemoryBarrier) * image_memory_barrier_count; + } else { + image_memory_barriers_offset = 0; + } + + uint8_t* args_ptr = reinterpret_cast( + WriteCommand(Command::kVkPipelineBarrier, arguments_size)); + auto& args = *reinterpret_cast(args_ptr); + args.src_stage_mask = src_stage_mask; + args.dst_stage_mask = dst_stage_mask; + args.dependency_flags = dependency_flags; + args.memory_barrier_count = memory_barrier_count; + args.buffer_memory_barrier_count = buffer_memory_barrier_count; + args.image_memory_barrier_count = image_memory_barrier_count; + if (memory_barrier_count) { + std::memcpy(args_ptr + memory_barriers_offset, memory_barriers, + sizeof(VkMemoryBarrier) * memory_barrier_count); + } + if (buffer_memory_barrier_count) { + std::memcpy(args_ptr + buffer_memory_barriers_offset, + buffer_memory_barriers, + sizeof(VkBufferMemoryBarrier) * buffer_memory_barrier_count); + } + if (image_memory_barrier_count) { + std::memcpy(args_ptr + image_memory_barriers_offset, image_memory_barriers, + sizeof(VkImageMemoryBarrier) * image_memory_barrier_count); + } +} + +void* DeferredCommandBuffer::WriteCommand(Command command, + size_t arguments_size_bytes) { + size_t arguments_size_elements = + (arguments_size_bytes + sizeof(uintmax_t) - 1) / sizeof(uintmax_t); + size_t offset = command_stream_.size(); + command_stream_.resize(offset + kCommandHeaderSizeElements + + arguments_size_elements); + CommandHeader& header = + *reinterpret_cast(command_stream_.data() + offset); + header.command = command; + header.arguments_size_elements = uint32_t(arguments_size_elements); + return command_stream_.data() + (offset + kCommandHeaderSizeElements); +} + +} // namespace vulkan +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/deferred_command_buffer.h b/src/xenia/gpu/vulkan/deferred_command_buffer.h new file mode 100644 index 000000000..476abe605 --- /dev/null +++ b/src/xenia/gpu/vulkan/deferred_command_buffer.h @@ -0,0 +1,120 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_VULKAN_DEFERRED_COMMAND_BUFFER_H_ +#define XENIA_GPU_VULKAN_DEFERRED_COMMAND_BUFFER_H_ + +#include +#include +#include + +#include "xenia/base/math.h" +#include "xenia/ui/vulkan/vulkan_provider.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +class VulkanCommandProcessor; + +class DeferredCommandBuffer { + public: + DeferredCommandBuffer(const VulkanCommandProcessor& command_processor, + size_t initial_size_bytes = 1024 * 1024); + + void Reset(); + void Execute(VkCommandBuffer command_buffer); + + void CmdVkBindIndexBuffer(VkBuffer buffer, VkDeviceSize offset, + VkIndexType index_type) { + auto& args = *reinterpret_cast(WriteCommand( + Command::kVkBindIndexBuffer, sizeof(ArgsVkBindIndexBuffer))); + args.buffer = buffer; + args.offset = offset; + args.index_type = index_type; + } + + void CmdVkCopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, + uint32_t region_count, const VkBufferCopy* regions) { + static_assert(alignof(VkBufferCopy) <= alignof(uintmax_t)); + const size_t header_size = + xe::align(sizeof(ArgsVkCopyBuffer), alignof(VkBufferCopy)); + uint8_t* args_ptr = reinterpret_cast( + WriteCommand(Command::kVkCopyBuffer, + header_size + sizeof(VkBufferCopy) * region_count)); + auto& args = *reinterpret_cast(args_ptr); + args.src_buffer = src_buffer; + args.dst_buffer = dst_buffer; + args.region_count = region_count; + std::memcpy(args_ptr + header_size, regions, + sizeof(VkBufferCopy) * region_count); + } + + // pNext of all barriers must be null. + void CmdVkPipelineBarrier(VkPipelineStageFlags src_stage_mask, + VkPipelineStageFlags dst_stage_mask, + VkDependencyFlags dependency_flags, + uint32_t memory_barrier_count, + const VkMemoryBarrier* memory_barriers, + uint32_t buffer_memory_barrier_count, + const VkBufferMemoryBarrier* buffer_memory_barriers, + uint32_t image_memory_barrier_count, + const VkImageMemoryBarrier* image_memory_barriers); + + private: + enum class Command { + kVkBindIndexBuffer, + kVkCopyBuffer, + kVkPipelineBarrier, + }; + + struct CommandHeader { + Command command; + uint32_t arguments_size_elements; + }; + static constexpr size_t kCommandHeaderSizeElements = + (sizeof(CommandHeader) + sizeof(uintmax_t) - 1) / sizeof(uintmax_t); + + struct ArgsVkBindIndexBuffer { + VkBuffer buffer; + VkDeviceSize offset; + VkIndexType index_type; + }; + + struct ArgsVkCopyBuffer { + VkBuffer src_buffer; + VkBuffer dst_buffer; + uint32_t region_count; + // Followed by VkBufferCopy[]. + }; + + struct ArgsVkPipelineBarrier { + VkPipelineStageFlags src_stage_mask; + VkPipelineStageFlags dst_stage_mask; + VkDependencyFlags dependency_flags; + uint32_t memory_barrier_count; + uint32_t buffer_memory_barrier_count; + uint32_t image_memory_barrier_count; + // Followed by aligned VkMemoryBarrier[], VkBufferMemoryBarrier[], + // VkImageMemoryBarrier[]. + }; + + void* WriteCommand(Command command, size_t arguments_size_bytes); + + const VulkanCommandProcessor& command_processor_; + + // uintmax_t to ensure uint64_t and pointer alignment of all structures. + std::vector command_stream_; +}; + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_VULKAN_DEFERRED_COMMAND_BUFFER_H_ \ No newline at end of file diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index f05838d29..c688ca6ee 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -15,6 +15,8 @@ #include "xenia/base/assert.h" #include "xenia/base/logging.h" #include "xenia/base/profiling.h" +#include "xenia/gpu/vulkan/deferred_command_buffer.h" +#include "xenia/gpu/vulkan/vulkan_shared_memory.h" #include "xenia/ui/vulkan/vulkan_context.h" #include "xenia/ui/vulkan/vulkan_provider.h" #include "xenia/ui/vulkan/vulkan_util.h" @@ -25,7 +27,9 @@ namespace vulkan { VulkanCommandProcessor::VulkanCommandProcessor( VulkanGraphicsSystem* graphics_system, kernel::KernelState* kernel_state) - : CommandProcessor(graphics_system, kernel_state) {} + : CommandProcessor(graphics_system, kernel_state), + deferred_command_buffer_(*this) {} + VulkanCommandProcessor::~VulkanCommandProcessor() = default; void VulkanCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr, @@ -39,10 +43,12 @@ bool VulkanCommandProcessor::SetupContext() { return false; } - const ui::vulkan::VulkanProvider& provider = - GetVulkanContext().GetVulkanProvider(); - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); - VkDevice device = provider.device(); + shared_memory_ = + std::make_unique(*this, *memory_, trace_writer_); + if (!shared_memory_->Initialize()) { + XELOGE("Failed to initialize shared memory"); + return false; + } return true; } @@ -50,11 +56,14 @@ bool VulkanCommandProcessor::SetupContext() { void VulkanCommandProcessor::ShutdownContext() { AwaitAllQueueOperationsCompletion(); + shared_memory_.reset(); + const ui::vulkan::VulkanProvider& provider = GetVulkanContext().GetVulkanProvider(); const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); + deferred_command_buffer_.Reset(); for (const auto& command_buffer_pair : command_buffers_submitted_) { dfn.vkDestroyCommandPool(device, command_buffer_pair.first.pool, nullptr); } @@ -119,19 +128,46 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, uint32_t index_count, IndexBufferInfo* index_buffer_info, bool major_mode_explicit) { -#if FINE_GRAINED_DRAW_SCOPES +#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES +#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES BeginSubmission(true); + bool indexed = index_buffer_info != nullptr && index_buffer_info->guest_base; + + // Actually draw. + if (indexed) { + uint32_t index_size = + index_buffer_info->format == xenos::IndexFormat::kInt32 + ? sizeof(uint32_t) + : sizeof(uint16_t); + assert_false(index_buffer_info->guest_base & (index_size - 1)); + uint32_t index_base = + index_buffer_info->guest_base & 0x1FFFFFFF & ~(index_size - 1); + uint32_t index_buffer_size = index_buffer_info->count * index_size; + if (!shared_memory_->RequestRange(index_base, index_buffer_size)) { + XELOGE( + "Failed to request index buffer at 0x{:08X} (size {}) in the shared " + "memory", + index_base, index_buffer_size); + return false; + } + deferred_command_buffer_.CmdVkBindIndexBuffer( + shared_memory_->buffer(), index_base, + index_buffer_info->format == xenos::IndexFormat::kInt32 + ? VK_INDEX_TYPE_UINT32 + : VK_INDEX_TYPE_UINT16); + } + shared_memory_->Use(VulkanSharedMemory::Usage::kRead); + return true; } bool VulkanCommandProcessor::IssueCopy() { -#if FINE_GRAINED_DRAW_SCOPES +#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES +#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES BeginSubmission(true); @@ -217,12 +253,14 @@ void VulkanCommandProcessor::CheckSubmissionFence(uint64_t await_submission) { command_buffers_writable_.push_back(command_buffer_pair.first); command_buffers_submitted_.pop_front(); } + + shared_memory_->CompletedSubmissionUpdated(); } void VulkanCommandProcessor::BeginSubmission(bool is_guest_command) { -#if FINE_GRAINED_DRAW_SCOPES +#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); -#endif // FINE_GRAINED_DRAW_SCOPES +#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES bool is_opening_frame = is_guest_command && !frame_open_; if (submission_open_ && !is_opening_frame) { @@ -257,6 +295,11 @@ void VulkanCommandProcessor::BeginSubmission(bool is_guest_command) { if (!submission_open_) { submission_open_ = true; + + // Start a new deferred command buffer - will submit it to the real one in + // the end of the submission (when async pipeline state object creation + // requests are fulfilled). + deferred_command_buffer_.Reset(); } if (is_opening_frame) { @@ -321,6 +364,8 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { bool is_closing_frame = is_swap && frame_open_; if (submission_open_) { + shared_memory_->EndSubmission(); + assert_false(command_buffers_writable_.empty()); CommandBuffer command_buffer = command_buffers_writable_.back(); if (dfn.vkResetCommandPool(device, command_buffer.pool, 0) != VK_SUCCESS) { @@ -339,6 +384,7 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { XELOGE("Failed to begin a Vulkan command buffer"); return false; } + deferred_command_buffer_.Execute(command_buffer.buffer); // TODO(Triang3l): Write deferred command buffer commands. if (dfn.vkEndCommandBuffer(command_buffer.buffer) != VK_SUCCESS) { XELOGE("Failed to end a Vulkan command buffer"); diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 90409159d..016f9f7d0 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -16,7 +16,9 @@ #include #include "xenia/gpu/command_processor.h" +#include "xenia/gpu/vulkan/deferred_command_buffer.h" #include "xenia/gpu/vulkan/vulkan_graphics_system.h" +#include "xenia/gpu/vulkan/vulkan_shared_memory.h" #include "xenia/gpu/xenos.h" #include "xenia/kernel/kernel_state.h" #include "xenia/ui/vulkan/vulkan_context.h" @@ -39,6 +41,13 @@ class VulkanCommandProcessor : public CommandProcessor { return static_cast(*context_); } + // Returns the deferred drawing command list for the currently open + // submission. + DeferredCommandBuffer& deferred_command_buffer() { + assert_true(submission_open_); + return deferred_command_buffer_; + } + uint64_t GetCurrentSubmission() const { return submission_completed_ + uint64_t(submissions_in_flight_fences_.size()) + 1; @@ -113,6 +122,9 @@ class VulkanCommandProcessor : public CommandProcessor { }; std::vector command_buffers_writable_; std::deque> command_buffers_submitted_; + DeferredCommandBuffer deferred_command_buffer_; + + std::unique_ptr shared_memory_; }; } // namespace vulkan diff --git a/src/xenia/gpu/vulkan/vulkan_shared_memory.cc b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc new file mode 100644 index 000000000..ce8818cd4 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc @@ -0,0 +1,309 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/vulkan/vulkan_shared_memory.h" + +#include +#include +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/gpu/vulkan/deferred_command_buffer.h" +#include "xenia/gpu/vulkan/vulkan_command_processor.h" +#include "xenia/ui/vulkan/vulkan_util.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +VulkanSharedMemory::VulkanSharedMemory( + VulkanCommandProcessor& command_processor, Memory& memory, + TraceWriter& trace_writer) + : SharedMemory(memory), + command_processor_(command_processor), + trace_writer_(trace_writer) {} + +VulkanSharedMemory::~VulkanSharedMemory() { Shutdown(true); } + +bool VulkanSharedMemory::Initialize() { + InitializeCommon(); + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + const VkPhysicalDeviceFeatures& device_features = provider.device_features(); + + VkBufferCreateInfo buffer_create_info; + buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + buffer_create_info.pNext = nullptr; + buffer_create_info.flags = 0; + const VkBufferCreateFlags sparse_flags = + VK_BUFFER_CREATE_SPARSE_BINDING_BIT | + VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT; + // TODO(Triang3l): Sparse binding. + buffer_create_info.size = kBufferSize; + buffer_create_info.usage = + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT; + buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + buffer_create_info.queueFamilyIndexCount = 0; + buffer_create_info.pQueueFamilyIndices = nullptr; + VkResult buffer_create_result = + dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_); + if (buffer_create_result != VK_SUCCESS) { + if (buffer_create_info.flags & sparse_flags) { + buffer_create_info.flags &= ~sparse_flags; + buffer_create_result = + dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_); + } + if (buffer_create_result != VK_SUCCESS) { + XELOGE("Shared memory: Failed to create the {} MB Vulkan buffer", + kBufferSize >> 20); + Shutdown(); + return false; + } + } + VkMemoryRequirements buffer_memory_requirements; + dfn.vkGetBufferMemoryRequirements(device, buffer_, + &buffer_memory_requirements); + // TODO(Triang3l): Determine sparse binding properties from memory + // requirements. + if (!xe::bit_scan_forward(buffer_memory_requirements.memoryTypeBits & + provider.memory_types_device_local(), + &buffer_memory_type_)) { + XELOGE( + "Shared memory: Failed to get a device-local Vulkan memory type for " + "the buffer"); + Shutdown(); + return false; + } + if (!(buffer_create_info.flags & sparse_flags)) { + VkMemoryAllocateInfo buffer_memory_allocate_info; + buffer_memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + buffer_memory_allocate_info.pNext = nullptr; + buffer_memory_allocate_info.allocationSize = + buffer_memory_requirements.size; + buffer_memory_allocate_info.memoryTypeIndex = buffer_memory_type_; + VkDeviceMemory buffer_memory; + if (dfn.vkAllocateMemory(device, &buffer_memory_allocate_info, nullptr, + &buffer_memory) != VK_SUCCESS) { + XELOGE( + "Shared memory: Failed to allocate {} MB of memory for the Vulkan " + "buffer", + kBufferSize >> 20); + Shutdown(); + return false; + } + buffer_memory_.push_back(buffer_memory); + if (dfn.vkBindBufferMemory(device, buffer_, buffer_memory, 0) != + VK_SUCCESS) { + XELOGE("Shared memory: Failed to bind memory to the Vulkan buffer"); + Shutdown(); + return false; + } + } + + upload_buffer_pool_ = std::make_unique( + provider, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + xe::align(ui::vulkan::VulkanUploadBufferPool::kDefaultPageSize, + size_t(1) << page_size_log2())); + + return true; +} + +void VulkanSharedMemory::Shutdown(bool from_destructor) { + upload_buffer_pool_.reset(); + + last_written_range_ = std::make_pair(0, 0); + last_usage_ = Usage::kTransferDestination; + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, buffer_); + + buffer_memory_allocated_.clear(); + for (VkDeviceMemory memory : buffer_memory_) { + dfn.vkFreeMemory(device, memory, nullptr); + } + buffer_memory_.clear(); + + // If calling from the destructor, the SharedMemory destructor will call + // ShutdownCommon. + if (!from_destructor) { + ShutdownCommon(); + } +} + +void VulkanSharedMemory::CompletedSubmissionUpdated() { + upload_buffer_pool_->Reclaim(command_processor_.GetCompletedSubmission()); +} + +void VulkanSharedMemory::EndSubmission() { upload_buffer_pool_->FlushWrites(); } + +void VulkanSharedMemory::Use(Usage usage, + std::pair written_range) { + written_range.first = std::min(written_range.first, kBufferSize); + written_range.second = + std::min(written_range.second, kBufferSize - written_range.first); + assert_true(usage != Usage::kRead || !written_range.second); + if (last_usage_ != usage || last_written_range_.second) { + VkPipelineStageFlags stage_mask_src, stage_mask_dst; + VkBufferMemoryBarrier buffer_memory_barrier; + GetBarrier(last_usage_, stage_mask_src, + buffer_memory_barrier.srcAccessMask); + GetBarrier(usage, stage_mask_dst, buffer_memory_barrier.dstAccessMask); + buffer_memory_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + buffer_memory_barrier.pNext = nullptr; + buffer_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + buffer_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + buffer_memory_barrier.buffer = buffer_; + if (last_usage_ == usage) { + // Committing the previous write. + buffer_memory_barrier.offset = VkDeviceSize(last_written_range_.first); + buffer_memory_barrier.size = VkDeviceSize(last_written_range_.second); + } else { + // Changing the stage and access mask - all preceding writes must be + // available not only to the source stage, but to the destination as well. + buffer_memory_barrier.offset = 0; + buffer_memory_barrier.size = VK_WHOLE_SIZE; + last_usage_ = usage; + } + command_processor_.deferred_command_buffer().CmdVkPipelineBarrier( + stage_mask_src, stage_mask_dst, 0, 0, nullptr, 1, + &buffer_memory_barrier, 0, nullptr); + } + last_written_range_ = written_range; +} + +bool VulkanSharedMemory::EnsureHostGpuMemoryAllocated(uint32_t start, + uint32_t length) { + // TODO(Triang3l): Do sparse binding. + return true; +} + +bool VulkanSharedMemory::UploadRanges( + const std::vector>& upload_page_ranges) { + if (upload_page_ranges.empty()) { + return true; + } + // upload_page_ranges are sorted, use them to determine the range for the + // ordering barrier. + Use(Usage::kTransferDestination, + std::make_pair( + upload_page_ranges.front().first << page_size_log2(), + (upload_page_ranges.back().first + upload_page_ranges.back().second - + upload_page_ranges.front().first) + << page_size_log2())); + DeferredCommandBuffer& command_buffer = + command_processor_.deferred_command_buffer(); + uint64_t submission_current = command_processor_.GetCurrentSubmission(); + bool successful = true; + upload_regions_.clear(); + VkBuffer upload_buffer_previous = VK_NULL_HANDLE; + for (auto upload_range : upload_page_ranges) { + uint32_t upload_range_start = upload_range.first; + uint32_t upload_range_length = upload_range.second; + trace_writer_.WriteMemoryRead(upload_range_start << page_size_log2(), + upload_range_length << page_size_log2()); + while (upload_range_length) { + VkBuffer upload_buffer; + VkDeviceSize upload_buffer_offset, upload_buffer_size; + uint8_t* upload_buffer_mapping = upload_buffer_pool_->RequestPartial( + submission_current, upload_range_length << page_size_log2(), + size_t(1) << page_size_log2(), upload_buffer, upload_buffer_offset, + upload_buffer_size); + if (upload_buffer_mapping == nullptr) { + XELOGE("Shared memory: Failed to get a Vulkan upload buffer"); + successful = false; + break; + } + MakeRangeValid(upload_range_start << page_size_log2(), + uint32_t(upload_buffer_size), false); + std::memcpy( + upload_buffer_mapping, + memory().TranslatePhysical(upload_range_start << page_size_log2()), + upload_buffer_size); + if (upload_buffer_previous != upload_buffer && !upload_regions_.empty()) { + assert_true(upload_buffer_previous != VK_NULL_HANDLE); + command_buffer.CmdVkCopyBuffer(upload_buffer_previous, buffer_, + uint32_t(upload_regions_.size()), + upload_regions_.data()); + upload_regions_.clear(); + } + upload_buffer_previous = upload_buffer; + VkBufferCopy& upload_region = upload_regions_.emplace_back(); + upload_region.srcOffset = upload_buffer_offset; + upload_region.dstOffset = + VkDeviceSize(upload_range_start << page_size_log2()); + upload_region.size = upload_buffer_size; + uint32_t upload_buffer_pages = + uint32_t(upload_buffer_size >> page_size_log2()); + upload_range_start += upload_buffer_pages; + upload_range_length -= upload_buffer_pages; + } + if (!successful) { + break; + } + } + if (!upload_regions_.empty()) { + assert_true(upload_buffer_previous != VK_NULL_HANDLE); + command_buffer.CmdVkCopyBuffer(upload_buffer_previous, buffer_, + uint32_t(upload_regions_.size()), + upload_regions_.data()); + upload_regions_.clear(); + } + return successful; +} + +void VulkanSharedMemory::GetBarrier(Usage usage, + VkPipelineStageFlags& stage_mask, + VkAccessFlags& access_mask) const { + switch (usage) { + case Usage::kComputeWrite: + stage_mask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + access_mask = VK_ACCESS_SHADER_READ_BIT; + return; + case Usage::kTransferDestination: + stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; + access_mask = VK_ACCESS_TRANSFER_WRITE_BIT; + return; + } + stage_mask = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + if (provider.device_features().tessellationShader) { + stage_mask |= VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT; + } + access_mask = VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_SHADER_READ_BIT; + switch (usage) { + case Usage::kRead: + stage_mask |= + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT; + access_mask |= VK_ACCESS_TRANSFER_READ_BIT; + break; + case Usage::kGuestDrawReadWrite: + access_mask |= VK_ACCESS_SHADER_WRITE_BIT; + break; + default: + assert_unhandled_case(usage); + } +} + +} // namespace vulkan +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_shared_memory.h b/src/xenia/gpu/vulkan/vulkan_shared_memory.h new file mode 100644 index 000000000..dec2b8280 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_shared_memory.h @@ -0,0 +1,97 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_VULKAN_VULKAN_SHARED_MEMORY_H_ +#define XENIA_GPU_VULKAN_VULKAN_SHARED_MEMORY_H_ + +#include +#include +#include +#include + +#include "xenia/gpu/shared_memory.h" +#include "xenia/gpu/trace_writer.h" +#include "xenia/memory.h" +#include "xenia/ui/vulkan/vulkan_provider.h" +#include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +class VulkanCommandProcessor; + +class VulkanSharedMemory : public SharedMemory { + public: + VulkanSharedMemory(VulkanCommandProcessor& command_processor, Memory& memory, + TraceWriter& trace_writer); + ~VulkanSharedMemory() override; + + bool Initialize(); + void Shutdown(bool from_destructor = false); + + void CompletedSubmissionUpdated(); + void EndSubmission(); + + enum class Usage { + // Index buffer, vfetch, compute read, transfer source. + kRead, + // Index buffer, vfetch, memexport. + kGuestDrawReadWrite, + kComputeWrite, + kTransferDestination, + }; + // Places pipeline barrier for the target usage, also ensuring writes of + // adjacent are ordered with writes of each other and reads. + void Use(Usage usage, std::pair written_range = {}); + + VkBuffer buffer() const { return buffer_; } + + protected: + bool EnsureHostGpuMemoryAllocated(uint32_t start, uint32_t length) override; + + bool UploadRanges(const std::vector>& + upload_page_ranges) override; + + private: + bool IsSparse() const { + return buffer_allocation_size_log2_ < kBufferSizeLog2; + } + + void GetBarrier(Usage usage, VkPipelineStageFlags& stage_mask, + VkAccessFlags& access_mask) const; + + VulkanCommandProcessor& command_processor_; + TraceWriter& trace_writer_; + + VkBuffer buffer_ = VK_NULL_HANDLE; + uint32_t buffer_memory_type_; + // Maximum of 1024 allocations in the worst case for all of the buffer because + // of the overall 4096 allocation count limit on Windows drivers. + static constexpr uint32_t kMinBufferAllocationSizeLog2 = + std::max(kOptimalAllocationLog2, kBufferSizeLog2 - 10); + uint32_t buffer_allocation_size_log2_ = kBufferSizeLog2; + // Sparse memory allocations, of different sizes. + std::vector buffer_memory_; + // One bit per every 2^buffer_allocation_size_log2_ of the buffer. + std::vector buffer_memory_allocated_; + + // First usage will likely be uploading. + Usage last_usage_ = Usage::kTransferDestination; + std::pair last_written_range_ = {}; + + std::unique_ptr upload_buffer_pool_; + std::vector upload_regions_; +}; + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_VULKAN_VULKAN_SHARED_MEMORY_H_ diff --git a/src/xenia/ui/vulkan/vulkan_provider.cc b/src/xenia/ui/vulkan/vulkan_provider.cc index 6444af302..bbe90b04c 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.cc +++ b/src/xenia/ui/vulkan/vulkan_provider.cc @@ -538,6 +538,7 @@ bool VulkanProvider::Initialize() { XE_VULKAN_LOAD_DFN(vkCmdBindPipeline); XE_VULKAN_LOAD_DFN(vkCmdBindVertexBuffers); XE_VULKAN_LOAD_DFN(vkCmdClearColorImage); + XE_VULKAN_LOAD_DFN(vkCmdCopyBuffer); XE_VULKAN_LOAD_DFN(vkCmdCopyBufferToImage); XE_VULKAN_LOAD_DFN(vkCmdDraw); XE_VULKAN_LOAD_DFN(vkCmdDrawIndexed); diff --git a/src/xenia/ui/vulkan/vulkan_provider.h b/src/xenia/ui/vulkan/vulkan_provider.h index 2e14b9398..8d7c10ed3 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.h +++ b/src/xenia/ui/vulkan/vulkan_provider.h @@ -35,6 +35,8 @@ #define XELOGVK XELOGI +#define XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES 1 + namespace xe { namespace ui { namespace vulkan { @@ -139,6 +141,7 @@ class VulkanProvider : public GraphicsProvider { PFN_vkCmdBindPipeline vkCmdBindPipeline; PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers; PFN_vkCmdClearColorImage vkCmdClearColorImage; + PFN_vkCmdCopyBuffer vkCmdCopyBuffer; PFN_vkCmdCopyBufferToImage vkCmdCopyBufferToImage; PFN_vkCmdDraw vkCmdDraw; PFN_vkCmdDrawIndexed vkCmdDrawIndexed; From 4d59f556a95426fe01cc52b1513f9238d84d999a Mon Sep 17 00:00:00 2001 From: Triang3l Date: Wed, 7 Oct 2020 21:03:50 +0300 Subject: [PATCH 021/123] [Vulkan] Sparse shared memory --- src/xenia/gpu/d3d12/d3d12_shared_memory.cc | 8 +- .../gpu/vulkan/vulkan_command_processor.cc | 127 +++++++++++-- .../gpu/vulkan/vulkan_command_processor.h | 29 ++- src/xenia/gpu/vulkan/vulkan_shared_memory.cc | 167 ++++++++++++++---- src/xenia/gpu/vulkan/vulkan_shared_memory.h | 17 +- src/xenia/ui/vulkan/vulkan_provider.cc | 3 +- src/xenia/ui/vulkan/vulkan_provider.h | 15 +- 7 files changed, 298 insertions(+), 68 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_shared_memory.cc b/src/xenia/gpu/d3d12/d3d12_shared_memory.cc index 992f9aed5..c260545ac 100644 --- a/src/xenia/gpu/d3d12/d3d12_shared_memory.cc +++ b/src/xenia/gpu/d3d12/d3d12_shared_memory.cc @@ -22,9 +22,9 @@ DEFINE_bool(d3d12_tiled_shared_memory, true, "Enable tiled resources for shared memory emulation. Disabling " - "them greatly increases video memory usage - a 512 MB buffer is " - "created - but allows graphics debuggers that don't support tiled " - "resources to work.", + "them greatly video memory usage - a 512 MB buffer is created - " + "but allows graphics debuggers that don't support tiled resources " + "to work.", "D3D12"); namespace xe { @@ -68,7 +68,7 @@ bool D3D12SharedMemory::Initialize() { XELOGGPU( "Direct3D 12 tiled resources are not used for shared memory " "emulation - video memory usage may increase significantly " - "because a full {} MB buffer will be created!", + "because a full {} MB buffer will be created", kBufferSize >> 20); if (provider.GetGraphicsAnalysis()) { // As of October 8th, 2018, PIX doesn't support tiled buffers. diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index c688ca6ee..531182ca5 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -63,6 +63,10 @@ void VulkanCommandProcessor::ShutdownContext() { const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); + sparse_bind_wait_stage_mask_ = 0; + sparse_buffer_binds_.clear(); + sparse_memory_binds_.clear(); + deferred_command_buffer_.Reset(); for (const auto& command_buffer_pair : command_buffers_submitted_) { dfn.vkDestroyCommandPool(device, command_buffer_pair.first.pool, nullptr); @@ -78,15 +82,19 @@ void VulkanCommandProcessor::ShutdownContext() { frame_current_ = 1; frame_open_ = false; - for (const auto& semaphore : - submissions_in_flight_sparse_binding_semaphores_) { + for (const auto& semaphore : submissions_in_flight_semaphores_) { dfn.vkDestroySemaphore(device, semaphore.first, nullptr); } - submissions_in_flight_sparse_binding_semaphores_.clear(); + submissions_in_flight_semaphores_.clear(); for (VkFence& fence : submissions_in_flight_fences_) { dfn.vkDestroyFence(device, fence, nullptr); } submissions_in_flight_fences_.clear(); + current_submission_wait_stage_masks_.clear(); + for (VkSemaphore semaphore : current_submission_wait_semaphores_) { + dfn.vkDestroySemaphore(device, semaphore, nullptr); + } + current_submission_wait_semaphores_.clear(); submission_completed_ = 0; submission_open_ = false; @@ -102,6 +110,22 @@ void VulkanCommandProcessor::ShutdownContext() { CommandProcessor::ShutdownContext(); } +void VulkanCommandProcessor::SparseBindBuffer( + VkBuffer buffer, uint32_t bind_count, const VkSparseMemoryBind* binds, + VkPipelineStageFlags wait_stage_mask) { + if (!bind_count) { + return; + } + SparseBufferBind& buffer_bind = sparse_buffer_binds_.emplace_back(); + buffer_bind.buffer = buffer; + buffer_bind.bind_offset = sparse_memory_binds_.size(); + buffer_bind.bind_count = bind_count; + sparse_memory_binds_.reserve(sparse_memory_binds_.size() + bind_count); + sparse_memory_binds_.insert(sparse_memory_binds_.end(), binds, + binds + bind_count); + sparse_bind_wait_stage_mask_ |= wait_stage_mask; +} + void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width, uint32_t frontbuffer_height) { @@ -233,15 +257,15 @@ void VulkanCommandProcessor::CheckSubmissionFence(uint64_t await_submission) { submissions_in_flight_fences_awaited_end); submission_completed_ += fences_awaited; - // Reclaim semaphores used for sparse binding and graphics synchronization. - while (!submissions_in_flight_sparse_binding_semaphores_.empty()) { + // Reclaim semaphores. + while (!submissions_in_flight_semaphores_.empty()) { const auto& semaphore_submission = - submissions_in_flight_sparse_binding_semaphores_.front(); + submissions_in_flight_semaphores_.front(); if (semaphore_submission.second > submission_completed_) { break; } semaphores_free_.push_back(semaphore_submission.first); - submissions_in_flight_sparse_binding_semaphores_.pop_front(); + submissions_in_flight_semaphores_.pop_front(); } // Reclaim command pools. @@ -322,14 +346,26 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { VkFence fence; if (dfn.vkCreateFence(device, &fence_create_info, nullptr, &fence) != VK_SUCCESS) { - XELOGE("Failed to create a Vulkan submission fence"); + XELOGE("Failed to create a Vulkan fence"); // Try to submit later. Completely dropping the submission is not // permitted because resources would be left in an undefined state. return false; } fences_free_.push_back(fence); } - // TODO(Triang3l): Create a sparse binding semaphore. + if (!sparse_memory_binds_.empty() && semaphores_free_.empty()) { + VkSemaphoreCreateInfo semaphore_create_info; + semaphore_create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + semaphore_create_info.pNext = nullptr; + semaphore_create_info.flags = 0; + VkSemaphore semaphore; + if (dfn.vkCreateSemaphore(device, &semaphore_create_info, nullptr, + &semaphore) != VK_SUCCESS) { + XELOGE("Failed to create a Vulkan semaphore"); + return false; + } + semaphores_free_.push_back(semaphore); + } if (command_buffers_writable_.empty()) { CommandBuffer command_buffer; VkCommandPoolCreateInfo command_pool_create_info; @@ -366,6 +402,52 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { if (submission_open_) { shared_memory_->EndSubmission(); + // Submit sparse binds earlier, before executing the deferred command + // buffer, to reduce latency. + if (!sparse_memory_binds_.empty()) { + sparse_buffer_bind_infos_temp_.clear(); + sparse_buffer_bind_infos_temp_.reserve(sparse_buffer_binds_.size()); + for (const SparseBufferBind& sparse_buffer_bind : sparse_buffer_binds_) { + VkSparseBufferMemoryBindInfo& sparse_buffer_bind_info = + sparse_buffer_bind_infos_temp_.emplace_back(); + sparse_buffer_bind_info.buffer = sparse_buffer_bind.buffer; + sparse_buffer_bind_info.bindCount = sparse_buffer_bind.bind_count; + sparse_buffer_bind_info.pBinds = + sparse_memory_binds_.data() + sparse_buffer_bind.bind_offset; + } + assert_false(semaphores_free_.empty()); + VkSemaphore bind_sparse_semaphore = semaphores_free_.back(); + VkBindSparseInfo bind_sparse_info; + bind_sparse_info.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO; + bind_sparse_info.pNext = nullptr; + bind_sparse_info.waitSemaphoreCount = 0; + bind_sparse_info.pWaitSemaphores = nullptr; + bind_sparse_info.bufferBindCount = + uint32_t(sparse_buffer_bind_infos_temp_.size()); + bind_sparse_info.pBufferBinds = + !sparse_buffer_bind_infos_temp_.empty() + ? sparse_buffer_bind_infos_temp_.data() + : nullptr; + bind_sparse_info.imageOpaqueBindCount = 0; + bind_sparse_info.pImageOpaqueBinds = nullptr; + bind_sparse_info.imageBindCount = 0; + bind_sparse_info.pImageBinds = 0; + bind_sparse_info.signalSemaphoreCount = 1; + bind_sparse_info.pSignalSemaphores = &bind_sparse_semaphore; + if (provider.BindSparse(1, &bind_sparse_info, VK_NULL_HANDLE) != + VK_SUCCESS) { + XELOGE("Failed to submit Vulkan sparse binds"); + return false; + } + current_submission_wait_semaphores_.push_back(bind_sparse_semaphore); + semaphores_free_.pop_back(); + current_submission_wait_stage_masks_.push_back( + sparse_bind_wait_stage_mask_); + sparse_bind_wait_stage_mask_ = 0; + sparse_buffer_binds_.clear(); + sparse_memory_binds_.clear(); + } + assert_false(command_buffers_writable_.empty()); CommandBuffer command_buffer = command_buffers_writable_.back(); if (dfn.vkResetCommandPool(device, command_buffer.pool, 0) != VK_SUCCESS) { @@ -385,18 +467,25 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { return false; } deferred_command_buffer_.Execute(command_buffer.buffer); - // TODO(Triang3l): Write deferred command buffer commands. if (dfn.vkEndCommandBuffer(command_buffer.buffer) != VK_SUCCESS) { XELOGE("Failed to end a Vulkan command buffer"); return false; } - // TODO(Triang3l): Submit sparse binding. + VkSubmitInfo submit_info; submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; submit_info.pNext = nullptr; - submit_info.waitSemaphoreCount = 0; - submit_info.pWaitSemaphores = nullptr; - submit_info.pWaitDstStageMask = nullptr; + if (!current_submission_wait_semaphores_.empty()) { + submit_info.waitSemaphoreCount = + uint32_t(current_submission_wait_semaphores_.size()); + submit_info.pWaitSemaphores = current_submission_wait_semaphores_.data(); + submit_info.pWaitDstStageMask = + current_submission_wait_stage_masks_.data(); + } else { + submit_info.waitSemaphoreCount = 0; + submit_info.pWaitSemaphores = nullptr; + submit_info.pWaitDstStageMask = nullptr; + } submit_info.commandBufferCount = 1; submit_info.pCommandBuffers = &command_buffer.buffer; submit_info.signalSemaphoreCount = 0; @@ -412,8 +501,14 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { XELOGE("Failed to submit a Vulkan command buffer"); return false; } - command_buffers_submitted_.push_back( - std::make_pair(command_buffer, GetCurrentSubmission())); + uint64_t submission_current = GetCurrentSubmission(); + current_submission_wait_stage_masks_.clear(); + for (VkSemaphore semaphore : current_submission_wait_semaphores_) { + submissions_in_flight_semaphores_.emplace_back(semaphore, + submission_current); + } + current_submission_wait_semaphores_.clear(); + command_buffers_submitted_.emplace_back(command_buffer, submission_current); command_buffers_writable_.pop_back(); // Increments the current submission number, going to the next submission. submissions_in_flight_fences_.push_back(fence); diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 016f9f7d0..6b9096a20 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -54,6 +54,16 @@ class VulkanCommandProcessor : public CommandProcessor { } uint64_t GetCompletedSubmission() const { return submission_completed_; } + // Sparse binds are: + // - In a single submission, all submitted in one vkQueueBindSparse. + // - Sent to the queue without waiting for a semaphore. + // Thus, multiple sparse binds between the completed and the current + // submission, and within one submission, must not touch any overlapping + // memory regions. + void SparseBindBuffer(VkBuffer buffer, uint32_t bind_count, + const VkSparseMemoryBind* binds, + VkPipelineStageFlags wait_stage_mask); + protected: bool SetupContext() override; void ShutdownContext() override; @@ -103,9 +113,13 @@ class VulkanCommandProcessor : public CommandProcessor { bool submission_open_ = false; uint64_t submission_completed_ = 0; + // In case vkQueueSubmit fails after something like a successful + // vkQueueBindSparse, to wait correctly on the next attempt. + std::vector current_submission_wait_semaphores_; + std::vector current_submission_wait_stage_masks_; std::vector submissions_in_flight_fences_; std::deque> - submissions_in_flight_sparse_binding_semaphores_; + submissions_in_flight_semaphores_; static constexpr uint32_t kMaxFramesInFlight = 3; bool frame_open_ = false; @@ -124,6 +138,19 @@ class VulkanCommandProcessor : public CommandProcessor { std::deque> command_buffers_submitted_; DeferredCommandBuffer deferred_command_buffer_; + std::vector sparse_memory_binds_; + struct SparseBufferBind { + VkBuffer buffer; + size_t bind_offset; + uint32_t bind_count; + }; + std::vector sparse_buffer_binds_; + // SparseBufferBind converted to VkSparseBufferMemoryBindInfo to this buffer + // on submission (because pBinds should point to a place in std::vector, but + // it may be reallocated). + std::vector sparse_buffer_bind_infos_temp_; + VkPipelineStageFlags sparse_bind_wait_stage_mask_ = 0; + std::unique_ptr shared_memory_; }; diff --git a/src/xenia/gpu/vulkan/vulkan_shared_memory.cc b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc index d4a4a0049..49b9cbbb0 100644 --- a/src/xenia/gpu/vulkan/vulkan_shared_memory.cc +++ b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc @@ -15,12 +15,20 @@ #include #include "xenia/base/assert.h" +#include "xenia/base/cvar.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" #include "xenia/gpu/vulkan/deferred_command_buffer.h" #include "xenia/gpu/vulkan/vulkan_command_processor.h" #include "xenia/ui/vulkan/vulkan_util.h" +DEFINE_bool(vulkan_sparse_shared_memory, true, + "Enable sparse binding for shared memory emulation. Disabling it " + "increases video memory usage - a 512 MB buffer is created - but " + "allows graphics debuggers that don't support sparse binding to " + "work.", + "Vulkan"); + namespace xe { namespace gpu { namespace vulkan { @@ -43,14 +51,15 @@ bool VulkanSharedMemory::Initialize() { VkDevice device = provider.device(); const VkPhysicalDeviceFeatures& device_features = provider.device_features(); - VkBufferCreateInfo buffer_create_info; - buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - buffer_create_info.pNext = nullptr; - buffer_create_info.flags = 0; const VkBufferCreateFlags sparse_flags = VK_BUFFER_CREATE_SPARSE_BINDING_BIT | VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT; - // TODO(Triang3l): Sparse binding. + + // Try to create a sparse buffer. + VkBufferCreateInfo buffer_create_info; + buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + buffer_create_info.pNext = nullptr; + buffer_create_info.flags = sparse_flags; buffer_create_info.size = kBufferSize; buffer_create_info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | @@ -58,39 +67,90 @@ bool VulkanSharedMemory::Initialize() { buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; buffer_create_info.queueFamilyIndexCount = 0; buffer_create_info.pQueueFamilyIndices = nullptr; - VkResult buffer_create_result = - dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_); - if (buffer_create_result != VK_SUCCESS) { - if (buffer_create_info.flags & sparse_flags) { - buffer_create_info.flags &= ~sparse_flags; - buffer_create_result = - dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_); + if (cvars::vulkan_sparse_shared_memory && + provider.IsSparseBindingSupported() && + device_features.sparseResidencyBuffer) { + if (dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_) == + VK_SUCCESS) { + VkMemoryRequirements buffer_memory_requirements; + dfn.vkGetBufferMemoryRequirements(device, buffer_, + &buffer_memory_requirements); + if (xe::bit_scan_forward(buffer_memory_requirements.memoryTypeBits & + provider.memory_types_device_local(), + &buffer_memory_type_)) { + uint32_t allocation_size_log2; + xe::bit_scan_forward( + std::max(uint64_t(buffer_memory_requirements.alignment), + uint64_t(1)), + &allocation_size_log2); + if (allocation_size_log2 < kBufferSizeLog2) { + // Maximum of 1024 allocations in the worst case for all of the + // buffer because of the overall 4096 allocation count limit on + // Windows drivers. + InitializeSparseHostGpuMemory( + std::max(allocation_size_log2, + std::max(kHostGpuMemoryOptimalSparseAllocationLog2, + kBufferSizeLog2 - uint32_t(10)))); + } else { + // Shouldn't happen on any real platform, but no point allocating the + // buffer sparsely. + dfn.vkDestroyBuffer(device, buffer_, nullptr); + buffer_ = VK_NULL_HANDLE; + } + } else { + XELOGE( + "Shared memory: Failed to get a device-local Vulkan memory type " + "for the sparse buffer"); + dfn.vkDestroyBuffer(device, buffer_, nullptr); + buffer_ = VK_NULL_HANDLE; + } + } else { + XELOGE("Shared memory: Failed to create the {} MB Vulkan sparse buffer", + kBufferSize >> 20); } - if (buffer_create_result != VK_SUCCESS) { + } + + // Create a non-sparse buffer if there were issues with the sparse buffer. + if (buffer_ == VK_NULL_HANDLE) { + XELOGGPU( + "Vulkan sparse binding is not used for shared memory emulation - video " + "memory usage may increase significantly because a full {} MB buffer " + "will be created", + kBufferSize >> 20); + buffer_create_info.flags &= ~sparse_flags; + if (dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_) != + VK_SUCCESS) { XELOGE("Shared memory: Failed to create the {} MB Vulkan buffer", kBufferSize >> 20); Shutdown(); return false; } - } - VkMemoryRequirements buffer_memory_requirements; - dfn.vkGetBufferMemoryRequirements(device, buffer_, - &buffer_memory_requirements); - // TODO(Triang3l): Determine sparse binding properties from memory - // requirements. - if (!xe::bit_scan_forward(buffer_memory_requirements.memoryTypeBits & - provider.memory_types_device_local(), - &buffer_memory_type_)) { - XELOGE( - "Shared memory: Failed to get a device-local Vulkan memory type for " - "the buffer"); - Shutdown(); - return false; - } - if (!(buffer_create_info.flags & sparse_flags)) { + VkMemoryRequirements buffer_memory_requirements; + dfn.vkGetBufferMemoryRequirements(device, buffer_, + &buffer_memory_requirements); + if (!xe::bit_scan_forward(buffer_memory_requirements.memoryTypeBits & + provider.memory_types_device_local(), + &buffer_memory_type_)) { + XELOGE( + "Shared memory: Failed to get a device-local Vulkan memory type for " + "the buffer"); + Shutdown(); + return false; + } VkMemoryAllocateInfo buffer_memory_allocate_info; buffer_memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - buffer_memory_allocate_info.pNext = nullptr; + VkMemoryDedicatedAllocateInfoKHR buffer_memory_dedicated_allocate_info; + if (provider.device_extensions().khr_dedicated_allocation) { + buffer_memory_dedicated_allocate_info.sType = + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR; + buffer_memory_dedicated_allocate_info.pNext = nullptr; + buffer_memory_dedicated_allocate_info.image = VK_NULL_HANDLE; + buffer_memory_dedicated_allocate_info.buffer = buffer_; + buffer_memory_allocate_info.pNext = + &buffer_memory_dedicated_allocate_info; + } else { + buffer_memory_allocate_info.pNext = nullptr; + } buffer_memory_allocate_info.allocationSize = buffer_memory_requirements.size; buffer_memory_allocate_info.memoryTypeIndex = buffer_memory_type_; @@ -133,8 +193,6 @@ void VulkanSharedMemory::Shutdown(bool from_destructor) { VkDevice device = provider.device(); ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, buffer_); - - buffer_memory_allocated_.clear(); for (VkDeviceMemory memory : buffer_memory_) { dfn.vkFreeMemory(device, memory, nullptr); } @@ -188,6 +246,51 @@ void VulkanSharedMemory::Use(Usage usage, last_written_range_ = written_range; } +bool VulkanSharedMemory::AllocateSparseHostGpuMemoryRange( + uint32_t offset_allocations, uint32_t length_allocations) { + if (!length_allocations) { + return true; + } + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + VkMemoryAllocateInfo memory_allocate_info; + memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + memory_allocate_info.pNext = nullptr; + memory_allocate_info.allocationSize = + length_allocations << host_gpu_memory_sparse_granularity_log2(); + memory_allocate_info.memoryTypeIndex = buffer_memory_type_; + VkDeviceMemory memory; + if (dfn.vkAllocateMemory(device, &memory_allocate_info, nullptr, &memory) != + VK_SUCCESS) { + XELOGE("Shared memory: Failed to allocate sparse buffer memory"); + return false; + } + buffer_memory_.push_back(memory); + + VkSparseMemoryBind bind; + bind.resourceOffset = offset_allocations + << host_gpu_memory_sparse_granularity_log2(); + bind.size = memory_allocate_info.allocationSize; + bind.memory = memory; + bind.memoryOffset = 0; + bind.flags = 0; + VkPipelineStageFlags bind_wait_stage_mask = + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT; + if (provider.device_features().tessellationShader) { + bind_wait_stage_mask |= + VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT; + } + command_processor_.SparseBindBuffer(buffer_, 1, &bind, bind_wait_stage_mask); + + return true; +} + bool VulkanSharedMemory::UploadRanges( const std::vector>& upload_page_ranges) { if (upload_page_ranges.empty()) { diff --git a/src/xenia/gpu/vulkan/vulkan_shared_memory.h b/src/xenia/gpu/vulkan/vulkan_shared_memory.h index a64ef17f8..2d5d15a38 100644 --- a/src/xenia/gpu/vulkan/vulkan_shared_memory.h +++ b/src/xenia/gpu/vulkan/vulkan_shared_memory.h @@ -54,14 +54,13 @@ class VulkanSharedMemory : public SharedMemory { VkBuffer buffer() const { return buffer_; } protected: + bool AllocateSparseHostGpuMemoryRange(uint32_t offset_allocations, + uint32_t length_allocations) override; + bool UploadRanges(const std::vector>& upload_page_ranges) override; private: - bool IsSparse() const { - return buffer_allocation_size_log2_ < kBufferSizeLog2; - } - void GetBarrier(Usage usage, VkPipelineStageFlags& stage_mask, VkAccessFlags& access_mask) const; @@ -70,16 +69,8 @@ class VulkanSharedMemory : public SharedMemory { VkBuffer buffer_ = VK_NULL_HANDLE; uint32_t buffer_memory_type_; - // Maximum of 1024 allocations in the worst case for all of the buffer because - // of the overall 4096 allocation count limit on Windows drivers. - static constexpr uint32_t kMinBufferAllocationSizeLog2 = - std::max(kHostGpuMemoryOptimalSparseAllocationLog2, - kBufferSizeLog2 - uint32_t(10)); - uint32_t buffer_allocation_size_log2_ = kBufferSizeLog2; - // Sparse memory allocations, of different sizes. + // Single for non-sparse, every allocation so far for sparse. std::vector buffer_memory_; - // One bit per every 2^buffer_allocation_size_log2_ of the buffer. - std::vector buffer_memory_allocated_; // First usage will likely be uploading. Usage last_usage_ = Usage::kTransferDestination; diff --git a/src/xenia/ui/vulkan/vulkan_provider.cc b/src/xenia/ui/vulkan/vulkan_provider.cc index bbe90b04c..179d8f40f 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.cc +++ b/src/xenia/ui/vulkan/vulkan_provider.cc @@ -30,7 +30,7 @@ DEFINE_bool( vulkan_validation, true, "Enable Vulkan validation (VK_LAYER_KHRONOS_validation). Messages will be " "written to the OS debug log.", - "GPU"); + "Vulkan"); DEFINE_int32( vulkan_device, -1, "Index of the physical device to use, or -1 for any compatible device.", @@ -587,6 +587,7 @@ bool VulkanProvider::Initialize() { XE_VULKAN_LOAD_DFN(vkMapMemory); XE_VULKAN_LOAD_DFN(vkResetCommandPool); XE_VULKAN_LOAD_DFN(vkResetFences); + XE_VULKAN_LOAD_DFN(vkQueueBindSparse); XE_VULKAN_LOAD_DFN(vkQueuePresentKHR); XE_VULKAN_LOAD_DFN(vkQueueSubmit); XE_VULKAN_LOAD_DFN(vkUnmapMemory); diff --git a/src/xenia/ui/vulkan/vulkan_provider.h b/src/xenia/ui/vulkan/vulkan_provider.h index 8d7c10ed3..9fc117a50 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.h +++ b/src/xenia/ui/vulkan/vulkan_provider.h @@ -190,6 +190,7 @@ class VulkanProvider : public GraphicsProvider { PFN_vkMapMemory vkMapMemory; PFN_vkResetCommandPool vkResetCommandPool; PFN_vkResetFences vkResetFences; + PFN_vkQueueBindSparse vkQueueBindSparse; PFN_vkQueuePresentKHR vkQueuePresentKHR; PFN_vkQueueSubmit vkQueueSubmit; PFN_vkUnmapMemory vkUnmapMemory; @@ -205,9 +206,21 @@ class VulkanProvider : public GraphicsProvider { return dfn_.vkQueueSubmit(queue_graphics_compute_, submit_count, submits, fence); } - bool CanSubmitSparseBindings() const { + // Safer in Xenia context - in case a sparse binding queue was not obtained + // for some reason. + bool IsSparseBindingSupported() const { return queue_sparse_binding_ != VK_NULL_HANDLE; } + VkResult BindSparse(uint32_t bind_info_count, + const VkBindSparseInfo* bind_info, VkFence fence) { + assert_true(IsSparseBindingSupported()); + std::mutex& mutex = queue_sparse_binding_ == queue_graphics_compute_ + ? queue_graphics_compute_mutex_ + : queue_sparse_binding_separate_mutex_; + std::lock_guard lock(mutex); + return dfn_.vkQueueBindSparse(queue_sparse_binding_, bind_info_count, + bind_info, fence); + } VkResult Present(const VkPresentInfoKHR* present_info) { // FIXME(Triang3l): Allow a separate queue for present - see // vulkan_provider.cc for details. From 89ac9294bf43367f352bfc8287cc58ad0927da26 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Wed, 7 Oct 2020 23:19:30 +0300 Subject: [PATCH 022/123] [Vulkan] Shared memory trace download --- .../gpu/vulkan/deferred_command_buffer.h | 12 ++- .../gpu/vulkan/vulkan_command_processor.cc | 13 ++- src/xenia/gpu/vulkan/vulkan_shared_memory.cc | 101 ++++++++++++++++++ src/xenia/gpu/vulkan/vulkan_shared_memory.h | 9 ++ .../ui/vulkan/vulkan_immediate_drawer.cc | 87 ++------------- src/xenia/ui/vulkan/vulkan_util.cc | 90 ++++++++++++++++ src/xenia/ui/vulkan/vulkan_util.h | 16 +++ 7 files changed, 243 insertions(+), 85 deletions(-) diff --git a/src/xenia/gpu/vulkan/deferred_command_buffer.h b/src/xenia/gpu/vulkan/deferred_command_buffer.h index 476abe605..879c92d5a 100644 --- a/src/xenia/gpu/vulkan/deferred_command_buffer.h +++ b/src/xenia/gpu/vulkan/deferred_command_buffer.h @@ -40,8 +40,8 @@ class DeferredCommandBuffer { args.index_type = index_type; } - void CmdVkCopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, - uint32_t region_count, const VkBufferCopy* regions) { + VkBufferCopy* CmdCopyBufferEmplace(VkBuffer src_buffer, VkBuffer dst_buffer, + uint32_t region_count) { static_assert(alignof(VkBufferCopy) <= alignof(uintmax_t)); const size_t header_size = xe::align(sizeof(ArgsVkCopyBuffer), alignof(VkBufferCopy)); @@ -52,8 +52,12 @@ class DeferredCommandBuffer { args.src_buffer = src_buffer; args.dst_buffer = dst_buffer; args.region_count = region_count; - std::memcpy(args_ptr + header_size, regions, - sizeof(VkBufferCopy) * region_count); + return reinterpret_cast(args_ptr + header_size); + } + void CmdVkCopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, + uint32_t region_count, const VkBufferCopy* regions) { + std::memcpy(CmdCopyBufferEmplace(src_buffer, dst_buffer, region_count), + regions, sizeof(VkBufferCopy) * region_count); } // pNext of all barriers must be null. diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 531182ca5..1dcea8284 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -198,7 +198,18 @@ bool VulkanCommandProcessor::IssueCopy() { return true; } -void VulkanCommandProcessor::InitializeTrace() {} +void VulkanCommandProcessor::InitializeTrace() { + BeginSubmission(false); + bool shared_memory_submitted = + shared_memory_->InitializeTraceSubmitDownloads(); + if (!shared_memory_submitted) { + return; + } + AwaitAllQueueOperationsCompletion(); + if (shared_memory_submitted) { + shared_memory_->InitializeTraceCompleteDownloads(); + } +} void VulkanCommandProcessor::FinalizeTrace() {} diff --git a/src/xenia/gpu/vulkan/vulkan_shared_memory.cc b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc index 49b9cbbb0..e8b1790b3 100644 --- a/src/xenia/gpu/vulkan/vulkan_shared_memory.cc +++ b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc @@ -182,6 +182,8 @@ bool VulkanSharedMemory::Initialize() { } void VulkanSharedMemory::Shutdown(bool from_destructor) { + ResetTraceDownload(); + upload_buffer_pool_.reset(); last_written_range_ = std::make_pair(0, 0); @@ -246,6 +248,92 @@ void VulkanSharedMemory::Use(Usage usage, last_written_range_ = written_range; } +bool VulkanSharedMemory::InitializeTraceSubmitDownloads() { + ResetTraceDownload(); + PrepareForTraceDownload(); + uint32_t download_page_count = trace_download_page_count(); + if (!download_page_count) { + return false; + } + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + if (!ui::vulkan::util::CreateDedicatedAllocationBuffer( + provider, download_page_count << page_size_log2(), + VK_BUFFER_USAGE_TRANSFER_DST_BIT, + ui::vulkan::util::MemoryPurpose::kReadback, trace_download_buffer_, + trace_download_buffer_memory_)) { + XELOGE( + "Shared memory: Failed to create a {} KB GPU-written memory download " + "buffer for frame tracing", + download_page_count << page_size_log2() >> 10); + ResetTraceDownload(); + return false; + } + + // TODO(Triang3l): End the render pass. + Use(Usage::kRead); + DeferredCommandBuffer& command_buffer = + command_processor_.deferred_command_buffer(); + + size_t download_range_count = trace_download_ranges().size(); + VkBufferCopy* download_regions = command_buffer.CmdCopyBufferEmplace( + buffer_, trace_download_buffer_, uint32_t(download_range_count)); + VkDeviceSize download_buffer_offset = 0; + for (size_t i = 0; i < download_range_count; ++i) { + VkBufferCopy& download_region = download_regions[i]; + const std::pair& download_range = + trace_download_ranges()[i]; + download_region.srcOffset = download_range.first; + download_region.dstOffset = download_buffer_offset; + download_region.size = download_range.second; + download_buffer_offset += download_range.second; + } + + VkBufferMemoryBarrier download_buffer_barrier; + download_buffer_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + download_buffer_barrier.pNext = nullptr; + download_buffer_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + download_buffer_barrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT; + download_buffer_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + download_buffer_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + download_buffer_barrier.buffer = trace_download_buffer_; + download_buffer_barrier.offset = 0; + download_buffer_barrier.size = VK_WHOLE_SIZE; + command_buffer.CmdVkPipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_HOST_BIT, 0, 0, nullptr, + 1, &download_buffer_barrier, 0, nullptr); + + return true; +} + +void VulkanSharedMemory::InitializeTraceCompleteDownloads() { + if (!trace_download_buffer_memory_) { + return; + } + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + void* download_mapping; + if (dfn.vkMapMemory(device, trace_download_buffer_memory_, 0, VK_WHOLE_SIZE, + 0, &download_mapping) == VK_SUCCESS) { + uint32_t download_buffer_offset = 0; + for (const auto& download_range : trace_download_ranges()) { + trace_writer_.WriteMemoryRead( + download_range.first, download_range.second, + reinterpret_cast(download_mapping) + + download_buffer_offset); + } + dfn.vkUnmapMemory(device, trace_download_buffer_memory_); + } else { + XELOGE( + "Shared memory: Failed to map the GPU-written memory download buffer " + "for frame tracing"); + } + ResetTraceDownload(); +} + bool VulkanSharedMemory::AllocateSparseHostGpuMemoryRange( uint32_t offset_allocations, uint32_t length_allocations) { if (!length_allocations) { @@ -296,6 +384,7 @@ bool VulkanSharedMemory::UploadRanges( if (upload_page_ranges.empty()) { return true; } + // TODO(Triang3l): End the render pass. // upload_page_ranges are sorted, use them to determine the range for the // ordering barrier. Use(Usage::kTransferDestination, @@ -401,6 +490,18 @@ void VulkanSharedMemory::GetBarrier(Usage usage, } } +void VulkanSharedMemory::ResetTraceDownload() { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + trace_download_buffer_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + trace_download_buffer_memory_); + ReleaseTraceDownloadRanges(); +} + } // namespace vulkan } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_shared_memory.h b/src/xenia/gpu/vulkan/vulkan_shared_memory.h index 2d5d15a38..0d8e90813 100644 --- a/src/xenia/gpu/vulkan/vulkan_shared_memory.h +++ b/src/xenia/gpu/vulkan/vulkan_shared_memory.h @@ -53,6 +53,10 @@ class VulkanSharedMemory : public SharedMemory { VkBuffer buffer() const { return buffer_; } + // Returns true if any downloads were submitted to the command processor. + bool InitializeTraceSubmitDownloads(); + void InitializeTraceCompleteDownloads(); + protected: bool AllocateSparseHostGpuMemoryRange(uint32_t offset_allocations, uint32_t length_allocations) override; @@ -78,6 +82,11 @@ class VulkanSharedMemory : public SharedMemory { std::unique_ptr upload_buffer_pool_; std::vector upload_regions_; + + // Created temporarily, only for downloading. + VkBuffer trace_download_buffer_ = VK_NULL_HANDLE; + VkDeviceMemory trace_download_buffer_memory_ = VK_NULL_HANDLE; + void ResetTraceDownload(); }; } // namespace vulkan diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc index 703930de4..b30386793 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc @@ -982,17 +982,11 @@ bool VulkanImmediateDrawer::CreateTextureResource( VkDeviceMemory upload_buffer_memory = VK_NULL_HANDLE; if (data) { size_t data_size = sizeof(uint32_t) * width * height; - VkBufferCreateInfo upload_buffer_create_info; - upload_buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - upload_buffer_create_info.pNext = nullptr; - upload_buffer_create_info.flags = 0; - upload_buffer_create_info.size = VkDeviceSize(data_size); - upload_buffer_create_info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; - upload_buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - upload_buffer_create_info.queueFamilyIndexCount = 0; - upload_buffer_create_info.pQueueFamilyIndices = nullptr; - if (dfn.vkCreateBuffer(device, &upload_buffer_create_info, nullptr, - &upload_buffer) != VK_SUCCESS) { + uint32_t upload_buffer_memory_type; + if (!util::CreateDedicatedAllocationBuffer( + provider, VkDeviceSize(data_size), VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + util::MemoryPurpose::kUpload, upload_buffer, upload_buffer_memory, + &upload_buffer_memory_type)) { XELOGE( "Failed to create a Vulkan upload buffer for a {}x{} immediate " "drawer texture", @@ -1003,72 +997,6 @@ bool VulkanImmediateDrawer::CreateTextureResource( dfn.vkFreeMemory(device, image_memory, nullptr); return false; } - - VkMemoryAllocateInfo upload_buffer_memory_allocate_info; - VkMemoryRequirements upload_buffer_memory_requirements; - dfn.vkGetBufferMemoryRequirements(device, upload_buffer, - &upload_buffer_memory_requirements); - upload_buffer_memory_allocate_info.memoryTypeIndex = - util::ChooseHostMemoryType( - provider, upload_buffer_memory_requirements.memoryTypeBits, false); - if (upload_buffer_memory_allocate_info.memoryTypeIndex == UINT32_MAX) { - XELOGE( - "Failed to get a host-visible memory type for a Vulkan upload buffer " - "for a {}x{} immediate drawer texture", - width, height); - dfn.vkDestroyBuffer(device, upload_buffer, nullptr); - FreeTextureDescriptor(descriptor_index); - dfn.vkDestroyImageView(device, image_view, nullptr); - dfn.vkDestroyImage(device, image, nullptr); - dfn.vkFreeMemory(device, image_memory, nullptr); - return false; - } - upload_buffer_memory_allocate_info.sType = - VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - VkMemoryDedicatedAllocateInfoKHR - upload_buffer_memory_dedicated_allocate_info; - if (dedicated_allocation_supported) { - upload_buffer_memory_dedicated_allocate_info.sType = - VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR; - upload_buffer_memory_dedicated_allocate_info.pNext = nullptr; - upload_buffer_memory_dedicated_allocate_info.image = VK_NULL_HANDLE; - upload_buffer_memory_dedicated_allocate_info.buffer = upload_buffer; - upload_buffer_memory_allocate_info.pNext = - &upload_buffer_memory_dedicated_allocate_info; - } else { - upload_buffer_memory_allocate_info.pNext = nullptr; - } - upload_buffer_memory_allocate_info.allocationSize = - util::GetMappableMemorySize(provider, - upload_buffer_memory_requirements.size); - if (dfn.vkAllocateMemory(device, &upload_buffer_memory_allocate_info, - nullptr, &upload_buffer_memory) != VK_SUCCESS) { - XELOGE( - "Failed to allocate memory for a Vulkan upload buffer for a {}x{} " - "immediate drawer texture", - width, height); - dfn.vkDestroyBuffer(device, upload_buffer, nullptr); - FreeTextureDescriptor(descriptor_index); - dfn.vkDestroyImageView(device, image_view, nullptr); - dfn.vkDestroyImage(device, image, nullptr); - dfn.vkFreeMemory(device, image_memory, nullptr); - return false; - } - if (dfn.vkBindBufferMemory(device, upload_buffer, upload_buffer_memory, - 0) != VK_SUCCESS) { - XELOGE( - "Failed to bind memory to a Vulkan upload buffer for a {}x{} " - "immediate drawer texture", - width, height); - dfn.vkDestroyBuffer(device, upload_buffer, nullptr); - dfn.vkFreeMemory(device, upload_buffer_memory, nullptr); - FreeTextureDescriptor(descriptor_index); - dfn.vkDestroyImageView(device, image_view, nullptr); - dfn.vkDestroyImage(device, image, nullptr); - dfn.vkFreeMemory(device, image_memory, nullptr); - return false; - } - void* upload_buffer_mapping; if (dfn.vkMapMemory(device, upload_buffer_memory, 0, VK_WHOLE_SIZE, 0, &upload_buffer_mapping) != VK_SUCCESS) { @@ -1085,9 +1013,8 @@ bool VulkanImmediateDrawer::CreateTextureResource( return false; } std::memcpy(upload_buffer_mapping, data, data_size); - util::FlushMappedMemoryRange( - provider, upload_buffer_memory, - upload_buffer_memory_allocate_info.memoryTypeIndex); + util::FlushMappedMemoryRange(provider, upload_buffer_memory, + upload_buffer_memory_type); dfn.vkUnmapMemory(device, upload_buffer_memory); } diff --git a/src/xenia/ui/vulkan/vulkan_util.cc b/src/xenia/ui/vulkan/vulkan_util.cc index d146beb83..49d8a949c 100644 --- a/src/xenia/ui/vulkan/vulkan_util.cc +++ b/src/xenia/ui/vulkan/vulkan_util.cc @@ -9,6 +9,9 @@ #include "xenia/ui/vulkan/vulkan_util.h" +#include + +#include "xenia/base/assert.h" #include "xenia/base/math.h" #include "xenia/ui/vulkan/vulkan_provider.h" @@ -43,6 +46,93 @@ void FlushMappedMemoryRange(const VulkanProvider& provider, provider.dfn().vkFlushMappedMemoryRanges(provider.device(), 1, &range); } +bool CreateDedicatedAllocationBuffer( + const VulkanProvider& provider, VkDeviceSize size, VkBufferUsageFlags usage, + MemoryPurpose memory_purpose, VkBuffer& buffer_out, + VkDeviceMemory& memory_out, uint32_t* memory_type_out) { + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + VkBufferCreateInfo buffer_create_info; + buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + buffer_create_info.pNext = nullptr; + buffer_create_info.flags = 0; + buffer_create_info.size = size; + buffer_create_info.usage = usage; + buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + buffer_create_info.queueFamilyIndexCount = 0; + buffer_create_info.pQueueFamilyIndices = nullptr; + VkBuffer buffer; + if (dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer) != + VK_SUCCESS) { + return false; + } + + VkMemoryRequirements memory_requirements; + dfn.vkGetBufferMemoryRequirements(device, buffer, &memory_requirements); + uint32_t memory_type = UINT32_MAX; + switch (memory_purpose) { + case MemoryPurpose::kDeviceLocal: + if (!xe::bit_scan_forward(memory_requirements.memoryTypeBits & + provider.memory_types_device_local(), + &memory_type)) { + memory_type = UINT32_MAX; + } + break; + case MemoryPurpose::kUpload: + case MemoryPurpose::kReadback: + memory_type = + ChooseHostMemoryType(provider, memory_requirements.memoryTypeBits, + memory_purpose == MemoryPurpose::kReadback); + break; + default: + assert_unhandled_case(memory_purpose); + } + if (memory_type == UINT32_MAX) { + dfn.vkDestroyBuffer(device, buffer, nullptr); + return false; + } + + VkMemoryAllocateInfo memory_allocate_info; + memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + VkMemoryDedicatedAllocateInfoKHR memory_dedicated_allocate_info; + if (provider.device_extensions().khr_dedicated_allocation) { + memory_dedicated_allocate_info.sType = + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR; + memory_dedicated_allocate_info.pNext = nullptr; + memory_dedicated_allocate_info.image = VK_NULL_HANDLE; + memory_dedicated_allocate_info.buffer = buffer; + memory_allocate_info.pNext = &memory_dedicated_allocate_info; + } else { + memory_allocate_info.pNext = nullptr; + } + memory_allocate_info.allocationSize = memory_requirements.size; + if (memory_purpose == MemoryPurpose::kUpload || + memory_purpose == MemoryPurpose::kReadback) { + memory_allocate_info.allocationSize = + GetMappableMemorySize(provider, memory_allocate_info.allocationSize); + } + memory_allocate_info.memoryTypeIndex = memory_type; + VkDeviceMemory memory; + if (dfn.vkAllocateMemory(device, &memory_allocate_info, nullptr, &memory) != + VK_SUCCESS) { + dfn.vkDestroyBuffer(device, buffer, nullptr); + return false; + } + if (dfn.vkBindBufferMemory(device, buffer, memory, 0) != VK_SUCCESS) { + dfn.vkDestroyBuffer(device, buffer, nullptr); + dfn.vkFreeMemory(device, memory, nullptr); + return false; + } + + buffer_out = buffer; + memory_out = memory; + if (memory_type_out) { + *memory_type_out = memory_type; + } + return true; +} + } // namespace util } // namespace vulkan } // namespace ui diff --git a/src/xenia/ui/vulkan/vulkan_util.h b/src/xenia/ui/vulkan/vulkan_util.h index 61bfec617..163731699 100644 --- a/src/xenia/ui/vulkan/vulkan_util.h +++ b/src/xenia/ui/vulkan/vulkan_util.h @@ -10,6 +10,8 @@ #ifndef XENIA_UI_VULKAN_VULKAN_UTIL_H_ #define XENIA_UI_VULKAN_VULKAN_UTIL_H_ +#include + #include "xenia/base/math.h" #include "xenia/ui/vulkan/vulkan_provider.h" @@ -38,6 +40,12 @@ inline bool DestroyAndNullHandle(F* destroy_function, P parent, T& handle) { return false; } +enum class MemoryPurpose { + kDeviceLocal, + kUpload, + kReadback, +}; + inline VkDeviceSize GetMappableMemorySize(const VulkanProvider& provider, VkDeviceSize size) { VkDeviceSize non_coherent_atom_size = @@ -85,6 +93,14 @@ inline void InitializeSubresourceRange( range.layerCount = layer_count; } +// Creates a buffer backed by a dedicated allocation. If using a mappable memory +// purpose (upload/readback), the allocation size will be aligned to +// nonCoherentAtomSize. +bool CreateDedicatedAllocationBuffer( + const VulkanProvider& provider, VkDeviceSize size, VkBufferUsageFlags usage, + MemoryPurpose memory_purpose, VkBuffer& buffer_out, + VkDeviceMemory& memory_out, uint32_t* memory_type_out = nullptr); + inline VkShaderModule CreateShaderModule(const VulkanProvider& provider, const void* code, size_t code_size) { VkShaderModuleCreateInfo shader_module_create_info; From 48620759765d10116703f45d35f3d418ff80805e Mon Sep 17 00:00:00 2001 From: Triang3l Date: Thu, 8 Oct 2020 23:05:30 +0300 Subject: [PATCH 023/123] [Vulkan] Cleanup and update SPIR-V dependencies --- .gitmodules | 12 +- premake5.lua | 1 - src/xenia/app/premake5.lua | 1 - src/xenia/gpu/premake5.lua | 5 - third_party/glslang | 1 + third_party/glslang-spirv.lua | 54 +- third_party/glslang-spirv/GLSL.ext.AMD.h | 110 - third_party/glslang-spirv/GLSL.ext.EXT.h | 39 - third_party/glslang-spirv/GLSL.ext.KHR.h | 48 - third_party/glslang-spirv/GLSL.ext.NV.h | 54 - third_party/glslang-spirv/GLSL.std.450.h | 131 - third_party/glslang-spirv/GlslangToSpv.cpp | 6146 ----------------- third_party/glslang-spirv/GlslangToSpv.h | 67 - third_party/glslang-spirv/InReadableOrder.cpp | 113 - third_party/glslang-spirv/Include/BaseTypes.h | 387 -- third_party/glslang-spirv/Include/Common.h | 274 - .../glslang-spirv/Include/ConstantUnion.h | 625 -- third_party/glslang-spirv/Include/InfoSink.h | 144 - .../glslang-spirv/Include/InitializeGlobals.h | 44 - third_party/glslang-spirv/Include/PoolAlloc.h | 317 - .../glslang-spirv/Include/ResourceLimits.h | 140 - third_party/glslang-spirv/Include/ShHandle.h | 176 - third_party/glslang-spirv/Include/Types.h | 1924 ------ third_party/glslang-spirv/Include/arrays.h | 329 - .../glslang-spirv/Include/intermediate.h | 1486 ---- third_party/glslang-spirv/Include/revision.h | 6 - .../glslang-spirv/Include/revision.template | 13 - third_party/glslang-spirv/Logger.cpp | 68 - third_party/glslang-spirv/Logger.h | 74 - third_party/glslang-spirv/SPVRemapper.cpp | 1479 ---- third_party/glslang-spirv/SPVRemapper.h | 304 - third_party/glslang-spirv/SpvBuilder.cpp | 2676 ------- third_party/glslang-spirv/SpvBuilder.h | 641 -- third_party/glslang-spirv/bitutils.h | 81 - third_party/glslang-spirv/disassemble.cpp | 695 -- third_party/glslang-spirv/disassemble.h | 52 - third_party/glslang-spirv/doc.cpp | 2894 -------- third_party/glslang-spirv/doc.h | 262 - third_party/glslang-spirv/hex_float.h | 1078 --- third_party/glslang-spirv/spirv.hpp | 1028 --- third_party/glslang-spirv/spvIR.h | 407 -- third_party/spirv-headers | 1 - third_party/spirv-tools | 1 - third_party/spirv-tools.lua | 76 - third_party/spirv/GLSL.std.450.h | 131 - third_party/spirv/GLSL.std.450.hpp11 | 135 - third_party/spirv/OpenCL.std.h | 272 - third_party/spirv/spirv.h | 871 --- third_party/spirv/spirv.hpp11 | 880 --- 49 files changed, 35 insertions(+), 26718 deletions(-) create mode 160000 third_party/glslang delete mode 100644 third_party/glslang-spirv/GLSL.ext.AMD.h delete mode 100644 third_party/glslang-spirv/GLSL.ext.EXT.h delete mode 100644 third_party/glslang-spirv/GLSL.ext.KHR.h delete mode 100644 third_party/glslang-spirv/GLSL.ext.NV.h delete mode 100644 third_party/glslang-spirv/GLSL.std.450.h delete mode 100644 third_party/glslang-spirv/GlslangToSpv.cpp delete mode 100644 third_party/glslang-spirv/GlslangToSpv.h delete mode 100644 third_party/glslang-spirv/InReadableOrder.cpp delete mode 100644 third_party/glslang-spirv/Include/BaseTypes.h delete mode 100644 third_party/glslang-spirv/Include/Common.h delete mode 100644 third_party/glslang-spirv/Include/ConstantUnion.h delete mode 100644 third_party/glslang-spirv/Include/InfoSink.h delete mode 100644 third_party/glslang-spirv/Include/InitializeGlobals.h delete mode 100644 third_party/glslang-spirv/Include/PoolAlloc.h delete mode 100644 third_party/glslang-spirv/Include/ResourceLimits.h delete mode 100644 third_party/glslang-spirv/Include/ShHandle.h delete mode 100644 third_party/glslang-spirv/Include/Types.h delete mode 100644 third_party/glslang-spirv/Include/arrays.h delete mode 100644 third_party/glslang-spirv/Include/intermediate.h delete mode 100644 third_party/glslang-spirv/Include/revision.h delete mode 100644 third_party/glslang-spirv/Include/revision.template delete mode 100644 third_party/glslang-spirv/Logger.cpp delete mode 100644 third_party/glslang-spirv/Logger.h delete mode 100644 third_party/glslang-spirv/SPVRemapper.cpp delete mode 100644 third_party/glslang-spirv/SPVRemapper.h delete mode 100644 third_party/glslang-spirv/SpvBuilder.cpp delete mode 100644 third_party/glslang-spirv/SpvBuilder.h delete mode 100644 third_party/glslang-spirv/bitutils.h delete mode 100644 third_party/glslang-spirv/disassemble.cpp delete mode 100644 third_party/glslang-spirv/disassemble.h delete mode 100644 third_party/glslang-spirv/doc.cpp delete mode 100644 third_party/glslang-spirv/doc.h delete mode 100644 third_party/glslang-spirv/hex_float.h delete mode 100644 third_party/glslang-spirv/spirv.hpp delete mode 100644 third_party/glslang-spirv/spvIR.h delete mode 160000 third_party/spirv-headers delete mode 160000 third_party/spirv-tools delete mode 100644 third_party/spirv-tools.lua delete mode 100644 third_party/spirv/GLSL.std.450.h delete mode 100644 third_party/spirv/GLSL.std.450.hpp11 delete mode 100644 third_party/spirv/OpenCL.std.h delete mode 100644 third_party/spirv/spirv.h delete mode 100644 third_party/spirv/spirv.hpp11 diff --git a/.gitmodules b/.gitmodules index 6c3ca7278..c50326cb4 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,9 +10,6 @@ [submodule "third_party/libav"] path = third_party/libav url = https://github.com/xenia-project/libav.git -[submodule "third_party/spirv-tools"] - path = third_party/spirv-tools - url = https://github.com/xenia-project/SPIRV-Tools.git [submodule "third_party/catch"] path = third_party/catch url = https://github.com/catchorg/Catch2.git @@ -25,12 +22,6 @@ [submodule "third_party/premake-export-compile-commands"] path = third_party/premake-export-compile-commands url = https://github.com/xenia-project/premake-export-compile-commands.git -[submodule "third_party/spirv-headers"] - path = third_party/spirv-headers - url = https://github.com/KhronosGroup/SPIRV-Headers.git -[submodule "third_party/volk"] - path = third_party/volk - url = https://github.com/zeux/volk.git [submodule "third_party/discord-rpc"] path = third_party/discord-rpc url = https://github.com/discordapp/discord-rpc.git @@ -64,3 +55,6 @@ [submodule "third_party/DirectXShaderCompiler"] path = third_party/DirectXShaderCompiler url = https://github.com/microsoft/DirectXShaderCompiler.git +[submodule "third_party/glslang"] + path = third_party/glslang + url = https://github.com/KhronosGroup/glslang.git diff --git a/premake5.lua b/premake5.lua index 2b8042334..40ecf8a92 100644 --- a/premake5.lua +++ b/premake5.lua @@ -225,7 +225,6 @@ solution("xenia") include("third_party/mspack.lua") include("third_party/SDL2.lua") include("third_party/snappy.lua") - include("third_party/spirv-tools.lua") include("third_party/xxhash.lua") include("src/xenia") diff --git a/src/xenia/app/premake5.lua b/src/xenia/app/premake5.lua index 3a0f6bb2a..e260449e9 100644 --- a/src/xenia/app/premake5.lua +++ b/src/xenia/app/premake5.lua @@ -19,7 +19,6 @@ project("xenia-app") "libavutil", "mspack", "snappy", - "spirv-tools", "xenia-app-discord", "xenia-apu", "xenia-apu-nop", diff --git a/src/xenia/gpu/premake5.lua b/src/xenia/gpu/premake5.lua index 92f798458..f4b2a08d8 100644 --- a/src/xenia/gpu/premake5.lua +++ b/src/xenia/gpu/premake5.lua @@ -11,16 +11,12 @@ project("xenia-gpu") "fmt", "glslang-spirv", "snappy", - "spirv-tools", "xenia-base", "xenia-ui", "xxhash", }) defines({ }) - includedirs({ - project_root.."/third_party/spirv-tools/external/include", - }) local_platform_files() group("src") @@ -32,7 +28,6 @@ project("xenia-gpu-shader-compiler") "dxbc", "fmt", "glslang-spirv", - "spirv-tools", "xenia-base", "xenia-gpu", }) diff --git a/third_party/glslang b/third_party/glslang new file mode 160000 index 000000000..2067d1a93 --- /dev/null +++ b/third_party/glslang @@ -0,0 +1 @@ +Subproject commit 2067d1a93e6edc17f2a6b7e3e5138a9bbcd35ef9 diff --git a/third_party/glslang-spirv.lua b/third_party/glslang-spirv.lua index 77895361b..b5cdf15ba 100644 --- a/third_party/glslang-spirv.lua +++ b/third_party/glslang-spirv.lua @@ -11,27 +11,35 @@ project("glslang-spirv") includedirs({ }) files({ - "glslang-spirv/bitutils.h", - "glslang-spirv/disassemble.cpp", - "glslang-spirv/disassemble.h", - "glslang-spirv/doc.cpp", - "glslang-spirv/doc.h", - "glslang-spirv/GLSL.ext.AMD.h", - "glslang-spirv/GLSL.ext.EXT.h", - "glslang-spirv/GLSL.ext.KHR.h", - "glslang-spirv/GLSL.ext.NV.h", - "glslang-spirv/GLSL.std.450.h", - -- Disabled until required. - -- "glslang-spirv/GlslangToSpv.cpp", - -- "glslang-spirv/GlslangToSpv.h", - "glslang-spirv/hex_float.h", - "glslang-spirv/InReadableOrder.cpp", - "glslang-spirv/Logger.cpp", - "glslang-spirv/Logger.h", - "glslang-spirv/spirv.hpp", - "glslang-spirv/SpvBuilder.cpp", - "glslang-spirv/SpvBuilder.h", - "glslang-spirv/spvIR.h", - "glslang-spirv/SPVRemapper.cpp", - "glslang-spirv/SPVRemapper.h", + "glslang/SPIRV/bitutils.h", + -- Disabled temporarily until PR #2417 removing SpvTools.h dependency is + -- merged. + -- "glslang/SPIRV/disassemble.cpp", + -- "glslang/SPIRV/disassemble.h", + "glslang/SPIRV/doc.cpp", + "glslang/SPIRV/doc.h", + "glslang/SPIRV/GLSL.ext.AMD.h", + "glslang/SPIRV/GLSL.ext.EXT.h", + "glslang/SPIRV/GLSL.ext.KHR.h", + "glslang/SPIRV/GLSL.ext.NV.h", + "glslang/SPIRV/GLSL.std.450.h", + -- Disabled because GLSL is not used. + -- "glslang/SPIRV/GlslangToSpv.cpp", + -- "glslang/SPIRV/GlslangToSpv.h", + "glslang/SPIRV/hex_float.h", + "glslang/SPIRV/InReadableOrder.cpp", + "glslang/SPIRV/Logger.cpp", + "glslang/SPIRV/Logger.h", + "glslang/SPIRV/NonSemanticDebugPrintf.h", + "glslang/SPIRV/spirv.hpp", + "glslang/SPIRV/SpvBuilder.cpp", + "glslang/SPIRV/SpvBuilder.h", + "glslang/SPIRV/spvIR.h", + -- Disabled because of spirv-tools dependency. + -- "glslang/SPIRV/SpvPostProcess.cpp", + "glslang/SPIRV/SPVRemapper.cpp", + "glslang/SPIRV/SPVRemapper.h", + -- Disabled because of spirv-tools dependency. + -- "glslang/SPIRV/SpvTools.cpp", + -- "glslang/SPIRV/SpvTools.h", }) diff --git a/third_party/glslang-spirv/GLSL.ext.AMD.h b/third_party/glslang-spirv/GLSL.ext.AMD.h deleted file mode 100644 index d4f57efdc..000000000 --- a/third_party/glslang-spirv/GLSL.ext.AMD.h +++ /dev/null @@ -1,110 +0,0 @@ -/* -** Copyright (c) 2014-2016 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and/or associated documentation files (the "Materials"), -** to deal in the Materials without restriction, including without limitation -** the rights to use, copy, modify, merge, publish, distribute, sublicense, -** and/or sell copies of the Materials, and to permit persons to whom the -** Materials are furnished to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in -** all copies or substantial portions of the Materials. -** -** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -** IN THE MATERIALS. -*/ - -#ifndef GLSLextAMD_H -#define GLSLextAMD_H - -enum BuiltIn; -enum Capability; -enum Decoration; -enum Op; - -static const int GLSLextAMDVersion = 100; -static const int GLSLextAMDRevision = 6; - -// SPV_AMD_shader_ballot -static const char* const E_SPV_AMD_shader_ballot = "SPV_AMD_shader_ballot"; - -enum ShaderBallotAMD { - ShaderBallotBadAMD = 0, // Don't use - - SwizzleInvocationsAMD = 1, - SwizzleInvocationsMaskedAMD = 2, - WriteInvocationAMD = 3, - MbcntAMD = 4, - - ShaderBallotCountAMD -}; - -// SPV_AMD_shader_trinary_minmax -static const char* const E_SPV_AMD_shader_trinary_minmax = "SPV_AMD_shader_trinary_minmax"; - -enum ShaderTrinaryMinMaxAMD { - ShaderTrinaryMinMaxBadAMD = 0, // Don't use - - FMin3AMD = 1, - UMin3AMD = 2, - SMin3AMD = 3, - FMax3AMD = 4, - UMax3AMD = 5, - SMax3AMD = 6, - FMid3AMD = 7, - UMid3AMD = 8, - SMid3AMD = 9, - - ShaderTrinaryMinMaxCountAMD -}; - -// SPV_AMD_shader_explicit_vertex_parameter -static const char* const E_SPV_AMD_shader_explicit_vertex_parameter = "SPV_AMD_shader_explicit_vertex_parameter"; - -enum ShaderExplicitVertexParameterAMD { - ShaderExplicitVertexParameterBadAMD = 0, // Don't use - - InterpolateAtVertexAMD = 1, - - ShaderExplicitVertexParameterCountAMD -}; - -// SPV_AMD_gcn_shader -static const char* const E_SPV_AMD_gcn_shader = "SPV_AMD_gcn_shader"; - -enum GcnShaderAMD { - GcnShaderBadAMD = 0, // Don't use - - CubeFaceIndexAMD = 1, - CubeFaceCoordAMD = 2, - TimeAMD = 3, - - GcnShaderCountAMD -}; - -// SPV_AMD_gpu_shader_half_float -static const char* const E_SPV_AMD_gpu_shader_half_float = "SPV_AMD_gpu_shader_half_float"; - -// SPV_AMD_texture_gather_bias_lod -static const char* const E_SPV_AMD_texture_gather_bias_lod = "SPV_AMD_texture_gather_bias_lod"; - -// SPV_AMD_gpu_shader_int16 -static const char* const E_SPV_AMD_gpu_shader_int16 = "SPV_AMD_gpu_shader_int16"; - -// SPV_AMD_shader_image_load_store_lod -static const char* const E_SPV_AMD_shader_image_load_store_lod = "SPV_AMD_shader_image_load_store_lod"; - -// SPV_AMD_shader_fragment_mask -static const char* const E_SPV_AMD_shader_fragment_mask = "SPV_AMD_shader_fragment_mask"; - -#endif // #ifndef GLSLextAMD_H diff --git a/third_party/glslang-spirv/GLSL.ext.EXT.h b/third_party/glslang-spirv/GLSL.ext.EXT.h deleted file mode 100644 index e879714d0..000000000 --- a/third_party/glslang-spirv/GLSL.ext.EXT.h +++ /dev/null @@ -1,39 +0,0 @@ -/* -** Copyright (c) 2014-2016 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and/or associated documentation files (the "Materials"), -** to deal in the Materials without restriction, including without limitation -** the rights to use, copy, modify, merge, publish, distribute, sublicense, -** and/or sell copies of the Materials, and to permit persons to whom the -** Materials are furnished to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in -** all copies or substantial portions of the Materials. -** -** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -** IN THE MATERIALS. -*/ - -#ifndef GLSLextEXT_H -#define GLSLextEXT_H - -enum BuiltIn; -enum Op; -enum Capability; - -static const int GLSLextEXTVersion = 100; -static const int GLSLextEXTRevision = 1; - -static const char* const E_SPV_EXT_fragment_fully_covered = "SPV_EXT_fragment_fully_covered"; - -#endif // #ifndef GLSLextEXT_H diff --git a/third_party/glslang-spirv/GLSL.ext.KHR.h b/third_party/glslang-spirv/GLSL.ext.KHR.h deleted file mode 100644 index 2eb10ae62..000000000 --- a/third_party/glslang-spirv/GLSL.ext.KHR.h +++ /dev/null @@ -1,48 +0,0 @@ -/* -** Copyright (c) 2014-2016 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and/or associated documentation files (the "Materials"), -** to deal in the Materials without restriction, including without limitation -** the rights to use, copy, modify, merge, publish, distribute, sublicense, -** and/or sell copies of the Materials, and to permit persons to whom the -** Materials are furnished to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in -** all copies or substantial portions of the Materials. -** -** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -** IN THE MATERIALS. -*/ - -#ifndef GLSLextKHR_H -#define GLSLextKHR_H - -enum BuiltIn; -enum Op; -enum Capability; - -static const int GLSLextKHRVersion = 100; -static const int GLSLextKHRRevision = 2; - -static const char* const E_SPV_KHR_shader_ballot = "SPV_KHR_shader_ballot"; -static const char* const E_SPV_KHR_subgroup_vote = "SPV_KHR_subgroup_vote"; -static const char* const E_SPV_KHR_device_group = "SPV_KHR_device_group"; -static const char* const E_SPV_KHR_multiview = "SPV_KHR_multiview"; -static const char* const E_SPV_KHR_shader_draw_parameters = "SPV_KHR_shader_draw_parameters"; -static const char* const E_SPV_KHR_16bit_storage = "SPV_KHR_16bit_storage"; -static const char* const E_SPV_KHR_storage_buffer_storage_class = "SPV_KHR_storage_buffer_storage_class"; -static const char* const E_SPV_KHR_post_depth_coverage = "SPV_KHR_post_depth_coverage"; -static const char* const E_SPV_EXT_shader_stencil_export = "SPV_EXT_shader_stencil_export"; -static const char* const E_SPV_EXT_shader_viewport_index_layer = "SPV_EXT_shader_viewport_index_layer"; - -#endif // #ifndef GLSLextKHR_H diff --git a/third_party/glslang-spirv/GLSL.ext.NV.h b/third_party/glslang-spirv/GLSL.ext.NV.h deleted file mode 100644 index c01858be4..000000000 --- a/third_party/glslang-spirv/GLSL.ext.NV.h +++ /dev/null @@ -1,54 +0,0 @@ -/* -** Copyright (c) 2014-2017 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and/or associated documentation files (the "Materials"), -** to deal in the Materials without restriction, including without limitation -** the rights to use, copy, modify, merge, publish, distribute, sublicense, -** and/or sell copies of the Materials, and to permit persons to whom the -** Materials are furnished to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in -** all copies or substantial portions of the Materials. -** -** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -** IN THE MATERIALS. -*/ - -#ifndef GLSLextNV_H -#define GLSLextNV_H - -enum BuiltIn; -enum Decoration; -enum Op; -enum Capability; - -static const int GLSLextNVVersion = 100; -static const int GLSLextNVRevision = 5; - -//SPV_NV_sample_mask_override_coverage -const char* const E_SPV_NV_sample_mask_override_coverage = "SPV_NV_sample_mask_override_coverage"; - -//SPV_NV_geometry_shader_passthrough -const char* const E_SPV_NV_geometry_shader_passthrough = "SPV_NV_geometry_shader_passthrough"; - -//SPV_NV_viewport_array2 -const char* const E_SPV_NV_viewport_array2 = "SPV_NV_viewport_array2"; -const char* const E_ARB_shader_viewport_layer_array = "SPV_ARB_shader_viewport_layer_array"; - -//SPV_NV_stereo_view_rendering -const char* const E_SPV_NV_stereo_view_rendering = "SPV_NV_stereo_view_rendering"; - -//SPV_NVX_multiview_per_view_attributes -const char* const E_SPV_NVX_multiview_per_view_attributes = "SPV_NVX_multiview_per_view_attributes"; - -#endif // #ifndef GLSLextNV_H \ No newline at end of file diff --git a/third_party/glslang-spirv/GLSL.std.450.h b/third_party/glslang-spirv/GLSL.std.450.h deleted file mode 100644 index df31092be..000000000 --- a/third_party/glslang-spirv/GLSL.std.450.h +++ /dev/null @@ -1,131 +0,0 @@ -/* -** Copyright (c) 2014-2016 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and/or associated documentation files (the "Materials"), -** to deal in the Materials without restriction, including without limitation -** the rights to use, copy, modify, merge, publish, distribute, sublicense, -** and/or sell copies of the Materials, and to permit persons to whom the -** Materials are furnished to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in -** all copies or substantial portions of the Materials. -** -** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -** IN THE MATERIALS. -*/ - -#ifndef GLSLstd450_H -#define GLSLstd450_H - -static const int GLSLstd450Version = 100; -static const int GLSLstd450Revision = 1; - -enum GLSLstd450 { - GLSLstd450Bad = 0, // Don't use - - GLSLstd450Round = 1, - GLSLstd450RoundEven = 2, - GLSLstd450Trunc = 3, - GLSLstd450FAbs = 4, - GLSLstd450SAbs = 5, - GLSLstd450FSign = 6, - GLSLstd450SSign = 7, - GLSLstd450Floor = 8, - GLSLstd450Ceil = 9, - GLSLstd450Fract = 10, - - GLSLstd450Radians = 11, - GLSLstd450Degrees = 12, - GLSLstd450Sin = 13, - GLSLstd450Cos = 14, - GLSLstd450Tan = 15, - GLSLstd450Asin = 16, - GLSLstd450Acos = 17, - GLSLstd450Atan = 18, - GLSLstd450Sinh = 19, - GLSLstd450Cosh = 20, - GLSLstd450Tanh = 21, - GLSLstd450Asinh = 22, - GLSLstd450Acosh = 23, - GLSLstd450Atanh = 24, - GLSLstd450Atan2 = 25, - - GLSLstd450Pow = 26, - GLSLstd450Exp = 27, - GLSLstd450Log = 28, - GLSLstd450Exp2 = 29, - GLSLstd450Log2 = 30, - GLSLstd450Sqrt = 31, - GLSLstd450InverseSqrt = 32, - - GLSLstd450Determinant = 33, - GLSLstd450MatrixInverse = 34, - - GLSLstd450Modf = 35, // second operand needs an OpVariable to write to - GLSLstd450ModfStruct = 36, // no OpVariable operand - GLSLstd450FMin = 37, - GLSLstd450UMin = 38, - GLSLstd450SMin = 39, - GLSLstd450FMax = 40, - GLSLstd450UMax = 41, - GLSLstd450SMax = 42, - GLSLstd450FClamp = 43, - GLSLstd450UClamp = 44, - GLSLstd450SClamp = 45, - GLSLstd450FMix = 46, - GLSLstd450IMix = 47, // Reserved - GLSLstd450Step = 48, - GLSLstd450SmoothStep = 49, - - GLSLstd450Fma = 50, - GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to - GLSLstd450FrexpStruct = 52, // no OpVariable operand - GLSLstd450Ldexp = 53, - - GLSLstd450PackSnorm4x8 = 54, - GLSLstd450PackUnorm4x8 = 55, - GLSLstd450PackSnorm2x16 = 56, - GLSLstd450PackUnorm2x16 = 57, - GLSLstd450PackHalf2x16 = 58, - GLSLstd450PackDouble2x32 = 59, - GLSLstd450UnpackSnorm2x16 = 60, - GLSLstd450UnpackUnorm2x16 = 61, - GLSLstd450UnpackHalf2x16 = 62, - GLSLstd450UnpackSnorm4x8 = 63, - GLSLstd450UnpackUnorm4x8 = 64, - GLSLstd450UnpackDouble2x32 = 65, - - GLSLstd450Length = 66, - GLSLstd450Distance = 67, - GLSLstd450Cross = 68, - GLSLstd450Normalize = 69, - GLSLstd450FaceForward = 70, - GLSLstd450Reflect = 71, - GLSLstd450Refract = 72, - - GLSLstd450FindILsb = 73, - GLSLstd450FindSMsb = 74, - GLSLstd450FindUMsb = 75, - - GLSLstd450InterpolateAtCentroid = 76, - GLSLstd450InterpolateAtSample = 77, - GLSLstd450InterpolateAtOffset = 78, - - GLSLstd450NMin = 79, - GLSLstd450NMax = 80, - GLSLstd450NClamp = 81, - - GLSLstd450Count -}; - -#endif // #ifndef GLSLstd450_H diff --git a/third_party/glslang-spirv/GlslangToSpv.cpp b/third_party/glslang-spirv/GlslangToSpv.cpp deleted file mode 100644 index 6e9fb38bb..000000000 --- a/third_party/glslang-spirv/GlslangToSpv.cpp +++ /dev/null @@ -1,6146 +0,0 @@ -// -// Copyright (C) 2014-2016 LunarG, Inc. -// Copyright (C) 2015-2016 Google, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -// -// Visit the nodes in the glslang intermediate tree representation to -// translate them to SPIR-V. -// - -#include "spirv.hpp" -#include "GlslangToSpv.h" -#include "SpvBuilder.h" -namespace spv { - #include "GLSL.std.450.h" - #include "GLSL.ext.KHR.h" - #include "GLSL.ext.EXT.h" -#ifdef AMD_EXTENSIONS - #include "GLSL.ext.AMD.h" -#endif -#ifdef NV_EXTENSIONS - #include "GLSL.ext.NV.h" -#endif -} - -#ifdef ENABLE_OPT - #include "spirv-tools/optimizer.hpp" - #include "message.h" - #include "SPVRemapper.h" -#endif - -#ifdef ENABLE_OPT -using namespace spvtools; -#endif - -// Glslang includes -#include "../glslang/MachineIndependent/localintermediate.h" -#include "../glslang/MachineIndependent/SymbolTable.h" -#include "../glslang/Include/Common.h" -#include "../glslang/Include/revision.h" - -#include -#include -#include -#include -#include -#include -#include - -namespace { - -namespace { -class SpecConstantOpModeGuard { -public: - SpecConstantOpModeGuard(spv::Builder* builder) - : builder_(builder) { - previous_flag_ = builder->isInSpecConstCodeGenMode(); - } - ~SpecConstantOpModeGuard() { - previous_flag_ ? builder_->setToSpecConstCodeGenMode() - : builder_->setToNormalCodeGenMode(); - } - void turnOnSpecConstantOpMode() { - builder_->setToSpecConstCodeGenMode(); - } - -private: - spv::Builder* builder_; - bool previous_flag_; -}; -} - -// -// The main holder of information for translating glslang to SPIR-V. -// -// Derives from the AST walking base class. -// -class TGlslangToSpvTraverser : public glslang::TIntermTraverser { -public: - TGlslangToSpvTraverser(unsigned int spvVersion, const glslang::TIntermediate*, spv::SpvBuildLogger* logger, - glslang::SpvOptions& options); - virtual ~TGlslangToSpvTraverser() { } - - bool visitAggregate(glslang::TVisit, glslang::TIntermAggregate*); - bool visitBinary(glslang::TVisit, glslang::TIntermBinary*); - void visitConstantUnion(glslang::TIntermConstantUnion*); - bool visitSelection(glslang::TVisit, glslang::TIntermSelection*); - bool visitSwitch(glslang::TVisit, glslang::TIntermSwitch*); - void visitSymbol(glslang::TIntermSymbol* symbol); - bool visitUnary(glslang::TVisit, glslang::TIntermUnary*); - bool visitLoop(glslang::TVisit, glslang::TIntermLoop*); - bool visitBranch(glslang::TVisit visit, glslang::TIntermBranch*); - - void finishSpv(); - void dumpSpv(std::vector& out); - -protected: - spv::Decoration TranslateInterpolationDecoration(const glslang::TQualifier& qualifier); - spv::Decoration TranslateAuxiliaryStorageDecoration(const glslang::TQualifier& qualifier); - spv::BuiltIn TranslateBuiltInDecoration(glslang::TBuiltInVariable, bool memberDeclaration); - spv::ImageFormat TranslateImageFormat(const glslang::TType& type); - spv::SelectionControlMask TranslateSelectionControl(const glslang::TIntermSelection&) const; - spv::SelectionControlMask TranslateSwitchControl(const glslang::TIntermSwitch&) const; - spv::LoopControlMask TranslateLoopControl(const glslang::TIntermLoop&, unsigned int& dependencyLength) const; - spv::StorageClass TranslateStorageClass(const glslang::TType&); - spv::Id createSpvVariable(const glslang::TIntermSymbol*); - spv::Id getSampledType(const glslang::TSampler&); - spv::Id getInvertedSwizzleType(const glslang::TIntermTyped&); - spv::Id createInvertedSwizzle(spv::Decoration precision, const glslang::TIntermTyped&, spv::Id parentResult); - void convertSwizzle(const glslang::TIntermAggregate&, std::vector& swizzle); - spv::Id convertGlslangToSpvType(const glslang::TType& type); - spv::Id convertGlslangToSpvType(const glslang::TType& type, glslang::TLayoutPacking, const glslang::TQualifier&); - bool filterMember(const glslang::TType& member); - spv::Id convertGlslangStructToSpvType(const glslang::TType&, const glslang::TTypeList* glslangStruct, - glslang::TLayoutPacking, const glslang::TQualifier&); - void decorateStructType(const glslang::TType&, const glslang::TTypeList* glslangStruct, glslang::TLayoutPacking, - const glslang::TQualifier&, spv::Id); - spv::Id makeArraySizeId(const glslang::TArraySizes&, int dim); - spv::Id accessChainLoad(const glslang::TType& type); - void accessChainStore(const glslang::TType& type, spv::Id rvalue); - void multiTypeStore(const glslang::TType&, spv::Id rValue); - glslang::TLayoutPacking getExplicitLayout(const glslang::TType& type) const; - int getArrayStride(const glslang::TType& arrayType, glslang::TLayoutPacking, glslang::TLayoutMatrix); - int getMatrixStride(const glslang::TType& matrixType, glslang::TLayoutPacking, glslang::TLayoutMatrix); - void updateMemberOffset(const glslang::TType& structType, const glslang::TType& memberType, int& currentOffset, int& nextOffset, glslang::TLayoutPacking, glslang::TLayoutMatrix); - void declareUseOfStructMember(const glslang::TTypeList& members, int glslangMember); - - bool isShaderEntryPoint(const glslang::TIntermAggregate* node); - bool writableParam(glslang::TStorageQualifier); - bool originalParam(glslang::TStorageQualifier, const glslang::TType&, bool implicitThisParam); - void makeFunctions(const glslang::TIntermSequence&); - void makeGlobalInitializers(const glslang::TIntermSequence&); - void visitFunctions(const glslang::TIntermSequence&); - void handleFunctionEntry(const glslang::TIntermAggregate* node); - void translateArguments(const glslang::TIntermAggregate& node, std::vector& arguments); - void translateArguments(glslang::TIntermUnary& node, std::vector& arguments); - spv::Id createImageTextureFunctionCall(glslang::TIntermOperator* node); - spv::Id handleUserFunctionCall(const glslang::TIntermAggregate*); - - spv::Id createBinaryOperation(glslang::TOperator op, spv::Decoration precision, spv::Decoration noContraction, spv::Id typeId, spv::Id left, spv::Id right, glslang::TBasicType typeProxy, bool reduceComparison = true); - spv::Id createBinaryMatrixOperation(spv::Op, spv::Decoration precision, spv::Decoration noContraction, spv::Id typeId, spv::Id left, spv::Id right); - spv::Id createUnaryOperation(glslang::TOperator op, spv::Decoration precision, spv::Decoration noContraction, spv::Id typeId, spv::Id operand,glslang::TBasicType typeProxy); - spv::Id createUnaryMatrixOperation(spv::Op op, spv::Decoration precision, spv::Decoration noContraction, spv::Id typeId, spv::Id operand,glslang::TBasicType typeProxy); - spv::Id createConversion(glslang::TOperator op, spv::Decoration precision, spv::Decoration noContraction, spv::Id destTypeId, spv::Id operand, glslang::TBasicType typeProxy); - spv::Id makeSmearedConstant(spv::Id constant, int vectorSize); - spv::Id createAtomicOperation(glslang::TOperator op, spv::Decoration precision, spv::Id typeId, std::vector& operands, glslang::TBasicType typeProxy); - spv::Id createInvocationsOperation(glslang::TOperator op, spv::Id typeId, std::vector& operands, glslang::TBasicType typeProxy); - spv::Id CreateInvocationsVectorOperation(spv::Op op, spv::GroupOperation groupOperation, spv::Id typeId, std::vector& operands); - spv::Id createMiscOperation(glslang::TOperator op, spv::Decoration precision, spv::Id typeId, std::vector& operands, glslang::TBasicType typeProxy); - spv::Id createNoArgOperation(glslang::TOperator op, spv::Decoration precision, spv::Id typeId); - spv::Id getSymbolId(const glslang::TIntermSymbol* node); - void addDecoration(spv::Id id, spv::Decoration dec); - void addDecoration(spv::Id id, spv::Decoration dec, unsigned value); - void addMemberDecoration(spv::Id id, int member, spv::Decoration dec); - void addMemberDecoration(spv::Id id, int member, spv::Decoration dec, unsigned value); - spv::Id createSpvConstant(const glslang::TIntermTyped&); - spv::Id createSpvConstantFromConstUnionArray(const glslang::TType& type, const glslang::TConstUnionArray&, int& nextConst, bool specConstant); - bool isTrivialLeaf(const glslang::TIntermTyped* node); - bool isTrivial(const glslang::TIntermTyped* node); - spv::Id createShortCircuit(glslang::TOperator, glslang::TIntermTyped& left, glslang::TIntermTyped& right); -#ifdef AMD_EXTENSIONS - spv::Id getExtBuiltins(const char* name); -#endif - - glslang::SpvOptions& options; - spv::Function* shaderEntry; - spv::Function* currentFunction; - spv::Instruction* entryPoint; - int sequenceDepth; - - spv::SpvBuildLogger* logger; - - // There is a 1:1 mapping between a spv builder and a module; this is thread safe - spv::Builder builder; - bool inEntryPoint; - bool entryPointTerminated; - bool linkageOnly; // true when visiting the set of objects in the AST present only for establishing interface, whether or not they were statically used - std::set iOSet; // all input/output variables from either static use or declaration of interface - const glslang::TIntermediate* glslangIntermediate; - spv::Id stdBuiltins; - std::unordered_map extBuiltinMap; - - std::unordered_map symbolValues; - std::unordered_set rValueParameters; // set of formal function parameters passed as rValues, rather than a pointer - std::unordered_map functionMap; - std::unordered_map structMap[glslang::ElpCount][glslang::ElmCount]; - std::unordered_map > memberRemapper; // for mapping glslang block indices to spv indices (e.g., due to hidden members) - std::stack breakForLoop; // false means break for switch -}; - -// -// Helper functions for translating glslang representations to SPIR-V enumerants. -// - -// Translate glslang profile to SPIR-V source language. -spv::SourceLanguage TranslateSourceLanguage(glslang::EShSource source, EProfile profile) -{ - switch (source) { - case glslang::EShSourceGlsl: - switch (profile) { - case ENoProfile: - case ECoreProfile: - case ECompatibilityProfile: - return spv::SourceLanguageGLSL; - case EEsProfile: - return spv::SourceLanguageESSL; - default: - return spv::SourceLanguageUnknown; - } - case glslang::EShSourceHlsl: - return spv::SourceLanguageHLSL; - default: - return spv::SourceLanguageUnknown; - } -} - -// Translate glslang language (stage) to SPIR-V execution model. -spv::ExecutionModel TranslateExecutionModel(EShLanguage stage) -{ - switch (stage) { - case EShLangVertex: return spv::ExecutionModelVertex; - case EShLangTessControl: return spv::ExecutionModelTessellationControl; - case EShLangTessEvaluation: return spv::ExecutionModelTessellationEvaluation; - case EShLangGeometry: return spv::ExecutionModelGeometry; - case EShLangFragment: return spv::ExecutionModelFragment; - case EShLangCompute: return spv::ExecutionModelGLCompute; - default: - assert(0); - return spv::ExecutionModelFragment; - } -} - -// Translate glslang sampler type to SPIR-V dimensionality. -spv::Dim TranslateDimensionality(const glslang::TSampler& sampler) -{ - switch (sampler.dim) { - case glslang::Esd1D: return spv::Dim1D; - case glslang::Esd2D: return spv::Dim2D; - case glslang::Esd3D: return spv::Dim3D; - case glslang::EsdCube: return spv::DimCube; - case glslang::EsdRect: return spv::DimRect; - case glslang::EsdBuffer: return spv::DimBuffer; - case glslang::EsdSubpass: return spv::DimSubpassData; - default: - assert(0); - return spv::Dim2D; - } -} - -// Translate glslang precision to SPIR-V precision decorations. -spv::Decoration TranslatePrecisionDecoration(glslang::TPrecisionQualifier glslangPrecision) -{ - switch (glslangPrecision) { - case glslang::EpqLow: return spv::DecorationRelaxedPrecision; - case glslang::EpqMedium: return spv::DecorationRelaxedPrecision; - default: - return spv::NoPrecision; - } -} - -// Translate glslang type to SPIR-V precision decorations. -spv::Decoration TranslatePrecisionDecoration(const glslang::TType& type) -{ - return TranslatePrecisionDecoration(type.getQualifier().precision); -} - -// Translate glslang type to SPIR-V block decorations. -spv::Decoration TranslateBlockDecoration(const glslang::TType& type, bool useStorageBuffer) -{ - if (type.getBasicType() == glslang::EbtBlock) { - switch (type.getQualifier().storage) { - case glslang::EvqUniform: return spv::DecorationBlock; - case glslang::EvqBuffer: return useStorageBuffer ? spv::DecorationBlock : spv::DecorationBufferBlock; - case glslang::EvqVaryingIn: return spv::DecorationBlock; - case glslang::EvqVaryingOut: return spv::DecorationBlock; - default: - assert(0); - break; - } - } - - return spv::DecorationMax; -} - -// Translate glslang type to SPIR-V memory decorations. -void TranslateMemoryDecoration(const glslang::TQualifier& qualifier, std::vector& memory) -{ - if (qualifier.coherent) - memory.push_back(spv::DecorationCoherent); - if (qualifier.volatil) - memory.push_back(spv::DecorationVolatile); - if (qualifier.restrict) - memory.push_back(spv::DecorationRestrict); - if (qualifier.readonly) - memory.push_back(spv::DecorationNonWritable); - if (qualifier.writeonly) - memory.push_back(spv::DecorationNonReadable); -} - -// Translate glslang type to SPIR-V layout decorations. -spv::Decoration TranslateLayoutDecoration(const glslang::TType& type, glslang::TLayoutMatrix matrixLayout) -{ - if (type.isMatrix()) { - switch (matrixLayout) { - case glslang::ElmRowMajor: - return spv::DecorationRowMajor; - case glslang::ElmColumnMajor: - return spv::DecorationColMajor; - default: - // opaque layouts don't need a majorness - return spv::DecorationMax; - } - } else { - switch (type.getBasicType()) { - default: - return spv::DecorationMax; - break; - case glslang::EbtBlock: - switch (type.getQualifier().storage) { - case glslang::EvqUniform: - case glslang::EvqBuffer: - switch (type.getQualifier().layoutPacking) { - case glslang::ElpShared: return spv::DecorationGLSLShared; - case glslang::ElpPacked: return spv::DecorationGLSLPacked; - default: - return spv::DecorationMax; - } - case glslang::EvqVaryingIn: - case glslang::EvqVaryingOut: - assert(type.getQualifier().layoutPacking == glslang::ElpNone); - return spv::DecorationMax; - default: - assert(0); - return spv::DecorationMax; - } - } - } -} - -// Translate glslang type to SPIR-V interpolation decorations. -// Returns spv::DecorationMax when no decoration -// should be applied. -spv::Decoration TGlslangToSpvTraverser::TranslateInterpolationDecoration(const glslang::TQualifier& qualifier) -{ - if (qualifier.smooth) - // Smooth decoration doesn't exist in SPIR-V 1.0 - return spv::DecorationMax; - else if (qualifier.nopersp) - return spv::DecorationNoPerspective; - else if (qualifier.flat) - return spv::DecorationFlat; -#ifdef AMD_EXTENSIONS - else if (qualifier.explicitInterp) { - builder.addExtension(spv::E_SPV_AMD_shader_explicit_vertex_parameter); - return spv::DecorationExplicitInterpAMD; - } -#endif - else - return spv::DecorationMax; -} - -// Translate glslang type to SPIR-V auxiliary storage decorations. -// Returns spv::DecorationMax when no decoration -// should be applied. -spv::Decoration TGlslangToSpvTraverser::TranslateAuxiliaryStorageDecoration(const glslang::TQualifier& qualifier) -{ - if (qualifier.patch) - return spv::DecorationPatch; - else if (qualifier.centroid) - return spv::DecorationCentroid; - else if (qualifier.sample) { - builder.addCapability(spv::CapabilitySampleRateShading); - return spv::DecorationSample; - } else - return spv::DecorationMax; -} - -// If glslang type is invariant, return SPIR-V invariant decoration. -spv::Decoration TranslateInvariantDecoration(const glslang::TQualifier& qualifier) -{ - if (qualifier.invariant) - return spv::DecorationInvariant; - else - return spv::DecorationMax; -} - -// If glslang type is noContraction, return SPIR-V NoContraction decoration. -spv::Decoration TranslateNoContractionDecoration(const glslang::TQualifier& qualifier) -{ - if (qualifier.noContraction) - return spv::DecorationNoContraction; - else - return spv::DecorationMax; -} - -// Translate a glslang built-in variable to a SPIR-V built in decoration. Also generate -// associated capabilities when required. For some built-in variables, a capability -// is generated only when using the variable in an executable instruction, but not when -// just declaring a struct member variable with it. This is true for PointSize, -// ClipDistance, and CullDistance. -spv::BuiltIn TGlslangToSpvTraverser::TranslateBuiltInDecoration(glslang::TBuiltInVariable builtIn, bool memberDeclaration) -{ - switch (builtIn) { - case glslang::EbvPointSize: - // Defer adding the capability until the built-in is actually used. - if (! memberDeclaration) { - switch (glslangIntermediate->getStage()) { - case EShLangGeometry: - builder.addCapability(spv::CapabilityGeometryPointSize); - break; - case EShLangTessControl: - case EShLangTessEvaluation: - builder.addCapability(spv::CapabilityTessellationPointSize); - break; - default: - break; - } - } - return spv::BuiltInPointSize; - - // These *Distance capabilities logically belong here, but if the member is declared and - // then never used, consumers of SPIR-V prefer the capability not be declared. - // They are now generated when used, rather than here when declared. - // Potentially, the specification should be more clear what the minimum - // use needed is to trigger the capability. - // - case glslang::EbvClipDistance: - if (!memberDeclaration) - builder.addCapability(spv::CapabilityClipDistance); - return spv::BuiltInClipDistance; - - case glslang::EbvCullDistance: - if (!memberDeclaration) - builder.addCapability(spv::CapabilityCullDistance); - return spv::BuiltInCullDistance; - - case glslang::EbvViewportIndex: - builder.addCapability(spv::CapabilityMultiViewport); - if (glslangIntermediate->getStage() == EShLangVertex || - glslangIntermediate->getStage() == EShLangTessControl || - glslangIntermediate->getStage() == EShLangTessEvaluation) { - - builder.addExtension(spv::E_SPV_EXT_shader_viewport_index_layer); - builder.addCapability(spv::CapabilityShaderViewportIndexLayerEXT); - } - return spv::BuiltInViewportIndex; - - case glslang::EbvSampleId: - builder.addCapability(spv::CapabilitySampleRateShading); - return spv::BuiltInSampleId; - - case glslang::EbvSamplePosition: - builder.addCapability(spv::CapabilitySampleRateShading); - return spv::BuiltInSamplePosition; - - case glslang::EbvSampleMask: - return spv::BuiltInSampleMask; - - case glslang::EbvLayer: - builder.addCapability(spv::CapabilityGeometry); - if (glslangIntermediate->getStage() == EShLangVertex || - glslangIntermediate->getStage() == EShLangTessControl || - glslangIntermediate->getStage() == EShLangTessEvaluation) { - - builder.addExtension(spv::E_SPV_EXT_shader_viewport_index_layer); - builder.addCapability(spv::CapabilityShaderViewportIndexLayerEXT); - } - return spv::BuiltInLayer; - - case glslang::EbvPosition: return spv::BuiltInPosition; - case glslang::EbvVertexId: return spv::BuiltInVertexId; - case glslang::EbvInstanceId: return spv::BuiltInInstanceId; - case glslang::EbvVertexIndex: return spv::BuiltInVertexIndex; - case glslang::EbvInstanceIndex: return spv::BuiltInInstanceIndex; - - case glslang::EbvBaseVertex: - builder.addExtension(spv::E_SPV_KHR_shader_draw_parameters); - builder.addCapability(spv::CapabilityDrawParameters); - return spv::BuiltInBaseVertex; - - case glslang::EbvBaseInstance: - builder.addExtension(spv::E_SPV_KHR_shader_draw_parameters); - builder.addCapability(spv::CapabilityDrawParameters); - return spv::BuiltInBaseInstance; - - case glslang::EbvDrawId: - builder.addExtension(spv::E_SPV_KHR_shader_draw_parameters); - builder.addCapability(spv::CapabilityDrawParameters); - return spv::BuiltInDrawIndex; - - case glslang::EbvPrimitiveId: - if (glslangIntermediate->getStage() == EShLangFragment) - builder.addCapability(spv::CapabilityGeometry); - return spv::BuiltInPrimitiveId; - - case glslang::EbvFragStencilRef: - builder.addExtension(spv::E_SPV_EXT_shader_stencil_export); - builder.addCapability(spv::CapabilityStencilExportEXT); - return spv::BuiltInFragStencilRefEXT; - - case glslang::EbvInvocationId: return spv::BuiltInInvocationId; - case glslang::EbvTessLevelInner: return spv::BuiltInTessLevelInner; - case glslang::EbvTessLevelOuter: return spv::BuiltInTessLevelOuter; - case glslang::EbvTessCoord: return spv::BuiltInTessCoord; - case glslang::EbvPatchVertices: return spv::BuiltInPatchVertices; - case glslang::EbvFragCoord: return spv::BuiltInFragCoord; - case glslang::EbvPointCoord: return spv::BuiltInPointCoord; - case glslang::EbvFace: return spv::BuiltInFrontFacing; - case glslang::EbvFragDepth: return spv::BuiltInFragDepth; - case glslang::EbvHelperInvocation: return spv::BuiltInHelperInvocation; - case glslang::EbvNumWorkGroups: return spv::BuiltInNumWorkgroups; - case glslang::EbvWorkGroupSize: return spv::BuiltInWorkgroupSize; - case glslang::EbvWorkGroupId: return spv::BuiltInWorkgroupId; - case glslang::EbvLocalInvocationId: return spv::BuiltInLocalInvocationId; - case glslang::EbvLocalInvocationIndex: return spv::BuiltInLocalInvocationIndex; - case glslang::EbvGlobalInvocationId: return spv::BuiltInGlobalInvocationId; - - case glslang::EbvSubGroupSize: - builder.addExtension(spv::E_SPV_KHR_shader_ballot); - builder.addCapability(spv::CapabilitySubgroupBallotKHR); - return spv::BuiltInSubgroupSize; - - case glslang::EbvSubGroupInvocation: - builder.addExtension(spv::E_SPV_KHR_shader_ballot); - builder.addCapability(spv::CapabilitySubgroupBallotKHR); - return spv::BuiltInSubgroupLocalInvocationId; - - case glslang::EbvSubGroupEqMask: - builder.addExtension(spv::E_SPV_KHR_shader_ballot); - builder.addCapability(spv::CapabilitySubgroupBallotKHR); - return spv::BuiltInSubgroupEqMaskKHR; - - case glslang::EbvSubGroupGeMask: - builder.addExtension(spv::E_SPV_KHR_shader_ballot); - builder.addCapability(spv::CapabilitySubgroupBallotKHR); - return spv::BuiltInSubgroupGeMaskKHR; - - case glslang::EbvSubGroupGtMask: - builder.addExtension(spv::E_SPV_KHR_shader_ballot); - builder.addCapability(spv::CapabilitySubgroupBallotKHR); - return spv::BuiltInSubgroupGtMaskKHR; - - case glslang::EbvSubGroupLeMask: - builder.addExtension(spv::E_SPV_KHR_shader_ballot); - builder.addCapability(spv::CapabilitySubgroupBallotKHR); - return spv::BuiltInSubgroupLeMaskKHR; - - case glslang::EbvSubGroupLtMask: - builder.addExtension(spv::E_SPV_KHR_shader_ballot); - builder.addCapability(spv::CapabilitySubgroupBallotKHR); - return spv::BuiltInSubgroupLtMaskKHR; - -#ifdef AMD_EXTENSIONS - case glslang::EbvBaryCoordNoPersp: - builder.addExtension(spv::E_SPV_AMD_shader_explicit_vertex_parameter); - return spv::BuiltInBaryCoordNoPerspAMD; - - case glslang::EbvBaryCoordNoPerspCentroid: - builder.addExtension(spv::E_SPV_AMD_shader_explicit_vertex_parameter); - return spv::BuiltInBaryCoordNoPerspCentroidAMD; - - case glslang::EbvBaryCoordNoPerspSample: - builder.addExtension(spv::E_SPV_AMD_shader_explicit_vertex_parameter); - return spv::BuiltInBaryCoordNoPerspSampleAMD; - - case glslang::EbvBaryCoordSmooth: - builder.addExtension(spv::E_SPV_AMD_shader_explicit_vertex_parameter); - return spv::BuiltInBaryCoordSmoothAMD; - - case glslang::EbvBaryCoordSmoothCentroid: - builder.addExtension(spv::E_SPV_AMD_shader_explicit_vertex_parameter); - return spv::BuiltInBaryCoordSmoothCentroidAMD; - - case glslang::EbvBaryCoordSmoothSample: - builder.addExtension(spv::E_SPV_AMD_shader_explicit_vertex_parameter); - return spv::BuiltInBaryCoordSmoothSampleAMD; - - case glslang::EbvBaryCoordPullModel: - builder.addExtension(spv::E_SPV_AMD_shader_explicit_vertex_parameter); - return spv::BuiltInBaryCoordPullModelAMD; -#endif - - case glslang::EbvDeviceIndex: - builder.addExtension(spv::E_SPV_KHR_device_group); - builder.addCapability(spv::CapabilityDeviceGroup); - return spv::BuiltInDeviceIndex; - - case glslang::EbvViewIndex: - builder.addExtension(spv::E_SPV_KHR_multiview); - builder.addCapability(spv::CapabilityMultiView); - return spv::BuiltInViewIndex; - -#ifdef NV_EXTENSIONS - case glslang::EbvViewportMaskNV: - if (!memberDeclaration) { - builder.addExtension(spv::E_SPV_NV_viewport_array2); - builder.addCapability(spv::CapabilityShaderViewportMaskNV); - } - return spv::BuiltInViewportMaskNV; - case glslang::EbvSecondaryPositionNV: - if (!memberDeclaration) { - builder.addExtension(spv::E_SPV_NV_stereo_view_rendering); - builder.addCapability(spv::CapabilityShaderStereoViewNV); - } - return spv::BuiltInSecondaryPositionNV; - case glslang::EbvSecondaryViewportMaskNV: - if (!memberDeclaration) { - builder.addExtension(spv::E_SPV_NV_stereo_view_rendering); - builder.addCapability(spv::CapabilityShaderStereoViewNV); - } - return spv::BuiltInSecondaryViewportMaskNV; - case glslang::EbvPositionPerViewNV: - if (!memberDeclaration) { - builder.addExtension(spv::E_SPV_NVX_multiview_per_view_attributes); - builder.addCapability(spv::CapabilityPerViewAttributesNV); - } - return spv::BuiltInPositionPerViewNV; - case glslang::EbvViewportMaskPerViewNV: - if (!memberDeclaration) { - builder.addExtension(spv::E_SPV_NVX_multiview_per_view_attributes); - builder.addCapability(spv::CapabilityPerViewAttributesNV); - } - return spv::BuiltInViewportMaskPerViewNV; - case glslang::EbvFragFullyCoveredNV: - builder.addExtension(spv::E_SPV_EXT_fragment_fully_covered); - builder.addCapability(spv::CapabilityFragmentFullyCoveredEXT); - return spv::BuiltInFullyCoveredEXT; -#endif - default: - return spv::BuiltInMax; - } -} - -// Translate glslang image layout format to SPIR-V image format. -spv::ImageFormat TGlslangToSpvTraverser::TranslateImageFormat(const glslang::TType& type) -{ - assert(type.getBasicType() == glslang::EbtSampler); - - // Check for capabilities - switch (type.getQualifier().layoutFormat) { - case glslang::ElfRg32f: - case glslang::ElfRg16f: - case glslang::ElfR11fG11fB10f: - case glslang::ElfR16f: - case glslang::ElfRgba16: - case glslang::ElfRgb10A2: - case glslang::ElfRg16: - case glslang::ElfRg8: - case glslang::ElfR16: - case glslang::ElfR8: - case glslang::ElfRgba16Snorm: - case glslang::ElfRg16Snorm: - case glslang::ElfRg8Snorm: - case glslang::ElfR16Snorm: - case glslang::ElfR8Snorm: - - case glslang::ElfRg32i: - case glslang::ElfRg16i: - case glslang::ElfRg8i: - case glslang::ElfR16i: - case glslang::ElfR8i: - - case glslang::ElfRgb10a2ui: - case glslang::ElfRg32ui: - case glslang::ElfRg16ui: - case glslang::ElfRg8ui: - case glslang::ElfR16ui: - case glslang::ElfR8ui: - builder.addCapability(spv::CapabilityStorageImageExtendedFormats); - break; - - default: - break; - } - - // do the translation - switch (type.getQualifier().layoutFormat) { - case glslang::ElfNone: return spv::ImageFormatUnknown; - case glslang::ElfRgba32f: return spv::ImageFormatRgba32f; - case glslang::ElfRgba16f: return spv::ImageFormatRgba16f; - case glslang::ElfR32f: return spv::ImageFormatR32f; - case glslang::ElfRgba8: return spv::ImageFormatRgba8; - case glslang::ElfRgba8Snorm: return spv::ImageFormatRgba8Snorm; - case glslang::ElfRg32f: return spv::ImageFormatRg32f; - case glslang::ElfRg16f: return spv::ImageFormatRg16f; - case glslang::ElfR11fG11fB10f: return spv::ImageFormatR11fG11fB10f; - case glslang::ElfR16f: return spv::ImageFormatR16f; - case glslang::ElfRgba16: return spv::ImageFormatRgba16; - case glslang::ElfRgb10A2: return spv::ImageFormatRgb10A2; - case glslang::ElfRg16: return spv::ImageFormatRg16; - case glslang::ElfRg8: return spv::ImageFormatRg8; - case glslang::ElfR16: return spv::ImageFormatR16; - case glslang::ElfR8: return spv::ImageFormatR8; - case glslang::ElfRgba16Snorm: return spv::ImageFormatRgba16Snorm; - case glslang::ElfRg16Snorm: return spv::ImageFormatRg16Snorm; - case glslang::ElfRg8Snorm: return spv::ImageFormatRg8Snorm; - case glslang::ElfR16Snorm: return spv::ImageFormatR16Snorm; - case glslang::ElfR8Snorm: return spv::ImageFormatR8Snorm; - case glslang::ElfRgba32i: return spv::ImageFormatRgba32i; - case glslang::ElfRgba16i: return spv::ImageFormatRgba16i; - case glslang::ElfRgba8i: return spv::ImageFormatRgba8i; - case glslang::ElfR32i: return spv::ImageFormatR32i; - case glslang::ElfRg32i: return spv::ImageFormatRg32i; - case glslang::ElfRg16i: return spv::ImageFormatRg16i; - case glslang::ElfRg8i: return spv::ImageFormatRg8i; - case glslang::ElfR16i: return spv::ImageFormatR16i; - case glslang::ElfR8i: return spv::ImageFormatR8i; - case glslang::ElfRgba32ui: return spv::ImageFormatRgba32ui; - case glslang::ElfRgba16ui: return spv::ImageFormatRgba16ui; - case glslang::ElfRgba8ui: return spv::ImageFormatRgba8ui; - case glslang::ElfR32ui: return spv::ImageFormatR32ui; - case glslang::ElfRg32ui: return spv::ImageFormatRg32ui; - case glslang::ElfRg16ui: return spv::ImageFormatRg16ui; - case glslang::ElfRgb10a2ui: return spv::ImageFormatRgb10a2ui; - case glslang::ElfRg8ui: return spv::ImageFormatRg8ui; - case glslang::ElfR16ui: return spv::ImageFormatR16ui; - case glslang::ElfR8ui: return spv::ImageFormatR8ui; - default: return spv::ImageFormatMax; - } -} - -spv::SelectionControlMask TGlslangToSpvTraverser::TranslateSelectionControl(const glslang::TIntermSelection& selectionNode) const -{ - if (selectionNode.getFlatten()) - return spv::SelectionControlFlattenMask; - if (selectionNode.getDontFlatten()) - return spv::SelectionControlDontFlattenMask; - return spv::SelectionControlMaskNone; -} - -spv::SelectionControlMask TGlslangToSpvTraverser::TranslateSwitchControl(const glslang::TIntermSwitch& switchNode) const -{ - if (switchNode.getFlatten()) - return spv::SelectionControlFlattenMask; - if (switchNode.getDontFlatten()) - return spv::SelectionControlDontFlattenMask; - return spv::SelectionControlMaskNone; -} - -// return a non-0 dependency if the dependency argument must be set -spv::LoopControlMask TGlslangToSpvTraverser::TranslateLoopControl(const glslang::TIntermLoop& loopNode, - unsigned int& dependencyLength) const -{ - spv::LoopControlMask control = spv::LoopControlMaskNone; - - if (loopNode.getDontUnroll()) - control = control | spv::LoopControlDontUnrollMask; - if (loopNode.getUnroll()) - control = control | spv::LoopControlUnrollMask; - if (loopNode.getLoopDependency() == glslang::TIntermLoop::dependencyInfinite) - control = control | spv::LoopControlDependencyInfiniteMask; - else if (loopNode.getLoopDependency() > 0) { - control = control | spv::LoopControlDependencyLengthMask; - dependencyLength = loopNode.getLoopDependency(); - } - - return control; -} - -// Translate glslang type to SPIR-V storage class. -spv::StorageClass TGlslangToSpvTraverser::TranslateStorageClass(const glslang::TType& type) -{ - if (type.getQualifier().isPipeInput()) - return spv::StorageClassInput; - if (type.getQualifier().isPipeOutput()) - return spv::StorageClassOutput; - - if (glslangIntermediate->getSource() != glslang::EShSourceHlsl || - type.getQualifier().storage == glslang::EvqUniform) { - if (type.getBasicType() == glslang::EbtAtomicUint) - return spv::StorageClassAtomicCounter; - if (type.containsOpaque()) - return spv::StorageClassUniformConstant; - } - - if (glslangIntermediate->usingStorageBuffer() && type.getQualifier().storage == glslang::EvqBuffer) { - builder.addExtension(spv::E_SPV_KHR_storage_buffer_storage_class); - return spv::StorageClassStorageBuffer; - } - - if (type.getQualifier().isUniformOrBuffer()) { - if (type.getQualifier().layoutPushConstant) - return spv::StorageClassPushConstant; - if (type.getBasicType() == glslang::EbtBlock) - return spv::StorageClassUniform; - return spv::StorageClassUniformConstant; - } - - switch (type.getQualifier().storage) { - case glslang::EvqShared: return spv::StorageClassWorkgroup; - case glslang::EvqGlobal: return spv::StorageClassPrivate; - case glslang::EvqConstReadOnly: return spv::StorageClassFunction; - case glslang::EvqTemporary: return spv::StorageClassFunction; - default: - assert(0); - break; - } - - return spv::StorageClassFunction; -} - -// Return whether or not the given type is something that should be tied to a -// descriptor set. -bool IsDescriptorResource(const glslang::TType& type) -{ - // uniform and buffer blocks are included, unless it is a push_constant - if (type.getBasicType() == glslang::EbtBlock) - return type.getQualifier().isUniformOrBuffer() && ! type.getQualifier().layoutPushConstant; - - // non block... - // basically samplerXXX/subpass/sampler/texture are all included - // if they are the global-scope-class, not the function parameter - // (or local, if they ever exist) class. - if (type.getBasicType() == glslang::EbtSampler) - return type.getQualifier().isUniformOrBuffer(); - - // None of the above. - return false; -} - -void InheritQualifiers(glslang::TQualifier& child, const glslang::TQualifier& parent) -{ - if (child.layoutMatrix == glslang::ElmNone) - child.layoutMatrix = parent.layoutMatrix; - - if (parent.invariant) - child.invariant = true; - if (parent.nopersp) - child.nopersp = true; -#ifdef AMD_EXTENSIONS - if (parent.explicitInterp) - child.explicitInterp = true; -#endif - if (parent.flat) - child.flat = true; - if (parent.centroid) - child.centroid = true; - if (parent.patch) - child.patch = true; - if (parent.sample) - child.sample = true; - if (parent.coherent) - child.coherent = true; - if (parent.volatil) - child.volatil = true; - if (parent.restrict) - child.restrict = true; - if (parent.readonly) - child.readonly = true; - if (parent.writeonly) - child.writeonly = true; -} - -bool HasNonLayoutQualifiers(const glslang::TType& type, const glslang::TQualifier& qualifier) -{ - // This should list qualifiers that simultaneous satisfy: - // - struct members might inherit from a struct declaration - // (note that non-block structs don't explicitly inherit, - // only implicitly, meaning no decoration involved) - // - affect decorations on the struct members - // (note smooth does not, and expecting something like volatile - // to effect the whole object) - // - are not part of the offset/st430/etc or row/column-major layout - return qualifier.invariant || (qualifier.hasLocation() && type.getBasicType() == glslang::EbtBlock); -} - -// -// Implement the TGlslangToSpvTraverser class. -// - -TGlslangToSpvTraverser::TGlslangToSpvTraverser(unsigned int spvVersion, const glslang::TIntermediate* glslangIntermediate, - spv::SpvBuildLogger* buildLogger, glslang::SpvOptions& options) - : TIntermTraverser(true, false, true), - options(options), - shaderEntry(nullptr), currentFunction(nullptr), - sequenceDepth(0), logger(buildLogger), - builder(spvVersion, (glslang::GetKhronosToolId() << 16) | glslang::GetSpirvGeneratorVersion(), logger), - inEntryPoint(false), entryPointTerminated(false), linkageOnly(false), - glslangIntermediate(glslangIntermediate) -{ - spv::ExecutionModel executionModel = TranslateExecutionModel(glslangIntermediate->getStage()); - - builder.clearAccessChain(); - builder.setSource(TranslateSourceLanguage(glslangIntermediate->getSource(), glslangIntermediate->getProfile()), - glslangIntermediate->getVersion()); - - if (options.generateDebugInfo) { - builder.setEmitOpLines(); - builder.setSourceFile(glslangIntermediate->getSourceFile()); - - // Set the source shader's text. If for SPV version 1.0, include - // a preamble in comments stating the OpModuleProcessed instructions. - // Otherwise, emit those as actual instructions. - std::string text; - const std::vector& processes = glslangIntermediate->getProcesses(); - for (int p = 0; p < (int)processes.size(); ++p) { - if (glslangIntermediate->getSpv().spv < 0x00010100) { - text.append("// OpModuleProcessed "); - text.append(processes[p]); - text.append("\n"); - } else - builder.addModuleProcessed(processes[p]); - } - if (glslangIntermediate->getSpv().spv < 0x00010100 && (int)processes.size() > 0) - text.append("#line 1\n"); - text.append(glslangIntermediate->getSourceText()); - builder.setSourceText(text); - } - stdBuiltins = builder.import("GLSL.std.450"); - builder.setMemoryModel(spv::AddressingModelLogical, spv::MemoryModelGLSL450); - shaderEntry = builder.makeEntryPoint(glslangIntermediate->getEntryPointName().c_str()); - entryPoint = builder.addEntryPoint(executionModel, shaderEntry, glslangIntermediate->getEntryPointName().c_str()); - - // Add the source extensions - const auto& sourceExtensions = glslangIntermediate->getRequestedExtensions(); - for (auto it = sourceExtensions.begin(); it != sourceExtensions.end(); ++it) - builder.addSourceExtension(it->c_str()); - - // Add the top-level modes for this shader. - - if (glslangIntermediate->getXfbMode()) { - builder.addCapability(spv::CapabilityTransformFeedback); - builder.addExecutionMode(shaderEntry, spv::ExecutionModeXfb); - } - - unsigned int mode; - switch (glslangIntermediate->getStage()) { - case EShLangVertex: - builder.addCapability(spv::CapabilityShader); - break; - - case EShLangTessEvaluation: - case EShLangTessControl: - builder.addCapability(spv::CapabilityTessellation); - - glslang::TLayoutGeometry primitive; - - if (glslangIntermediate->getStage() == EShLangTessControl) { - builder.addExecutionMode(shaderEntry, spv::ExecutionModeOutputVertices, glslangIntermediate->getVertices()); - primitive = glslangIntermediate->getOutputPrimitive(); - } else { - primitive = glslangIntermediate->getInputPrimitive(); - } - - switch (primitive) { - case glslang::ElgTriangles: mode = spv::ExecutionModeTriangles; break; - case glslang::ElgQuads: mode = spv::ExecutionModeQuads; break; - case glslang::ElgIsolines: mode = spv::ExecutionModeIsolines; break; - default: mode = spv::ExecutionModeMax; break; - } - if (mode != spv::ExecutionModeMax) - builder.addExecutionMode(shaderEntry, (spv::ExecutionMode)mode); - - switch (glslangIntermediate->getVertexSpacing()) { - case glslang::EvsEqual: mode = spv::ExecutionModeSpacingEqual; break; - case glslang::EvsFractionalEven: mode = spv::ExecutionModeSpacingFractionalEven; break; - case glslang::EvsFractionalOdd: mode = spv::ExecutionModeSpacingFractionalOdd; break; - default: mode = spv::ExecutionModeMax; break; - } - if (mode != spv::ExecutionModeMax) - builder.addExecutionMode(shaderEntry, (spv::ExecutionMode)mode); - - switch (glslangIntermediate->getVertexOrder()) { - case glslang::EvoCw: mode = spv::ExecutionModeVertexOrderCw; break; - case glslang::EvoCcw: mode = spv::ExecutionModeVertexOrderCcw; break; - default: mode = spv::ExecutionModeMax; break; - } - if (mode != spv::ExecutionModeMax) - builder.addExecutionMode(shaderEntry, (spv::ExecutionMode)mode); - - if (glslangIntermediate->getPointMode()) - builder.addExecutionMode(shaderEntry, spv::ExecutionModePointMode); - break; - - case EShLangGeometry: - builder.addCapability(spv::CapabilityGeometry); - switch (glslangIntermediate->getInputPrimitive()) { - case glslang::ElgPoints: mode = spv::ExecutionModeInputPoints; break; - case glslang::ElgLines: mode = spv::ExecutionModeInputLines; break; - case glslang::ElgLinesAdjacency: mode = spv::ExecutionModeInputLinesAdjacency; break; - case glslang::ElgTriangles: mode = spv::ExecutionModeTriangles; break; - case glslang::ElgTrianglesAdjacency: mode = spv::ExecutionModeInputTrianglesAdjacency; break; - default: mode = spv::ExecutionModeMax; break; - } - if (mode != spv::ExecutionModeMax) - builder.addExecutionMode(shaderEntry, (spv::ExecutionMode)mode); - - builder.addExecutionMode(shaderEntry, spv::ExecutionModeInvocations, glslangIntermediate->getInvocations()); - - switch (glslangIntermediate->getOutputPrimitive()) { - case glslang::ElgPoints: mode = spv::ExecutionModeOutputPoints; break; - case glslang::ElgLineStrip: mode = spv::ExecutionModeOutputLineStrip; break; - case glslang::ElgTriangleStrip: mode = spv::ExecutionModeOutputTriangleStrip; break; - default: mode = spv::ExecutionModeMax; break; - } - if (mode != spv::ExecutionModeMax) - builder.addExecutionMode(shaderEntry, (spv::ExecutionMode)mode); - builder.addExecutionMode(shaderEntry, spv::ExecutionModeOutputVertices, glslangIntermediate->getVertices()); - break; - - case EShLangFragment: - builder.addCapability(spv::CapabilityShader); - if (glslangIntermediate->getPixelCenterInteger()) - builder.addExecutionMode(shaderEntry, spv::ExecutionModePixelCenterInteger); - - if (glslangIntermediate->getOriginUpperLeft()) - builder.addExecutionMode(shaderEntry, spv::ExecutionModeOriginUpperLeft); - else - builder.addExecutionMode(shaderEntry, spv::ExecutionModeOriginLowerLeft); - - if (glslangIntermediate->getEarlyFragmentTests()) - builder.addExecutionMode(shaderEntry, spv::ExecutionModeEarlyFragmentTests); - - if (glslangIntermediate->getPostDepthCoverage()) { - builder.addCapability(spv::CapabilitySampleMaskPostDepthCoverage); - builder.addExecutionMode(shaderEntry, spv::ExecutionModePostDepthCoverage); - builder.addExtension(spv::E_SPV_KHR_post_depth_coverage); - } - - switch(glslangIntermediate->getDepth()) { - case glslang::EldGreater: mode = spv::ExecutionModeDepthGreater; break; - case glslang::EldLess: mode = spv::ExecutionModeDepthLess; break; - default: mode = spv::ExecutionModeMax; break; - } - if (mode != spv::ExecutionModeMax) - builder.addExecutionMode(shaderEntry, (spv::ExecutionMode)mode); - - if (glslangIntermediate->getDepth() != glslang::EldUnchanged && glslangIntermediate->isDepthReplacing()) - builder.addExecutionMode(shaderEntry, spv::ExecutionModeDepthReplacing); - break; - - case EShLangCompute: - builder.addCapability(spv::CapabilityShader); - builder.addExecutionMode(shaderEntry, spv::ExecutionModeLocalSize, glslangIntermediate->getLocalSize(0), - glslangIntermediate->getLocalSize(1), - glslangIntermediate->getLocalSize(2)); - break; - - default: - break; - } -} - -// Finish creating SPV, after the traversal is complete. -void TGlslangToSpvTraverser::finishSpv() -{ - if (! entryPointTerminated) { - builder.setBuildPoint(shaderEntry->getLastBlock()); - builder.leaveFunction(); - } - - // finish off the entry-point SPV instruction by adding the Input/Output - for (auto it = iOSet.cbegin(); it != iOSet.cend(); ++it) - entryPoint->addIdOperand(*it); - - builder.eliminateDeadDecorations(); -} - -// Write the SPV into 'out'. -void TGlslangToSpvTraverser::dumpSpv(std::vector& out) -{ - builder.dump(out); -} - -// -// Implement the traversal functions. -// -// Return true from interior nodes to have the external traversal -// continue on to children. Return false if children were -// already processed. -// - -// -// Symbols can turn into -// - uniform/input reads -// - output writes -// - complex lvalue base setups: foo.bar[3].... , where we see foo and start up an access chain -// - something simple that degenerates into the last bullet -// -void TGlslangToSpvTraverser::visitSymbol(glslang::TIntermSymbol* symbol) -{ - SpecConstantOpModeGuard spec_constant_op_mode_setter(&builder); - if (symbol->getType().getQualifier().isSpecConstant()) - spec_constant_op_mode_setter.turnOnSpecConstantOpMode(); - - // getSymbolId() will set up all the IO decorations on the first call. - // Formal function parameters were mapped during makeFunctions(). - spv::Id id = getSymbolId(symbol); - - // Include all "static use" and "linkage only" interface variables on the OpEntryPoint instruction - if (builder.isPointer(id)) { - spv::StorageClass sc = builder.getStorageClass(id); - if (sc == spv::StorageClassInput || sc == spv::StorageClassOutput) { - if (!symbol->getType().isStruct() || symbol->getType().getStruct()->size() > 0) - iOSet.insert(id); - } - } - - // Only process non-linkage-only nodes for generating actual static uses - if (! linkageOnly || symbol->getQualifier().isSpecConstant()) { - // Prepare to generate code for the access - - // L-value chains will be computed left to right. We're on the symbol now, - // which is the left-most part of the access chain, so now is "clear" time, - // followed by setting the base. - builder.clearAccessChain(); - - // For now, we consider all user variables as being in memory, so they are pointers, - // except for - // A) R-Value arguments to a function, which are an intermediate object. - // See comments in handleUserFunctionCall(). - // B) Specialization constants (normal constants don't even come in as a variable), - // These are also pure R-values. - glslang::TQualifier qualifier = symbol->getQualifier(); - if (qualifier.isSpecConstant() || rValueParameters.find(symbol->getId()) != rValueParameters.end()) - builder.setAccessChainRValue(id); - else - builder.setAccessChainLValue(id); - } -} - -bool TGlslangToSpvTraverser::visitBinary(glslang::TVisit /* visit */, glslang::TIntermBinary* node) -{ - builder.setLine(node->getLoc().line); - - SpecConstantOpModeGuard spec_constant_op_mode_setter(&builder); - if (node->getType().getQualifier().isSpecConstant()) - spec_constant_op_mode_setter.turnOnSpecConstantOpMode(); - - // First, handle special cases - switch (node->getOp()) { - case glslang::EOpAssign: - case glslang::EOpAddAssign: - case glslang::EOpSubAssign: - case glslang::EOpMulAssign: - case glslang::EOpVectorTimesMatrixAssign: - case glslang::EOpVectorTimesScalarAssign: - case glslang::EOpMatrixTimesScalarAssign: - case glslang::EOpMatrixTimesMatrixAssign: - case glslang::EOpDivAssign: - case glslang::EOpModAssign: - case glslang::EOpAndAssign: - case glslang::EOpInclusiveOrAssign: - case glslang::EOpExclusiveOrAssign: - case glslang::EOpLeftShiftAssign: - case glslang::EOpRightShiftAssign: - // A bin-op assign "a += b" means the same thing as "a = a + b" - // where a is evaluated before b. For a simple assignment, GLSL - // says to evaluate the left before the right. So, always, left - // node then right node. - { - // get the left l-value, save it away - builder.clearAccessChain(); - node->getLeft()->traverse(this); - spv::Builder::AccessChain lValue = builder.getAccessChain(); - - // evaluate the right - builder.clearAccessChain(); - node->getRight()->traverse(this); - spv::Id rValue = accessChainLoad(node->getRight()->getType()); - - if (node->getOp() != glslang::EOpAssign) { - // the left is also an r-value - builder.setAccessChain(lValue); - spv::Id leftRValue = accessChainLoad(node->getLeft()->getType()); - - // do the operation - rValue = createBinaryOperation(node->getOp(), TranslatePrecisionDecoration(node->getOperationPrecision()), - TranslateNoContractionDecoration(node->getType().getQualifier()), - convertGlslangToSpvType(node->getType()), leftRValue, rValue, - node->getType().getBasicType()); - - // these all need their counterparts in createBinaryOperation() - assert(rValue != spv::NoResult); - } - - // store the result - builder.setAccessChain(lValue); - multiTypeStore(node->getType(), rValue); - - // assignments are expressions having an rValue after they are evaluated... - builder.clearAccessChain(); - builder.setAccessChainRValue(rValue); - } - return false; - case glslang::EOpIndexDirect: - case glslang::EOpIndexDirectStruct: - { - // Get the left part of the access chain. - node->getLeft()->traverse(this); - - // Add the next element in the chain - - const int glslangIndex = node->getRight()->getAsConstantUnion()->getConstArray()[0].getIConst(); - if (! node->getLeft()->getType().isArray() && - node->getLeft()->getType().isVector() && - node->getOp() == glslang::EOpIndexDirect) { - // This is essentially a hard-coded vector swizzle of size 1, - // so short circuit the access-chain stuff with a swizzle. - std::vector swizzle; - swizzle.push_back(glslangIndex); - builder.accessChainPushSwizzle(swizzle, convertGlslangToSpvType(node->getLeft()->getType())); - } else { - int spvIndex = glslangIndex; - if (node->getLeft()->getBasicType() == glslang::EbtBlock && - node->getOp() == glslang::EOpIndexDirectStruct) - { - // This may be, e.g., an anonymous block-member selection, which generally need - // index remapping due to hidden members in anonymous blocks. - std::vector& remapper = memberRemapper[node->getLeft()->getType().getStruct()]; - assert(remapper.size() > 0); - spvIndex = remapper[glslangIndex]; - } - - // normal case for indexing array or structure or block - builder.accessChainPush(builder.makeIntConstant(spvIndex)); - - // Add capabilities here for accessing PointSize and clip/cull distance. - // We have deferred generation of associated capabilities until now. - if (node->getLeft()->getType().isStruct() && ! node->getLeft()->getType().isArray()) - declareUseOfStructMember(*(node->getLeft()->getType().getStruct()), glslangIndex); - } - } - return false; - case glslang::EOpIndexIndirect: - { - // Structure or array or vector indirection. - // Will use native SPIR-V access-chain for struct and array indirection; - // matrices are arrays of vectors, so will also work for a matrix. - // Will use the access chain's 'component' for variable index into a vector. - - // This adapter is building access chains left to right. - // Set up the access chain to the left. - node->getLeft()->traverse(this); - - // save it so that computing the right side doesn't trash it - spv::Builder::AccessChain partial = builder.getAccessChain(); - - // compute the next index in the chain - builder.clearAccessChain(); - node->getRight()->traverse(this); - spv::Id index = accessChainLoad(node->getRight()->getType()); - - // restore the saved access chain - builder.setAccessChain(partial); - - if (! node->getLeft()->getType().isArray() && node->getLeft()->getType().isVector()) - builder.accessChainPushComponent(index, convertGlslangToSpvType(node->getLeft()->getType())); - else - builder.accessChainPush(index); - } - return false; - case glslang::EOpVectorSwizzle: - { - node->getLeft()->traverse(this); - std::vector swizzle; - convertSwizzle(*node->getRight()->getAsAggregate(), swizzle); - builder.accessChainPushSwizzle(swizzle, convertGlslangToSpvType(node->getLeft()->getType())); - } - return false; - case glslang::EOpMatrixSwizzle: - logger->missingFunctionality("matrix swizzle"); - return true; - case glslang::EOpLogicalOr: - case glslang::EOpLogicalAnd: - { - - // These may require short circuiting, but can sometimes be done as straight - // binary operations. The right operand must be short circuited if it has - // side effects, and should probably be if it is complex. - if (isTrivial(node->getRight()->getAsTyped())) - break; // handle below as a normal binary operation - // otherwise, we need to do dynamic short circuiting on the right operand - spv::Id result = createShortCircuit(node->getOp(), *node->getLeft()->getAsTyped(), *node->getRight()->getAsTyped()); - builder.clearAccessChain(); - builder.setAccessChainRValue(result); - } - return false; - default: - break; - } - - // Assume generic binary op... - - // get right operand - builder.clearAccessChain(); - node->getLeft()->traverse(this); - spv::Id left = accessChainLoad(node->getLeft()->getType()); - - // get left operand - builder.clearAccessChain(); - node->getRight()->traverse(this); - spv::Id right = accessChainLoad(node->getRight()->getType()); - - // get result - spv::Id result = createBinaryOperation(node->getOp(), TranslatePrecisionDecoration(node->getOperationPrecision()), - TranslateNoContractionDecoration(node->getType().getQualifier()), - convertGlslangToSpvType(node->getType()), left, right, - node->getLeft()->getType().getBasicType()); - - builder.clearAccessChain(); - if (! result) { - logger->missingFunctionality("unknown glslang binary operation"); - return true; // pick up a child as the place-holder result - } else { - builder.setAccessChainRValue(result); - return false; - } -} - -bool TGlslangToSpvTraverser::visitUnary(glslang::TVisit /* visit */, glslang::TIntermUnary* node) -{ - builder.setLine(node->getLoc().line); - - SpecConstantOpModeGuard spec_constant_op_mode_setter(&builder); - if (node->getType().getQualifier().isSpecConstant()) - spec_constant_op_mode_setter.turnOnSpecConstantOpMode(); - - spv::Id result = spv::NoResult; - - // try texturing first - result = createImageTextureFunctionCall(node); - if (result != spv::NoResult) { - builder.clearAccessChain(); - builder.setAccessChainRValue(result); - - return false; // done with this node - } - - // Non-texturing. - - if (node->getOp() == glslang::EOpArrayLength) { - // Quite special; won't want to evaluate the operand. - - // Normal .length() would have been constant folded by the front-end. - // So, this has to be block.lastMember.length(). - // SPV wants "block" and member number as the operands, go get them. - assert(node->getOperand()->getType().isRuntimeSizedArray()); - glslang::TIntermTyped* block = node->getOperand()->getAsBinaryNode()->getLeft(); - block->traverse(this); - unsigned int member = node->getOperand()->getAsBinaryNode()->getRight()->getAsConstantUnion()->getConstArray()[0].getUConst(); - spv::Id length = builder.createArrayLength(builder.accessChainGetLValue(), member); - - builder.clearAccessChain(); - builder.setAccessChainRValue(length); - - return false; - } - - // Start by evaluating the operand - - // Does it need a swizzle inversion? If so, evaluation is inverted; - // operate first on the swizzle base, then apply the swizzle. - spv::Id invertedType = spv::NoType; - auto resultType = [&invertedType, &node, this](){ return invertedType != spv::NoType ? invertedType : convertGlslangToSpvType(node->getType()); }; - if (node->getOp() == glslang::EOpInterpolateAtCentroid) - invertedType = getInvertedSwizzleType(*node->getOperand()); - - builder.clearAccessChain(); - if (invertedType != spv::NoType) - node->getOperand()->getAsBinaryNode()->getLeft()->traverse(this); - else - node->getOperand()->traverse(this); - - spv::Id operand = spv::NoResult; - - if (node->getOp() == glslang::EOpAtomicCounterIncrement || - node->getOp() == glslang::EOpAtomicCounterDecrement || - node->getOp() == glslang::EOpAtomicCounter || - node->getOp() == glslang::EOpInterpolateAtCentroid) - operand = builder.accessChainGetLValue(); // Special case l-value operands - else - operand = accessChainLoad(node->getOperand()->getType()); - - spv::Decoration precision = TranslatePrecisionDecoration(node->getOperationPrecision()); - spv::Decoration noContraction = TranslateNoContractionDecoration(node->getType().getQualifier()); - - // it could be a conversion - if (! result) - result = createConversion(node->getOp(), precision, noContraction, resultType(), operand, node->getOperand()->getBasicType()); - - // if not, then possibly an operation - if (! result) - result = createUnaryOperation(node->getOp(), precision, noContraction, resultType(), operand, node->getOperand()->getBasicType()); - - if (result) { - if (invertedType) - result = createInvertedSwizzle(precision, *node->getOperand(), result); - - builder.clearAccessChain(); - builder.setAccessChainRValue(result); - - return false; // done with this node - } - - // it must be a special case, check... - switch (node->getOp()) { - case glslang::EOpPostIncrement: - case glslang::EOpPostDecrement: - case glslang::EOpPreIncrement: - case glslang::EOpPreDecrement: - { - // we need the integer value "1" or the floating point "1.0" to add/subtract - spv::Id one = 0; - if (node->getBasicType() == glslang::EbtFloat) - one = builder.makeFloatConstant(1.0F); - else if (node->getBasicType() == glslang::EbtDouble) - one = builder.makeDoubleConstant(1.0); -#ifdef AMD_EXTENSIONS - else if (node->getBasicType() == glslang::EbtFloat16) - one = builder.makeFloat16Constant(1.0F); -#endif - else if (node->getBasicType() == glslang::EbtInt64 || node->getBasicType() == glslang::EbtUint64) - one = builder.makeInt64Constant(1); -#ifdef AMD_EXTENSIONS - else if (node->getBasicType() == glslang::EbtInt16 || node->getBasicType() == glslang::EbtUint16) - one = builder.makeInt16Constant(1); -#endif - else - one = builder.makeIntConstant(1); - glslang::TOperator op; - if (node->getOp() == glslang::EOpPreIncrement || - node->getOp() == glslang::EOpPostIncrement) - op = glslang::EOpAdd; - else - op = glslang::EOpSub; - - spv::Id result = createBinaryOperation(op, precision, - TranslateNoContractionDecoration(node->getType().getQualifier()), - convertGlslangToSpvType(node->getType()), operand, one, - node->getType().getBasicType()); - assert(result != spv::NoResult); - - // The result of operation is always stored, but conditionally the - // consumed result. The consumed result is always an r-value. - builder.accessChainStore(result); - builder.clearAccessChain(); - if (node->getOp() == glslang::EOpPreIncrement || - node->getOp() == glslang::EOpPreDecrement) - builder.setAccessChainRValue(result); - else - builder.setAccessChainRValue(operand); - } - - return false; - - case glslang::EOpEmitStreamVertex: - builder.createNoResultOp(spv::OpEmitStreamVertex, operand); - return false; - case glslang::EOpEndStreamPrimitive: - builder.createNoResultOp(spv::OpEndStreamPrimitive, operand); - return false; - - default: - logger->missingFunctionality("unknown glslang unary"); - return true; // pick up operand as placeholder result - } -} - -bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TIntermAggregate* node) -{ - SpecConstantOpModeGuard spec_constant_op_mode_setter(&builder); - if (node->getType().getQualifier().isSpecConstant()) - spec_constant_op_mode_setter.turnOnSpecConstantOpMode(); - - spv::Id result = spv::NoResult; - spv::Id invertedType = spv::NoType; // to use to override the natural type of the node - auto resultType = [&invertedType, &node, this](){ return invertedType != spv::NoType ? invertedType : convertGlslangToSpvType(node->getType()); }; - - // try texturing - result = createImageTextureFunctionCall(node); - if (result != spv::NoResult) { - builder.clearAccessChain(); - builder.setAccessChainRValue(result); - - return false; -#ifdef AMD_EXTENSIONS - } else if (node->getOp() == glslang::EOpImageStore || node->getOp() == glslang::EOpImageStoreLod) { -#else - } else if (node->getOp() == glslang::EOpImageStore) { -#endif - // "imageStore" is a special case, which has no result - return false; - } - - glslang::TOperator binOp = glslang::EOpNull; - bool reduceComparison = true; - bool isMatrix = false; - bool noReturnValue = false; - bool atomic = false; - - assert(node->getOp()); - - spv::Decoration precision = TranslatePrecisionDecoration(node->getOperationPrecision()); - - switch (node->getOp()) { - case glslang::EOpSequence: - { - if (preVisit) - ++sequenceDepth; - else - --sequenceDepth; - - if (sequenceDepth == 1) { - // If this is the parent node of all the functions, we want to see them - // early, so all call points have actual SPIR-V functions to reference. - // In all cases, still let the traverser visit the children for us. - makeFunctions(node->getAsAggregate()->getSequence()); - - // Also, we want all globals initializers to go into the beginning of the entry point, before - // anything else gets there, so visit out of order, doing them all now. - makeGlobalInitializers(node->getAsAggregate()->getSequence()); - - // Initializers are done, don't want to visit again, but functions and link objects need to be processed, - // so do them manually. - visitFunctions(node->getAsAggregate()->getSequence()); - - return false; - } - - return true; - } - case glslang::EOpLinkerObjects: - { - if (visit == glslang::EvPreVisit) - linkageOnly = true; - else - linkageOnly = false; - - return true; - } - case glslang::EOpComma: - { - // processing from left to right naturally leaves the right-most - // lying around in the access chain - glslang::TIntermSequence& glslangOperands = node->getSequence(); - for (int i = 0; i < (int)glslangOperands.size(); ++i) - glslangOperands[i]->traverse(this); - - return false; - } - case glslang::EOpFunction: - if (visit == glslang::EvPreVisit) { - if (isShaderEntryPoint(node)) { - inEntryPoint = true; - builder.setBuildPoint(shaderEntry->getLastBlock()); - currentFunction = shaderEntry; - } else { - handleFunctionEntry(node); - } - } else { - if (inEntryPoint) - entryPointTerminated = true; - builder.leaveFunction(); - inEntryPoint = false; - } - - return true; - case glslang::EOpParameters: - // Parameters will have been consumed by EOpFunction processing, but not - // the body, so we still visited the function node's children, making this - // child redundant. - return false; - case glslang::EOpFunctionCall: - { - builder.setLine(node->getLoc().line); - if (node->isUserDefined()) - result = handleUserFunctionCall(node); - // assert(result); // this can happen for bad shaders because the call graph completeness checking is not yet done - if (result) { - builder.clearAccessChain(); - builder.setAccessChainRValue(result); - } else - logger->missingFunctionality("missing user function; linker needs to catch that"); - - return false; - } - case glslang::EOpConstructMat2x2: - case glslang::EOpConstructMat2x3: - case glslang::EOpConstructMat2x4: - case glslang::EOpConstructMat3x2: - case glslang::EOpConstructMat3x3: - case glslang::EOpConstructMat3x4: - case glslang::EOpConstructMat4x2: - case glslang::EOpConstructMat4x3: - case glslang::EOpConstructMat4x4: - case glslang::EOpConstructDMat2x2: - case glslang::EOpConstructDMat2x3: - case glslang::EOpConstructDMat2x4: - case glslang::EOpConstructDMat3x2: - case glslang::EOpConstructDMat3x3: - case glslang::EOpConstructDMat3x4: - case glslang::EOpConstructDMat4x2: - case glslang::EOpConstructDMat4x3: - case glslang::EOpConstructDMat4x4: - case glslang::EOpConstructIMat2x2: - case glslang::EOpConstructIMat2x3: - case glslang::EOpConstructIMat2x4: - case glslang::EOpConstructIMat3x2: - case glslang::EOpConstructIMat3x3: - case glslang::EOpConstructIMat3x4: - case glslang::EOpConstructIMat4x2: - case glslang::EOpConstructIMat4x3: - case glslang::EOpConstructIMat4x4: - case glslang::EOpConstructUMat2x2: - case glslang::EOpConstructUMat2x3: - case glslang::EOpConstructUMat2x4: - case glslang::EOpConstructUMat3x2: - case glslang::EOpConstructUMat3x3: - case glslang::EOpConstructUMat3x4: - case glslang::EOpConstructUMat4x2: - case glslang::EOpConstructUMat4x3: - case glslang::EOpConstructUMat4x4: - case glslang::EOpConstructBMat2x2: - case glslang::EOpConstructBMat2x3: - case glslang::EOpConstructBMat2x4: - case glslang::EOpConstructBMat3x2: - case glslang::EOpConstructBMat3x3: - case glslang::EOpConstructBMat3x4: - case glslang::EOpConstructBMat4x2: - case glslang::EOpConstructBMat4x3: - case glslang::EOpConstructBMat4x4: -#ifdef AMD_EXTENSIONS - case glslang::EOpConstructF16Mat2x2: - case glslang::EOpConstructF16Mat2x3: - case glslang::EOpConstructF16Mat2x4: - case glslang::EOpConstructF16Mat3x2: - case glslang::EOpConstructF16Mat3x3: - case glslang::EOpConstructF16Mat3x4: - case glslang::EOpConstructF16Mat4x2: - case glslang::EOpConstructF16Mat4x3: - case glslang::EOpConstructF16Mat4x4: -#endif - isMatrix = true; - // fall through - case glslang::EOpConstructFloat: - case glslang::EOpConstructVec2: - case glslang::EOpConstructVec3: - case glslang::EOpConstructVec4: - case glslang::EOpConstructDouble: - case glslang::EOpConstructDVec2: - case glslang::EOpConstructDVec3: - case glslang::EOpConstructDVec4: -#ifdef AMD_EXTENSIONS - case glslang::EOpConstructFloat16: - case glslang::EOpConstructF16Vec2: - case glslang::EOpConstructF16Vec3: - case glslang::EOpConstructF16Vec4: -#endif - case glslang::EOpConstructBool: - case glslang::EOpConstructBVec2: - case glslang::EOpConstructBVec3: - case glslang::EOpConstructBVec4: - case glslang::EOpConstructInt: - case glslang::EOpConstructIVec2: - case glslang::EOpConstructIVec3: - case glslang::EOpConstructIVec4: - case glslang::EOpConstructUint: - case glslang::EOpConstructUVec2: - case glslang::EOpConstructUVec3: - case glslang::EOpConstructUVec4: - case glslang::EOpConstructInt64: - case glslang::EOpConstructI64Vec2: - case glslang::EOpConstructI64Vec3: - case glslang::EOpConstructI64Vec4: - case glslang::EOpConstructUint64: - case glslang::EOpConstructU64Vec2: - case glslang::EOpConstructU64Vec3: - case glslang::EOpConstructU64Vec4: -#ifdef AMD_EXTENSIONS - case glslang::EOpConstructInt16: - case glslang::EOpConstructI16Vec2: - case glslang::EOpConstructI16Vec3: - case glslang::EOpConstructI16Vec4: - case glslang::EOpConstructUint16: - case glslang::EOpConstructU16Vec2: - case glslang::EOpConstructU16Vec3: - case glslang::EOpConstructU16Vec4: -#endif - case glslang::EOpConstructStruct: - case glslang::EOpConstructTextureSampler: - { - builder.setLine(node->getLoc().line); - std::vector arguments; - translateArguments(*node, arguments); - spv::Id constructed; - if (node->getOp() == glslang::EOpConstructTextureSampler) - constructed = builder.createOp(spv::OpSampledImage, resultType(), arguments); - else if (node->getOp() == glslang::EOpConstructStruct || node->getType().isArray()) { - std::vector constituents; - for (int c = 0; c < (int)arguments.size(); ++c) - constituents.push_back(arguments[c]); - constructed = builder.createCompositeConstruct(resultType(), constituents); - } else if (isMatrix) - constructed = builder.createMatrixConstructor(precision, arguments, resultType()); - else - constructed = builder.createConstructor(precision, arguments, resultType()); - - builder.clearAccessChain(); - builder.setAccessChainRValue(constructed); - - return false; - } - - // These six are component-wise compares with component-wise results. - // Forward on to createBinaryOperation(), requesting a vector result. - case glslang::EOpLessThan: - case glslang::EOpGreaterThan: - case glslang::EOpLessThanEqual: - case glslang::EOpGreaterThanEqual: - case glslang::EOpVectorEqual: - case glslang::EOpVectorNotEqual: - { - // Map the operation to a binary - binOp = node->getOp(); - reduceComparison = false; - switch (node->getOp()) { - case glslang::EOpVectorEqual: binOp = glslang::EOpVectorEqual; break; - case glslang::EOpVectorNotEqual: binOp = glslang::EOpVectorNotEqual; break; - default: binOp = node->getOp(); break; - } - - break; - } - case glslang::EOpMul: - // component-wise matrix multiply - binOp = glslang::EOpMul; - break; - case glslang::EOpOuterProduct: - // two vectors multiplied to make a matrix - binOp = glslang::EOpOuterProduct; - break; - case glslang::EOpDot: - { - // for scalar dot product, use multiply - glslang::TIntermSequence& glslangOperands = node->getSequence(); - if (glslangOperands[0]->getAsTyped()->getVectorSize() == 1) - binOp = glslang::EOpMul; - break; - } - case glslang::EOpMod: - // when an aggregate, this is the floating-point mod built-in function, - // which can be emitted by the one in createBinaryOperation() - binOp = glslang::EOpMod; - break; - case glslang::EOpEmitVertex: - case glslang::EOpEndPrimitive: - case glslang::EOpBarrier: - case glslang::EOpMemoryBarrier: - case glslang::EOpMemoryBarrierAtomicCounter: - case glslang::EOpMemoryBarrierBuffer: - case glslang::EOpMemoryBarrierImage: - case glslang::EOpMemoryBarrierShared: - case glslang::EOpGroupMemoryBarrier: - case glslang::EOpDeviceMemoryBarrier: - case glslang::EOpAllMemoryBarrierWithGroupSync: - case glslang::EOpDeviceMemoryBarrierWithGroupSync: - case glslang::EOpWorkgroupMemoryBarrier: - case glslang::EOpWorkgroupMemoryBarrierWithGroupSync: - noReturnValue = true; - // These all have 0 operands and will naturally finish up in the code below for 0 operands - break; - - case glslang::EOpAtomicAdd: - case glslang::EOpAtomicMin: - case glslang::EOpAtomicMax: - case glslang::EOpAtomicAnd: - case glslang::EOpAtomicOr: - case glslang::EOpAtomicXor: - case glslang::EOpAtomicExchange: - case glslang::EOpAtomicCompSwap: - atomic = true; - break; - - case glslang::EOpAtomicCounterAdd: - case glslang::EOpAtomicCounterSubtract: - case glslang::EOpAtomicCounterMin: - case glslang::EOpAtomicCounterMax: - case glslang::EOpAtomicCounterAnd: - case glslang::EOpAtomicCounterOr: - case glslang::EOpAtomicCounterXor: - case glslang::EOpAtomicCounterExchange: - case glslang::EOpAtomicCounterCompSwap: - builder.addExtension("SPV_KHR_shader_atomic_counter_ops"); - builder.addCapability(spv::CapabilityAtomicStorageOps); - atomic = true; - break; - - default: - break; - } - - // - // See if it maps to a regular operation. - // - if (binOp != glslang::EOpNull) { - glslang::TIntermTyped* left = node->getSequence()[0]->getAsTyped(); - glslang::TIntermTyped* right = node->getSequence()[1]->getAsTyped(); - assert(left && right); - - builder.clearAccessChain(); - left->traverse(this); - spv::Id leftId = accessChainLoad(left->getType()); - - builder.clearAccessChain(); - right->traverse(this); - spv::Id rightId = accessChainLoad(right->getType()); - - builder.setLine(node->getLoc().line); - result = createBinaryOperation(binOp, precision, TranslateNoContractionDecoration(node->getType().getQualifier()), - resultType(), leftId, rightId, - left->getType().getBasicType(), reduceComparison); - - // code above should only make binOp that exists in createBinaryOperation - assert(result != spv::NoResult); - builder.clearAccessChain(); - builder.setAccessChainRValue(result); - - return false; - } - - // - // Create the list of operands. - // - glslang::TIntermSequence& glslangOperands = node->getSequence(); - std::vector operands; - for (int arg = 0; arg < (int)glslangOperands.size(); ++arg) { - // special case l-value operands; there are just a few - bool lvalue = false; - switch (node->getOp()) { - case glslang::EOpFrexp: - case glslang::EOpModf: - if (arg == 1) - lvalue = true; - break; - case glslang::EOpInterpolateAtSample: - case glslang::EOpInterpolateAtOffset: -#ifdef AMD_EXTENSIONS - case glslang::EOpInterpolateAtVertex: -#endif - if (arg == 0) { - lvalue = true; - - // Does it need a swizzle inversion? If so, evaluation is inverted; - // operate first on the swizzle base, then apply the swizzle. - if (glslangOperands[0]->getAsOperator() && - glslangOperands[0]->getAsOperator()->getOp() == glslang::EOpVectorSwizzle) - invertedType = convertGlslangToSpvType(glslangOperands[0]->getAsBinaryNode()->getLeft()->getType()); - } - break; - case glslang::EOpAtomicAdd: - case glslang::EOpAtomicMin: - case glslang::EOpAtomicMax: - case glslang::EOpAtomicAnd: - case glslang::EOpAtomicOr: - case glslang::EOpAtomicXor: - case glslang::EOpAtomicExchange: - case glslang::EOpAtomicCompSwap: - case glslang::EOpAtomicCounterAdd: - case glslang::EOpAtomicCounterSubtract: - case glslang::EOpAtomicCounterMin: - case glslang::EOpAtomicCounterMax: - case glslang::EOpAtomicCounterAnd: - case glslang::EOpAtomicCounterOr: - case glslang::EOpAtomicCounterXor: - case glslang::EOpAtomicCounterExchange: - case glslang::EOpAtomicCounterCompSwap: - if (arg == 0) - lvalue = true; - break; - case glslang::EOpAddCarry: - case glslang::EOpSubBorrow: - if (arg == 2) - lvalue = true; - break; - case glslang::EOpUMulExtended: - case glslang::EOpIMulExtended: - if (arg >= 2) - lvalue = true; - break; - default: - break; - } - builder.clearAccessChain(); - if (invertedType != spv::NoType && arg == 0) - glslangOperands[0]->getAsBinaryNode()->getLeft()->traverse(this); - else - glslangOperands[arg]->traverse(this); - if (lvalue) - operands.push_back(builder.accessChainGetLValue()); - else { - builder.setLine(node->getLoc().line); - operands.push_back(accessChainLoad(glslangOperands[arg]->getAsTyped()->getType())); - } - } - - builder.setLine(node->getLoc().line); - if (atomic) { - // Handle all atomics - result = createAtomicOperation(node->getOp(), precision, resultType(), operands, node->getBasicType()); - } else { - // Pass through to generic operations. - switch (glslangOperands.size()) { - case 0: - result = createNoArgOperation(node->getOp(), precision, resultType()); - break; - case 1: - result = createUnaryOperation( - node->getOp(), precision, - TranslateNoContractionDecoration(node->getType().getQualifier()), - resultType(), operands.front(), - glslangOperands[0]->getAsTyped()->getBasicType()); - break; - default: - result = createMiscOperation(node->getOp(), precision, resultType(), operands, node->getBasicType()); - break; - } - if (invertedType) - result = createInvertedSwizzle(precision, *glslangOperands[0]->getAsBinaryNode(), result); - } - - if (noReturnValue) - return false; - - if (! result) { - logger->missingFunctionality("unknown glslang aggregate"); - return true; // pick up a child as a placeholder operand - } else { - builder.clearAccessChain(); - builder.setAccessChainRValue(result); - return false; - } -} - -// This path handles both if-then-else and ?: -// The if-then-else has a node type of void, while -// ?: has either a void or a non-void node type -// -// Leaving the result, when not void: -// GLSL only has r-values as the result of a :?, but -// if we have an l-value, that can be more efficient if it will -// become the base of a complex r-value expression, because the -// next layer copies r-values into memory to use the access-chain mechanism -bool TGlslangToSpvTraverser::visitSelection(glslang::TVisit /* visit */, glslang::TIntermSelection* node) -{ - // See if it simple and safe to generate OpSelect instead of using control flow. - // Crucially, side effects must be avoided, and there are performance trade-offs. - // Return true if good idea (and safe) for OpSelect, false otherwise. - const auto selectPolicy = [&]() -> bool { - if ((!node->getType().isScalar() && !node->getType().isVector()) || - node->getBasicType() == glslang::EbtVoid) - return false; - - if (node->getTrueBlock() == nullptr || - node->getFalseBlock() == nullptr) - return false; - - assert(node->getType() == node->getTrueBlock() ->getAsTyped()->getType() && - node->getType() == node->getFalseBlock()->getAsTyped()->getType()); - - // return true if a single operand to ? : is okay for OpSelect - const auto operandOkay = [](glslang::TIntermTyped* node) { - return node->getAsSymbolNode() || node->getType().getQualifier().isConstant(); - }; - - return operandOkay(node->getTrueBlock() ->getAsTyped()) && - operandOkay(node->getFalseBlock()->getAsTyped()); - }; - - // Emit OpSelect for this selection. - const auto handleAsOpSelect = [&]() { - node->getCondition()->traverse(this); - spv::Id condition = accessChainLoad(node->getCondition()->getType()); - node->getTrueBlock()->traverse(this); - spv::Id trueValue = accessChainLoad(node->getTrueBlock()->getAsTyped()->getType()); - node->getFalseBlock()->traverse(this); - spv::Id falseValue = accessChainLoad(node->getTrueBlock()->getAsTyped()->getType()); - - builder.setLine(node->getLoc().line); - - // smear condition to vector, if necessary (AST is always scalar) - if (builder.isVector(trueValue)) - condition = builder.smearScalar(spv::NoPrecision, condition, - builder.makeVectorType(builder.makeBoolType(), - builder.getNumComponents(trueValue))); - - spv::Id select = builder.createTriOp(spv::OpSelect, - convertGlslangToSpvType(node->getType()), condition, - trueValue, falseValue); - builder.clearAccessChain(); - builder.setAccessChainRValue(select); - }; - - // Try for OpSelect - - if (selectPolicy()) { - SpecConstantOpModeGuard spec_constant_op_mode_setter(&builder); - if (node->getType().getQualifier().isSpecConstant()) - spec_constant_op_mode_setter.turnOnSpecConstantOpMode(); - - handleAsOpSelect(); - return false; - } - - // Instead, emit control flow... - // Don't handle results as temporaries, because there will be two names - // and better to leave SSA to later passes. - spv::Id result = (node->getBasicType() == glslang::EbtVoid) - ? spv::NoResult - : builder.createVariable(spv::StorageClassFunction, convertGlslangToSpvType(node->getType())); - - // emit the condition before doing anything with selection - node->getCondition()->traverse(this); - - // Selection control: - const spv::SelectionControlMask control = TranslateSelectionControl(*node); - - // make an "if" based on the value created by the condition - spv::Builder::If ifBuilder(accessChainLoad(node->getCondition()->getType()), control, builder); - - // emit the "then" statement - if (node->getTrueBlock() != nullptr) { - node->getTrueBlock()->traverse(this); - if (result != spv::NoResult) - builder.createStore(accessChainLoad(node->getTrueBlock()->getAsTyped()->getType()), result); - } - - if (node->getFalseBlock() != nullptr) { - ifBuilder.makeBeginElse(); - // emit the "else" statement - node->getFalseBlock()->traverse(this); - if (result != spv::NoResult) - builder.createStore(accessChainLoad(node->getFalseBlock()->getAsTyped()->getType()), result); - } - - // finish off the control flow - ifBuilder.makeEndIf(); - - if (result != spv::NoResult) { - // GLSL only has r-values as the result of a :?, but - // if we have an l-value, that can be more efficient if it will - // become the base of a complex r-value expression, because the - // next layer copies r-values into memory to use the access-chain mechanism - builder.clearAccessChain(); - builder.setAccessChainLValue(result); - } - - return false; -} - -bool TGlslangToSpvTraverser::visitSwitch(glslang::TVisit /* visit */, glslang::TIntermSwitch* node) -{ - // emit and get the condition before doing anything with switch - node->getCondition()->traverse(this); - spv::Id selector = accessChainLoad(node->getCondition()->getAsTyped()->getType()); - - // Selection control: - const spv::SelectionControlMask control = TranslateSwitchControl(*node); - - // browse the children to sort out code segments - int defaultSegment = -1; - std::vector codeSegments; - glslang::TIntermSequence& sequence = node->getBody()->getSequence(); - std::vector caseValues; - std::vector valueIndexToSegment(sequence.size()); // note: probably not all are used, it is an overestimate - for (glslang::TIntermSequence::iterator c = sequence.begin(); c != sequence.end(); ++c) { - TIntermNode* child = *c; - if (child->getAsBranchNode() && child->getAsBranchNode()->getFlowOp() == glslang::EOpDefault) - defaultSegment = (int)codeSegments.size(); - else if (child->getAsBranchNode() && child->getAsBranchNode()->getFlowOp() == glslang::EOpCase) { - valueIndexToSegment[caseValues.size()] = (int)codeSegments.size(); - caseValues.push_back(child->getAsBranchNode()->getExpression()->getAsConstantUnion()->getConstArray()[0].getIConst()); - } else - codeSegments.push_back(child); - } - - // handle the case where the last code segment is missing, due to no code - // statements between the last case and the end of the switch statement - if ((caseValues.size() && (int)codeSegments.size() == valueIndexToSegment[caseValues.size() - 1]) || - (int)codeSegments.size() == defaultSegment) - codeSegments.push_back(nullptr); - - // make the switch statement - std::vector segmentBlocks; // returned, as the blocks allocated in the call - builder.makeSwitch(selector, control, (int)codeSegments.size(), caseValues, valueIndexToSegment, defaultSegment, segmentBlocks); - - // emit all the code in the segments - breakForLoop.push(false); - for (unsigned int s = 0; s < codeSegments.size(); ++s) { - builder.nextSwitchSegment(segmentBlocks, s); - if (codeSegments[s]) - codeSegments[s]->traverse(this); - else - builder.addSwitchBreak(); - } - breakForLoop.pop(); - - builder.endSwitch(segmentBlocks); - - return false; -} - -void TGlslangToSpvTraverser::visitConstantUnion(glslang::TIntermConstantUnion* node) -{ - int nextConst = 0; - spv::Id constant = createSpvConstantFromConstUnionArray(node->getType(), node->getConstArray(), nextConst, false); - - builder.clearAccessChain(); - builder.setAccessChainRValue(constant); -} - -bool TGlslangToSpvTraverser::visitLoop(glslang::TVisit /* visit */, glslang::TIntermLoop* node) -{ - auto blocks = builder.makeNewLoop(); - builder.createBranch(&blocks.head); - - // Loop control: - unsigned int dependencyLength = glslang::TIntermLoop::dependencyInfinite; - const spv::LoopControlMask control = TranslateLoopControl(*node, dependencyLength); - - // Spec requires back edges to target header blocks, and every header block - // must dominate its merge block. Make a header block first to ensure these - // conditions are met. By definition, it will contain OpLoopMerge, followed - // by a block-ending branch. But we don't want to put any other body/test - // instructions in it, since the body/test may have arbitrary instructions, - // including merges of its own. - builder.setLine(node->getLoc().line); - builder.setBuildPoint(&blocks.head); - builder.createLoopMerge(&blocks.merge, &blocks.continue_target, control, dependencyLength); - if (node->testFirst() && node->getTest()) { - spv::Block& test = builder.makeNewBlock(); - builder.createBranch(&test); - - builder.setBuildPoint(&test); - node->getTest()->traverse(this); - spv::Id condition = accessChainLoad(node->getTest()->getType()); - builder.createConditionalBranch(condition, &blocks.body, &blocks.merge); - - builder.setBuildPoint(&blocks.body); - breakForLoop.push(true); - if (node->getBody()) - node->getBody()->traverse(this); - builder.createBranch(&blocks.continue_target); - breakForLoop.pop(); - - builder.setBuildPoint(&blocks.continue_target); - if (node->getTerminal()) - node->getTerminal()->traverse(this); - builder.createBranch(&blocks.head); - } else { - builder.setLine(node->getLoc().line); - builder.createBranch(&blocks.body); - - breakForLoop.push(true); - builder.setBuildPoint(&blocks.body); - if (node->getBody()) - node->getBody()->traverse(this); - builder.createBranch(&blocks.continue_target); - breakForLoop.pop(); - - builder.setBuildPoint(&blocks.continue_target); - if (node->getTerminal()) - node->getTerminal()->traverse(this); - if (node->getTest()) { - node->getTest()->traverse(this); - spv::Id condition = - accessChainLoad(node->getTest()->getType()); - builder.createConditionalBranch(condition, &blocks.head, &blocks.merge); - } else { - // TODO: unless there was a break/return/discard instruction - // somewhere in the body, this is an infinite loop, so we should - // issue a warning. - builder.createBranch(&blocks.head); - } - } - builder.setBuildPoint(&blocks.merge); - builder.closeLoop(); - return false; -} - -bool TGlslangToSpvTraverser::visitBranch(glslang::TVisit /* visit */, glslang::TIntermBranch* node) -{ - if (node->getExpression()) - node->getExpression()->traverse(this); - - builder.setLine(node->getLoc().line); - - switch (node->getFlowOp()) { - case glslang::EOpKill: - builder.makeDiscard(); - break; - case glslang::EOpBreak: - if (breakForLoop.top()) - builder.createLoopExit(); - else - builder.addSwitchBreak(); - break; - case glslang::EOpContinue: - builder.createLoopContinue(); - break; - case glslang::EOpReturn: - if (node->getExpression()) { - const glslang::TType& glslangReturnType = node->getExpression()->getType(); - spv::Id returnId = accessChainLoad(glslangReturnType); - if (builder.getTypeId(returnId) != currentFunction->getReturnType()) { - builder.clearAccessChain(); - spv::Id copyId = builder.createVariable(spv::StorageClassFunction, currentFunction->getReturnType()); - builder.setAccessChainLValue(copyId); - multiTypeStore(glslangReturnType, returnId); - returnId = builder.createLoad(copyId); - } - builder.makeReturn(false, returnId); - } else - builder.makeReturn(false); - - builder.clearAccessChain(); - break; - - default: - assert(0); - break; - } - - return false; -} - -spv::Id TGlslangToSpvTraverser::createSpvVariable(const glslang::TIntermSymbol* node) -{ - // First, steer off constants, which are not SPIR-V variables, but - // can still have a mapping to a SPIR-V Id. - // This includes specialization constants. - if (node->getQualifier().isConstant()) { - return createSpvConstant(*node); - } - - // Now, handle actual variables - spv::StorageClass storageClass = TranslateStorageClass(node->getType()); - spv::Id spvType = convertGlslangToSpvType(node->getType()); - -#ifdef AMD_EXTENSIONS - const bool contains16BitType = node->getType().containsBasicType(glslang::EbtFloat16) || - node->getType().containsBasicType(glslang::EbtInt16) || - node->getType().containsBasicType(glslang::EbtUint16); - if (contains16BitType) { - if (storageClass == spv::StorageClassInput || storageClass == spv::StorageClassOutput) { - builder.addExtension(spv::E_SPV_KHR_16bit_storage); - builder.addCapability(spv::CapabilityStorageInputOutput16); - } else if (storageClass == spv::StorageClassPushConstant) { - builder.addExtension(spv::E_SPV_KHR_16bit_storage); - builder.addCapability(spv::CapabilityStoragePushConstant16); - } else if (storageClass == spv::StorageClassUniform) { - builder.addExtension(spv::E_SPV_KHR_16bit_storage); - builder.addCapability(spv::CapabilityStorageUniform16); - if (node->getType().getQualifier().storage == glslang::EvqBuffer) - builder.addCapability(spv::CapabilityStorageUniformBufferBlock16); - } - } -#endif - - const char* name = node->getName().c_str(); - if (glslang::IsAnonymous(name)) - name = ""; - - return builder.createVariable(storageClass, spvType, name); -} - -// Return type Id of the sampled type. -spv::Id TGlslangToSpvTraverser::getSampledType(const glslang::TSampler& sampler) -{ - switch (sampler.type) { - case glslang::EbtFloat: return builder.makeFloatType(32); - case glslang::EbtInt: return builder.makeIntType(32); - case glslang::EbtUint: return builder.makeUintType(32); - default: - assert(0); - return builder.makeFloatType(32); - } -} - -// If node is a swizzle operation, return the type that should be used if -// the swizzle base is first consumed by another operation, before the swizzle -// is applied. -spv::Id TGlslangToSpvTraverser::getInvertedSwizzleType(const glslang::TIntermTyped& node) -{ - if (node.getAsOperator() && - node.getAsOperator()->getOp() == glslang::EOpVectorSwizzle) - return convertGlslangToSpvType(node.getAsBinaryNode()->getLeft()->getType()); - else - return spv::NoType; -} - -// When inverting a swizzle with a parent op, this function -// will apply the swizzle operation to a completed parent operation. -spv::Id TGlslangToSpvTraverser::createInvertedSwizzle(spv::Decoration precision, const glslang::TIntermTyped& node, spv::Id parentResult) -{ - std::vector swizzle; - convertSwizzle(*node.getAsBinaryNode()->getRight()->getAsAggregate(), swizzle); - return builder.createRvalueSwizzle(precision, convertGlslangToSpvType(node.getType()), parentResult, swizzle); -} - -// Convert a glslang AST swizzle node to a swizzle vector for building SPIR-V. -void TGlslangToSpvTraverser::convertSwizzle(const glslang::TIntermAggregate& node, std::vector& swizzle) -{ - const glslang::TIntermSequence& swizzleSequence = node.getSequence(); - for (int i = 0; i < (int)swizzleSequence.size(); ++i) - swizzle.push_back(swizzleSequence[i]->getAsConstantUnion()->getConstArray()[0].getIConst()); -} - -// Convert from a glslang type to an SPV type, by calling into a -// recursive version of this function. This establishes the inherited -// layout state rooted from the top-level type. -spv::Id TGlslangToSpvTraverser::convertGlslangToSpvType(const glslang::TType& type) -{ - return convertGlslangToSpvType(type, getExplicitLayout(type), type.getQualifier()); -} - -// Do full recursive conversion of an arbitrary glslang type to a SPIR-V Id. -// explicitLayout can be kept the same throughout the hierarchical recursive walk. -// Mutually recursive with convertGlslangStructToSpvType(). -spv::Id TGlslangToSpvTraverser::convertGlslangToSpvType(const glslang::TType& type, glslang::TLayoutPacking explicitLayout, const glslang::TQualifier& qualifier) -{ - spv::Id spvType = spv::NoResult; - - switch (type.getBasicType()) { - case glslang::EbtVoid: - spvType = builder.makeVoidType(); - assert (! type.isArray()); - break; - case glslang::EbtFloat: - spvType = builder.makeFloatType(32); - break; - case glslang::EbtDouble: - spvType = builder.makeFloatType(64); - break; -#ifdef AMD_EXTENSIONS - case glslang::EbtFloat16: - builder.addExtension(spv::E_SPV_AMD_gpu_shader_half_float); - spvType = builder.makeFloatType(16); - break; -#endif - case glslang::EbtBool: - // "transparent" bool doesn't exist in SPIR-V. The GLSL convention is - // a 32-bit int where non-0 means true. - if (explicitLayout != glslang::ElpNone) - spvType = builder.makeUintType(32); - else - spvType = builder.makeBoolType(); - break; - case glslang::EbtInt: - spvType = builder.makeIntType(32); - break; - case glslang::EbtUint: - spvType = builder.makeUintType(32); - break; - case glslang::EbtInt64: - spvType = builder.makeIntType(64); - break; - case glslang::EbtUint64: - spvType = builder.makeUintType(64); - break; -#ifdef AMD_EXTENSIONS - case glslang::EbtInt16: - builder.addExtension(spv::E_SPV_AMD_gpu_shader_int16); - spvType = builder.makeIntType(16); - break; - case glslang::EbtUint16: - builder.addExtension(spv::E_SPV_AMD_gpu_shader_int16); - spvType = builder.makeUintType(16); - break; -#endif - case glslang::EbtAtomicUint: - builder.addCapability(spv::CapabilityAtomicStorage); - spvType = builder.makeUintType(32); - break; - case glslang::EbtSampler: - { - const glslang::TSampler& sampler = type.getSampler(); - if (sampler.sampler) { - // pure sampler - spvType = builder.makeSamplerType(); - } else { - // an image is present, make its type - spvType = builder.makeImageType(getSampledType(sampler), TranslateDimensionality(sampler), sampler.shadow, sampler.arrayed, sampler.ms, - sampler.image ? 2 : 1, TranslateImageFormat(type)); - if (sampler.combined) { - // already has both image and sampler, make the combined type - spvType = builder.makeSampledImageType(spvType); - } - } - } - break; - case glslang::EbtStruct: - case glslang::EbtBlock: - { - // If we've seen this struct type, return it - const glslang::TTypeList* glslangMembers = type.getStruct(); - - // Try to share structs for different layouts, but not yet for other - // kinds of qualification (primarily not yet including interpolant qualification). - if (! HasNonLayoutQualifiers(type, qualifier)) - spvType = structMap[explicitLayout][qualifier.layoutMatrix][glslangMembers]; - if (spvType != spv::NoResult) - break; - - // else, we haven't seen it... - if (type.getBasicType() == glslang::EbtBlock) - memberRemapper[glslangMembers].resize(glslangMembers->size()); - spvType = convertGlslangStructToSpvType(type, glslangMembers, explicitLayout, qualifier); - } - break; - default: - assert(0); - break; - } - - if (type.isMatrix()) - spvType = builder.makeMatrixType(spvType, type.getMatrixCols(), type.getMatrixRows()); - else { - // If this variable has a vector element count greater than 1, create a SPIR-V vector - if (type.getVectorSize() > 1) - spvType = builder.makeVectorType(spvType, type.getVectorSize()); - } - - if (type.isArray()) { - int stride = 0; // keep this 0 unless doing an explicit layout; 0 will mean no decoration, no stride - - // Do all but the outer dimension - if (type.getArraySizes()->getNumDims() > 1) { - // We need to decorate array strides for types needing explicit layout, except blocks. - if (explicitLayout != glslang::ElpNone && type.getBasicType() != glslang::EbtBlock) { - // Use a dummy glslang type for querying internal strides of - // arrays of arrays, but using just a one-dimensional array. - glslang::TType simpleArrayType(type, 0); // deference type of the array - while (simpleArrayType.getArraySizes().getNumDims() > 1) - simpleArrayType.getArraySizes().dereference(); - - // Will compute the higher-order strides here, rather than making a whole - // pile of types and doing repetitive recursion on their contents. - stride = getArrayStride(simpleArrayType, explicitLayout, qualifier.layoutMatrix); - } - - // make the arrays - for (int dim = type.getArraySizes()->getNumDims() - 1; dim > 0; --dim) { - spvType = builder.makeArrayType(spvType, makeArraySizeId(*type.getArraySizes(), dim), stride); - if (stride > 0) - builder.addDecoration(spvType, spv::DecorationArrayStride, stride); - stride *= type.getArraySizes()->getDimSize(dim); - } - } else { - // single-dimensional array, and don't yet have stride - - // We need to decorate array strides for types needing explicit layout, except blocks. - if (explicitLayout != glslang::ElpNone && type.getBasicType() != glslang::EbtBlock) - stride = getArrayStride(type, explicitLayout, qualifier.layoutMatrix); - } - - // Do the outer dimension, which might not be known for a runtime-sized array - if (type.isRuntimeSizedArray()) { - spvType = builder.makeRuntimeArray(spvType); - } else { - assert(type.getOuterArraySize() > 0); - spvType = builder.makeArrayType(spvType, makeArraySizeId(*type.getArraySizes(), 0), stride); - } - if (stride > 0) - builder.addDecoration(spvType, spv::DecorationArrayStride, stride); - } - - return spvType; -} - -// TODO: this functionality should exist at a higher level, in creating the AST -// -// Identify interface members that don't have their required extension turned on. -// -bool TGlslangToSpvTraverser::filterMember(const glslang::TType& member) -{ - auto& extensions = glslangIntermediate->getRequestedExtensions(); - - if (member.getFieldName() == "gl_ViewportMask" && - extensions.find("GL_NV_viewport_array2") == extensions.end()) - return true; - if (member.getFieldName() == "gl_SecondaryViewportMaskNV" && - extensions.find("GL_NV_stereo_view_rendering") == extensions.end()) - return true; - if (member.getFieldName() == "gl_SecondaryPositionNV" && - extensions.find("GL_NV_stereo_view_rendering") == extensions.end()) - return true; - if (member.getFieldName() == "gl_PositionPerViewNV" && - extensions.find("GL_NVX_multiview_per_view_attributes") == extensions.end()) - return true; - if (member.getFieldName() == "gl_ViewportMaskPerViewNV" && - extensions.find("GL_NVX_multiview_per_view_attributes") == extensions.end()) - return true; - - return false; -}; - -// Do full recursive conversion of a glslang structure (or block) type to a SPIR-V Id. -// explicitLayout can be kept the same throughout the hierarchical recursive walk. -// Mutually recursive with convertGlslangToSpvType(). -spv::Id TGlslangToSpvTraverser::convertGlslangStructToSpvType(const glslang::TType& type, - const glslang::TTypeList* glslangMembers, - glslang::TLayoutPacking explicitLayout, - const glslang::TQualifier& qualifier) -{ - // Create a vector of struct types for SPIR-V to consume - std::vector spvMembers; - int memberDelta = 0; // how much the member's index changes from glslang to SPIR-V, normally 0, except sometimes for blocks - for (int i = 0; i < (int)glslangMembers->size(); i++) { - glslang::TType& glslangMember = *(*glslangMembers)[i].type; - if (glslangMember.hiddenMember()) { - ++memberDelta; - if (type.getBasicType() == glslang::EbtBlock) - memberRemapper[glslangMembers][i] = -1; - } else { - if (type.getBasicType() == glslang::EbtBlock) { - memberRemapper[glslangMembers][i] = i - memberDelta; - if (filterMember(glslangMember)) - continue; - } - // modify just this child's view of the qualifier - glslang::TQualifier memberQualifier = glslangMember.getQualifier(); - InheritQualifiers(memberQualifier, qualifier); - - // manually inherit location - if (! memberQualifier.hasLocation() && qualifier.hasLocation()) - memberQualifier.layoutLocation = qualifier.layoutLocation; - - // recurse - spvMembers.push_back(convertGlslangToSpvType(glslangMember, explicitLayout, memberQualifier)); - } - } - - // Make the SPIR-V type - spv::Id spvType = builder.makeStructType(spvMembers, type.getTypeName().c_str()); - if (! HasNonLayoutQualifiers(type, qualifier)) - structMap[explicitLayout][qualifier.layoutMatrix][glslangMembers] = spvType; - - // Decorate it - decorateStructType(type, glslangMembers, explicitLayout, qualifier, spvType); - - return spvType; -} - -void TGlslangToSpvTraverser::decorateStructType(const glslang::TType& type, - const glslang::TTypeList* glslangMembers, - glslang::TLayoutPacking explicitLayout, - const glslang::TQualifier& qualifier, - spv::Id spvType) -{ - // Name and decorate the non-hidden members - int offset = -1; - int locationOffset = 0; // for use within the members of this struct - for (int i = 0; i < (int)glslangMembers->size(); i++) { - glslang::TType& glslangMember = *(*glslangMembers)[i].type; - int member = i; - if (type.getBasicType() == glslang::EbtBlock) { - member = memberRemapper[glslangMembers][i]; - if (filterMember(glslangMember)) - continue; - } - - // modify just this child's view of the qualifier - glslang::TQualifier memberQualifier = glslangMember.getQualifier(); - InheritQualifiers(memberQualifier, qualifier); - - // using -1 above to indicate a hidden member - if (member >= 0) { - builder.addMemberName(spvType, member, glslangMember.getFieldName().c_str()); - addMemberDecoration(spvType, member, TranslateLayoutDecoration(glslangMember, memberQualifier.layoutMatrix)); - addMemberDecoration(spvType, member, TranslatePrecisionDecoration(glslangMember)); - // Add interpolation and auxiliary storage decorations only to top-level members of Input and Output storage classes - if (type.getQualifier().storage == glslang::EvqVaryingIn || - type.getQualifier().storage == glslang::EvqVaryingOut) { - if (type.getBasicType() == glslang::EbtBlock || - glslangIntermediate->getSource() == glslang::EShSourceHlsl) { - addMemberDecoration(spvType, member, TranslateInterpolationDecoration(memberQualifier)); - addMemberDecoration(spvType, member, TranslateAuxiliaryStorageDecoration(memberQualifier)); - } - } - addMemberDecoration(spvType, member, TranslateInvariantDecoration(memberQualifier)); - - if (type.getBasicType() == glslang::EbtBlock && - qualifier.storage == glslang::EvqBuffer) { - // Add memory decorations only to top-level members of shader storage block - std::vector memory; - TranslateMemoryDecoration(memberQualifier, memory); - for (unsigned int i = 0; i < memory.size(); ++i) - addMemberDecoration(spvType, member, memory[i]); - } - - // Location assignment was already completed correctly by the front end, - // just track whether a member needs to be decorated. - // Ignore member locations if the container is an array, as that's - // ill-specified and decisions have been made to not allow this. - if (! type.isArray() && memberQualifier.hasLocation()) - builder.addMemberDecoration(spvType, member, spv::DecorationLocation, memberQualifier.layoutLocation); - - if (qualifier.hasLocation()) // track for upcoming inheritance - locationOffset += glslangIntermediate->computeTypeLocationSize(glslangMember); - - // component, XFB, others - if (glslangMember.getQualifier().hasComponent()) - builder.addMemberDecoration(spvType, member, spv::DecorationComponent, glslangMember.getQualifier().layoutComponent); - if (glslangMember.getQualifier().hasXfbOffset()) - builder.addMemberDecoration(spvType, member, spv::DecorationOffset, glslangMember.getQualifier().layoutXfbOffset); - else if (explicitLayout != glslang::ElpNone) { - // figure out what to do with offset, which is accumulating - int nextOffset; - updateMemberOffset(type, glslangMember, offset, nextOffset, explicitLayout, memberQualifier.layoutMatrix); - if (offset >= 0) - builder.addMemberDecoration(spvType, member, spv::DecorationOffset, offset); - offset = nextOffset; - } - - if (glslangMember.isMatrix() && explicitLayout != glslang::ElpNone) - builder.addMemberDecoration(spvType, member, spv::DecorationMatrixStride, getMatrixStride(glslangMember, explicitLayout, memberQualifier.layoutMatrix)); - - // built-in variable decorations - spv::BuiltIn builtIn = TranslateBuiltInDecoration(glslangMember.getQualifier().builtIn, true); - if (builtIn != spv::BuiltInMax) - addMemberDecoration(spvType, member, spv::DecorationBuiltIn, (int)builtIn); - -#ifdef NV_EXTENSIONS - if (builtIn == spv::BuiltInLayer) { - // SPV_NV_viewport_array2 extension - if (glslangMember.getQualifier().layoutViewportRelative){ - addMemberDecoration(spvType, member, (spv::Decoration)spv::DecorationViewportRelativeNV); - builder.addCapability(spv::CapabilityShaderViewportMaskNV); - builder.addExtension(spv::E_SPV_NV_viewport_array2); - } - if (glslangMember.getQualifier().layoutSecondaryViewportRelativeOffset != -2048){ - addMemberDecoration(spvType, member, (spv::Decoration)spv::DecorationSecondaryViewportRelativeNV, glslangMember.getQualifier().layoutSecondaryViewportRelativeOffset); - builder.addCapability(spv::CapabilityShaderStereoViewNV); - builder.addExtension(spv::E_SPV_NV_stereo_view_rendering); - } - } - if (glslangMember.getQualifier().layoutPassthrough) { - addMemberDecoration(spvType, member, (spv::Decoration)spv::DecorationPassthroughNV); - builder.addCapability(spv::CapabilityGeometryShaderPassthroughNV); - builder.addExtension(spv::E_SPV_NV_geometry_shader_passthrough); - } -#endif - } - } - - // Decorate the structure - addDecoration(spvType, TranslateLayoutDecoration(type, qualifier.layoutMatrix)); - addDecoration(spvType, TranslateBlockDecoration(type, glslangIntermediate->usingStorageBuffer())); - if (type.getQualifier().hasStream() && glslangIntermediate->isMultiStream()) { - builder.addCapability(spv::CapabilityGeometryStreams); - builder.addDecoration(spvType, spv::DecorationStream, type.getQualifier().layoutStream); - } -} - -// Turn the expression forming the array size into an id. -// This is not quite trivial, because of specialization constants. -// Sometimes, a raw constant is turned into an Id, and sometimes -// a specialization constant expression is. -spv::Id TGlslangToSpvTraverser::makeArraySizeId(const glslang::TArraySizes& arraySizes, int dim) -{ - // First, see if this is sized with a node, meaning a specialization constant: - glslang::TIntermTyped* specNode = arraySizes.getDimNode(dim); - if (specNode != nullptr) { - builder.clearAccessChain(); - specNode->traverse(this); - return accessChainLoad(specNode->getAsTyped()->getType()); - } - - // Otherwise, need a compile-time (front end) size, get it: - int size = arraySizes.getDimSize(dim); - assert(size > 0); - return builder.makeUintConstant(size); -} - -// Wrap the builder's accessChainLoad to: -// - localize handling of RelaxedPrecision -// - use the SPIR-V inferred type instead of another conversion of the glslang type -// (avoids unnecessary work and possible type punning for structures) -// - do conversion of concrete to abstract type -spv::Id TGlslangToSpvTraverser::accessChainLoad(const glslang::TType& type) -{ - spv::Id nominalTypeId = builder.accessChainGetInferredType(); - spv::Id loadedId = builder.accessChainLoad(TranslatePrecisionDecoration(type), nominalTypeId); - - // Need to convert to abstract types when necessary - if (type.getBasicType() == glslang::EbtBool) { - if (builder.isScalarType(nominalTypeId)) { - // Conversion for bool - spv::Id boolType = builder.makeBoolType(); - if (nominalTypeId != boolType) - loadedId = builder.createBinOp(spv::OpINotEqual, boolType, loadedId, builder.makeUintConstant(0)); - } else if (builder.isVectorType(nominalTypeId)) { - // Conversion for bvec - int vecSize = builder.getNumTypeComponents(nominalTypeId); - spv::Id bvecType = builder.makeVectorType(builder.makeBoolType(), vecSize); - if (nominalTypeId != bvecType) - loadedId = builder.createBinOp(spv::OpINotEqual, bvecType, loadedId, makeSmearedConstant(builder.makeUintConstant(0), vecSize)); - } - } - - return loadedId; -} - -// Wrap the builder's accessChainStore to: -// - do conversion of concrete to abstract type -// -// Implicitly uses the existing builder.accessChain as the storage target. -void TGlslangToSpvTraverser::accessChainStore(const glslang::TType& type, spv::Id rvalue) -{ - // Need to convert to abstract types when necessary - if (type.getBasicType() == glslang::EbtBool) { - spv::Id nominalTypeId = builder.accessChainGetInferredType(); - - if (builder.isScalarType(nominalTypeId)) { - // Conversion for bool - spv::Id boolType = builder.makeBoolType(); - if (nominalTypeId != boolType) { - // keep these outside arguments, for determinant order-of-evaluation - spv::Id one = builder.makeUintConstant(1); - spv::Id zero = builder.makeUintConstant(0); - rvalue = builder.createTriOp(spv::OpSelect, nominalTypeId, rvalue, one, zero); - } else if (builder.getTypeId(rvalue) != boolType) - rvalue = builder.createBinOp(spv::OpINotEqual, boolType, rvalue, builder.makeUintConstant(0)); - } else if (builder.isVectorType(nominalTypeId)) { - // Conversion for bvec - int vecSize = builder.getNumTypeComponents(nominalTypeId); - spv::Id bvecType = builder.makeVectorType(builder.makeBoolType(), vecSize); - if (nominalTypeId != bvecType) { - // keep these outside arguments, for determinant order-of-evaluation - spv::Id one = makeSmearedConstant(builder.makeUintConstant(1), vecSize); - spv::Id zero = makeSmearedConstant(builder.makeUintConstant(0), vecSize); - rvalue = builder.createTriOp(spv::OpSelect, nominalTypeId, rvalue, one, zero); - } else if (builder.getTypeId(rvalue) != bvecType) - rvalue = builder.createBinOp(spv::OpINotEqual, bvecType, rvalue, - makeSmearedConstant(builder.makeUintConstant(0), vecSize)); - } - } - - builder.accessChainStore(rvalue); -} - -// For storing when types match at the glslang level, but not might match at the -// SPIR-V level. -// -// This especially happens when a single glslang type expands to multiple -// SPIR-V types, like a struct that is used in a member-undecorated way as well -// as in a member-decorated way. -// -// NOTE: This function can handle any store request; if it's not special it -// simplifies to a simple OpStore. -// -// Implicitly uses the existing builder.accessChain as the storage target. -void TGlslangToSpvTraverser::multiTypeStore(const glslang::TType& type, spv::Id rValue) -{ - // we only do the complex path here if it's an aggregate - if (! type.isStruct() && ! type.isArray()) { - accessChainStore(type, rValue); - return; - } - - // and, it has to be a case of type aliasing - spv::Id rType = builder.getTypeId(rValue); - spv::Id lValue = builder.accessChainGetLValue(); - spv::Id lType = builder.getContainedTypeId(builder.getTypeId(lValue)); - if (lType == rType) { - accessChainStore(type, rValue); - return; - } - - // Recursively (as needed) copy an aggregate type to a different aggregate type, - // where the two types were the same type in GLSL. This requires member - // by member copy, recursively. - - // If an array, copy element by element. - if (type.isArray()) { - glslang::TType glslangElementType(type, 0); - spv::Id elementRType = builder.getContainedTypeId(rType); - for (int index = 0; index < type.getOuterArraySize(); ++index) { - // get the source member - spv::Id elementRValue = builder.createCompositeExtract(rValue, elementRType, index); - - // set up the target storage - builder.clearAccessChain(); - builder.setAccessChainLValue(lValue); - builder.accessChainPush(builder.makeIntConstant(index)); - - // store the member - multiTypeStore(glslangElementType, elementRValue); - } - } else { - assert(type.isStruct()); - - // loop over structure members - const glslang::TTypeList& members = *type.getStruct(); - for (int m = 0; m < (int)members.size(); ++m) { - const glslang::TType& glslangMemberType = *members[m].type; - - // get the source member - spv::Id memberRType = builder.getContainedTypeId(rType, m); - spv::Id memberRValue = builder.createCompositeExtract(rValue, memberRType, m); - - // set up the target storage - builder.clearAccessChain(); - builder.setAccessChainLValue(lValue); - builder.accessChainPush(builder.makeIntConstant(m)); - - // store the member - multiTypeStore(glslangMemberType, memberRValue); - } - } -} - -// Decide whether or not this type should be -// decorated with offsets and strides, and if so -// whether std140 or std430 rules should be applied. -glslang::TLayoutPacking TGlslangToSpvTraverser::getExplicitLayout(const glslang::TType& type) const -{ - // has to be a block - if (type.getBasicType() != glslang::EbtBlock) - return glslang::ElpNone; - - // has to be a uniform or buffer block - if (type.getQualifier().storage != glslang::EvqUniform && - type.getQualifier().storage != glslang::EvqBuffer) - return glslang::ElpNone; - - // return the layout to use - switch (type.getQualifier().layoutPacking) { - case glslang::ElpStd140: - case glslang::ElpStd430: - return type.getQualifier().layoutPacking; - default: - return glslang::ElpNone; - } -} - -// Given an array type, returns the integer stride required for that array -int TGlslangToSpvTraverser::getArrayStride(const glslang::TType& arrayType, glslang::TLayoutPacking explicitLayout, glslang::TLayoutMatrix matrixLayout) -{ - int size; - int stride; - glslangIntermediate->getBaseAlignment(arrayType, size, stride, explicitLayout == glslang::ElpStd140, matrixLayout == glslang::ElmRowMajor); - - return stride; -} - -// Given a matrix type, or array (of array) of matrixes type, returns the integer stride required for that matrix -// when used as a member of an interface block -int TGlslangToSpvTraverser::getMatrixStride(const glslang::TType& matrixType, glslang::TLayoutPacking explicitLayout, glslang::TLayoutMatrix matrixLayout) -{ - glslang::TType elementType; - elementType.shallowCopy(matrixType); - elementType.clearArraySizes(); - - int size; - int stride; - glslangIntermediate->getBaseAlignment(elementType, size, stride, explicitLayout == glslang::ElpStd140, matrixLayout == glslang::ElmRowMajor); - - return stride; -} - -// Given a member type of a struct, realign the current offset for it, and compute -// the next (not yet aligned) offset for the next member, which will get aligned -// on the next call. -// 'currentOffset' should be passed in already initialized, ready to modify, and reflecting -// the migration of data from nextOffset -> currentOffset. It should be -1 on the first call. -// -1 means a non-forced member offset (no decoration needed). -void TGlslangToSpvTraverser::updateMemberOffset(const glslang::TType& structType, const glslang::TType& memberType, int& currentOffset, int& nextOffset, - glslang::TLayoutPacking explicitLayout, glslang::TLayoutMatrix matrixLayout) -{ - // this will get a positive value when deemed necessary - nextOffset = -1; - - // override anything in currentOffset with user-set offset - if (memberType.getQualifier().hasOffset()) - currentOffset = memberType.getQualifier().layoutOffset; - - // It could be that current linker usage in glslang updated all the layoutOffset, - // in which case the following code does not matter. But, that's not quite right - // once cross-compilation unit GLSL validation is done, as the original user - // settings are needed in layoutOffset, and then the following will come into play. - - if (explicitLayout == glslang::ElpNone) { - if (! memberType.getQualifier().hasOffset()) - currentOffset = -1; - - return; - } - - // Getting this far means we need explicit offsets - if (currentOffset < 0) - currentOffset = 0; - - // Now, currentOffset is valid (either 0, or from a previous nextOffset), - // but possibly not yet correctly aligned. - - int memberSize; - int dummyStride; - int memberAlignment = glslangIntermediate->getBaseAlignment(memberType, memberSize, dummyStride, explicitLayout == glslang::ElpStd140, matrixLayout == glslang::ElmRowMajor); - - // Adjust alignment for HLSL rules - // TODO: make this consistent in early phases of code: - // adjusting this late means inconsistencies with earlier code, which for reflection is an issue - // Until reflection is brought in sync with these adjustments, don't apply to $Global, - // which is the most likely to rely on reflection, and least likely to rely implicit layouts - if (glslangIntermediate->usingHlslOFfsets() && - ! memberType.isArray() && memberType.isVector() && structType.getTypeName().compare("$Global") != 0) { - int dummySize; - int componentAlignment = glslangIntermediate->getBaseAlignmentScalar(memberType, dummySize); - if (componentAlignment <= 4) - memberAlignment = componentAlignment; - } - - // Bump up to member alignment - glslang::RoundToPow2(currentOffset, memberAlignment); - - // Bump up to vec4 if there is a bad straddle - if (glslangIntermediate->improperStraddle(memberType, memberSize, currentOffset)) - glslang::RoundToPow2(currentOffset, 16); - - nextOffset = currentOffset + memberSize; -} - -void TGlslangToSpvTraverser::declareUseOfStructMember(const glslang::TTypeList& members, int glslangMember) -{ - const glslang::TBuiltInVariable glslangBuiltIn = members[glslangMember].type->getQualifier().builtIn; - switch (glslangBuiltIn) - { - case glslang::EbvClipDistance: - case glslang::EbvCullDistance: - case glslang::EbvPointSize: -#ifdef NV_EXTENSIONS - case glslang::EbvViewportMaskNV: - case glslang::EbvSecondaryPositionNV: - case glslang::EbvSecondaryViewportMaskNV: - case glslang::EbvPositionPerViewNV: - case glslang::EbvViewportMaskPerViewNV: -#endif - // Generate the associated capability. Delegate to TranslateBuiltInDecoration. - // Alternately, we could just call this for any glslang built-in, since the - // capability already guards against duplicates. - TranslateBuiltInDecoration(glslangBuiltIn, false); - break; - default: - // Capabilities were already generated when the struct was declared. - break; - } -} - -bool TGlslangToSpvTraverser::isShaderEntryPoint(const glslang::TIntermAggregate* node) -{ - return node->getName().compare(glslangIntermediate->getEntryPointMangledName().c_str()) == 0; -} - -// Does parameter need a place to keep writes, separate from the original? -// Assumes called after originalParam(), which filters out block/buffer/opaque-based -// qualifiers such that we should have only in/out/inout/constreadonly here. -bool TGlslangToSpvTraverser::writableParam(glslang::TStorageQualifier qualifier) -{ - assert(qualifier == glslang::EvqIn || - qualifier == glslang::EvqOut || - qualifier == glslang::EvqInOut || - qualifier == glslang::EvqConstReadOnly); - return qualifier != glslang::EvqConstReadOnly; -} - -// Is parameter pass-by-original? -bool TGlslangToSpvTraverser::originalParam(glslang::TStorageQualifier qualifier, const glslang::TType& paramType, - bool implicitThisParam) -{ - if (implicitThisParam) // implicit this - return true; - if (glslangIntermediate->getSource() == glslang::EShSourceHlsl) - return paramType.getBasicType() == glslang::EbtBlock; - return paramType.containsOpaque() || // sampler, etc. - (paramType.getBasicType() == glslang::EbtBlock && qualifier == glslang::EvqBuffer); // SSBO -} - -// Make all the functions, skeletally, without actually visiting their bodies. -void TGlslangToSpvTraverser::makeFunctions(const glslang::TIntermSequence& glslFunctions) -{ - const auto getParamDecorations = [](std::vector& decorations, const glslang::TType& type) { - spv::Decoration paramPrecision = TranslatePrecisionDecoration(type); - if (paramPrecision != spv::NoPrecision) - decorations.push_back(paramPrecision); - TranslateMemoryDecoration(type.getQualifier(), decorations); - }; - - for (int f = 0; f < (int)glslFunctions.size(); ++f) { - glslang::TIntermAggregate* glslFunction = glslFunctions[f]->getAsAggregate(); - if (! glslFunction || glslFunction->getOp() != glslang::EOpFunction || isShaderEntryPoint(glslFunction)) - continue; - - // We're on a user function. Set up the basic interface for the function now, - // so that it's available to call. Translating the body will happen later. - // - // Typically (except for a "const in" parameter), an address will be passed to the - // function. What it is an address of varies: - // - // - "in" parameters not marked as "const" can be written to without modifying the calling - // argument so that write needs to be to a copy, hence the address of a copy works. - // - // - "const in" parameters can just be the r-value, as no writes need occur. - // - // - "out" and "inout" arguments can't be done as pointers to the calling argument, because - // GLSL has copy-in/copy-out semantics. They can be handled though with a pointer to a copy. - - std::vector paramTypes; - std::vector> paramDecorations; // list of decorations per parameter - glslang::TIntermSequence& parameters = glslFunction->getSequence()[0]->getAsAggregate()->getSequence(); - - bool implicitThis = (int)parameters.size() > 0 && parameters[0]->getAsSymbolNode()->getName() == - glslangIntermediate->implicitThisName; - - paramDecorations.resize(parameters.size()); - for (int p = 0; p < (int)parameters.size(); ++p) { - const glslang::TType& paramType = parameters[p]->getAsTyped()->getType(); - spv::Id typeId = convertGlslangToSpvType(paramType); - if (originalParam(paramType.getQualifier().storage, paramType, implicitThis && p == 0)) - typeId = builder.makePointer(TranslateStorageClass(paramType), typeId); - else if (writableParam(paramType.getQualifier().storage)) - typeId = builder.makePointer(spv::StorageClassFunction, typeId); - else - rValueParameters.insert(parameters[p]->getAsSymbolNode()->getId()); - getParamDecorations(paramDecorations[p], paramType); - paramTypes.push_back(typeId); - } - - spv::Block* functionBlock; - spv::Function *function = builder.makeFunctionEntry(TranslatePrecisionDecoration(glslFunction->getType()), - convertGlslangToSpvType(glslFunction->getType()), - glslFunction->getName().c_str(), paramTypes, - paramDecorations, &functionBlock); - if (implicitThis) - function->setImplicitThis(); - - // Track function to emit/call later - functionMap[glslFunction->getName().c_str()] = function; - - // Set the parameter id's - for (int p = 0; p < (int)parameters.size(); ++p) { - symbolValues[parameters[p]->getAsSymbolNode()->getId()] = function->getParamId(p); - // give a name too - builder.addName(function->getParamId(p), parameters[p]->getAsSymbolNode()->getName().c_str()); - } - } -} - -// Process all the initializers, while skipping the functions and link objects -void TGlslangToSpvTraverser::makeGlobalInitializers(const glslang::TIntermSequence& initializers) -{ - builder.setBuildPoint(shaderEntry->getLastBlock()); - for (int i = 0; i < (int)initializers.size(); ++i) { - glslang::TIntermAggregate* initializer = initializers[i]->getAsAggregate(); - if (initializer && initializer->getOp() != glslang::EOpFunction && initializer->getOp() != glslang::EOpLinkerObjects) { - - // We're on a top-level node that's not a function. Treat as an initializer, whose - // code goes into the beginning of the entry point. - initializer->traverse(this); - } - } -} - -// Process all the functions, while skipping initializers. -void TGlslangToSpvTraverser::visitFunctions(const glslang::TIntermSequence& glslFunctions) -{ - for (int f = 0; f < (int)glslFunctions.size(); ++f) { - glslang::TIntermAggregate* node = glslFunctions[f]->getAsAggregate(); - if (node && (node->getOp() == glslang::EOpFunction || node->getOp() == glslang::EOpLinkerObjects)) - node->traverse(this); - } -} - -void TGlslangToSpvTraverser::handleFunctionEntry(const glslang::TIntermAggregate* node) -{ - // SPIR-V functions should already be in the functionMap from the prepass - // that called makeFunctions(). - currentFunction = functionMap[node->getName().c_str()]; - spv::Block* functionBlock = currentFunction->getEntryBlock(); - builder.setBuildPoint(functionBlock); -} - -void TGlslangToSpvTraverser::translateArguments(const glslang::TIntermAggregate& node, std::vector& arguments) -{ - const glslang::TIntermSequence& glslangArguments = node.getSequence(); - - glslang::TSampler sampler = {}; - bool cubeCompare = false; - if (node.isTexture() || node.isImage()) { - sampler = glslangArguments[0]->getAsTyped()->getType().getSampler(); - cubeCompare = sampler.dim == glslang::EsdCube && sampler.arrayed && sampler.shadow; - } - - for (int i = 0; i < (int)glslangArguments.size(); ++i) { - builder.clearAccessChain(); - glslangArguments[i]->traverse(this); - - // Special case l-value operands - bool lvalue = false; - switch (node.getOp()) { - case glslang::EOpImageAtomicAdd: - case glslang::EOpImageAtomicMin: - case glslang::EOpImageAtomicMax: - case glslang::EOpImageAtomicAnd: - case glslang::EOpImageAtomicOr: - case glslang::EOpImageAtomicXor: - case glslang::EOpImageAtomicExchange: - case glslang::EOpImageAtomicCompSwap: - if (i == 0) - lvalue = true; - break; - case glslang::EOpSparseImageLoad: - if ((sampler.ms && i == 3) || (! sampler.ms && i == 2)) - lvalue = true; - break; - case glslang::EOpSparseTexture: - if ((cubeCompare && i == 3) || (! cubeCompare && i == 2)) - lvalue = true; - break; - case glslang::EOpSparseTextureClamp: - if ((cubeCompare && i == 4) || (! cubeCompare && i == 3)) - lvalue = true; - break; - case glslang::EOpSparseTextureLod: - case glslang::EOpSparseTextureOffset: - if (i == 3) - lvalue = true; - break; - case glslang::EOpSparseTextureFetch: - if ((sampler.dim != glslang::EsdRect && i == 3) || (sampler.dim == glslang::EsdRect && i == 2)) - lvalue = true; - break; - case glslang::EOpSparseTextureFetchOffset: - if ((sampler.dim != glslang::EsdRect && i == 4) || (sampler.dim == glslang::EsdRect && i == 3)) - lvalue = true; - break; - case glslang::EOpSparseTextureLodOffset: - case glslang::EOpSparseTextureGrad: - case glslang::EOpSparseTextureOffsetClamp: - if (i == 4) - lvalue = true; - break; - case glslang::EOpSparseTextureGradOffset: - case glslang::EOpSparseTextureGradClamp: - if (i == 5) - lvalue = true; - break; - case glslang::EOpSparseTextureGradOffsetClamp: - if (i == 6) - lvalue = true; - break; - case glslang::EOpSparseTextureGather: - if ((sampler.shadow && i == 3) || (! sampler.shadow && i == 2)) - lvalue = true; - break; - case glslang::EOpSparseTextureGatherOffset: - case glslang::EOpSparseTextureGatherOffsets: - if ((sampler.shadow && i == 4) || (! sampler.shadow && i == 3)) - lvalue = true; - break; -#ifdef AMD_EXTENSIONS - case glslang::EOpSparseTextureGatherLod: - if (i == 3) - lvalue = true; - break; - case glslang::EOpSparseTextureGatherLodOffset: - case glslang::EOpSparseTextureGatherLodOffsets: - if (i == 4) - lvalue = true; - break; - case glslang::EOpSparseImageLoadLod: - if (i == 3) - lvalue = true; - break; -#endif - default: - break; - } - - if (lvalue) - arguments.push_back(builder.accessChainGetLValue()); - else - arguments.push_back(accessChainLoad(glslangArguments[i]->getAsTyped()->getType())); - } -} - -void TGlslangToSpvTraverser::translateArguments(glslang::TIntermUnary& node, std::vector& arguments) -{ - builder.clearAccessChain(); - node.getOperand()->traverse(this); - arguments.push_back(accessChainLoad(node.getOperand()->getType())); -} - -spv::Id TGlslangToSpvTraverser::createImageTextureFunctionCall(glslang::TIntermOperator* node) -{ - if (! node->isImage() && ! node->isTexture()) - return spv::NoResult; - - builder.setLine(node->getLoc().line); - - auto resultType = [&node,this]{ return convertGlslangToSpvType(node->getType()); }; - - // Process a GLSL texturing op (will be SPV image) - const glslang::TSampler sampler = node->getAsAggregate() ? node->getAsAggregate()->getSequence()[0]->getAsTyped()->getType().getSampler() - : node->getAsUnaryNode()->getOperand()->getAsTyped()->getType().getSampler(); - std::vector arguments; - if (node->getAsAggregate()) - translateArguments(*node->getAsAggregate(), arguments); - else - translateArguments(*node->getAsUnaryNode(), arguments); - spv::Decoration precision = TranslatePrecisionDecoration(node->getOperationPrecision()); - - spv::Builder::TextureParameters params = { }; - params.sampler = arguments[0]; - - glslang::TCrackedTextureOp cracked; - node->crackTexture(sampler, cracked); - - const bool isUnsignedResult = node->getType().getBasicType() == glslang::EbtUint; - - // Check for queries - if (cracked.query) { - // OpImageQueryLod works on a sampled image, for other queries the image has to be extracted first - if (node->getOp() != glslang::EOpTextureQueryLod && builder.isSampledImage(params.sampler)) - params.sampler = builder.createUnaryOp(spv::OpImage, builder.getImageType(params.sampler), params.sampler); - - switch (node->getOp()) { - case glslang::EOpImageQuerySize: - case glslang::EOpTextureQuerySize: - if (arguments.size() > 1) { - params.lod = arguments[1]; - return builder.createTextureQueryCall(spv::OpImageQuerySizeLod, params, isUnsignedResult); - } else - return builder.createTextureQueryCall(spv::OpImageQuerySize, params, isUnsignedResult); - case glslang::EOpImageQuerySamples: - case glslang::EOpTextureQuerySamples: - return builder.createTextureQueryCall(spv::OpImageQuerySamples, params, isUnsignedResult); - case glslang::EOpTextureQueryLod: - params.coords = arguments[1]; - return builder.createTextureQueryCall(spv::OpImageQueryLod, params, isUnsignedResult); - case glslang::EOpTextureQueryLevels: - return builder.createTextureQueryCall(spv::OpImageQueryLevels, params, isUnsignedResult); - case glslang::EOpSparseTexelsResident: - return builder.createUnaryOp(spv::OpImageSparseTexelsResident, builder.makeBoolType(), arguments[0]); - default: - assert(0); - break; - } - } - - // Check for image functions other than queries - if (node->isImage()) { - std::vector operands; - auto opIt = arguments.begin(); - operands.push_back(*(opIt++)); - - // Handle subpass operations - // TODO: GLSL should change to have the "MS" only on the type rather than the - // built-in function. - if (cracked.subpass) { - // add on the (0,0) coordinate - spv::Id zero = builder.makeIntConstant(0); - std::vector comps; - comps.push_back(zero); - comps.push_back(zero); - operands.push_back(builder.makeCompositeConstant(builder.makeVectorType(builder.makeIntType(32), 2), comps)); - if (sampler.ms) { - operands.push_back(spv::ImageOperandsSampleMask); - operands.push_back(*(opIt++)); - } - spv::Id result = builder.createOp(spv::OpImageRead, resultType(), operands); - builder.setPrecision(result, precision); - return result; - } - - operands.push_back(*(opIt++)); -#ifdef AMD_EXTENSIONS - if (node->getOp() == glslang::EOpImageLoad || node->getOp() == glslang::EOpImageLoadLod) { -#else - if (node->getOp() == glslang::EOpImageLoad) { -#endif - if (sampler.ms) { - operands.push_back(spv::ImageOperandsSampleMask); - operands.push_back(*opIt); -#ifdef AMD_EXTENSIONS - } else if (cracked.lod) { - builder.addExtension(spv::E_SPV_AMD_shader_image_load_store_lod); - builder.addCapability(spv::CapabilityImageReadWriteLodAMD); - - operands.push_back(spv::ImageOperandsLodMask); - operands.push_back(*opIt); -#endif - } - if (builder.getImageTypeFormat(builder.getImageType(operands.front())) == spv::ImageFormatUnknown) - builder.addCapability(spv::CapabilityStorageImageReadWithoutFormat); - - spv::Id result = builder.createOp(spv::OpImageRead, resultType(), operands); - builder.setPrecision(result, precision); - return result; -#ifdef AMD_EXTENSIONS - } else if (node->getOp() == glslang::EOpImageStore || node->getOp() == glslang::EOpImageStoreLod) { -#else - } else if (node->getOp() == glslang::EOpImageStore) { -#endif - if (sampler.ms) { - operands.push_back(*(opIt + 1)); - operands.push_back(spv::ImageOperandsSampleMask); - operands.push_back(*opIt); -#ifdef AMD_EXTENSIONS - } else if (cracked.lod) { - builder.addExtension(spv::E_SPV_AMD_shader_image_load_store_lod); - builder.addCapability(spv::CapabilityImageReadWriteLodAMD); - - operands.push_back(*(opIt + 1)); - operands.push_back(spv::ImageOperandsLodMask); - operands.push_back(*opIt); -#endif - } else - operands.push_back(*opIt); - builder.createNoResultOp(spv::OpImageWrite, operands); - if (builder.getImageTypeFormat(builder.getImageType(operands.front())) == spv::ImageFormatUnknown) - builder.addCapability(spv::CapabilityStorageImageWriteWithoutFormat); - return spv::NoResult; -#ifdef AMD_EXTENSIONS - } else if (node->getOp() == glslang::EOpSparseImageLoad || node->getOp() == glslang::EOpSparseImageLoadLod) { -#else - } else if (node->getOp() == glslang::EOpSparseImageLoad) { -#endif - builder.addCapability(spv::CapabilitySparseResidency); - if (builder.getImageTypeFormat(builder.getImageType(operands.front())) == spv::ImageFormatUnknown) - builder.addCapability(spv::CapabilityStorageImageReadWithoutFormat); - - if (sampler.ms) { - operands.push_back(spv::ImageOperandsSampleMask); - operands.push_back(*opIt++); -#ifdef AMD_EXTENSIONS - } else if (cracked.lod) { - builder.addExtension(spv::E_SPV_AMD_shader_image_load_store_lod); - builder.addCapability(spv::CapabilityImageReadWriteLodAMD); - - operands.push_back(spv::ImageOperandsLodMask); - operands.push_back(*opIt++); -#endif - } - - // Create the return type that was a special structure - spv::Id texelOut = *opIt; - spv::Id typeId0 = resultType(); - spv::Id typeId1 = builder.getDerefTypeId(texelOut); - spv::Id resultTypeId = builder.makeStructResultType(typeId0, typeId1); - - spv::Id resultId = builder.createOp(spv::OpImageSparseRead, resultTypeId, operands); - - // Decode the return type - builder.createStore(builder.createCompositeExtract(resultId, typeId1, 1), texelOut); - return builder.createCompositeExtract(resultId, typeId0, 0); - } else { - // Process image atomic operations - - // GLSL "IMAGE_PARAMS" will involve in constructing an image texel pointer and this pointer, - // as the first source operand, is required by SPIR-V atomic operations. - operands.push_back(sampler.ms ? *(opIt++) : builder.makeUintConstant(0)); // For non-MS, the value should be 0 - - spv::Id resultTypeId = builder.makePointer(spv::StorageClassImage, resultType()); - spv::Id pointer = builder.createOp(spv::OpImageTexelPointer, resultTypeId, operands); - - std::vector operands; - operands.push_back(pointer); - for (; opIt != arguments.end(); ++opIt) - operands.push_back(*opIt); - - return createAtomicOperation(node->getOp(), precision, resultType(), operands, node->getBasicType()); - } - } - -#ifdef AMD_EXTENSIONS - // Check for fragment mask functions other than queries - if (cracked.fragMask) { - assert(sampler.ms); - - auto opIt = arguments.begin(); - std::vector operands; - - // Extract the image if necessary - if (builder.isSampledImage(params.sampler)) - params.sampler = builder.createUnaryOp(spv::OpImage, builder.getImageType(params.sampler), params.sampler); - - operands.push_back(params.sampler); - ++opIt; - - if (sampler.isSubpass()) { - // add on the (0,0) coordinate - spv::Id zero = builder.makeIntConstant(0); - std::vector comps; - comps.push_back(zero); - comps.push_back(zero); - operands.push_back(builder.makeCompositeConstant(builder.makeVectorType(builder.makeIntType(32), 2), comps)); - } - - for (; opIt != arguments.end(); ++opIt) - operands.push_back(*opIt); - - spv::Op fragMaskOp = spv::OpNop; - if (node->getOp() == glslang::EOpFragmentMaskFetch) - fragMaskOp = spv::OpFragmentMaskFetchAMD; - else if (node->getOp() == glslang::EOpFragmentFetch) - fragMaskOp = spv::OpFragmentFetchAMD; - - builder.addExtension(spv::E_SPV_AMD_shader_fragment_mask); - builder.addCapability(spv::CapabilityFragmentMaskAMD); - return builder.createOp(fragMaskOp, resultType(), operands); - } -#endif - - // Check for texture functions other than queries - bool sparse = node->isSparseTexture(); - bool cubeCompare = sampler.dim == glslang::EsdCube && sampler.arrayed && sampler.shadow; - - // check for bias argument - bool bias = false; -#ifdef AMD_EXTENSIONS - if (! cracked.lod && ! cracked.grad && ! cracked.fetch && ! cubeCompare) { -#else - if (! cracked.lod && ! cracked.gather && ! cracked.grad && ! cracked.fetch && ! cubeCompare) { -#endif - int nonBiasArgCount = 2; -#ifdef AMD_EXTENSIONS - if (cracked.gather) - ++nonBiasArgCount; // comp argument should be present when bias argument is present -#endif - if (cracked.offset) - ++nonBiasArgCount; -#ifdef AMD_EXTENSIONS - else if (cracked.offsets) - ++nonBiasArgCount; -#endif - if (cracked.grad) - nonBiasArgCount += 2; - if (cracked.lodClamp) - ++nonBiasArgCount; - if (sparse) - ++nonBiasArgCount; - - if ((int)arguments.size() > nonBiasArgCount) - bias = true; - } - - // See if the sampler param should really be just the SPV image part - if (cracked.fetch) { - // a fetch needs to have the image extracted first - if (builder.isSampledImage(params.sampler)) - params.sampler = builder.createUnaryOp(spv::OpImage, builder.getImageType(params.sampler), params.sampler); - } - -#ifdef AMD_EXTENSIONS - if (cracked.gather) { - const auto& sourceExtensions = glslangIntermediate->getRequestedExtensions(); - if (bias || cracked.lod || - sourceExtensions.find(glslang::E_GL_AMD_texture_gather_bias_lod) != sourceExtensions.end()) { - builder.addExtension(spv::E_SPV_AMD_texture_gather_bias_lod); - builder.addCapability(spv::CapabilityImageGatherBiasLodAMD); - } - } -#endif - - // set the rest of the arguments - - params.coords = arguments[1]; - int extraArgs = 0; - bool noImplicitLod = false; - - // sort out where Dref is coming from - if (cubeCompare) { - params.Dref = arguments[2]; - ++extraArgs; - } else if (sampler.shadow && cracked.gather) { - params.Dref = arguments[2]; - ++extraArgs; - } else if (sampler.shadow) { - std::vector indexes; - int dRefComp; - if (cracked.proj) - dRefComp = 2; // "The resulting 3rd component of P in the shadow forms is used as Dref" - else - dRefComp = builder.getNumComponents(params.coords) - 1; - indexes.push_back(dRefComp); - params.Dref = builder.createCompositeExtract(params.coords, builder.getScalarTypeId(builder.getTypeId(params.coords)), indexes); - } - - // lod - if (cracked.lod) { - params.lod = arguments[2 + extraArgs]; - ++extraArgs; - } else if (glslangIntermediate->getStage() != EShLangFragment) { - // we need to invent the default lod for an explicit lod instruction for a non-fragment stage - noImplicitLod = true; - } - - // multisample - if (sampler.ms) { - params.sample = arguments[2 + extraArgs]; // For MS, "sample" should be specified - ++extraArgs; - } - - // gradient - if (cracked.grad) { - params.gradX = arguments[2 + extraArgs]; - params.gradY = arguments[3 + extraArgs]; - extraArgs += 2; - } - - // offset and offsets - if (cracked.offset) { - params.offset = arguments[2 + extraArgs]; - ++extraArgs; - } else if (cracked.offsets) { - params.offsets = arguments[2 + extraArgs]; - ++extraArgs; - } - - // lod clamp - if (cracked.lodClamp) { - params.lodClamp = arguments[2 + extraArgs]; - ++extraArgs; - } - - // sparse - if (sparse) { - params.texelOut = arguments[2 + extraArgs]; - ++extraArgs; - } - - // gather component - if (cracked.gather && ! sampler.shadow) { - // default component is 0, if missing, otherwise an argument - if (2 + extraArgs < (int)arguments.size()) { - params.component = arguments[2 + extraArgs]; - ++extraArgs; - } else - params.component = builder.makeIntConstant(0); - } - - // bias - if (bias) { - params.bias = arguments[2 + extraArgs]; - ++extraArgs; - } - - // projective component (might not to move) - // GLSL: "The texture coordinates consumed from P, not including the last component of P, - // are divided by the last component of P." - // SPIR-V: "... (u [, v] [, w], q)... It may be a vector larger than needed, but all - // unused components will appear after all used components." - if (cracked.proj) { - int projSourceComp = builder.getNumComponents(params.coords) - 1; - int projTargetComp; - switch (sampler.dim) { - case glslang::Esd1D: projTargetComp = 1; break; - case glslang::Esd2D: projTargetComp = 2; break; - case glslang::EsdRect: projTargetComp = 2; break; - default: projTargetComp = projSourceComp; break; - } - // copy the projective coordinate if we have to - if (projTargetComp != projSourceComp) { - spv::Id projComp = builder.createCompositeExtract(params.coords, - builder.getScalarTypeId(builder.getTypeId(params.coords)), - projSourceComp); - params.coords = builder.createCompositeInsert(projComp, params.coords, - builder.getTypeId(params.coords), projTargetComp); - } - } - - return builder.createTextureCall(precision, resultType(), sparse, cracked.fetch, cracked.proj, cracked.gather, noImplicitLod, params); -} - -spv::Id TGlslangToSpvTraverser::handleUserFunctionCall(const glslang::TIntermAggregate* node) -{ - // Grab the function's pointer from the previously created function - spv::Function* function = functionMap[node->getName().c_str()]; - if (! function) - return 0; - - const glslang::TIntermSequence& glslangArgs = node->getSequence(); - const glslang::TQualifierList& qualifiers = node->getQualifierList(); - - // See comments in makeFunctions() for details about the semantics for parameter passing. - // - // These imply we need a four step process: - // 1. Evaluate the arguments - // 2. Allocate and make copies of in, out, and inout arguments - // 3. Make the call - // 4. Copy back the results - - // 1. Evaluate the arguments - std::vector lValues; - std::vector rValues; - std::vector argTypes; - for (int a = 0; a < (int)glslangArgs.size(); ++a) { - const glslang::TType& paramType = glslangArgs[a]->getAsTyped()->getType(); - // build l-value - builder.clearAccessChain(); - glslangArgs[a]->traverse(this); - argTypes.push_back(¶mType); - // keep outputs and pass-by-originals as l-values, evaluate others as r-values - if (originalParam(qualifiers[a], paramType, function->hasImplicitThis() && a == 0) || - writableParam(qualifiers[a])) { - // save l-value - lValues.push_back(builder.getAccessChain()); - } else { - // process r-value - rValues.push_back(accessChainLoad(*argTypes.back())); - } - } - - // 2. Allocate space for anything needing a copy, and if it's "in" or "inout" - // copy the original into that space. - // - // Also, build up the list of actual arguments to pass in for the call - int lValueCount = 0; - int rValueCount = 0; - std::vector spvArgs; - for (int a = 0; a < (int)glslangArgs.size(); ++a) { - const glslang::TType& paramType = glslangArgs[a]->getAsTyped()->getType(); - spv::Id arg; - if (originalParam(qualifiers[a], paramType, function->hasImplicitThis() && a == 0)) { - builder.setAccessChain(lValues[lValueCount]); - arg = builder.accessChainGetLValue(); - ++lValueCount; - } else if (writableParam(qualifiers[a])) { - // need space to hold the copy - arg = builder.createVariable(spv::StorageClassFunction, convertGlslangToSpvType(paramType), "param"); - if (qualifiers[a] == glslang::EvqIn || qualifiers[a] == glslang::EvqInOut) { - // need to copy the input into output space - builder.setAccessChain(lValues[lValueCount]); - spv::Id copy = accessChainLoad(*argTypes[a]); - builder.clearAccessChain(); - builder.setAccessChainLValue(arg); - multiTypeStore(paramType, copy); - } - ++lValueCount; - } else { - arg = rValues[rValueCount]; - ++rValueCount; - } - spvArgs.push_back(arg); - } - - // 3. Make the call. - spv::Id result = builder.createFunctionCall(function, spvArgs); - builder.setPrecision(result, TranslatePrecisionDecoration(node->getType())); - - // 4. Copy back out an "out" arguments. - lValueCount = 0; - for (int a = 0; a < (int)glslangArgs.size(); ++a) { - const glslang::TType& paramType = glslangArgs[a]->getAsTyped()->getType(); - if (originalParam(qualifiers[a], paramType, function->hasImplicitThis() && a == 0)) - ++lValueCount; - else if (writableParam(qualifiers[a])) { - if (qualifiers[a] == glslang::EvqOut || qualifiers[a] == glslang::EvqInOut) { - spv::Id copy = builder.createLoad(spvArgs[a]); - builder.setAccessChain(lValues[lValueCount]); - multiTypeStore(paramType, copy); - } - ++lValueCount; - } - } - - return result; -} - -// Translate AST operation to SPV operation, already having SPV-based operands/types. -spv::Id TGlslangToSpvTraverser::createBinaryOperation(glslang::TOperator op, spv::Decoration precision, - spv::Decoration noContraction, - spv::Id typeId, spv::Id left, spv::Id right, - glslang::TBasicType typeProxy, bool reduceComparison) -{ -#ifdef AMD_EXTENSIONS - bool isUnsigned = typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64 || typeProxy == glslang::EbtUint16; - bool isFloat = typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble || typeProxy == glslang::EbtFloat16; -#else - bool isUnsigned = typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64; - bool isFloat = typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble; -#endif - bool isBool = typeProxy == glslang::EbtBool; - - spv::Op binOp = spv::OpNop; - bool needMatchingVectors = true; // for non-matrix ops, would a scalar need to smear to match a vector? - bool comparison = false; - - switch (op) { - case glslang::EOpAdd: - case glslang::EOpAddAssign: - if (isFloat) - binOp = spv::OpFAdd; - else - binOp = spv::OpIAdd; - break; - case glslang::EOpSub: - case glslang::EOpSubAssign: - if (isFloat) - binOp = spv::OpFSub; - else - binOp = spv::OpISub; - break; - case glslang::EOpMul: - case glslang::EOpMulAssign: - if (isFloat) - binOp = spv::OpFMul; - else - binOp = spv::OpIMul; - break; - case glslang::EOpVectorTimesScalar: - case glslang::EOpVectorTimesScalarAssign: - if (isFloat && (builder.isVector(left) || builder.isVector(right))) { - if (builder.isVector(right)) - std::swap(left, right); - assert(builder.isScalar(right)); - needMatchingVectors = false; - binOp = spv::OpVectorTimesScalar; - } else - binOp = spv::OpIMul; - break; - case glslang::EOpVectorTimesMatrix: - case glslang::EOpVectorTimesMatrixAssign: - binOp = spv::OpVectorTimesMatrix; - break; - case glslang::EOpMatrixTimesVector: - binOp = spv::OpMatrixTimesVector; - break; - case glslang::EOpMatrixTimesScalar: - case glslang::EOpMatrixTimesScalarAssign: - binOp = spv::OpMatrixTimesScalar; - break; - case glslang::EOpMatrixTimesMatrix: - case glslang::EOpMatrixTimesMatrixAssign: - binOp = spv::OpMatrixTimesMatrix; - break; - case glslang::EOpOuterProduct: - binOp = spv::OpOuterProduct; - needMatchingVectors = false; - break; - - case glslang::EOpDiv: - case glslang::EOpDivAssign: - if (isFloat) - binOp = spv::OpFDiv; - else if (isUnsigned) - binOp = spv::OpUDiv; - else - binOp = spv::OpSDiv; - break; - case glslang::EOpMod: - case glslang::EOpModAssign: - if (isFloat) - binOp = spv::OpFMod; - else if (isUnsigned) - binOp = spv::OpUMod; - else - binOp = spv::OpSMod; - break; - case glslang::EOpRightShift: - case glslang::EOpRightShiftAssign: - if (isUnsigned) - binOp = spv::OpShiftRightLogical; - else - binOp = spv::OpShiftRightArithmetic; - break; - case glslang::EOpLeftShift: - case glslang::EOpLeftShiftAssign: - binOp = spv::OpShiftLeftLogical; - break; - case glslang::EOpAnd: - case glslang::EOpAndAssign: - binOp = spv::OpBitwiseAnd; - break; - case glslang::EOpLogicalAnd: - needMatchingVectors = false; - binOp = spv::OpLogicalAnd; - break; - case glslang::EOpInclusiveOr: - case glslang::EOpInclusiveOrAssign: - binOp = spv::OpBitwiseOr; - break; - case glslang::EOpLogicalOr: - needMatchingVectors = false; - binOp = spv::OpLogicalOr; - break; - case glslang::EOpExclusiveOr: - case glslang::EOpExclusiveOrAssign: - binOp = spv::OpBitwiseXor; - break; - case glslang::EOpLogicalXor: - needMatchingVectors = false; - binOp = spv::OpLogicalNotEqual; - break; - - case glslang::EOpLessThan: - case glslang::EOpGreaterThan: - case glslang::EOpLessThanEqual: - case glslang::EOpGreaterThanEqual: - case glslang::EOpEqual: - case glslang::EOpNotEqual: - case glslang::EOpVectorEqual: - case glslang::EOpVectorNotEqual: - comparison = true; - break; - default: - break; - } - - // handle mapped binary operations (should be non-comparison) - if (binOp != spv::OpNop) { - assert(comparison == false); - if (builder.isMatrix(left) || builder.isMatrix(right)) - return createBinaryMatrixOperation(binOp, precision, noContraction, typeId, left, right); - - // No matrix involved; make both operands be the same number of components, if needed - if (needMatchingVectors) - builder.promoteScalar(precision, left, right); - - spv::Id result = builder.createBinOp(binOp, typeId, left, right); - addDecoration(result, noContraction); - return builder.setPrecision(result, precision); - } - - if (! comparison) - return 0; - - // Handle comparison instructions - - if (reduceComparison && (op == glslang::EOpEqual || op == glslang::EOpNotEqual) - && (builder.isVector(left) || builder.isMatrix(left) || builder.isAggregate(left))) - return builder.createCompositeCompare(precision, left, right, op == glslang::EOpEqual); - - switch (op) { - case glslang::EOpLessThan: - if (isFloat) - binOp = spv::OpFOrdLessThan; - else if (isUnsigned) - binOp = spv::OpULessThan; - else - binOp = spv::OpSLessThan; - break; - case glslang::EOpGreaterThan: - if (isFloat) - binOp = spv::OpFOrdGreaterThan; - else if (isUnsigned) - binOp = spv::OpUGreaterThan; - else - binOp = spv::OpSGreaterThan; - break; - case glslang::EOpLessThanEqual: - if (isFloat) - binOp = spv::OpFOrdLessThanEqual; - else if (isUnsigned) - binOp = spv::OpULessThanEqual; - else - binOp = spv::OpSLessThanEqual; - break; - case glslang::EOpGreaterThanEqual: - if (isFloat) - binOp = spv::OpFOrdGreaterThanEqual; - else if (isUnsigned) - binOp = spv::OpUGreaterThanEqual; - else - binOp = spv::OpSGreaterThanEqual; - break; - case glslang::EOpEqual: - case glslang::EOpVectorEqual: - if (isFloat) - binOp = spv::OpFOrdEqual; - else if (isBool) - binOp = spv::OpLogicalEqual; - else - binOp = spv::OpIEqual; - break; - case glslang::EOpNotEqual: - case glslang::EOpVectorNotEqual: - if (isFloat) - binOp = spv::OpFOrdNotEqual; - else if (isBool) - binOp = spv::OpLogicalNotEqual; - else - binOp = spv::OpINotEqual; - break; - default: - break; - } - - if (binOp != spv::OpNop) { - spv::Id result = builder.createBinOp(binOp, typeId, left, right); - addDecoration(result, noContraction); - return builder.setPrecision(result, precision); - } - - return 0; -} - -// -// Translate AST matrix operation to SPV operation, already having SPV-based operands/types. -// These can be any of: -// -// matrix * scalar -// scalar * matrix -// matrix * matrix linear algebraic -// matrix * vector -// vector * matrix -// matrix * matrix componentwise -// matrix op matrix op in {+, -, /} -// matrix op scalar op in {+, -, /} -// scalar op matrix op in {+, -, /} -// -spv::Id TGlslangToSpvTraverser::createBinaryMatrixOperation(spv::Op op, spv::Decoration precision, spv::Decoration noContraction, spv::Id typeId, spv::Id left, spv::Id right) -{ - bool firstClass = true; - - // First, handle first-class matrix operations (* and matrix/scalar) - switch (op) { - case spv::OpFDiv: - if (builder.isMatrix(left) && builder.isScalar(right)) { - // turn matrix / scalar into a multiply... - right = builder.createBinOp(spv::OpFDiv, builder.getTypeId(right), builder.makeFloatConstant(1.0F), right); - op = spv::OpMatrixTimesScalar; - } else - firstClass = false; - break; - case spv::OpMatrixTimesScalar: - if (builder.isMatrix(right)) - std::swap(left, right); - assert(builder.isScalar(right)); - break; - case spv::OpVectorTimesMatrix: - assert(builder.isVector(left)); - assert(builder.isMatrix(right)); - break; - case spv::OpMatrixTimesVector: - assert(builder.isMatrix(left)); - assert(builder.isVector(right)); - break; - case spv::OpMatrixTimesMatrix: - assert(builder.isMatrix(left)); - assert(builder.isMatrix(right)); - break; - default: - firstClass = false; - break; - } - - if (firstClass) { - spv::Id result = builder.createBinOp(op, typeId, left, right); - addDecoration(result, noContraction); - return builder.setPrecision(result, precision); - } - - // Handle component-wise +, -, *, %, and / for all combinations of type. - // The result type of all of them is the same type as the (a) matrix operand. - // The algorithm is to: - // - break the matrix(es) into vectors - // - smear any scalar to a vector - // - do vector operations - // - make a matrix out the vector results - switch (op) { - case spv::OpFAdd: - case spv::OpFSub: - case spv::OpFDiv: - case spv::OpFMod: - case spv::OpFMul: - { - // one time set up... - bool leftMat = builder.isMatrix(left); - bool rightMat = builder.isMatrix(right); - unsigned int numCols = leftMat ? builder.getNumColumns(left) : builder.getNumColumns(right); - int numRows = leftMat ? builder.getNumRows(left) : builder.getNumRows(right); - spv::Id scalarType = builder.getScalarTypeId(typeId); - spv::Id vecType = builder.makeVectorType(scalarType, numRows); - std::vector results; - spv::Id smearVec = spv::NoResult; - if (builder.isScalar(left)) - smearVec = builder.smearScalar(precision, left, vecType); - else if (builder.isScalar(right)) - smearVec = builder.smearScalar(precision, right, vecType); - - // do each vector op - for (unsigned int c = 0; c < numCols; ++c) { - std::vector indexes; - indexes.push_back(c); - spv::Id leftVec = leftMat ? builder.createCompositeExtract( left, vecType, indexes) : smearVec; - spv::Id rightVec = rightMat ? builder.createCompositeExtract(right, vecType, indexes) : smearVec; - spv::Id result = builder.createBinOp(op, vecType, leftVec, rightVec); - addDecoration(result, noContraction); - results.push_back(builder.setPrecision(result, precision)); - } - - // put the pieces together - return builder.setPrecision(builder.createCompositeConstruct(typeId, results), precision); - } - default: - assert(0); - return spv::NoResult; - } -} - -spv::Id TGlslangToSpvTraverser::createUnaryOperation(glslang::TOperator op, spv::Decoration precision, spv::Decoration noContraction, spv::Id typeId, spv::Id operand, glslang::TBasicType typeProxy) -{ - spv::Op unaryOp = spv::OpNop; - int extBuiltins = -1; - int libCall = -1; -#ifdef AMD_EXTENSIONS - bool isUnsigned = typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64 || typeProxy == glslang::EbtUint16; - bool isFloat = typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble || typeProxy == glslang::EbtFloat16; -#else - bool isUnsigned = typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64; - bool isFloat = typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble; -#endif - - switch (op) { - case glslang::EOpNegative: - if (isFloat) { - unaryOp = spv::OpFNegate; - if (builder.isMatrixType(typeId)) - return createUnaryMatrixOperation(unaryOp, precision, noContraction, typeId, operand, typeProxy); - } else - unaryOp = spv::OpSNegate; - break; - - case glslang::EOpLogicalNot: - case glslang::EOpVectorLogicalNot: - unaryOp = spv::OpLogicalNot; - break; - case glslang::EOpBitwiseNot: - unaryOp = spv::OpNot; - break; - - case glslang::EOpDeterminant: - libCall = spv::GLSLstd450Determinant; - break; - case glslang::EOpMatrixInverse: - libCall = spv::GLSLstd450MatrixInverse; - break; - case glslang::EOpTranspose: - unaryOp = spv::OpTranspose; - break; - - case glslang::EOpRadians: - libCall = spv::GLSLstd450Radians; - break; - case glslang::EOpDegrees: - libCall = spv::GLSLstd450Degrees; - break; - case glslang::EOpSin: - libCall = spv::GLSLstd450Sin; - break; - case glslang::EOpCos: - libCall = spv::GLSLstd450Cos; - break; - case glslang::EOpTan: - libCall = spv::GLSLstd450Tan; - break; - case glslang::EOpAcos: - libCall = spv::GLSLstd450Acos; - break; - case glslang::EOpAsin: - libCall = spv::GLSLstd450Asin; - break; - case glslang::EOpAtan: - libCall = spv::GLSLstd450Atan; - break; - - case glslang::EOpAcosh: - libCall = spv::GLSLstd450Acosh; - break; - case glslang::EOpAsinh: - libCall = spv::GLSLstd450Asinh; - break; - case glslang::EOpAtanh: - libCall = spv::GLSLstd450Atanh; - break; - case glslang::EOpTanh: - libCall = spv::GLSLstd450Tanh; - break; - case glslang::EOpCosh: - libCall = spv::GLSLstd450Cosh; - break; - case glslang::EOpSinh: - libCall = spv::GLSLstd450Sinh; - break; - - case glslang::EOpLength: - libCall = spv::GLSLstd450Length; - break; - case glslang::EOpNormalize: - libCall = spv::GLSLstd450Normalize; - break; - - case glslang::EOpExp: - libCall = spv::GLSLstd450Exp; - break; - case glslang::EOpLog: - libCall = spv::GLSLstd450Log; - break; - case glslang::EOpExp2: - libCall = spv::GLSLstd450Exp2; - break; - case glslang::EOpLog2: - libCall = spv::GLSLstd450Log2; - break; - case glslang::EOpSqrt: - libCall = spv::GLSLstd450Sqrt; - break; - case glslang::EOpInverseSqrt: - libCall = spv::GLSLstd450InverseSqrt; - break; - - case glslang::EOpFloor: - libCall = spv::GLSLstd450Floor; - break; - case glslang::EOpTrunc: - libCall = spv::GLSLstd450Trunc; - break; - case glslang::EOpRound: - libCall = spv::GLSLstd450Round; - break; - case glslang::EOpRoundEven: - libCall = spv::GLSLstd450RoundEven; - break; - case glslang::EOpCeil: - libCall = spv::GLSLstd450Ceil; - break; - case glslang::EOpFract: - libCall = spv::GLSLstd450Fract; - break; - - case glslang::EOpIsNan: - unaryOp = spv::OpIsNan; - break; - case glslang::EOpIsInf: - unaryOp = spv::OpIsInf; - break; - case glslang::EOpIsFinite: - unaryOp = spv::OpIsFinite; - break; - - case glslang::EOpFloatBitsToInt: - case glslang::EOpFloatBitsToUint: - case glslang::EOpIntBitsToFloat: - case glslang::EOpUintBitsToFloat: - case glslang::EOpDoubleBitsToInt64: - case glslang::EOpDoubleBitsToUint64: - case glslang::EOpInt64BitsToDouble: - case glslang::EOpUint64BitsToDouble: -#ifdef AMD_EXTENSIONS - case glslang::EOpFloat16BitsToInt16: - case glslang::EOpFloat16BitsToUint16: - case glslang::EOpInt16BitsToFloat16: - case glslang::EOpUint16BitsToFloat16: -#endif - unaryOp = spv::OpBitcast; - break; - - case glslang::EOpPackSnorm2x16: - libCall = spv::GLSLstd450PackSnorm2x16; - break; - case glslang::EOpUnpackSnorm2x16: - libCall = spv::GLSLstd450UnpackSnorm2x16; - break; - case glslang::EOpPackUnorm2x16: - libCall = spv::GLSLstd450PackUnorm2x16; - break; - case glslang::EOpUnpackUnorm2x16: - libCall = spv::GLSLstd450UnpackUnorm2x16; - break; - case glslang::EOpPackHalf2x16: - libCall = spv::GLSLstd450PackHalf2x16; - break; - case glslang::EOpUnpackHalf2x16: - libCall = spv::GLSLstd450UnpackHalf2x16; - break; - case glslang::EOpPackSnorm4x8: - libCall = spv::GLSLstd450PackSnorm4x8; - break; - case glslang::EOpUnpackSnorm4x8: - libCall = spv::GLSLstd450UnpackSnorm4x8; - break; - case glslang::EOpPackUnorm4x8: - libCall = spv::GLSLstd450PackUnorm4x8; - break; - case glslang::EOpUnpackUnorm4x8: - libCall = spv::GLSLstd450UnpackUnorm4x8; - break; - case glslang::EOpPackDouble2x32: - libCall = spv::GLSLstd450PackDouble2x32; - break; - case glslang::EOpUnpackDouble2x32: - libCall = spv::GLSLstd450UnpackDouble2x32; - break; - - case glslang::EOpPackInt2x32: - case glslang::EOpUnpackInt2x32: - case glslang::EOpPackUint2x32: - case glslang::EOpUnpackUint2x32: - unaryOp = spv::OpBitcast; - break; - -#ifdef AMD_EXTENSIONS - case glslang::EOpPackInt2x16: - case glslang::EOpUnpackInt2x16: - case glslang::EOpPackUint2x16: - case glslang::EOpUnpackUint2x16: - case glslang::EOpPackInt4x16: - case glslang::EOpUnpackInt4x16: - case glslang::EOpPackUint4x16: - case glslang::EOpUnpackUint4x16: - case glslang::EOpPackFloat2x16: - case glslang::EOpUnpackFloat2x16: - unaryOp = spv::OpBitcast; - break; -#endif - - case glslang::EOpDPdx: - unaryOp = spv::OpDPdx; - break; - case glslang::EOpDPdy: - unaryOp = spv::OpDPdy; - break; - case glslang::EOpFwidth: - unaryOp = spv::OpFwidth; - break; - case glslang::EOpDPdxFine: - builder.addCapability(spv::CapabilityDerivativeControl); - unaryOp = spv::OpDPdxFine; - break; - case glslang::EOpDPdyFine: - builder.addCapability(spv::CapabilityDerivativeControl); - unaryOp = spv::OpDPdyFine; - break; - case glslang::EOpFwidthFine: - builder.addCapability(spv::CapabilityDerivativeControl); - unaryOp = spv::OpFwidthFine; - break; - case glslang::EOpDPdxCoarse: - builder.addCapability(spv::CapabilityDerivativeControl); - unaryOp = spv::OpDPdxCoarse; - break; - case glslang::EOpDPdyCoarse: - builder.addCapability(spv::CapabilityDerivativeControl); - unaryOp = spv::OpDPdyCoarse; - break; - case glslang::EOpFwidthCoarse: - builder.addCapability(spv::CapabilityDerivativeControl); - unaryOp = spv::OpFwidthCoarse; - break; - case glslang::EOpInterpolateAtCentroid: - builder.addCapability(spv::CapabilityInterpolationFunction); - libCall = spv::GLSLstd450InterpolateAtCentroid; - break; - case glslang::EOpAny: - unaryOp = spv::OpAny; - break; - case glslang::EOpAll: - unaryOp = spv::OpAll; - break; - - case glslang::EOpAbs: - if (isFloat) - libCall = spv::GLSLstd450FAbs; - else - libCall = spv::GLSLstd450SAbs; - break; - case glslang::EOpSign: - if (isFloat) - libCall = spv::GLSLstd450FSign; - else - libCall = spv::GLSLstd450SSign; - break; - - case glslang::EOpAtomicCounterIncrement: - case glslang::EOpAtomicCounterDecrement: - case glslang::EOpAtomicCounter: - { - // Handle all of the atomics in one place, in createAtomicOperation() - std::vector operands; - operands.push_back(operand); - return createAtomicOperation(op, precision, typeId, operands, typeProxy); - } - - case glslang::EOpBitFieldReverse: - unaryOp = spv::OpBitReverse; - break; - case glslang::EOpBitCount: - unaryOp = spv::OpBitCount; - break; - case glslang::EOpFindLSB: - libCall = spv::GLSLstd450FindILsb; - break; - case glslang::EOpFindMSB: - if (isUnsigned) - libCall = spv::GLSLstd450FindUMsb; - else - libCall = spv::GLSLstd450FindSMsb; - break; - - case glslang::EOpBallot: - case glslang::EOpReadFirstInvocation: - case glslang::EOpAnyInvocation: - case glslang::EOpAllInvocations: - case glslang::EOpAllInvocationsEqual: -#ifdef AMD_EXTENSIONS - case glslang::EOpMinInvocations: - case glslang::EOpMaxInvocations: - case glslang::EOpAddInvocations: - case glslang::EOpMinInvocationsNonUniform: - case glslang::EOpMaxInvocationsNonUniform: - case glslang::EOpAddInvocationsNonUniform: - case glslang::EOpMinInvocationsInclusiveScan: - case glslang::EOpMaxInvocationsInclusiveScan: - case glslang::EOpAddInvocationsInclusiveScan: - case glslang::EOpMinInvocationsInclusiveScanNonUniform: - case glslang::EOpMaxInvocationsInclusiveScanNonUniform: - case glslang::EOpAddInvocationsInclusiveScanNonUniform: - case glslang::EOpMinInvocationsExclusiveScan: - case glslang::EOpMaxInvocationsExclusiveScan: - case glslang::EOpAddInvocationsExclusiveScan: - case glslang::EOpMinInvocationsExclusiveScanNonUniform: - case glslang::EOpMaxInvocationsExclusiveScanNonUniform: - case glslang::EOpAddInvocationsExclusiveScanNonUniform: -#endif - { - std::vector operands; - operands.push_back(operand); - return createInvocationsOperation(op, typeId, operands, typeProxy); - } - -#ifdef AMD_EXTENSIONS - case glslang::EOpMbcnt: - extBuiltins = getExtBuiltins(spv::E_SPV_AMD_shader_ballot); - libCall = spv::MbcntAMD; - break; - - case glslang::EOpCubeFaceIndex: - extBuiltins = getExtBuiltins(spv::E_SPV_AMD_gcn_shader); - libCall = spv::CubeFaceIndexAMD; - break; - - case glslang::EOpCubeFaceCoord: - extBuiltins = getExtBuiltins(spv::E_SPV_AMD_gcn_shader); - libCall = spv::CubeFaceCoordAMD; - break; -#endif - - default: - return 0; - } - - spv::Id id; - if (libCall >= 0) { - std::vector args; - args.push_back(operand); - id = builder.createBuiltinCall(typeId, extBuiltins >= 0 ? extBuiltins : stdBuiltins, libCall, args); - } else { - id = builder.createUnaryOp(unaryOp, typeId, operand); - } - - addDecoration(id, noContraction); - return builder.setPrecision(id, precision); -} - -// Create a unary operation on a matrix -spv::Id TGlslangToSpvTraverser::createUnaryMatrixOperation(spv::Op op, spv::Decoration precision, spv::Decoration noContraction, spv::Id typeId, spv::Id operand, glslang::TBasicType /* typeProxy */) -{ - // Handle unary operations vector by vector. - // The result type is the same type as the original type. - // The algorithm is to: - // - break the matrix into vectors - // - apply the operation to each vector - // - make a matrix out the vector results - - // get the types sorted out - int numCols = builder.getNumColumns(operand); - int numRows = builder.getNumRows(operand); - spv::Id srcVecType = builder.makeVectorType(builder.getScalarTypeId(builder.getTypeId(operand)), numRows); - spv::Id destVecType = builder.makeVectorType(builder.getScalarTypeId(typeId), numRows); - std::vector results; - - // do each vector op - for (int c = 0; c < numCols; ++c) { - std::vector indexes; - indexes.push_back(c); - spv::Id srcVec = builder.createCompositeExtract(operand, srcVecType, indexes); - spv::Id destVec = builder.createUnaryOp(op, destVecType, srcVec); - addDecoration(destVec, noContraction); - results.push_back(builder.setPrecision(destVec, precision)); - } - - // put the pieces together - return builder.setPrecision(builder.createCompositeConstruct(typeId, results), precision); -} - -spv::Id TGlslangToSpvTraverser::createConversion(glslang::TOperator op, spv::Decoration precision, spv::Decoration noContraction, spv::Id destType, spv::Id operand, glslang::TBasicType typeProxy) -{ - spv::Op convOp = spv::OpNop; - spv::Id zero = 0; - spv::Id one = 0; - spv::Id type = 0; - - int vectorSize = builder.isVectorType(destType) ? builder.getNumTypeComponents(destType) : 0; - - switch (op) { - case glslang::EOpConvIntToBool: - case glslang::EOpConvUintToBool: - case glslang::EOpConvInt64ToBool: - case glslang::EOpConvUint64ToBool: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvInt16ToBool: - case glslang::EOpConvUint16ToBool: -#endif - if (op == glslang::EOpConvInt64ToBool || op == glslang::EOpConvUint64ToBool) - zero = builder.makeUint64Constant(0); -#ifdef AMD_EXTENSIONS - else if (op == glslang::EOpConvInt16ToBool || op == glslang::EOpConvUint16ToBool) - zero = builder.makeUint16Constant(0); -#endif - else - zero = builder.makeUintConstant(0); - zero = makeSmearedConstant(zero, vectorSize); - return builder.createBinOp(spv::OpINotEqual, destType, operand, zero); - - case glslang::EOpConvFloatToBool: - zero = builder.makeFloatConstant(0.0F); - zero = makeSmearedConstant(zero, vectorSize); - return builder.createBinOp(spv::OpFOrdNotEqual, destType, operand, zero); - - case glslang::EOpConvDoubleToBool: - zero = builder.makeDoubleConstant(0.0); - zero = makeSmearedConstant(zero, vectorSize); - return builder.createBinOp(spv::OpFOrdNotEqual, destType, operand, zero); - -#ifdef AMD_EXTENSIONS - case glslang::EOpConvFloat16ToBool: - zero = builder.makeFloat16Constant(0.0F); - zero = makeSmearedConstant(zero, vectorSize); - return builder.createBinOp(spv::OpFOrdNotEqual, destType, operand, zero); -#endif - - case glslang::EOpConvBoolToFloat: - convOp = spv::OpSelect; - zero = builder.makeFloatConstant(0.0F); - one = builder.makeFloatConstant(1.0F); - break; - - case glslang::EOpConvBoolToDouble: - convOp = spv::OpSelect; - zero = builder.makeDoubleConstant(0.0); - one = builder.makeDoubleConstant(1.0); - break; - -#ifdef AMD_EXTENSIONS - case glslang::EOpConvBoolToFloat16: - convOp = spv::OpSelect; - zero = builder.makeFloat16Constant(0.0F); - one = builder.makeFloat16Constant(1.0F); - break; -#endif - - case glslang::EOpConvBoolToInt: - case glslang::EOpConvBoolToInt64: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvBoolToInt16: -#endif - if (op == glslang::EOpConvBoolToInt64) - zero = builder.makeInt64Constant(0); -#ifdef AMD_EXTENSIONS - else if (op == glslang::EOpConvBoolToInt16) - zero = builder.makeInt16Constant(0); -#endif - else - zero = builder.makeIntConstant(0); - - if (op == glslang::EOpConvBoolToInt64) - one = builder.makeInt64Constant(1); -#ifdef AMD_EXTENSIONS - else if (op == glslang::EOpConvBoolToInt16) - one = builder.makeInt16Constant(1); -#endif - else - one = builder.makeIntConstant(1); - - convOp = spv::OpSelect; - break; - - case glslang::EOpConvBoolToUint: - case glslang::EOpConvBoolToUint64: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvBoolToUint16: -#endif - if (op == glslang::EOpConvBoolToUint64) - zero = builder.makeUint64Constant(0); -#ifdef AMD_EXTENSIONS - else if (op == glslang::EOpConvBoolToUint16) - zero = builder.makeUint16Constant(0); -#endif - else - zero = builder.makeUintConstant(0); - - if (op == glslang::EOpConvBoolToUint64) - one = builder.makeUint64Constant(1); -#ifdef AMD_EXTENSIONS - else if (op == glslang::EOpConvBoolToUint16) - one = builder.makeUint16Constant(1); -#endif - else - one = builder.makeUintConstant(1); - - convOp = spv::OpSelect; - break; - - case glslang::EOpConvIntToFloat: - case glslang::EOpConvIntToDouble: - case glslang::EOpConvInt64ToFloat: - case glslang::EOpConvInt64ToDouble: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvInt16ToFloat: - case glslang::EOpConvInt16ToDouble: - case glslang::EOpConvInt16ToFloat16: - case glslang::EOpConvIntToFloat16: - case glslang::EOpConvInt64ToFloat16: -#endif - convOp = spv::OpConvertSToF; - break; - - case glslang::EOpConvUintToFloat: - case glslang::EOpConvUintToDouble: - case glslang::EOpConvUint64ToFloat: - case glslang::EOpConvUint64ToDouble: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvUint16ToFloat: - case glslang::EOpConvUint16ToDouble: - case glslang::EOpConvUint16ToFloat16: - case glslang::EOpConvUintToFloat16: - case glslang::EOpConvUint64ToFloat16: -#endif - convOp = spv::OpConvertUToF; - break; - - case glslang::EOpConvDoubleToFloat: - case glslang::EOpConvFloatToDouble: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvDoubleToFloat16: - case glslang::EOpConvFloat16ToDouble: - case glslang::EOpConvFloatToFloat16: - case glslang::EOpConvFloat16ToFloat: -#endif - convOp = spv::OpFConvert; - if (builder.isMatrixType(destType)) - return createUnaryMatrixOperation(convOp, precision, noContraction, destType, operand, typeProxy); - break; - - case glslang::EOpConvFloatToInt: - case glslang::EOpConvDoubleToInt: - case glslang::EOpConvFloatToInt64: - case glslang::EOpConvDoubleToInt64: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvFloatToInt16: - case glslang::EOpConvDoubleToInt16: - case glslang::EOpConvFloat16ToInt16: - case glslang::EOpConvFloat16ToInt: - case glslang::EOpConvFloat16ToInt64: -#endif - convOp = spv::OpConvertFToS; - break; - - case glslang::EOpConvUintToInt: - case glslang::EOpConvIntToUint: - case glslang::EOpConvUint64ToInt64: - case glslang::EOpConvInt64ToUint64: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvUint16ToInt16: - case glslang::EOpConvInt16ToUint16: -#endif - if (builder.isInSpecConstCodeGenMode()) { - // Build zero scalar or vector for OpIAdd. - if (op == glslang::EOpConvUint64ToInt64 || op == glslang::EOpConvInt64ToUint64) - zero = builder.makeUint64Constant(0); -#ifdef AMD_EXTENSIONS - else if (op == glslang::EOpConvUint16ToInt16 || op == glslang::EOpConvInt16ToUint16) - zero = builder.makeUint16Constant(0); -#endif - else - zero = builder.makeUintConstant(0); - - zero = makeSmearedConstant(zero, vectorSize); - // Use OpIAdd, instead of OpBitcast to do the conversion when - // generating for OpSpecConstantOp instruction. - return builder.createBinOp(spv::OpIAdd, destType, operand, zero); - } - // For normal run-time conversion instruction, use OpBitcast. - convOp = spv::OpBitcast; - break; - - case glslang::EOpConvFloatToUint: - case glslang::EOpConvDoubleToUint: - case glslang::EOpConvFloatToUint64: - case glslang::EOpConvDoubleToUint64: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvFloatToUint16: - case glslang::EOpConvDoubleToUint16: - case glslang::EOpConvFloat16ToUint16: - case glslang::EOpConvFloat16ToUint: - case glslang::EOpConvFloat16ToUint64: -#endif - convOp = spv::OpConvertFToU; - break; - - case glslang::EOpConvIntToInt64: - case glslang::EOpConvInt64ToInt: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvIntToInt16: - case glslang::EOpConvInt16ToInt: - case glslang::EOpConvInt64ToInt16: - case glslang::EOpConvInt16ToInt64: -#endif - convOp = spv::OpSConvert; - break; - - case glslang::EOpConvUintToUint64: - case glslang::EOpConvUint64ToUint: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvUintToUint16: - case glslang::EOpConvUint16ToUint: - case glslang::EOpConvUint64ToUint16: - case glslang::EOpConvUint16ToUint64: -#endif - convOp = spv::OpUConvert; - break; - - case glslang::EOpConvIntToUint64: - case glslang::EOpConvInt64ToUint: - case glslang::EOpConvUint64ToInt: - case glslang::EOpConvUintToInt64: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvInt16ToUint: - case glslang::EOpConvUintToInt16: - case glslang::EOpConvInt16ToUint64: - case glslang::EOpConvUint64ToInt16: - case glslang::EOpConvUint16ToInt: - case glslang::EOpConvIntToUint16: - case glslang::EOpConvUint16ToInt64: - case glslang::EOpConvInt64ToUint16: -#endif - // OpSConvert/OpUConvert + OpBitCast - switch (op) { - case glslang::EOpConvIntToUint64: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvInt16ToUint64: -#endif - convOp = spv::OpSConvert; - type = builder.makeIntType(64); - break; - case glslang::EOpConvInt64ToUint: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvInt16ToUint: -#endif - convOp = spv::OpSConvert; - type = builder.makeIntType(32); - break; - case glslang::EOpConvUint64ToInt: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvUint16ToInt: -#endif - convOp = spv::OpUConvert; - type = builder.makeUintType(32); - break; - case glslang::EOpConvUintToInt64: -#ifdef AMD_EXTENSIONS - case glslang::EOpConvUint16ToInt64: -#endif - convOp = spv::OpUConvert; - type = builder.makeUintType(64); - break; -#ifdef AMD_EXTENSIONS - case glslang::EOpConvUintToInt16: - case glslang::EOpConvUint64ToInt16: - convOp = spv::OpUConvert; - type = builder.makeUintType(16); - break; - case glslang::EOpConvIntToUint16: - case glslang::EOpConvInt64ToUint16: - convOp = spv::OpSConvert; - type = builder.makeIntType(16); - break; -#endif - default: - assert(0); - break; - } - - if (vectorSize > 0) - type = builder.makeVectorType(type, vectorSize); - - operand = builder.createUnaryOp(convOp, type, operand); - - if (builder.isInSpecConstCodeGenMode()) { - // Build zero scalar or vector for OpIAdd. -#ifdef AMD_EXTENSIONS - if (op == glslang::EOpConvIntToUint64 || op == glslang::EOpConvUintToInt64 || - op == glslang::EOpConvInt16ToUint64 || op == glslang::EOpConvUint16ToInt64) - zero = builder.makeUint64Constant(0); - else if (op == glslang::EOpConvIntToUint16 || op == glslang::EOpConvUintToInt16 || - op == glslang::EOpConvInt64ToUint16 || op == glslang::EOpConvUint64ToInt16) - zero = builder.makeUint16Constant(0); - else - zero = builder.makeUintConstant(0); -#else - if (op == glslang::EOpConvIntToUint64 || op == glslang::EOpConvUintToInt64) - zero = builder.makeUint64Constant(0); - else - zero = builder.makeUintConstant(0); -#endif - - zero = makeSmearedConstant(zero, vectorSize); - // Use OpIAdd, instead of OpBitcast to do the conversion when - // generating for OpSpecConstantOp instruction. - return builder.createBinOp(spv::OpIAdd, destType, operand, zero); - } - // For normal run-time conversion instruction, use OpBitcast. - convOp = spv::OpBitcast; - break; - default: - break; - } - - spv::Id result = 0; - if (convOp == spv::OpNop) - return result; - - if (convOp == spv::OpSelect) { - zero = makeSmearedConstant(zero, vectorSize); - one = makeSmearedConstant(one, vectorSize); - result = builder.createTriOp(convOp, destType, operand, one, zero); - } else - result = builder.createUnaryOp(convOp, destType, operand); - - return builder.setPrecision(result, precision); -} - -spv::Id TGlslangToSpvTraverser::makeSmearedConstant(spv::Id constant, int vectorSize) -{ - if (vectorSize == 0) - return constant; - - spv::Id vectorTypeId = builder.makeVectorType(builder.getTypeId(constant), vectorSize); - std::vector components; - for (int c = 0; c < vectorSize; ++c) - components.push_back(constant); - return builder.makeCompositeConstant(vectorTypeId, components); -} - -// For glslang ops that map to SPV atomic opCodes -spv::Id TGlslangToSpvTraverser::createAtomicOperation(glslang::TOperator op, spv::Decoration /*precision*/, spv::Id typeId, std::vector& operands, glslang::TBasicType typeProxy) -{ - spv::Op opCode = spv::OpNop; - - switch (op) { - case glslang::EOpAtomicAdd: - case glslang::EOpImageAtomicAdd: - case glslang::EOpAtomicCounterAdd: - opCode = spv::OpAtomicIAdd; - break; - case glslang::EOpAtomicCounterSubtract: - opCode = spv::OpAtomicISub; - break; - case glslang::EOpAtomicMin: - case glslang::EOpImageAtomicMin: - case glslang::EOpAtomicCounterMin: - opCode = (typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64) ? spv::OpAtomicUMin : spv::OpAtomicSMin; - break; - case glslang::EOpAtomicMax: - case glslang::EOpImageAtomicMax: - case glslang::EOpAtomicCounterMax: - opCode = (typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64) ? spv::OpAtomicUMax : spv::OpAtomicSMax; - break; - case glslang::EOpAtomicAnd: - case glslang::EOpImageAtomicAnd: - case glslang::EOpAtomicCounterAnd: - opCode = spv::OpAtomicAnd; - break; - case glslang::EOpAtomicOr: - case glslang::EOpImageAtomicOr: - case glslang::EOpAtomicCounterOr: - opCode = spv::OpAtomicOr; - break; - case glslang::EOpAtomicXor: - case glslang::EOpImageAtomicXor: - case glslang::EOpAtomicCounterXor: - opCode = spv::OpAtomicXor; - break; - case glslang::EOpAtomicExchange: - case glslang::EOpImageAtomicExchange: - case glslang::EOpAtomicCounterExchange: - opCode = spv::OpAtomicExchange; - break; - case glslang::EOpAtomicCompSwap: - case glslang::EOpImageAtomicCompSwap: - case glslang::EOpAtomicCounterCompSwap: - opCode = spv::OpAtomicCompareExchange; - break; - case glslang::EOpAtomicCounterIncrement: - opCode = spv::OpAtomicIIncrement; - break; - case glslang::EOpAtomicCounterDecrement: - opCode = spv::OpAtomicIDecrement; - break; - case glslang::EOpAtomicCounter: - opCode = spv::OpAtomicLoad; - break; - default: - assert(0); - break; - } - - if (typeProxy == glslang::EbtInt64 || typeProxy == glslang::EbtUint64) - builder.addCapability(spv::CapabilityInt64Atomics); - - // Sort out the operands - // - mapping from glslang -> SPV - // - there are extra SPV operands with no glslang source - // - compare-exchange swaps the value and comparator - // - compare-exchange has an extra memory semantics - // - EOpAtomicCounterDecrement needs a post decrement - std::vector spvAtomicOperands; // hold the spv operands - auto opIt = operands.begin(); // walk the glslang operands - spvAtomicOperands.push_back(*(opIt++)); - spvAtomicOperands.push_back(builder.makeUintConstant(spv::ScopeDevice)); // TBD: what is the correct scope? - spvAtomicOperands.push_back(builder.makeUintConstant(spv::MemorySemanticsMaskNone)); // TBD: what are the correct memory semantics? - if (opCode == spv::OpAtomicCompareExchange) { - // There are 2 memory semantics for compare-exchange. And the operand order of "comparator" and "new value" in GLSL - // differs from that in SPIR-V. Hence, special processing is required. - spvAtomicOperands.push_back(builder.makeUintConstant(spv::MemorySemanticsMaskNone)); - spvAtomicOperands.push_back(*(opIt + 1)); - spvAtomicOperands.push_back(*opIt); - opIt += 2; - } - - // Add the rest of the operands, skipping any that were dealt with above. - for (; opIt != operands.end(); ++opIt) - spvAtomicOperands.push_back(*opIt); - - spv::Id resultId = builder.createOp(opCode, typeId, spvAtomicOperands); - - // GLSL and HLSL atomic-counter decrement return post-decrement value, - // while SPIR-V returns pre-decrement value. Translate between these semantics. - if (op == glslang::EOpAtomicCounterDecrement) - resultId = builder.createBinOp(spv::OpISub, typeId, resultId, builder.makeIntConstant(1)); - - return resultId; -} - -// Create group invocation operations. -spv::Id TGlslangToSpvTraverser::createInvocationsOperation(glslang::TOperator op, spv::Id typeId, std::vector& operands, glslang::TBasicType typeProxy) -{ -#ifdef AMD_EXTENSIONS - bool isUnsigned = typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64; - bool isFloat = typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble || typeProxy == glslang::EbtFloat16; -#endif - - spv::Op opCode = spv::OpNop; - std::vector spvGroupOperands; - spv::GroupOperation groupOperation = spv::GroupOperationMax; - - if (op == glslang::EOpBallot || op == glslang::EOpReadFirstInvocation || - op == glslang::EOpReadInvocation) { - builder.addExtension(spv::E_SPV_KHR_shader_ballot); - builder.addCapability(spv::CapabilitySubgroupBallotKHR); - } else if (op == glslang::EOpAnyInvocation || - op == glslang::EOpAllInvocations || - op == glslang::EOpAllInvocationsEqual) { - builder.addExtension(spv::E_SPV_KHR_subgroup_vote); - builder.addCapability(spv::CapabilitySubgroupVoteKHR); - } else { - builder.addCapability(spv::CapabilityGroups); -#ifdef AMD_EXTENSIONS - if (op == glslang::EOpMinInvocationsNonUniform || - op == glslang::EOpMaxInvocationsNonUniform || - op == glslang::EOpAddInvocationsNonUniform || - op == glslang::EOpMinInvocationsInclusiveScanNonUniform || - op == glslang::EOpMaxInvocationsInclusiveScanNonUniform || - op == glslang::EOpAddInvocationsInclusiveScanNonUniform || - op == glslang::EOpMinInvocationsExclusiveScanNonUniform || - op == glslang::EOpMaxInvocationsExclusiveScanNonUniform || - op == glslang::EOpAddInvocationsExclusiveScanNonUniform) - builder.addExtension(spv::E_SPV_AMD_shader_ballot); -#endif - - spvGroupOperands.push_back(builder.makeUintConstant(spv::ScopeSubgroup)); -#ifdef AMD_EXTENSIONS - switch (op) { - case glslang::EOpMinInvocations: - case glslang::EOpMaxInvocations: - case glslang::EOpAddInvocations: - case glslang::EOpMinInvocationsNonUniform: - case glslang::EOpMaxInvocationsNonUniform: - case glslang::EOpAddInvocationsNonUniform: - groupOperation = spv::GroupOperationReduce; - spvGroupOperands.push_back(groupOperation); - break; - case glslang::EOpMinInvocationsInclusiveScan: - case glslang::EOpMaxInvocationsInclusiveScan: - case glslang::EOpAddInvocationsInclusiveScan: - case glslang::EOpMinInvocationsInclusiveScanNonUniform: - case glslang::EOpMaxInvocationsInclusiveScanNonUniform: - case glslang::EOpAddInvocationsInclusiveScanNonUniform: - groupOperation = spv::GroupOperationInclusiveScan; - spvGroupOperands.push_back(groupOperation); - break; - case glslang::EOpMinInvocationsExclusiveScan: - case glslang::EOpMaxInvocationsExclusiveScan: - case glslang::EOpAddInvocationsExclusiveScan: - case glslang::EOpMinInvocationsExclusiveScanNonUniform: - case glslang::EOpMaxInvocationsExclusiveScanNonUniform: - case glslang::EOpAddInvocationsExclusiveScanNonUniform: - groupOperation = spv::GroupOperationExclusiveScan; - spvGroupOperands.push_back(groupOperation); - break; - default: - break; - } -#endif - } - - for (auto opIt = operands.begin(); opIt != operands.end(); ++opIt) - spvGroupOperands.push_back(*opIt); - - switch (op) { - case glslang::EOpAnyInvocation: - opCode = spv::OpSubgroupAnyKHR; - break; - case glslang::EOpAllInvocations: - opCode = spv::OpSubgroupAllKHR; - break; - case glslang::EOpAllInvocationsEqual: - opCode = spv::OpSubgroupAllEqualKHR; - break; - case glslang::EOpReadInvocation: - opCode = spv::OpSubgroupReadInvocationKHR; - if (builder.isVectorType(typeId)) - return CreateInvocationsVectorOperation(opCode, groupOperation, typeId, operands); - break; - case glslang::EOpReadFirstInvocation: - opCode = spv::OpSubgroupFirstInvocationKHR; - break; - case glslang::EOpBallot: - { - // NOTE: According to the spec, the result type of "OpSubgroupBallotKHR" must be a 4 component vector of 32 - // bit integer types. The GLSL built-in function "ballotARB()" assumes the maximum number of invocations in - // a subgroup is 64. Thus, we have to convert uvec4.xy to uint64_t as follow: - // - // result = Bitcast(SubgroupBallotKHR(Predicate).xy) - // - spv::Id uintType = builder.makeUintType(32); - spv::Id uvec4Type = builder.makeVectorType(uintType, 4); - spv::Id result = builder.createOp(spv::OpSubgroupBallotKHR, uvec4Type, spvGroupOperands); - - std::vector components; - components.push_back(builder.createCompositeExtract(result, uintType, 0)); - components.push_back(builder.createCompositeExtract(result, uintType, 1)); - - spv::Id uvec2Type = builder.makeVectorType(uintType, 2); - return builder.createUnaryOp(spv::OpBitcast, typeId, - builder.createCompositeConstruct(uvec2Type, components)); - } - -#ifdef AMD_EXTENSIONS - case glslang::EOpMinInvocations: - case glslang::EOpMaxInvocations: - case glslang::EOpAddInvocations: - case glslang::EOpMinInvocationsInclusiveScan: - case glslang::EOpMaxInvocationsInclusiveScan: - case glslang::EOpAddInvocationsInclusiveScan: - case glslang::EOpMinInvocationsExclusiveScan: - case glslang::EOpMaxInvocationsExclusiveScan: - case glslang::EOpAddInvocationsExclusiveScan: - if (op == glslang::EOpMinInvocations || - op == glslang::EOpMinInvocationsInclusiveScan || - op == glslang::EOpMinInvocationsExclusiveScan) { - if (isFloat) - opCode = spv::OpGroupFMin; - else { - if (isUnsigned) - opCode = spv::OpGroupUMin; - else - opCode = spv::OpGroupSMin; - } - } else if (op == glslang::EOpMaxInvocations || - op == glslang::EOpMaxInvocationsInclusiveScan || - op == glslang::EOpMaxInvocationsExclusiveScan) { - if (isFloat) - opCode = spv::OpGroupFMax; - else { - if (isUnsigned) - opCode = spv::OpGroupUMax; - else - opCode = spv::OpGroupSMax; - } - } else { - if (isFloat) - opCode = spv::OpGroupFAdd; - else - opCode = spv::OpGroupIAdd; - } - - if (builder.isVectorType(typeId)) - return CreateInvocationsVectorOperation(opCode, groupOperation, typeId, operands); - - break; - case glslang::EOpMinInvocationsNonUniform: - case glslang::EOpMaxInvocationsNonUniform: - case glslang::EOpAddInvocationsNonUniform: - case glslang::EOpMinInvocationsInclusiveScanNonUniform: - case glslang::EOpMaxInvocationsInclusiveScanNonUniform: - case glslang::EOpAddInvocationsInclusiveScanNonUniform: - case glslang::EOpMinInvocationsExclusiveScanNonUniform: - case glslang::EOpMaxInvocationsExclusiveScanNonUniform: - case glslang::EOpAddInvocationsExclusiveScanNonUniform: - if (op == glslang::EOpMinInvocationsNonUniform || - op == glslang::EOpMinInvocationsInclusiveScanNonUniform || - op == glslang::EOpMinInvocationsExclusiveScanNonUniform) { - if (isFloat) - opCode = spv::OpGroupFMinNonUniformAMD; - else { - if (isUnsigned) - opCode = spv::OpGroupUMinNonUniformAMD; - else - opCode = spv::OpGroupSMinNonUniformAMD; - } - } - else if (op == glslang::EOpMaxInvocationsNonUniform || - op == glslang::EOpMaxInvocationsInclusiveScanNonUniform || - op == glslang::EOpMaxInvocationsExclusiveScanNonUniform) { - if (isFloat) - opCode = spv::OpGroupFMaxNonUniformAMD; - else { - if (isUnsigned) - opCode = spv::OpGroupUMaxNonUniformAMD; - else - opCode = spv::OpGroupSMaxNonUniformAMD; - } - } - else { - if (isFloat) - opCode = spv::OpGroupFAddNonUniformAMD; - else - opCode = spv::OpGroupIAddNonUniformAMD; - } - - if (builder.isVectorType(typeId)) - return CreateInvocationsVectorOperation(opCode, groupOperation, typeId, operands); - - break; -#endif - default: - logger->missingFunctionality("invocation operation"); - return spv::NoResult; - } - - assert(opCode != spv::OpNop); - return builder.createOp(opCode, typeId, spvGroupOperands); -} - -// Create group invocation operations on a vector -spv::Id TGlslangToSpvTraverser::CreateInvocationsVectorOperation(spv::Op op, spv::GroupOperation groupOperation, spv::Id typeId, std::vector& operands) -{ -#ifdef AMD_EXTENSIONS - assert(op == spv::OpGroupFMin || op == spv::OpGroupUMin || op == spv::OpGroupSMin || - op == spv::OpGroupFMax || op == spv::OpGroupUMax || op == spv::OpGroupSMax || - op == spv::OpGroupFAdd || op == spv::OpGroupIAdd || op == spv::OpGroupBroadcast || - op == spv::OpSubgroupReadInvocationKHR || - op == spv::OpGroupFMinNonUniformAMD || op == spv::OpGroupUMinNonUniformAMD || op == spv::OpGroupSMinNonUniformAMD || - op == spv::OpGroupFMaxNonUniformAMD || op == spv::OpGroupUMaxNonUniformAMD || op == spv::OpGroupSMaxNonUniformAMD || - op == spv::OpGroupFAddNonUniformAMD || op == spv::OpGroupIAddNonUniformAMD); -#else - assert(op == spv::OpGroupFMin || op == spv::OpGroupUMin || op == spv::OpGroupSMin || - op == spv::OpGroupFMax || op == spv::OpGroupUMax || op == spv::OpGroupSMax || - op == spv::OpGroupFAdd || op == spv::OpGroupIAdd || op == spv::OpGroupBroadcast || - op == spv::OpSubgroupReadInvocationKHR); -#endif - - // Handle group invocation operations scalar by scalar. - // The result type is the same type as the original type. - // The algorithm is to: - // - break the vector into scalars - // - apply the operation to each scalar - // - make a vector out the scalar results - - // get the types sorted out - int numComponents = builder.getNumComponents(operands[0]); - spv::Id scalarType = builder.getScalarTypeId(builder.getTypeId(operands[0])); - std::vector results; - - // do each scalar op - for (int comp = 0; comp < numComponents; ++comp) { - std::vector indexes; - indexes.push_back(comp); - spv::Id scalar = builder.createCompositeExtract(operands[0], scalarType, indexes); - std::vector spvGroupOperands; - if (op == spv::OpSubgroupReadInvocationKHR) { - spvGroupOperands.push_back(scalar); - spvGroupOperands.push_back(operands[1]); - } else if (op == spv::OpGroupBroadcast) { - spvGroupOperands.push_back(builder.makeUintConstant(spv::ScopeSubgroup)); - spvGroupOperands.push_back(scalar); - spvGroupOperands.push_back(operands[1]); - } else { - spvGroupOperands.push_back(builder.makeUintConstant(spv::ScopeSubgroup)); - spvGroupOperands.push_back(groupOperation); - spvGroupOperands.push_back(scalar); - } - - results.push_back(builder.createOp(op, scalarType, spvGroupOperands)); - } - - // put the pieces together - return builder.createCompositeConstruct(typeId, results); -} - -spv::Id TGlslangToSpvTraverser::createMiscOperation(glslang::TOperator op, spv::Decoration precision, spv::Id typeId, std::vector& operands, glslang::TBasicType typeProxy) -{ -#ifdef AMD_EXTENSIONS - bool isUnsigned = typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64 || typeProxy == glslang::EbtUint16; - bool isFloat = typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble || typeProxy == glslang::EbtFloat16; -#else - bool isUnsigned = typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64; - bool isFloat = typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble; -#endif - - spv::Op opCode = spv::OpNop; - int extBuiltins = -1; - int libCall = -1; - size_t consumedOperands = operands.size(); - spv::Id typeId0 = 0; - if (consumedOperands > 0) - typeId0 = builder.getTypeId(operands[0]); - spv::Id typeId1 = 0; - if (consumedOperands > 1) - typeId1 = builder.getTypeId(operands[1]); - spv::Id frexpIntType = 0; - - switch (op) { - case glslang::EOpMin: - if (isFloat) - libCall = spv::GLSLstd450FMin; - else if (isUnsigned) - libCall = spv::GLSLstd450UMin; - else - libCall = spv::GLSLstd450SMin; - builder.promoteScalar(precision, operands.front(), operands.back()); - break; - case glslang::EOpModf: - libCall = spv::GLSLstd450Modf; - break; - case glslang::EOpMax: - if (isFloat) - libCall = spv::GLSLstd450FMax; - else if (isUnsigned) - libCall = spv::GLSLstd450UMax; - else - libCall = spv::GLSLstd450SMax; - builder.promoteScalar(precision, operands.front(), operands.back()); - break; - case glslang::EOpPow: - libCall = spv::GLSLstd450Pow; - break; - case glslang::EOpDot: - opCode = spv::OpDot; - break; - case glslang::EOpAtan: - libCall = spv::GLSLstd450Atan2; - break; - - case glslang::EOpClamp: - if (isFloat) - libCall = spv::GLSLstd450FClamp; - else if (isUnsigned) - libCall = spv::GLSLstd450UClamp; - else - libCall = spv::GLSLstd450SClamp; - builder.promoteScalar(precision, operands.front(), operands[1]); - builder.promoteScalar(precision, operands.front(), operands[2]); - break; - case glslang::EOpMix: - if (! builder.isBoolType(builder.getScalarTypeId(builder.getTypeId(operands.back())))) { - assert(isFloat); - libCall = spv::GLSLstd450FMix; - } else { - opCode = spv::OpSelect; - std::swap(operands.front(), operands.back()); - } - builder.promoteScalar(precision, operands.front(), operands.back()); - break; - case glslang::EOpStep: - libCall = spv::GLSLstd450Step; - builder.promoteScalar(precision, operands.front(), operands.back()); - break; - case glslang::EOpSmoothStep: - libCall = spv::GLSLstd450SmoothStep; - builder.promoteScalar(precision, operands[0], operands[2]); - builder.promoteScalar(precision, operands[1], operands[2]); - break; - - case glslang::EOpDistance: - libCall = spv::GLSLstd450Distance; - break; - case glslang::EOpCross: - libCall = spv::GLSLstd450Cross; - break; - case glslang::EOpFaceForward: - libCall = spv::GLSLstd450FaceForward; - break; - case glslang::EOpReflect: - libCall = spv::GLSLstd450Reflect; - break; - case glslang::EOpRefract: - libCall = spv::GLSLstd450Refract; - break; - case glslang::EOpInterpolateAtSample: - builder.addCapability(spv::CapabilityInterpolationFunction); - libCall = spv::GLSLstd450InterpolateAtSample; - break; - case glslang::EOpInterpolateAtOffset: - builder.addCapability(spv::CapabilityInterpolationFunction); - libCall = spv::GLSLstd450InterpolateAtOffset; - break; - case glslang::EOpAddCarry: - opCode = spv::OpIAddCarry; - typeId = builder.makeStructResultType(typeId0, typeId0); - consumedOperands = 2; - break; - case glslang::EOpSubBorrow: - opCode = spv::OpISubBorrow; - typeId = builder.makeStructResultType(typeId0, typeId0); - consumedOperands = 2; - break; - case glslang::EOpUMulExtended: - opCode = spv::OpUMulExtended; - typeId = builder.makeStructResultType(typeId0, typeId0); - consumedOperands = 2; - break; - case glslang::EOpIMulExtended: - opCode = spv::OpSMulExtended; - typeId = builder.makeStructResultType(typeId0, typeId0); - consumedOperands = 2; - break; - case glslang::EOpBitfieldExtract: - if (isUnsigned) - opCode = spv::OpBitFieldUExtract; - else - opCode = spv::OpBitFieldSExtract; - break; - case glslang::EOpBitfieldInsert: - opCode = spv::OpBitFieldInsert; - break; - - case glslang::EOpFma: - libCall = spv::GLSLstd450Fma; - break; - case glslang::EOpFrexp: - { - libCall = spv::GLSLstd450FrexpStruct; - assert(builder.isPointerType(typeId1)); - typeId1 = builder.getContainedTypeId(typeId1); -#ifdef AMD_EXTENSIONS - int width = builder.getScalarTypeWidth(typeId1); -#else - int width = 32; -#endif - if (builder.getNumComponents(operands[0]) == 1) - frexpIntType = builder.makeIntegerType(width, true); - else - frexpIntType = builder.makeVectorType(builder.makeIntegerType(width, true), builder.getNumComponents(operands[0])); - typeId = builder.makeStructResultType(typeId0, frexpIntType); - consumedOperands = 1; - } - break; - case glslang::EOpLdexp: - libCall = spv::GLSLstd450Ldexp; - break; - - case glslang::EOpReadInvocation: - return createInvocationsOperation(op, typeId, operands, typeProxy); - -#ifdef AMD_EXTENSIONS - case glslang::EOpSwizzleInvocations: - extBuiltins = getExtBuiltins(spv::E_SPV_AMD_shader_ballot); - libCall = spv::SwizzleInvocationsAMD; - break; - case glslang::EOpSwizzleInvocationsMasked: - extBuiltins = getExtBuiltins(spv::E_SPV_AMD_shader_ballot); - libCall = spv::SwizzleInvocationsMaskedAMD; - break; - case glslang::EOpWriteInvocation: - extBuiltins = getExtBuiltins(spv::E_SPV_AMD_shader_ballot); - libCall = spv::WriteInvocationAMD; - break; - - case glslang::EOpMin3: - extBuiltins = getExtBuiltins(spv::E_SPV_AMD_shader_trinary_minmax); - if (isFloat) - libCall = spv::FMin3AMD; - else { - if (isUnsigned) - libCall = spv::UMin3AMD; - else - libCall = spv::SMin3AMD; - } - break; - case glslang::EOpMax3: - extBuiltins = getExtBuiltins(spv::E_SPV_AMD_shader_trinary_minmax); - if (isFloat) - libCall = spv::FMax3AMD; - else { - if (isUnsigned) - libCall = spv::UMax3AMD; - else - libCall = spv::SMax3AMD; - } - break; - case glslang::EOpMid3: - extBuiltins = getExtBuiltins(spv::E_SPV_AMD_shader_trinary_minmax); - if (isFloat) - libCall = spv::FMid3AMD; - else { - if (isUnsigned) - libCall = spv::UMid3AMD; - else - libCall = spv::SMid3AMD; - } - break; - - case glslang::EOpInterpolateAtVertex: - extBuiltins = getExtBuiltins(spv::E_SPV_AMD_shader_explicit_vertex_parameter); - libCall = spv::InterpolateAtVertexAMD; - break; -#endif - - default: - return 0; - } - - spv::Id id = 0; - if (libCall >= 0) { - // Use an extended instruction from the standard library. - // Construct the call arguments, without modifying the original operands vector. - // We might need the remaining arguments, e.g. in the EOpFrexp case. - std::vector callArguments(operands.begin(), operands.begin() + consumedOperands); - id = builder.createBuiltinCall(typeId, extBuiltins >= 0 ? extBuiltins : stdBuiltins, libCall, callArguments); - } else { - switch (consumedOperands) { - case 0: - // should all be handled by visitAggregate and createNoArgOperation - assert(0); - return 0; - case 1: - // should all be handled by createUnaryOperation - assert(0); - return 0; - case 2: - id = builder.createBinOp(opCode, typeId, operands[0], operands[1]); - break; - default: - // anything 3 or over doesn't have l-value operands, so all should be consumed - assert(consumedOperands == operands.size()); - id = builder.createOp(opCode, typeId, operands); - break; - } - } - - // Decode the return types that were structures - switch (op) { - case glslang::EOpAddCarry: - case glslang::EOpSubBorrow: - builder.createStore(builder.createCompositeExtract(id, typeId0, 1), operands[2]); - id = builder.createCompositeExtract(id, typeId0, 0); - break; - case glslang::EOpUMulExtended: - case glslang::EOpIMulExtended: - builder.createStore(builder.createCompositeExtract(id, typeId0, 0), operands[3]); - builder.createStore(builder.createCompositeExtract(id, typeId0, 1), operands[2]); - break; - case glslang::EOpFrexp: - { - assert(operands.size() == 2); - if (builder.isFloatType(builder.getScalarTypeId(typeId1))) { - // "exp" is floating-point type (from HLSL intrinsic) - spv::Id member1 = builder.createCompositeExtract(id, frexpIntType, 1); - member1 = builder.createUnaryOp(spv::OpConvertSToF, typeId1, member1); - builder.createStore(member1, operands[1]); - } else - // "exp" is integer type (from GLSL built-in function) - builder.createStore(builder.createCompositeExtract(id, frexpIntType, 1), operands[1]); - id = builder.createCompositeExtract(id, typeId0, 0); - } - break; - default: - break; - } - - return builder.setPrecision(id, precision); -} - -// Intrinsics with no arguments (or no return value, and no precision). -spv::Id TGlslangToSpvTraverser::createNoArgOperation(glslang::TOperator op, spv::Decoration precision, spv::Id typeId) -{ - // TODO: get the barrier operands correct - - switch (op) { - case glslang::EOpEmitVertex: - builder.createNoResultOp(spv::OpEmitVertex); - return 0; - case glslang::EOpEndPrimitive: - builder.createNoResultOp(spv::OpEndPrimitive); - return 0; - case glslang::EOpBarrier: - if (glslangIntermediate->getStage() == EShLangTessControl) { - builder.createControlBarrier(spv::ScopeWorkgroup, spv::ScopeInvocation, spv::MemorySemanticsMaskNone); - // TODO: prefer the following, when available: - // builder.createControlBarrier(spv::ScopePatch, spv::ScopePatch, - // spv::MemorySemanticsPatchMask | - // spv::MemorySemanticsAcquireReleaseMask); - } else { - builder.createControlBarrier(spv::ScopeWorkgroup, spv::ScopeWorkgroup, - spv::MemorySemanticsWorkgroupMemoryMask | - spv::MemorySemanticsAcquireReleaseMask); - } - return 0; - case glslang::EOpMemoryBarrier: - builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsAllMemory | - spv::MemorySemanticsAcquireReleaseMask); - return 0; - case glslang::EOpMemoryBarrierAtomicCounter: - builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsAtomicCounterMemoryMask | - spv::MemorySemanticsAcquireReleaseMask); - return 0; - case glslang::EOpMemoryBarrierBuffer: - builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsUniformMemoryMask | - spv::MemorySemanticsAcquireReleaseMask); - return 0; - case glslang::EOpMemoryBarrierImage: - builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsImageMemoryMask | - spv::MemorySemanticsAcquireReleaseMask); - return 0; - case glslang::EOpMemoryBarrierShared: - builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsWorkgroupMemoryMask | - spv::MemorySemanticsAcquireReleaseMask); - return 0; - case glslang::EOpGroupMemoryBarrier: - builder.createMemoryBarrier(spv::ScopeWorkgroup, spv::MemorySemanticsAllMemory | - spv::MemorySemanticsAcquireReleaseMask); - return 0; - case glslang::EOpAllMemoryBarrierWithGroupSync: - builder.createControlBarrier(spv::ScopeWorkgroup, spv::ScopeDevice, - spv::MemorySemanticsAllMemory | - spv::MemorySemanticsAcquireReleaseMask); - return 0; - case glslang::EOpDeviceMemoryBarrier: - builder.createMemoryBarrier(spv::ScopeDevice, spv::MemorySemanticsUniformMemoryMask | - spv::MemorySemanticsImageMemoryMask | - spv::MemorySemanticsAcquireReleaseMask); - return 0; - case glslang::EOpDeviceMemoryBarrierWithGroupSync: - builder.createControlBarrier(spv::ScopeWorkgroup, spv::ScopeDevice, spv::MemorySemanticsUniformMemoryMask | - spv::MemorySemanticsImageMemoryMask | - spv::MemorySemanticsAcquireReleaseMask); - return 0; - case glslang::EOpWorkgroupMemoryBarrier: - builder.createMemoryBarrier(spv::ScopeWorkgroup, spv::MemorySemanticsWorkgroupMemoryMask | - spv::MemorySemanticsAcquireReleaseMask); - return 0; - case glslang::EOpWorkgroupMemoryBarrierWithGroupSync: - builder.createControlBarrier(spv::ScopeWorkgroup, spv::ScopeWorkgroup, - spv::MemorySemanticsWorkgroupMemoryMask | - spv::MemorySemanticsAcquireReleaseMask); - return 0; -#ifdef AMD_EXTENSIONS - case glslang::EOpTime: - { - std::vector args; // Dummy arguments - spv::Id id = builder.createBuiltinCall(typeId, getExtBuiltins(spv::E_SPV_AMD_gcn_shader), spv::TimeAMD, args); - return builder.setPrecision(id, precision); - } -#endif - default: - logger->missingFunctionality("unknown operation with no arguments"); - return 0; - } -} - -spv::Id TGlslangToSpvTraverser::getSymbolId(const glslang::TIntermSymbol* symbol) -{ - auto iter = symbolValues.find(symbol->getId()); - spv::Id id; - if (symbolValues.end() != iter) { - id = iter->second; - return id; - } - - // it was not found, create it - id = createSpvVariable(symbol); - symbolValues[symbol->getId()] = id; - - if (symbol->getBasicType() != glslang::EbtBlock) { - addDecoration(id, TranslatePrecisionDecoration(symbol->getType())); - addDecoration(id, TranslateInterpolationDecoration(symbol->getType().getQualifier())); - addDecoration(id, TranslateAuxiliaryStorageDecoration(symbol->getType().getQualifier())); - if (symbol->getType().getQualifier().hasSpecConstantId()) - addDecoration(id, spv::DecorationSpecId, symbol->getType().getQualifier().layoutSpecConstantId); - if (symbol->getQualifier().hasIndex()) - builder.addDecoration(id, spv::DecorationIndex, symbol->getQualifier().layoutIndex); - if (symbol->getQualifier().hasComponent()) - builder.addDecoration(id, spv::DecorationComponent, symbol->getQualifier().layoutComponent); - // atomic counters use this: - if (symbol->getQualifier().hasOffset()) - builder.addDecoration(id, spv::DecorationOffset, symbol->getQualifier().layoutOffset); - } - - if (symbol->getQualifier().hasLocation()) - builder.addDecoration(id, spv::DecorationLocation, symbol->getQualifier().layoutLocation); - addDecoration(id, TranslateInvariantDecoration(symbol->getType().getQualifier())); - if (symbol->getQualifier().hasStream() && glslangIntermediate->isMultiStream()) { - builder.addCapability(spv::CapabilityGeometryStreams); - builder.addDecoration(id, spv::DecorationStream, symbol->getQualifier().layoutStream); - } - if (symbol->getQualifier().hasSet()) - builder.addDecoration(id, spv::DecorationDescriptorSet, symbol->getQualifier().layoutSet); - else if (IsDescriptorResource(symbol->getType())) { - // default to 0 - builder.addDecoration(id, spv::DecorationDescriptorSet, 0); - } - if (symbol->getQualifier().hasBinding()) - builder.addDecoration(id, spv::DecorationBinding, symbol->getQualifier().layoutBinding); - if (symbol->getQualifier().hasAttachment()) - builder.addDecoration(id, spv::DecorationInputAttachmentIndex, symbol->getQualifier().layoutAttachment); - if (glslangIntermediate->getXfbMode()) { - builder.addCapability(spv::CapabilityTransformFeedback); - if (symbol->getQualifier().hasXfbStride()) - builder.addDecoration(id, spv::DecorationXfbStride, symbol->getQualifier().layoutXfbStride); - if (symbol->getQualifier().hasXfbBuffer()) { - builder.addDecoration(id, spv::DecorationXfbBuffer, symbol->getQualifier().layoutXfbBuffer); - unsigned stride = glslangIntermediate->getXfbStride(symbol->getQualifier().layoutXfbBuffer); - if (stride != glslang::TQualifier::layoutXfbStrideEnd) - builder.addDecoration(id, spv::DecorationXfbStride, stride); - } - if (symbol->getQualifier().hasXfbOffset()) - builder.addDecoration(id, spv::DecorationOffset, symbol->getQualifier().layoutXfbOffset); - } - - if (symbol->getType().isImage()) { - std::vector memory; - TranslateMemoryDecoration(symbol->getType().getQualifier(), memory); - for (unsigned int i = 0; i < memory.size(); ++i) - addDecoration(id, memory[i]); - } - - // built-in variable decorations - spv::BuiltIn builtIn = TranslateBuiltInDecoration(symbol->getQualifier().builtIn, false); - if (builtIn != spv::BuiltInMax) - addDecoration(id, spv::DecorationBuiltIn, (int)builtIn); - -#ifdef NV_EXTENSIONS - if (builtIn == spv::BuiltInSampleMask) { - spv::Decoration decoration; - // GL_NV_sample_mask_override_coverage extension - if (glslangIntermediate->getLayoutOverrideCoverage()) - decoration = (spv::Decoration)spv::DecorationOverrideCoverageNV; - else - decoration = (spv::Decoration)spv::DecorationMax; - addDecoration(id, decoration); - if (decoration != spv::DecorationMax) { - builder.addExtension(spv::E_SPV_NV_sample_mask_override_coverage); - } - } - else if (builtIn == spv::BuiltInLayer) { - // SPV_NV_viewport_array2 extension - if (symbol->getQualifier().layoutViewportRelative) { - addDecoration(id, (spv::Decoration)spv::DecorationViewportRelativeNV); - builder.addCapability(spv::CapabilityShaderViewportMaskNV); - builder.addExtension(spv::E_SPV_NV_viewport_array2); - } - if (symbol->getQualifier().layoutSecondaryViewportRelativeOffset != -2048) { - addDecoration(id, (spv::Decoration)spv::DecorationSecondaryViewportRelativeNV, symbol->getQualifier().layoutSecondaryViewportRelativeOffset); - builder.addCapability(spv::CapabilityShaderStereoViewNV); - builder.addExtension(spv::E_SPV_NV_stereo_view_rendering); - } - } - - if (symbol->getQualifier().layoutPassthrough) { - addDecoration(id, spv::DecorationPassthroughNV); - builder.addCapability(spv::CapabilityGeometryShaderPassthroughNV); - builder.addExtension(spv::E_SPV_NV_geometry_shader_passthrough); - } -#endif - - return id; -} - -// If 'dec' is valid, add no-operand decoration to an object -void TGlslangToSpvTraverser::addDecoration(spv::Id id, spv::Decoration dec) -{ - if (dec != spv::DecorationMax) - builder.addDecoration(id, dec); -} - -// If 'dec' is valid, add a one-operand decoration to an object -void TGlslangToSpvTraverser::addDecoration(spv::Id id, spv::Decoration dec, unsigned value) -{ - if (dec != spv::DecorationMax) - builder.addDecoration(id, dec, value); -} - -// If 'dec' is valid, add a no-operand decoration to a struct member -void TGlslangToSpvTraverser::addMemberDecoration(spv::Id id, int member, spv::Decoration dec) -{ - if (dec != spv::DecorationMax) - builder.addMemberDecoration(id, (unsigned)member, dec); -} - -// If 'dec' is valid, add a one-operand decoration to a struct member -void TGlslangToSpvTraverser::addMemberDecoration(spv::Id id, int member, spv::Decoration dec, unsigned value) -{ - if (dec != spv::DecorationMax) - builder.addMemberDecoration(id, (unsigned)member, dec, value); -} - -// Make a full tree of instructions to build a SPIR-V specialization constant, -// or regular constant if possible. -// -// TBD: this is not yet done, nor verified to be the best design, it does do the leaf symbols though -// -// Recursively walk the nodes. The nodes form a tree whose leaves are -// regular constants, which themselves are trees that createSpvConstant() -// recursively walks. So, this function walks the "top" of the tree: -// - emit specialization constant-building instructions for specConstant -// - when running into a non-spec-constant, switch to createSpvConstant() -spv::Id TGlslangToSpvTraverser::createSpvConstant(const glslang::TIntermTyped& node) -{ - assert(node.getQualifier().isConstant()); - - // Handle front-end constants first (non-specialization constants). - if (! node.getQualifier().specConstant) { - // hand off to the non-spec-constant path - assert(node.getAsConstantUnion() != nullptr || node.getAsSymbolNode() != nullptr); - int nextConst = 0; - return createSpvConstantFromConstUnionArray(node.getType(), node.getAsConstantUnion() ? node.getAsConstantUnion()->getConstArray() : node.getAsSymbolNode()->getConstArray(), - nextConst, false); - } - - // We now know we have a specialization constant to build - - // gl_WorkGroupSize is a special case until the front-end handles hierarchical specialization constants, - // even then, it's specialization ids are handled by special case syntax in GLSL: layout(local_size_x = ... - if (node.getType().getQualifier().builtIn == glslang::EbvWorkGroupSize) { - std::vector dimConstId; - for (int dim = 0; dim < 3; ++dim) { - bool specConst = (glslangIntermediate->getLocalSizeSpecId(dim) != glslang::TQualifier::layoutNotSet); - dimConstId.push_back(builder.makeUintConstant(glslangIntermediate->getLocalSize(dim), specConst)); - if (specConst) - addDecoration(dimConstId.back(), spv::DecorationSpecId, glslangIntermediate->getLocalSizeSpecId(dim)); - } - return builder.makeCompositeConstant(builder.makeVectorType(builder.makeUintType(32), 3), dimConstId, true); - } - - // An AST node labelled as specialization constant should be a symbol node. - // Its initializer should either be a sub tree with constant nodes, or a constant union array. - if (auto* sn = node.getAsSymbolNode()) { - if (auto* sub_tree = sn->getConstSubtree()) { - // Traverse the constant constructor sub tree like generating normal run-time instructions. - // During the AST traversal, if the node is marked as 'specConstant', SpecConstantOpModeGuard - // will set the builder into spec constant op instruction generating mode. - sub_tree->traverse(this); - return accessChainLoad(sub_tree->getType()); - } else if (auto* const_union_array = &sn->getConstArray()){ - int nextConst = 0; - spv::Id id = createSpvConstantFromConstUnionArray(sn->getType(), *const_union_array, nextConst, true); - builder.addName(id, sn->getName().c_str()); - return id; - } - } - - // Neither a front-end constant node, nor a specialization constant node with constant union array or - // constant sub tree as initializer. - logger->missingFunctionality("Neither a front-end constant nor a spec constant."); - exit(1); - return spv::NoResult; -} - -// Use 'consts' as the flattened glslang source of scalar constants to recursively -// build the aggregate SPIR-V constant. -// -// If there are not enough elements present in 'consts', 0 will be substituted; -// an empty 'consts' can be used to create a fully zeroed SPIR-V constant. -// -spv::Id TGlslangToSpvTraverser::createSpvConstantFromConstUnionArray(const glslang::TType& glslangType, const glslang::TConstUnionArray& consts, int& nextConst, bool specConstant) -{ - // vector of constants for SPIR-V - std::vector spvConsts; - - // Type is used for struct and array constants - spv::Id typeId = convertGlslangToSpvType(glslangType); - - if (glslangType.isArray()) { - glslang::TType elementType(glslangType, 0); - for (int i = 0; i < glslangType.getOuterArraySize(); ++i) - spvConsts.push_back(createSpvConstantFromConstUnionArray(elementType, consts, nextConst, false)); - } else if (glslangType.isMatrix()) { - glslang::TType vectorType(glslangType, 0); - for (int col = 0; col < glslangType.getMatrixCols(); ++col) - spvConsts.push_back(createSpvConstantFromConstUnionArray(vectorType, consts, nextConst, false)); - } else if (glslangType.getStruct()) { - glslang::TVector::const_iterator iter; - for (iter = glslangType.getStruct()->begin(); iter != glslangType.getStruct()->end(); ++iter) - spvConsts.push_back(createSpvConstantFromConstUnionArray(*iter->type, consts, nextConst, false)); - } else if (glslangType.getVectorSize() > 1) { - for (unsigned int i = 0; i < (unsigned int)glslangType.getVectorSize(); ++i) { - bool zero = nextConst >= consts.size(); - switch (glslangType.getBasicType()) { - case glslang::EbtInt: - spvConsts.push_back(builder.makeIntConstant(zero ? 0 : consts[nextConst].getIConst())); - break; - case glslang::EbtUint: - spvConsts.push_back(builder.makeUintConstant(zero ? 0 : consts[nextConst].getUConst())); - break; - case glslang::EbtInt64: - spvConsts.push_back(builder.makeInt64Constant(zero ? 0 : consts[nextConst].getI64Const())); - break; - case glslang::EbtUint64: - spvConsts.push_back(builder.makeUint64Constant(zero ? 0 : consts[nextConst].getU64Const())); - break; -#ifdef AMD_EXTENSIONS - case glslang::EbtInt16: - spvConsts.push_back(builder.makeInt16Constant(zero ? 0 : (short)consts[nextConst].getIConst())); - break; - case glslang::EbtUint16: - spvConsts.push_back(builder.makeUint16Constant(zero ? 0 : (unsigned short)consts[nextConst].getUConst())); - break; -#endif - case glslang::EbtFloat: - spvConsts.push_back(builder.makeFloatConstant(zero ? 0.0F : (float)consts[nextConst].getDConst())); - break; - case glslang::EbtDouble: - spvConsts.push_back(builder.makeDoubleConstant(zero ? 0.0 : consts[nextConst].getDConst())); - break; -#ifdef AMD_EXTENSIONS - case glslang::EbtFloat16: - spvConsts.push_back(builder.makeFloat16Constant(zero ? 0.0F : (float)consts[nextConst].getDConst())); - break; -#endif - case glslang::EbtBool: - spvConsts.push_back(builder.makeBoolConstant(zero ? false : consts[nextConst].getBConst())); - break; - default: - assert(0); - break; - } - ++nextConst; - } - } else { - // we have a non-aggregate (scalar) constant - bool zero = nextConst >= consts.size(); - spv::Id scalar = 0; - switch (glslangType.getBasicType()) { - case glslang::EbtInt: - scalar = builder.makeIntConstant(zero ? 0 : consts[nextConst].getIConst(), specConstant); - break; - case glslang::EbtUint: - scalar = builder.makeUintConstant(zero ? 0 : consts[nextConst].getUConst(), specConstant); - break; - case glslang::EbtInt64: - scalar = builder.makeInt64Constant(zero ? 0 : consts[nextConst].getI64Const(), specConstant); - break; - case glslang::EbtUint64: - scalar = builder.makeUint64Constant(zero ? 0 : consts[nextConst].getU64Const(), specConstant); - break; -#ifdef AMD_EXTENSIONS - case glslang::EbtInt16: - scalar = builder.makeInt16Constant(zero ? 0 : (short)consts[nextConst].getIConst(), specConstant); - break; - case glslang::EbtUint16: - scalar = builder.makeUint16Constant(zero ? 0 : (unsigned short)consts[nextConst].getUConst(), specConstant); - break; -#endif - case glslang::EbtFloat: - scalar = builder.makeFloatConstant(zero ? 0.0F : (float)consts[nextConst].getDConst(), specConstant); - break; - case glslang::EbtDouble: - scalar = builder.makeDoubleConstant(zero ? 0.0 : consts[nextConst].getDConst(), specConstant); - break; -#ifdef AMD_EXTENSIONS - case glslang::EbtFloat16: - scalar = builder.makeFloat16Constant(zero ? 0.0F : (float)consts[nextConst].getDConst(), specConstant); - break; -#endif - case glslang::EbtBool: - scalar = builder.makeBoolConstant(zero ? false : consts[nextConst].getBConst(), specConstant); - break; - default: - assert(0); - break; - } - ++nextConst; - return scalar; - } - - return builder.makeCompositeConstant(typeId, spvConsts); -} - -// Return true if the node is a constant or symbol whose reading has no -// non-trivial observable cost or effect. -bool TGlslangToSpvTraverser::isTrivialLeaf(const glslang::TIntermTyped* node) -{ - // don't know what this is - if (node == nullptr) - return false; - - // a constant is safe - if (node->getAsConstantUnion() != nullptr) - return true; - - // not a symbol means non-trivial - if (node->getAsSymbolNode() == nullptr) - return false; - - // a symbol, depends on what's being read - switch (node->getType().getQualifier().storage) { - case glslang::EvqTemporary: - case glslang::EvqGlobal: - case glslang::EvqIn: - case glslang::EvqInOut: - case glslang::EvqConst: - case glslang::EvqConstReadOnly: - case glslang::EvqUniform: - return true; - default: - return false; - } -} - -// A node is trivial if it is a single operation with no side effects. -// HLSL (and/or vectors) are always trivial, as it does not short circuit. -// Otherwise, error on the side of saying non-trivial. -// Return true if trivial. -bool TGlslangToSpvTraverser::isTrivial(const glslang::TIntermTyped* node) -{ - if (node == nullptr) - return false; - - // count non scalars as trivial, as well as anything coming from HLSL - if (! node->getType().isScalarOrVec1() || glslangIntermediate->getSource() == glslang::EShSourceHlsl) - return true; - - // symbols and constants are trivial - if (isTrivialLeaf(node)) - return true; - - // otherwise, it needs to be a simple operation or one or two leaf nodes - - // not a simple operation - const glslang::TIntermBinary* binaryNode = node->getAsBinaryNode(); - const glslang::TIntermUnary* unaryNode = node->getAsUnaryNode(); - if (binaryNode == nullptr && unaryNode == nullptr) - return false; - - // not on leaf nodes - if (binaryNode && (! isTrivialLeaf(binaryNode->getLeft()) || ! isTrivialLeaf(binaryNode->getRight()))) - return false; - - if (unaryNode && ! isTrivialLeaf(unaryNode->getOperand())) { - return false; - } - - switch (node->getAsOperator()->getOp()) { - case glslang::EOpLogicalNot: - case glslang::EOpConvIntToBool: - case glslang::EOpConvUintToBool: - case glslang::EOpConvFloatToBool: - case glslang::EOpConvDoubleToBool: - case glslang::EOpEqual: - case glslang::EOpNotEqual: - case glslang::EOpLessThan: - case glslang::EOpGreaterThan: - case glslang::EOpLessThanEqual: - case glslang::EOpGreaterThanEqual: - case glslang::EOpIndexDirect: - case glslang::EOpIndexDirectStruct: - case glslang::EOpLogicalXor: - case glslang::EOpAny: - case glslang::EOpAll: - return true; - default: - return false; - } -} - -// Emit short-circuiting code, where 'right' is never evaluated unless -// the left side is true (for &&) or false (for ||). -spv::Id TGlslangToSpvTraverser::createShortCircuit(glslang::TOperator op, glslang::TIntermTyped& left, glslang::TIntermTyped& right) -{ - spv::Id boolTypeId = builder.makeBoolType(); - - // emit left operand - builder.clearAccessChain(); - left.traverse(this); - spv::Id leftId = accessChainLoad(left.getType()); - - // Operands to accumulate OpPhi operands - std::vector phiOperands; - // accumulate left operand's phi information - phiOperands.push_back(leftId); - phiOperands.push_back(builder.getBuildPoint()->getId()); - - // Make the two kinds of operation symmetric with a "!" - // || => emit "if (! left) result = right" - // && => emit "if ( left) result = right" - // - // TODO: this runtime "not" for || could be avoided by adding functionality - // to 'builder' to have an "else" without an "then" - if (op == glslang::EOpLogicalOr) - leftId = builder.createUnaryOp(spv::OpLogicalNot, boolTypeId, leftId); - - // make an "if" based on the left value - spv::Builder::If ifBuilder(leftId, spv::SelectionControlMaskNone, builder); - - // emit right operand as the "then" part of the "if" - builder.clearAccessChain(); - right.traverse(this); - spv::Id rightId = accessChainLoad(right.getType()); - - // accumulate left operand's phi information - phiOperands.push_back(rightId); - phiOperands.push_back(builder.getBuildPoint()->getId()); - - // finish the "if" - ifBuilder.makeEndIf(); - - // phi together the two results - return builder.createOp(spv::OpPhi, boolTypeId, phiOperands); -} - -#ifdef AMD_EXTENSIONS -// Return type Id of the imported set of extended instructions corresponds to the name. -// Import this set if it has not been imported yet. -spv::Id TGlslangToSpvTraverser::getExtBuiltins(const char* name) -{ - if (extBuiltinMap.find(name) != extBuiltinMap.end()) - return extBuiltinMap[name]; - else { - builder.addExtension(name); - spv::Id extBuiltins = builder.import(name); - extBuiltinMap[name] = extBuiltins; - return extBuiltins; - } -} -#endif - -}; // end anonymous namespace - -namespace glslang { - -void GetSpirvVersion(std::string& version) -{ - const int bufSize = 100; - char buf[bufSize]; - snprintf(buf, bufSize, "0x%08x, Revision %d", spv::Version, spv::Revision); - version = buf; -} - -// For low-order part of the generator's magic number. Bump up -// when there is a change in the style (e.g., if SSA form changes, -// or a different instruction sequence to do something gets used). -int GetSpirvGeneratorVersion() -{ - // return 1; // start - // return 2; // EOpAtomicCounterDecrement gets a post decrement, to map between GLSL -> SPIR-V - // return 3; // change/correct barrier-instruction operands, to match memory model group decisions - return 4; // some deeper access chains: for dynamic vector component, and local Boolean component -} - -// Write SPIR-V out to a binary file -void OutputSpvBin(const std::vector& spirv, const char* baseName) -{ - std::ofstream out; - out.open(baseName, std::ios::binary | std::ios::out); - if (out.fail()) - printf("ERROR: Failed to open file: %s\n", baseName); - for (int i = 0; i < (int)spirv.size(); ++i) { - unsigned int word = spirv[i]; - out.write((const char*)&word, 4); - } - out.close(); -} - -// Write SPIR-V out to a text file with 32-bit hexadecimal words -void OutputSpvHex(const std::vector& spirv, const char* baseName, const char* varName) -{ - std::ofstream out; - out.open(baseName, std::ios::binary | std::ios::out); - if (out.fail()) - printf("ERROR: Failed to open file: %s\n", baseName); - out << "\t// " GLSLANG_REVISION " " GLSLANG_DATE << std::endl; - if (varName != nullptr) { - out << "\t #pragma once" << std::endl; - out << "const uint32_t " << varName << "[] = {" << std::endl; - } - const int WORDS_PER_LINE = 8; - for (int i = 0; i < (int)spirv.size(); i += WORDS_PER_LINE) { - out << "\t"; - for (int j = 0; j < WORDS_PER_LINE && i + j < (int)spirv.size(); ++j) { - const unsigned int word = spirv[i + j]; - out << "0x" << std::hex << std::setw(8) << std::setfill('0') << word; - if (i + j + 1 < (int)spirv.size()) { - out << ","; - } - } - out << std::endl; - } - if (varName != nullptr) { - out << "};"; - } - out.close(); -} - -#ifdef ENABLE_OPT -void errHandler(const std::string& str) { - std::cerr << str << std::endl; -} -#endif - -// -// Set up the glslang traversal -// -void GlslangToSpv(const glslang::TIntermediate& intermediate, std::vector& spirv, SpvOptions* options) -{ - spv::SpvBuildLogger logger; - GlslangToSpv(intermediate, spirv, &logger, options); -} - -void GlslangToSpv(const glslang::TIntermediate& intermediate, std::vector& spirv, - spv::SpvBuildLogger* logger, SpvOptions* options) -{ - TIntermNode* root = intermediate.getTreeRoot(); - - if (root == 0) - return; - - glslang::SpvOptions defaultOptions; - if (options == nullptr) - options = &defaultOptions; - - glslang::GetThreadPoolAllocator().push(); - - TGlslangToSpvTraverser it(intermediate.getSpv().spv, &intermediate, logger, *options); - root->traverse(&it); - it.finishSpv(); - it.dumpSpv(spirv); - -#ifdef ENABLE_OPT - // If from HLSL, run spirv-opt to "legalize" the SPIR-V for Vulkan - // eg. forward and remove memory writes of opaque types. - if ((intermediate.getSource() == EShSourceHlsl || - options->optimizeSize) && - !options->disableOptimizer) { - spv_target_env target_env = SPV_ENV_UNIVERSAL_1_2; - - spvtools::Optimizer optimizer(target_env); - optimizer.SetMessageConsumer([](spv_message_level_t level, - const char* source, - const spv_position_t& position, - const char* message) { - std::cerr << StringifyMessage(level, source, position, message) - << std::endl; - }); - - optimizer.RegisterPass(CreateInlineExhaustivePass()); - optimizer.RegisterPass(CreateEliminateDeadFunctionsPass()); - optimizer.RegisterPass(CreateScalarReplacementPass()); - optimizer.RegisterPass(CreateLocalAccessChainConvertPass()); - optimizer.RegisterPass(CreateLocalSingleBlockLoadStoreElimPass()); - optimizer.RegisterPass(CreateLocalSingleStoreElimPass()); - optimizer.RegisterPass(CreateInsertExtractElimPass()); - optimizer.RegisterPass(CreateDeadInsertElimPass()); - optimizer.RegisterPass(CreateAggressiveDCEPass()); - optimizer.RegisterPass(CreateDeadBranchElimPass()); - optimizer.RegisterPass(CreateCFGCleanupPass()); - optimizer.RegisterPass(CreateBlockMergePass()); - optimizer.RegisterPass(CreateLocalMultiStoreElimPass()); - optimizer.RegisterPass(CreateInsertExtractElimPass()); - optimizer.RegisterPass(CreateDeadInsertElimPass()); - if (options->optimizeSize) { - optimizer.RegisterPass(CreateRedundancyEliminationPass()); - // TODO(greg-lunarg): Add this when AMD driver issues are resolved - // optimizer.RegisterPass(CreateCommonUniformElimPass()); - } - optimizer.RegisterPass(CreateAggressiveDCEPass()); - - if (!optimizer.Run(spirv.data(), spirv.size(), &spirv)) - return; - - // Remove dead module-level objects: functions, types, vars - // TODO(greg-lunarg): Switch to spirv-opt versions when available - spv::spirvbin_t Remapper(0); - Remapper.registerErrorHandler(errHandler); - Remapper.remap(spirv, spv::spirvbin_t::DCE_ALL); - } -#endif - - glslang::GetThreadPoolAllocator().pop(); -} - -}; // end namespace glslang diff --git a/third_party/glslang-spirv/GlslangToSpv.h b/third_party/glslang-spirv/GlslangToSpv.h deleted file mode 100644 index 3a4371375..000000000 --- a/third_party/glslang-spirv/GlslangToSpv.h +++ /dev/null @@ -1,67 +0,0 @@ -// -// Copyright (C) 2014 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -#pragma once - -#if _MSC_VER >= 1900 - #pragma warning(disable : 4464) // relative include path contains '..' -#endif - -#include "../glslang/Include/intermediate.h" - -#include -#include - -#include "Logger.h" - -namespace glslang { - -struct SpvOptions { - SpvOptions() : generateDebugInfo(false), disableOptimizer(true), - optimizeSize(false) { } - bool generateDebugInfo; - bool disableOptimizer; - bool optimizeSize; -}; - -void GetSpirvVersion(std::string&); -int GetSpirvGeneratorVersion(); -void GlslangToSpv(const glslang::TIntermediate& intermediate, std::vector& spirv, - SpvOptions* options = nullptr); -void GlslangToSpv(const glslang::TIntermediate& intermediate, std::vector& spirv, - spv::SpvBuildLogger* logger, SpvOptions* options = nullptr); -void OutputSpvBin(const std::vector& spirv, const char* baseName); -void OutputSpvHex(const std::vector& spirv, const char* baseName, const char* varName); - -} diff --git a/third_party/glslang-spirv/InReadableOrder.cpp b/third_party/glslang-spirv/InReadableOrder.cpp deleted file mode 100644 index 52b29613a..000000000 --- a/third_party/glslang-spirv/InReadableOrder.cpp +++ /dev/null @@ -1,113 +0,0 @@ -// -// Copyright (C) 2016 Google, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -// The SPIR-V spec requires code blocks to appear in an order satisfying the -// dominator-tree direction (ie, dominator before the dominated). This is, -// actually, easy to achieve: any pre-order CFG traversal algorithm will do it. -// Because such algorithms visit a block only after traversing some path to it -// from the root, they necessarily visit the block's idom first. -// -// But not every graph-traversal algorithm outputs blocks in an order that -// appears logical to human readers. The problem is that unrelated branches may -// be interspersed with each other, and merge blocks may come before some of the -// branches being merged. -// -// A good, human-readable order of blocks may be achieved by performing -// depth-first search but delaying merge nodes until after all their branches -// have been visited. This is implemented below by the inReadableOrder() -// function. - -#include "spvIR.h" - -#include -#include - -using spv::Block; -using spv::Id; - -namespace { -// Traverses CFG in a readable order, invoking a pre-set callback on each block. -// Use by calling visit() on the root block. -class ReadableOrderTraverser { -public: - explicit ReadableOrderTraverser(std::function callback) : callback_(callback) {} - // Visits the block if it hasn't been visited already and isn't currently - // being delayed. Invokes callback(block), then descends into its - // successors. Delays merge-block and continue-block processing until all - // the branches have been completed. - void visit(Block* block) - { - assert(block); - if (visited_.count(block) || delayed_.count(block)) - return; - callback_(block); - visited_.insert(block); - Block* mergeBlock = nullptr; - Block* continueBlock = nullptr; - auto mergeInst = block->getMergeInstruction(); - if (mergeInst) { - Id mergeId = mergeInst->getIdOperand(0); - mergeBlock = block->getParent().getParent().getInstruction(mergeId)->getBlock(); - delayed_.insert(mergeBlock); - if (mergeInst->getOpCode() == spv::OpLoopMerge) { - Id continueId = mergeInst->getIdOperand(1); - continueBlock = - block->getParent().getParent().getInstruction(continueId)->getBlock(); - delayed_.insert(continueBlock); - } - } - const auto successors = block->getSuccessors(); - for (auto it = successors.cbegin(); it != successors.cend(); ++it) - visit(*it); - if (continueBlock) { - delayed_.erase(continueBlock); - visit(continueBlock); - } - if (mergeBlock) { - delayed_.erase(mergeBlock); - visit(mergeBlock); - } - } - -private: - std::function callback_; - // Whether a block has already been visited or is being delayed. - std::unordered_set visited_, delayed_; -}; -} - -void spv::inReadableOrder(Block* root, std::function callback) -{ - ReadableOrderTraverser(callback).visit(root); -} diff --git a/third_party/glslang-spirv/Include/BaseTypes.h b/third_party/glslang-spirv/Include/BaseTypes.h deleted file mode 100644 index 050c2c4b9..000000000 --- a/third_party/glslang-spirv/Include/BaseTypes.h +++ /dev/null @@ -1,387 +0,0 @@ -// -// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. -// Copyright (C) 2012-2013 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// - -#ifndef _BASICTYPES_INCLUDED_ -#define _BASICTYPES_INCLUDED_ - -namespace glslang { - -// -// Basic type. Arrays, vectors, sampler details, etc., are orthogonal to this. -// -enum TBasicType { - EbtVoid, - EbtFloat, - EbtDouble, -#ifdef AMD_EXTENSIONS - EbtFloat16, -#endif - EbtInt, - EbtUint, - EbtInt64, - EbtUint64, -#ifdef AMD_EXTENSIONS - EbtInt16, - EbtUint16, -#endif - EbtBool, - EbtAtomicUint, - EbtSampler, - EbtStruct, - EbtBlock, - - // HLSL types that live only temporarily. - EbtString, - - EbtNumTypes -}; - -// -// Storage qualifiers. Should align with different kinds of storage or -// resource or GLSL storage qualifier. Expansion is deprecated. -// -// N.B.: You probably DON'T want to add anything here, but rather just add it -// to the built-in variables. See the comment above TBuiltInVariable. -// -// A new built-in variable will normally be an existing qualifier, like 'in', 'out', etc. -// DO NOT follow the design pattern of, say EvqInstanceId, etc. -// -enum TStorageQualifier { - EvqTemporary, // For temporaries (within a function), read/write - EvqGlobal, // For globals read/write - EvqConst, // User-defined constant values, will be semantically constant and constant folded - EvqVaryingIn, // pipeline input, read only, also supercategory for all built-ins not included in this enum (see TBuiltInVariable) - EvqVaryingOut, // pipeline output, read/write, also supercategory for all built-ins not included in this enum (see TBuiltInVariable) - EvqUniform, // read only, shared with app - EvqBuffer, // read/write, shared with app - EvqShared, // compute shader's read/write 'shared' qualifier - - // parameters - EvqIn, // also, for 'in' in the grammar before we know if it's a pipeline input or an 'in' parameter - EvqOut, // also, for 'out' in the grammar before we know if it's a pipeline output or an 'out' parameter - EvqInOut, - EvqConstReadOnly, // input; also other read-only types having neither a constant value nor constant-value semantics - - // built-ins read by vertex shader - EvqVertexId, - EvqInstanceId, - - // built-ins written by vertex shader - EvqPosition, - EvqPointSize, - EvqClipVertex, - - // built-ins read by fragment shader - EvqFace, - EvqFragCoord, - EvqPointCoord, - - // built-ins written by fragment shader - EvqFragColor, - EvqFragDepth, - - // end of list - EvqLast -}; - -// -// Subcategories of the TStorageQualifier, simply to give a direct mapping -// between built-in variable names and an numerical value (the enum). -// -// For backward compatibility, there is some redundancy between the -// TStorageQualifier and these. Existing members should both be maintained accurately. -// However, any new built-in variable (and any existing non-redundant one) -// must follow the pattern that the specific built-in is here, and only its -// general qualifier is in TStorageQualifier. -// -// Something like gl_Position, which is sometimes 'in' and sometimes 'out' -// shows up as two different built-in variables in a single stage, but -// only has a single enum in TBuiltInVariable, so both the -// TStorageQualifier and the TBuitinVariable are needed to distinguish -// between them. -// -enum TBuiltInVariable { - EbvNone, - EbvNumWorkGroups, - EbvWorkGroupSize, - EbvWorkGroupId, - EbvLocalInvocationId, - EbvGlobalInvocationId, - EbvLocalInvocationIndex, - EbvSubGroupSize, - EbvSubGroupInvocation, - EbvSubGroupEqMask, - EbvSubGroupGeMask, - EbvSubGroupGtMask, - EbvSubGroupLeMask, - EbvSubGroupLtMask, - EbvVertexId, - EbvInstanceId, - EbvVertexIndex, - EbvInstanceIndex, - EbvBaseVertex, - EbvBaseInstance, - EbvDrawId, - EbvPosition, - EbvPointSize, - EbvClipVertex, - EbvClipDistance, - EbvCullDistance, - EbvNormal, - EbvVertex, - EbvMultiTexCoord0, - EbvMultiTexCoord1, - EbvMultiTexCoord2, - EbvMultiTexCoord3, - EbvMultiTexCoord4, - EbvMultiTexCoord5, - EbvMultiTexCoord6, - EbvMultiTexCoord7, - EbvFrontColor, - EbvBackColor, - EbvFrontSecondaryColor, - EbvBackSecondaryColor, - EbvTexCoord, - EbvFogFragCoord, - EbvInvocationId, - EbvPrimitiveId, - EbvLayer, - EbvViewportIndex, - EbvPatchVertices, - EbvTessLevelOuter, - EbvTessLevelInner, - EbvBoundingBox, - EbvTessCoord, - EbvColor, - EbvSecondaryColor, - EbvFace, - EbvFragCoord, - EbvPointCoord, - EbvFragColor, - EbvFragData, - EbvFragDepth, - EbvFragStencilRef, - EbvSampleId, - EbvSamplePosition, - EbvSampleMask, - EbvHelperInvocation, - -#ifdef AMD_EXTENSIONS - EbvBaryCoordNoPersp, - EbvBaryCoordNoPerspCentroid, - EbvBaryCoordNoPerspSample, - EbvBaryCoordSmooth, - EbvBaryCoordSmoothCentroid, - EbvBaryCoordSmoothSample, - EbvBaryCoordPullModel, -#endif - - EbvViewIndex, - EbvDeviceIndex, - -#ifdef NV_EXTENSIONS - EbvViewportMaskNV, - EbvSecondaryPositionNV, - EbvSecondaryViewportMaskNV, - EbvPositionPerViewNV, - EbvViewportMaskPerViewNV, - EbvFragFullyCoveredNV, -#endif - - // HLSL built-ins that live only temporarily, until they get remapped - // to one of the above. - EbvFragDepthGreater, - EbvFragDepthLesser, - EbvGsOutputStream, - EbvOutputPatch, - EbvInputPatch, - - // structbuffer types - EbvAppendConsume, // no need to differentiate append and consume - EbvRWStructuredBuffer, - EbvStructuredBuffer, - EbvByteAddressBuffer, - EbvRWByteAddressBuffer, - - EbvLast -}; - -// These will show up in error messages -__inline const char* GetStorageQualifierString(TStorageQualifier q) -{ - switch (q) { - case EvqTemporary: return "temp"; break; - case EvqGlobal: return "global"; break; - case EvqConst: return "const"; break; - case EvqConstReadOnly: return "const (read only)"; break; - case EvqVaryingIn: return "in"; break; - case EvqVaryingOut: return "out"; break; - case EvqUniform: return "uniform"; break; - case EvqBuffer: return "buffer"; break; - case EvqShared: return "shared"; break; - case EvqIn: return "in"; break; - case EvqOut: return "out"; break; - case EvqInOut: return "inout"; break; - case EvqVertexId: return "gl_VertexId"; break; - case EvqInstanceId: return "gl_InstanceId"; break; - case EvqPosition: return "gl_Position"; break; - case EvqPointSize: return "gl_PointSize"; break; - case EvqClipVertex: return "gl_ClipVertex"; break; - case EvqFace: return "gl_FrontFacing"; break; - case EvqFragCoord: return "gl_FragCoord"; break; - case EvqPointCoord: return "gl_PointCoord"; break; - case EvqFragColor: return "fragColor"; break; - case EvqFragDepth: return "gl_FragDepth"; break; - default: return "unknown qualifier"; - } -} - -__inline const char* GetBuiltInVariableString(TBuiltInVariable v) -{ - switch (v) { - case EbvNone: return ""; - case EbvNumWorkGroups: return "NumWorkGroups"; - case EbvWorkGroupSize: return "WorkGroupSize"; - case EbvWorkGroupId: return "WorkGroupID"; - case EbvLocalInvocationId: return "LocalInvocationID"; - case EbvGlobalInvocationId: return "GlobalInvocationID"; - case EbvLocalInvocationIndex: return "LocalInvocationIndex"; - case EbvSubGroupSize: return "SubGroupSize"; - case EbvSubGroupInvocation: return "SubGroupInvocation"; - case EbvSubGroupEqMask: return "SubGroupEqMask"; - case EbvSubGroupGeMask: return "SubGroupGeMask"; - case EbvSubGroupGtMask: return "SubGroupGtMask"; - case EbvSubGroupLeMask: return "SubGroupLeMask"; - case EbvSubGroupLtMask: return "SubGroupLtMask"; - case EbvVertexId: return "VertexId"; - case EbvInstanceId: return "InstanceId"; - case EbvVertexIndex: return "VertexIndex"; - case EbvInstanceIndex: return "InstanceIndex"; - case EbvBaseVertex: return "BaseVertex"; - case EbvBaseInstance: return "BaseInstance"; - case EbvDrawId: return "DrawId"; - case EbvPosition: return "Position"; - case EbvPointSize: return "PointSize"; - case EbvClipVertex: return "ClipVertex"; - case EbvClipDistance: return "ClipDistance"; - case EbvCullDistance: return "CullDistance"; - case EbvNormal: return "Normal"; - case EbvVertex: return "Vertex"; - case EbvMultiTexCoord0: return "MultiTexCoord0"; - case EbvMultiTexCoord1: return "MultiTexCoord1"; - case EbvMultiTexCoord2: return "MultiTexCoord2"; - case EbvMultiTexCoord3: return "MultiTexCoord3"; - case EbvMultiTexCoord4: return "MultiTexCoord4"; - case EbvMultiTexCoord5: return "MultiTexCoord5"; - case EbvMultiTexCoord6: return "MultiTexCoord6"; - case EbvMultiTexCoord7: return "MultiTexCoord7"; - case EbvFrontColor: return "FrontColor"; - case EbvBackColor: return "BackColor"; - case EbvFrontSecondaryColor: return "FrontSecondaryColor"; - case EbvBackSecondaryColor: return "BackSecondaryColor"; - case EbvTexCoord: return "TexCoord"; - case EbvFogFragCoord: return "FogFragCoord"; - case EbvInvocationId: return "InvocationID"; - case EbvPrimitiveId: return "PrimitiveID"; - case EbvLayer: return "Layer"; - case EbvViewportIndex: return "ViewportIndex"; - case EbvPatchVertices: return "PatchVertices"; - case EbvTessLevelOuter: return "TessLevelOuter"; - case EbvTessLevelInner: return "TessLevelInner"; - case EbvBoundingBox: return "BoundingBox"; - case EbvTessCoord: return "TessCoord"; - case EbvColor: return "Color"; - case EbvSecondaryColor: return "SecondaryColor"; - case EbvFace: return "Face"; - case EbvFragCoord: return "FragCoord"; - case EbvPointCoord: return "PointCoord"; - case EbvFragColor: return "FragColor"; - case EbvFragData: return "FragData"; - case EbvFragDepth: return "FragDepth"; - case EbvFragStencilRef: return "FragStencilRef"; - case EbvSampleId: return "SampleId"; - case EbvSamplePosition: return "SamplePosition"; - case EbvSampleMask: return "SampleMaskIn"; - case EbvHelperInvocation: return "HelperInvocation"; - -#ifdef AMD_EXTENSIONS - case EbvBaryCoordNoPersp: return "BaryCoordNoPersp"; - case EbvBaryCoordNoPerspCentroid: return "BaryCoordNoPerspCentroid"; - case EbvBaryCoordNoPerspSample: return "BaryCoordNoPerspSample"; - case EbvBaryCoordSmooth: return "BaryCoordSmooth"; - case EbvBaryCoordSmoothCentroid: return "BaryCoordSmoothCentroid"; - case EbvBaryCoordSmoothSample: return "BaryCoordSmoothSample"; - case EbvBaryCoordPullModel: return "BaryCoordPullModel"; -#endif - - case EbvViewIndex: return "ViewIndex"; - case EbvDeviceIndex: return "DeviceIndex"; - -#ifdef NV_EXTENSIONS - case EbvViewportMaskNV: return "ViewportMaskNV"; - case EbvSecondaryPositionNV: return "SecondaryPositionNV"; - case EbvSecondaryViewportMaskNV: return "SecondaryViewportMaskNV"; - case EbvPositionPerViewNV: return "PositionPerViewNV"; - case EbvViewportMaskPerViewNV: return "ViewportMaskPerViewNV"; - case EbvFragFullyCoveredNV: return "FragFullyCoveredNV"; -#endif - default: return "unknown built-in variable"; - } -} - -// In this enum, order matters; users can assume higher precision is a bigger value -// and EpqNone is 0. -enum TPrecisionQualifier { - EpqNone = 0, - EpqLow, - EpqMedium, - EpqHigh -}; - -__inline const char* GetPrecisionQualifierString(TPrecisionQualifier p) -{ - switch(p) { - case EpqNone: return ""; break; - case EpqLow: return "lowp"; break; - case EpqMedium: return "mediump"; break; - case EpqHigh: return "highp"; break; - default: return "unknown precision qualifier"; - } -} - -} // end namespace glslang - -#endif // _BASICTYPES_INCLUDED_ diff --git a/third_party/glslang-spirv/Include/Common.h b/third_party/glslang-spirv/Include/Common.h deleted file mode 100644 index 041701774..000000000 --- a/third_party/glslang-spirv/Include/Common.h +++ /dev/null @@ -1,274 +0,0 @@ -// -// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. -// Copyright (C) 2012-2013 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// - -#ifndef _COMMON_INCLUDED_ -#define _COMMON_INCLUDED_ - -#if (defined(_MSC_VER) && _MSC_VER < 1900 /*vs2015*/) || defined MINGW_HAS_SECURE_API - #include - #define snprintf sprintf_s - #define safe_vsprintf(buf,max,format,args) vsnprintf_s((buf), (max), (max), (format), (args)) -#elif defined (solaris) - #define safe_vsprintf(buf,max,format,args) vsnprintf((buf), (max), (format), (args)) - #include - #define UINT_PTR uintptr_t -#else - #define safe_vsprintf(buf,max,format,args) vsnprintf((buf), (max), (format), (args)) - #include - #define UINT_PTR uintptr_t -#endif - -#if defined(__ANDROID__) || _MSC_VER < 1700 -#include -namespace std { -template -std::string to_string(const T& val) { - std::ostringstream os; - os << val; - return os.str(); -} -} -#endif - -#if defined(_MSC_VER) && _MSC_VER < 1800 -inline long long int strtoll (const char* str, char** endptr, int base) -{ - return _strtoi64(str, endptr, base); -} -inline unsigned long long int strtoull (const char* str, char** endptr, int base) -{ - return _strtoui64(str, endptr, base); -} -inline long long int atoll (const char* str) -{ - return strtoll(str, NULL, 10); -} -#endif - -#if defined(_MSC_VER) -#define strdup _strdup -#endif - -/* windows only pragma */ -#ifdef _MSC_VER - #pragma warning(disable : 4786) // Don't warn about too long identifiers - #pragma warning(disable : 4514) // unused inline method - #pragma warning(disable : 4201) // nameless union -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "PoolAlloc.h" - -// -// Put POOL_ALLOCATOR_NEW_DELETE in base classes to make them use this scheme. -// -#define POOL_ALLOCATOR_NEW_DELETE(A) \ - void* operator new(size_t s) { return (A).allocate(s); } \ - void* operator new(size_t, void *_Where) { return (_Where); } \ - void operator delete(void*) { } \ - void operator delete(void *, void *) { } \ - void* operator new[](size_t s) { return (A).allocate(s); } \ - void* operator new[](size_t, void *_Where) { return (_Where); } \ - void operator delete[](void*) { } \ - void operator delete[](void *, void *) { } - -namespace glslang { - - // - // Pool version of string. - // - typedef pool_allocator TStringAllocator; - typedef std::basic_string , TStringAllocator> TString; - -} // end namespace glslang - -// Repackage the std::hash for use by unordered map/set with a TString key. -namespace std { - - template<> struct hash { - std::size_t operator()(const glslang::TString& s) const - { - const unsigned _FNV_offset_basis = 2166136261U; - const unsigned _FNV_prime = 16777619U; - unsigned _Val = _FNV_offset_basis; - size_t _Count = s.size(); - const char* _First = s.c_str(); - for (size_t _Next = 0; _Next < _Count; ++_Next) - { - _Val ^= (unsigned)_First[_Next]; - _Val *= _FNV_prime; - } - - return _Val; - } - }; -} - -namespace glslang { - -inline TString* NewPoolTString(const char* s) -{ - void* memory = GetThreadPoolAllocator().allocate(sizeof(TString)); - return new(memory) TString(s); -} - -template inline T* NewPoolObject(T*) -{ - return new(GetThreadPoolAllocator().allocate(sizeof(T))) T; -} - -template inline T* NewPoolObject(T, int instances) -{ - return new(GetThreadPoolAllocator().allocate(instances * sizeof(T))) T[instances]; -} - -// -// Pool allocator versions of vectors, lists, and maps -// -template class TVector : public std::vector > { -public: - POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) - - typedef typename std::vector >::size_type size_type; - TVector() : std::vector >() {} - TVector(const pool_allocator& a) : std::vector >(a) {} - TVector(size_type i) : std::vector >(i) {} - TVector(size_type i, const T& val) : std::vector >(i, val) {} -}; - -template class TList : public std::list > { -}; - -template > -class TMap : public std::map > > { -}; - -template , class PRED = std::equal_to > -class TUnorderedMap : public std::unordered_map > > { -}; - -// -// Persistent string memory. Should only be used for strings that survive -// across compiles/links. -// -typedef std::basic_string TPersistString; - -// -// templatized min and max functions. -// -template T Min(const T a, const T b) { return a < b ? a : b; } -template T Max(const T a, const T b) { return a > b ? a : b; } - -// -// Create a TString object from an integer. -// -#if defined _MSC_VER || defined MINGW_HAS_SECURE_API -inline const TString String(const int i, const int base = 10) -{ - char text[16]; // 32 bit ints are at most 10 digits in base 10 - _itoa_s(i, text, sizeof(text), base); - return text; -} -#else -inline const TString String(const int i, const int /*base*/ = 10) -{ - char text[16]; // 32 bit ints are at most 10 digits in base 10 - - // we assume base 10 for all cases - snprintf(text, sizeof(text), "%d", i); - - return text; -} -#endif - -struct TSourceLoc { - void init() { name = nullptr; string = 0; line = 0; column = 0; } - void init(int stringNum) { init(); string = stringNum; } - // Returns the name if it exists. Otherwise, returns the string number. - std::string getStringNameOrNum(bool quoteStringName = true) const - { - if (name != nullptr) - return quoteStringName ? ("\"" + std::string(name) + "\"") : name; - return std::to_string((long long)string); - } - const char* name; // descriptive name for this string - int string; - int line; - int column; -}; - -class TPragmaTable : public TMap { -public: - POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) -}; - -const int MaxTokenLength = 1024; - -template bool IsPow2(T powerOf2) -{ - if (powerOf2 <= 0) - return false; - - return (powerOf2 & (powerOf2 - 1)) == 0; -} - -// Round number up to a multiple of the given powerOf2, which is not -// a power, just a number that must be a power of 2. -template void RoundToPow2(T& number, int powerOf2) -{ - assert(IsPow2(powerOf2)); - number = (number + powerOf2 - 1) & ~(powerOf2 - 1); -} - -template bool IsMultipleOfPow2(T number, int powerOf2) -{ - assert(IsPow2(powerOf2)); - return ! (number & (powerOf2 - 1)); -} - -} // end namespace glslang - -#endif // _COMMON_INCLUDED_ diff --git a/third_party/glslang-spirv/Include/ConstantUnion.h b/third_party/glslang-spirv/Include/ConstantUnion.h deleted file mode 100644 index 58c6094ea..000000000 --- a/third_party/glslang-spirv/Include/ConstantUnion.h +++ /dev/null @@ -1,625 +0,0 @@ -// -// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. -// Copyright (C) 2013 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// - -#ifndef _CONSTANT_UNION_INCLUDED_ -#define _CONSTANT_UNION_INCLUDED_ - -#include "../Include/Common.h" -#include "../Include/BaseTypes.h" - -namespace glslang { - -class TConstUnion { -public: - POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) - - TConstUnion() : iConst(0), type(EbtInt) { } - - void setIConst(int i) - { - iConst = i; - type = EbtInt; - } - - void setUConst(unsigned int u) - { - uConst = u; - type = EbtUint; - } - - void setI64Const(long long i64) - { - i64Const = i64; - type = EbtInt64; - } - - void setU64Const(unsigned long long u64) - { - u64Const = u64; - type = EbtUint64; - } - - void setDConst(double d) - { - dConst = d; - type = EbtDouble; - } - - void setBConst(bool b) - { - bConst = b; - type = EbtBool; - } - - void setSConst(const TString* s) - { - sConst = s; - type = EbtString; - } - - int getIConst() const { return iConst; } - unsigned int getUConst() const { return uConst; } - long long getI64Const() const { return i64Const; } - unsigned long long getU64Const() const { return u64Const; } - double getDConst() const { return dConst; } - bool getBConst() const { return bConst; } - const TString* getSConst() const { return sConst; } - - bool operator==(const int i) const - { - if (i == iConst) - return true; - - return false; - } - - bool operator==(const unsigned int u) const - { - if (u == uConst) - return true; - - return false; - } - - bool operator==(const long long i64) const - { - if (i64 == i64Const) - return true; - - return false; - } - - bool operator==(const unsigned long long u64) const - { - if (u64 == u64Const) - return true; - - return false; - } - - bool operator==(const double d) const - { - if (d == dConst) - return true; - - return false; - } - - bool operator==(const bool b) const - { - if (b == bConst) - return true; - - return false; - } - - bool operator==(const TConstUnion& constant) const - { - if (constant.type != type) - return false; - - switch (type) { - case EbtInt: - if (constant.iConst == iConst) - return true; - - break; - case EbtUint: - if (constant.uConst == uConst) - return true; - - break; - case EbtInt64: - if (constant.i64Const == i64Const) - return true; - - break; - case EbtUint64: - if (constant.u64Const == u64Const) - return true; - - break; - case EbtDouble: - if (constant.dConst == dConst) - return true; - - break; - case EbtBool: - if (constant.bConst == bConst) - return true; - - break; - default: - assert(false && "Default missing"); - } - - return false; - } - - bool operator!=(const int i) const - { - return !operator==(i); - } - - bool operator!=(const unsigned int u) const - { - return !operator==(u); - } - - bool operator!=(const long long i) const - { - return !operator==(i); - } - - bool operator!=(const unsigned long long u) const - { - return !operator==(u); - } - - bool operator!=(const float f) const - { - return !operator==(f); - } - - bool operator!=(const bool b) const - { - return !operator==(b); - } - - bool operator!=(const TConstUnion& constant) const - { - return !operator==(constant); - } - - bool operator>(const TConstUnion& constant) const - { - assert(type == constant.type); - switch (type) { - case EbtInt: - if (iConst > constant.iConst) - return true; - - return false; - case EbtUint: - if (uConst > constant.uConst) - return true; - - return false; - case EbtInt64: - if (i64Const > constant.i64Const) - return true; - - return false; - case EbtUint64: - if (u64Const > constant.u64Const) - return true; - - return false; - case EbtDouble: - if (dConst > constant.dConst) - return true; - - return false; - default: - assert(false && "Default missing"); - return false; - } - } - - bool operator<(const TConstUnion& constant) const - { - assert(type == constant.type); - switch (type) { - case EbtInt: - if (iConst < constant.iConst) - return true; - - return false; - case EbtUint: - if (uConst < constant.uConst) - return true; - - return false; - case EbtInt64: - if (i64Const < constant.i64Const) - return true; - - return false; - case EbtUint64: - if (u64Const < constant.u64Const) - return true; - - return false; - case EbtDouble: - if (dConst < constant.dConst) - return true; - - return false; - default: - assert(false && "Default missing"); - return false; - } - } - - TConstUnion operator+(const TConstUnion& constant) const - { - TConstUnion returnValue; - assert(type == constant.type); - switch (type) { - case EbtInt: returnValue.setIConst(iConst + constant.iConst); break; - case EbtInt64: returnValue.setI64Const(i64Const + constant.i64Const); break; - case EbtUint: returnValue.setUConst(uConst + constant.uConst); break; - case EbtUint64: returnValue.setU64Const(u64Const + constant.u64Const); break; - case EbtDouble: returnValue.setDConst(dConst + constant.dConst); break; - default: assert(false && "Default missing"); - } - - return returnValue; - } - - TConstUnion operator-(const TConstUnion& constant) const - { - TConstUnion returnValue; - assert(type == constant.type); - switch (type) { - case EbtInt: returnValue.setIConst(iConst - constant.iConst); break; - case EbtInt64: returnValue.setI64Const(i64Const - constant.i64Const); break; - case EbtUint: returnValue.setUConst(uConst - constant.uConst); break; - case EbtUint64: returnValue.setU64Const(u64Const - constant.u64Const); break; - case EbtDouble: returnValue.setDConst(dConst - constant.dConst); break; - default: assert(false && "Default missing"); - } - - return returnValue; - } - - TConstUnion operator*(const TConstUnion& constant) const - { - TConstUnion returnValue; - assert(type == constant.type); - switch (type) { - case EbtInt: returnValue.setIConst(iConst * constant.iConst); break; - case EbtInt64: returnValue.setI64Const(i64Const * constant.i64Const); break; - case EbtUint: returnValue.setUConst(uConst * constant.uConst); break; - case EbtUint64: returnValue.setU64Const(u64Const * constant.u64Const); break; - case EbtDouble: returnValue.setDConst(dConst * constant.dConst); break; - default: assert(false && "Default missing"); - } - - return returnValue; - } - - TConstUnion operator%(const TConstUnion& constant) const - { - TConstUnion returnValue; - assert(type == constant.type); - switch (type) { - case EbtInt: returnValue.setIConst(iConst % constant.iConst); break; - case EbtInt64: returnValue.setI64Const(i64Const % constant.i64Const); break; - case EbtUint: returnValue.setUConst(uConst % constant.uConst); break; - case EbtUint64: returnValue.setU64Const(u64Const % constant.u64Const); break; - default: assert(false && "Default missing"); - } - - return returnValue; - } - - TConstUnion operator>>(const TConstUnion& constant) const - { - TConstUnion returnValue; - switch (type) { - case EbtInt: - switch (constant.type) { - case EbtInt: returnValue.setIConst(iConst >> constant.iConst); break; - case EbtUint: returnValue.setIConst(iConst >> constant.uConst); break; - case EbtInt64: returnValue.setIConst(iConst >> constant.i64Const); break; - case EbtUint64: returnValue.setIConst(iConst >> constant.u64Const); break; - default: assert(false && "Default missing"); - } - break; - case EbtUint: - switch (constant.type) { - case EbtInt: returnValue.setUConst(uConst >> constant.iConst); break; - case EbtUint: returnValue.setUConst(uConst >> constant.uConst); break; - case EbtInt64: returnValue.setUConst(uConst >> constant.i64Const); break; - case EbtUint64: returnValue.setUConst(uConst >> constant.u64Const); break; - default: assert(false && "Default missing"); - } - break; - case EbtInt64: - switch (constant.type) { - case EbtInt: returnValue.setI64Const(i64Const >> constant.iConst); break; - case EbtUint: returnValue.setI64Const(i64Const >> constant.uConst); break; - case EbtInt64: returnValue.setI64Const(i64Const >> constant.i64Const); break; - case EbtUint64: returnValue.setI64Const(i64Const >> constant.u64Const); break; - default: assert(false && "Default missing"); - } - break; - case EbtUint64: - switch (constant.type) { - case EbtInt: returnValue.setU64Const(u64Const >> constant.iConst); break; - case EbtUint: returnValue.setU64Const(u64Const >> constant.uConst); break; - case EbtInt64: returnValue.setU64Const(u64Const >> constant.i64Const); break; - case EbtUint64: returnValue.setU64Const(u64Const >> constant.u64Const); break; - default: assert(false && "Default missing"); - } - break; - default: assert(false && "Default missing"); - } - - return returnValue; - } - - TConstUnion operator<<(const TConstUnion& constant) const - { - TConstUnion returnValue; - switch (type) { - case EbtInt: - switch (constant.type) { - case EbtInt: returnValue.setIConst(iConst << constant.iConst); break; - case EbtUint: returnValue.setIConst(iConst << constant.uConst); break; - case EbtInt64: returnValue.setIConst(iConst << constant.i64Const); break; - case EbtUint64: returnValue.setIConst(iConst << constant.u64Const); break; - default: assert(false && "Default missing"); - } - break; - case EbtUint: - switch (constant.type) { - case EbtInt: returnValue.setUConst(uConst << constant.iConst); break; - case EbtUint: returnValue.setUConst(uConst << constant.uConst); break; - case EbtInt64: returnValue.setUConst(uConst << constant.i64Const); break; - case EbtUint64: returnValue.setUConst(uConst << constant.u64Const); break; - default: assert(false && "Default missing"); - } - break; - case EbtInt64: - switch (constant.type) { - case EbtInt: returnValue.setI64Const(i64Const << constant.iConst); break; - case EbtUint: returnValue.setI64Const(i64Const << constant.uConst); break; - case EbtInt64: returnValue.setI64Const(i64Const << constant.i64Const); break; - case EbtUint64: returnValue.setI64Const(i64Const << constant.u64Const); break; - default: assert(false && "Default missing"); - } - break; - case EbtUint64: - switch (constant.type) { - case EbtInt: returnValue.setU64Const(u64Const << constant.iConst); break; - case EbtUint: returnValue.setU64Const(u64Const << constant.uConst); break; - case EbtInt64: returnValue.setU64Const(u64Const << constant.i64Const); break; - case EbtUint64: returnValue.setU64Const(u64Const << constant.u64Const); break; - default: assert(false && "Default missing"); - } - break; - default: assert(false && "Default missing"); - } - - return returnValue; - } - - TConstUnion operator&(const TConstUnion& constant) const - { - TConstUnion returnValue; - assert(type == constant.type); - switch (type) { - case EbtInt: returnValue.setIConst(iConst & constant.iConst); break; - case EbtUint: returnValue.setUConst(uConst & constant.uConst); break; - case EbtInt64: returnValue.setI64Const(i64Const & constant.i64Const); break; - case EbtUint64: returnValue.setU64Const(u64Const & constant.u64Const); break; - default: assert(false && "Default missing"); - } - - return returnValue; - } - - TConstUnion operator|(const TConstUnion& constant) const - { - TConstUnion returnValue; - assert(type == constant.type); - switch (type) { - case EbtInt: returnValue.setIConst(iConst | constant.iConst); break; - case EbtUint: returnValue.setUConst(uConst | constant.uConst); break; - case EbtInt64: returnValue.setI64Const(i64Const | constant.i64Const); break; - case EbtUint64: returnValue.setU64Const(u64Const | constant.u64Const); break; - default: assert(false && "Default missing"); - } - - return returnValue; - } - - TConstUnion operator^(const TConstUnion& constant) const - { - TConstUnion returnValue; - assert(type == constant.type); - switch (type) { - case EbtInt: returnValue.setIConst(iConst ^ constant.iConst); break; - case EbtUint: returnValue.setUConst(uConst ^ constant.uConst); break; - case EbtInt64: returnValue.setI64Const(i64Const ^ constant.i64Const); break; - case EbtUint64: returnValue.setU64Const(u64Const ^ constant.u64Const); break; - default: assert(false && "Default missing"); - } - - return returnValue; - } - - TConstUnion operator~() const - { - TConstUnion returnValue; - switch (type) { - case EbtInt: returnValue.setIConst(~iConst); break; - case EbtUint: returnValue.setUConst(~uConst); break; - case EbtInt64: returnValue.setI64Const(~i64Const); break; - case EbtUint64: returnValue.setU64Const(~u64Const); break; - default: assert(false && "Default missing"); - } - - return returnValue; - } - - TConstUnion operator&&(const TConstUnion& constant) const - { - TConstUnion returnValue; - assert(type == constant.type); - switch (type) { - case EbtBool: returnValue.setBConst(bConst && constant.bConst); break; - default: assert(false && "Default missing"); - } - - return returnValue; - } - - TConstUnion operator||(const TConstUnion& constant) const - { - TConstUnion returnValue; - assert(type == constant.type); - switch (type) { - case EbtBool: returnValue.setBConst(bConst || constant.bConst); break; - default: assert(false && "Default missing"); - } - - return returnValue; - } - - TBasicType getType() const { return type; } - -private: - union { - int iConst; // used for ivec, scalar ints - unsigned int uConst; // used for uvec, scalar uints - long long i64Const; // used for i64vec, scalar int64s - unsigned long long u64Const; // used for u64vec, scalar uint64s - bool bConst; // used for bvec, scalar bools - double dConst; // used for vec, dvec, mat, dmat, scalar floats and doubles - const TString* sConst; // string constant - }; - - TBasicType type; -}; - -// Encapsulate having a pointer to an array of TConstUnion, -// which only needs to be allocated if its size is going to be -// bigger than 0. -// -// One convenience is being able to use [] to go inside the array, instead -// of C++ assuming it as an array of pointers to vectors. -// -// General usage is that the size is known up front, and it is -// created once with the proper size. -// -class TConstUnionArray { -public: - POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) - - TConstUnionArray() : unionArray(nullptr) { } - virtual ~TConstUnionArray() { } - - explicit TConstUnionArray(int size) - { - if (size == 0) - unionArray = nullptr; - else - unionArray = new TConstUnionVector(size); - } - TConstUnionArray(const TConstUnionArray& a) : unionArray(a.unionArray) { } - TConstUnionArray(const TConstUnionArray& a, int start, int size) - { - unionArray = new TConstUnionVector(size); - for (int i = 0; i < size; ++i) - (*unionArray)[i] = a[start + i]; - } - - // Use this constructor for a smear operation - TConstUnionArray(int size, const TConstUnion& val) - { - unionArray = new TConstUnionVector(size, val); - } - - int size() const { return unionArray ? (int)unionArray->size() : 0; } - TConstUnion& operator[](size_t index) { return (*unionArray)[index]; } - const TConstUnion& operator[](size_t index) const { return (*unionArray)[index]; } - bool operator==(const TConstUnionArray& rhs) const - { - // this includes the case that both are unallocated - if (unionArray == rhs.unionArray) - return true; - - if (! unionArray || ! rhs.unionArray) - return false; - - return *unionArray == *rhs.unionArray; - } - bool operator!=(const TConstUnionArray& rhs) const { return ! operator==(rhs); } - - double dot(const TConstUnionArray& rhs) - { - assert(rhs.unionArray->size() == unionArray->size()); - double sum = 0.0; - - for (size_t comp = 0; comp < unionArray->size(); ++comp) - sum += (*this)[comp].getDConst() * rhs[comp].getDConst(); - - return sum; - } - - bool empty() const { return unionArray == nullptr; } - -protected: - typedef TVector TConstUnionVector; - TConstUnionVector* unionArray; -}; - -} // end namespace glslang - -#endif // _CONSTANT_UNION_INCLUDED_ diff --git a/third_party/glslang-spirv/Include/InfoSink.h b/third_party/glslang-spirv/Include/InfoSink.h deleted file mode 100644 index dceb603cf..000000000 --- a/third_party/glslang-spirv/Include/InfoSink.h +++ /dev/null @@ -1,144 +0,0 @@ -// -// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// - -#ifndef _INFOSINK_INCLUDED_ -#define _INFOSINK_INCLUDED_ - -#include "../Include/Common.h" -#include - -namespace glslang { - -// -// TPrefixType is used to centralize how info log messages start. -// See below. -// -enum TPrefixType { - EPrefixNone, - EPrefixWarning, - EPrefixError, - EPrefixInternalError, - EPrefixUnimplemented, - EPrefixNote -}; - -enum TOutputStream { - ENull = 0, - EDebugger = 0x01, - EStdOut = 0x02, - EString = 0x04, -}; -// -// Encapsulate info logs for all objects that have them. -// -// The methods are a general set of tools for getting a variety of -// messages and types inserted into the log. -// -class TInfoSinkBase { -public: - TInfoSinkBase() : outputStream(4) {} - void erase() { sink.erase(); } - TInfoSinkBase& operator<<(const TPersistString& t) { append(t); return *this; } - TInfoSinkBase& operator<<(char c) { append(1, c); return *this; } - TInfoSinkBase& operator<<(const char* s) { append(s); return *this; } - TInfoSinkBase& operator<<(int n) { append(String(n)); return *this; } - TInfoSinkBase& operator<<(unsigned int n) { append(String(n)); return *this; } - TInfoSinkBase& operator<<(float n) { const int size = 40; char buf[size]; - snprintf(buf, size, (fabs(n) > 1e-8 && fabs(n) < 1e8) || n == 0.0f ? "%f" : "%g", n); - append(buf); - return *this; } - TInfoSinkBase& operator+(const TPersistString& t) { append(t); return *this; } - TInfoSinkBase& operator+(const TString& t) { append(t); return *this; } - TInfoSinkBase& operator<<(const TString& t) { append(t); return *this; } - TInfoSinkBase& operator+(const char* s) { append(s); return *this; } - const char* c_str() const { return sink.c_str(); } - void prefix(TPrefixType message) { - switch(message) { - case EPrefixNone: break; - case EPrefixWarning: append("WARNING: "); break; - case EPrefixError: append("ERROR: "); break; - case EPrefixInternalError: append("INTERNAL ERROR: "); break; - case EPrefixUnimplemented: append("UNIMPLEMENTED: "); break; - case EPrefixNote: append("NOTE: "); break; - default: append("UNKNOWN ERROR: "); break; - } - } - void location(const TSourceLoc& loc) { - const int maxSize = 24; - char locText[maxSize]; - snprintf(locText, maxSize, ":%d", loc.line); - append(loc.getStringNameOrNum(false).c_str()); - append(locText); - append(": "); - } - void message(TPrefixType message, const char* s) { - prefix(message); - append(s); - append("\n"); - } - void message(TPrefixType message, const char* s, const TSourceLoc& loc) { - prefix(message); - location(loc); - append(s); - append("\n"); - } - - void setOutputStream(int output = 4) - { - outputStream = output; - } - -protected: - void append(const char* s); - - void append(int count, char c); - void append(const TPersistString& t); - void append(const TString& t); - - void checkMem(size_t growth) { if (sink.capacity() < sink.size() + growth + 2) - sink.reserve(sink.capacity() + sink.capacity() / 2); } - void appendToStream(const char* s); - TPersistString sink; - int outputStream; -}; - -} // end namespace glslang - -class TInfoSink { -public: - glslang::TInfoSinkBase info; - glslang::TInfoSinkBase debug; -}; - -#endif // _INFOSINK_INCLUDED_ diff --git a/third_party/glslang-spirv/Include/InitializeGlobals.h b/third_party/glslang-spirv/Include/InitializeGlobals.h deleted file mode 100644 index 95d0a40e9..000000000 --- a/third_party/glslang-spirv/Include/InitializeGlobals.h +++ /dev/null @@ -1,44 +0,0 @@ -// -// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// - -#ifndef __INITIALIZE_GLOBALS_INCLUDED_ -#define __INITIALIZE_GLOBALS_INCLUDED_ - -namespace glslang { - -bool InitializePoolIndex(); - -} // end namespace glslang - -#endif // __INITIALIZE_GLOBALS_INCLUDED_ diff --git a/third_party/glslang-spirv/Include/PoolAlloc.h b/third_party/glslang-spirv/Include/PoolAlloc.h deleted file mode 100644 index 0e237a6a2..000000000 --- a/third_party/glslang-spirv/Include/PoolAlloc.h +++ /dev/null @@ -1,317 +0,0 @@ -// -// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. -// Copyright (C) 2012-2013 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// - -#ifndef _POOLALLOC_INCLUDED_ -#define _POOLALLOC_INCLUDED_ - -#ifdef _DEBUG -# define GUARD_BLOCKS // define to enable guard block sanity checking -#endif - -// -// This header defines an allocator that can be used to efficiently -// allocate a large number of small requests for heap memory, with the -// intention that they are not individually deallocated, but rather -// collectively deallocated at one time. -// -// This simultaneously -// -// * Makes each individual allocation much more efficient; the -// typical allocation is trivial. -// * Completely avoids the cost of doing individual deallocation. -// * Saves the trouble of tracking down and plugging a large class of leaks. -// -// Individual classes can use this allocator by supplying their own -// new and delete methods. -// -// STL containers can use this allocator by using the pool_allocator -// class as the allocator (second) template argument. -// - -#include -#include -#include - -namespace glslang { - -// If we are using guard blocks, we must track each individual -// allocation. If we aren't using guard blocks, these -// never get instantiated, so won't have any impact. -// - -class TAllocation { -public: - TAllocation(size_t size, unsigned char* mem, TAllocation* prev = 0) : - size(size), mem(mem), prevAlloc(prev) { - // Allocations are bracketed: - // [allocationHeader][initialGuardBlock][userData][finalGuardBlock] - // This would be cleaner with if (guardBlockSize)..., but that - // makes the compiler print warnings about 0 length memsets, - // even with the if() protecting them. -# ifdef GUARD_BLOCKS - memset(preGuard(), guardBlockBeginVal, guardBlockSize); - memset(data(), userDataFill, size); - memset(postGuard(), guardBlockEndVal, guardBlockSize); -# endif - } - - void check() const { - checkGuardBlock(preGuard(), guardBlockBeginVal, "before"); - checkGuardBlock(postGuard(), guardBlockEndVal, "after"); - } - - void checkAllocList() const; - - // Return total size needed to accommodate user buffer of 'size', - // plus our tracking data. - inline static size_t allocationSize(size_t size) { - return size + 2 * guardBlockSize + headerSize(); - } - - // Offset from surrounding buffer to get to user data buffer. - inline static unsigned char* offsetAllocation(unsigned char* m) { - return m + guardBlockSize + headerSize(); - } - -private: - void checkGuardBlock(unsigned char* blockMem, unsigned char val, const char* locText) const; - - // Find offsets to pre and post guard blocks, and user data buffer - unsigned char* preGuard() const { return mem + headerSize(); } - unsigned char* data() const { return preGuard() + guardBlockSize; } - unsigned char* postGuard() const { return data() + size; } - - size_t size; // size of the user data area - unsigned char* mem; // beginning of our allocation (pts to header) - TAllocation* prevAlloc; // prior allocation in the chain - - const static unsigned char guardBlockBeginVal; - const static unsigned char guardBlockEndVal; - const static unsigned char userDataFill; - - const static size_t guardBlockSize; -# ifdef GUARD_BLOCKS - inline static size_t headerSize() { return sizeof(TAllocation); } -# else - inline static size_t headerSize() { return 0; } -# endif -}; - -// -// There are several stacks. One is to track the pushing and popping -// of the user, and not yet implemented. The others are simply a -// repositories of free pages or used pages. -// -// Page stacks are linked together with a simple header at the beginning -// of each allocation obtained from the underlying OS. Multi-page allocations -// are returned to the OS. Individual page allocations are kept for future -// re-use. -// -// The "page size" used is not, nor must it match, the underlying OS -// page size. But, having it be about that size or equal to a set of -// pages is likely most optimal. -// -class TPoolAllocator { -public: - TPoolAllocator(int growthIncrement = 8*1024, int allocationAlignment = 16); - - // - // Don't call the destructor just to free up the memory, call pop() - // - ~TPoolAllocator(); - - // - // Call push() to establish a new place to pop memory too. Does not - // have to be called to get things started. - // - void push(); - - // - // Call pop() to free all memory allocated since the last call to push(), - // or if no last call to push, frees all memory since first allocation. - // - void pop(); - - // - // Call popAll() to free all memory allocated. - // - void popAll(); - - // - // Call allocate() to actually acquire memory. Returns 0 if no memory - // available, otherwise a properly aligned pointer to 'numBytes' of memory. - // - void* allocate(size_t numBytes); - - // - // There is no deallocate. The point of this class is that - // deallocation can be skipped by the user of it, as the model - // of use is to simultaneously deallocate everything at once - // by calling pop(), and to not have to solve memory leak problems. - // - -protected: - friend struct tHeader; - - struct tHeader { - tHeader(tHeader* nextPage, size_t pageCount) : -#ifdef GUARD_BLOCKS - lastAllocation(0), -#endif - nextPage(nextPage), pageCount(pageCount) { } - - ~tHeader() { -#ifdef GUARD_BLOCKS - if (lastAllocation) - lastAllocation->checkAllocList(); -#endif - } - -#ifdef GUARD_BLOCKS - TAllocation* lastAllocation; -#endif - tHeader* nextPage; - size_t pageCount; - }; - - struct tAllocState { - size_t offset; - tHeader* page; - }; - typedef std::vector tAllocStack; - - // Track allocations if and only if we're using guard blocks -#ifndef GUARD_BLOCKS - void* initializeAllocation(tHeader*, unsigned char* memory, size_t) { -#else - void* initializeAllocation(tHeader* block, unsigned char* memory, size_t numBytes) { - new(memory) TAllocation(numBytes, memory, block->lastAllocation); - block->lastAllocation = reinterpret_cast(memory); -#endif - - // This is optimized entirely away if GUARD_BLOCKS is not defined. - return TAllocation::offsetAllocation(memory); - } - - size_t pageSize; // granularity of allocation from the OS - size_t alignment; // all returned allocations will be aligned at - // this granularity, which will be a power of 2 - size_t alignmentMask; - size_t headerSkip; // amount of memory to skip to make room for the - // header (basically, size of header, rounded - // up to make it aligned - size_t currentPageOffset; // next offset in top of inUseList to allocate from - tHeader* freeList; // list of popped memory - tHeader* inUseList; // list of all memory currently being used - tAllocStack stack; // stack of where to allocate from, to partition pool - - int numCalls; // just an interesting statistic - size_t totalBytes; // just an interesting statistic -private: - TPoolAllocator& operator=(const TPoolAllocator&); // don't allow assignment operator - TPoolAllocator(const TPoolAllocator&); // don't allow default copy constructor -}; - -// -// There could potentially be many pools with pops happening at -// different times. But a simple use is to have a global pop -// with everyone using the same global allocator. -// -extern TPoolAllocator& GetThreadPoolAllocator(); -void SetThreadPoolAllocator(TPoolAllocator* poolAllocator); - -// -// This STL compatible allocator is intended to be used as the allocator -// parameter to templatized STL containers, like vector and map. -// -// It will use the pools for allocation, and not -// do any deallocation, but will still do destruction. -// -template -class pool_allocator { -public: - typedef size_t size_type; - typedef ptrdiff_t difference_type; - typedef T *pointer; - typedef const T *const_pointer; - typedef T& reference; - typedef const T& const_reference; - typedef T value_type; - template - struct rebind { - typedef pool_allocator other; - }; - pointer address(reference x) const { return &x; } - const_pointer address(const_reference x) const { return &x; } - - pool_allocator() : allocator(GetThreadPoolAllocator()) { } - pool_allocator(TPoolAllocator& a) : allocator(a) { } - pool_allocator(const pool_allocator& p) : allocator(p.allocator) { } - - template - pool_allocator(const pool_allocator& p) : allocator(p.getAllocator()) { } - - pointer allocate(size_type n) { - return reinterpret_cast(getAllocator().allocate(n * sizeof(T))); } - pointer allocate(size_type n, const void*) { - return reinterpret_cast(getAllocator().allocate(n * sizeof(T))); } - - void deallocate(void*, size_type) { } - void deallocate(pointer, size_type) { } - - pointer _Charalloc(size_t n) { - return reinterpret_cast(getAllocator().allocate(n)); } - - void construct(pointer p, const T& val) { new ((void *)p) T(val); } - void destroy(pointer p) { p->T::~T(); } - - bool operator==(const pool_allocator& rhs) const { return &getAllocator() == &rhs.getAllocator(); } - bool operator!=(const pool_allocator& rhs) const { return &getAllocator() != &rhs.getAllocator(); } - - size_type max_size() const { return static_cast(-1) / sizeof(T); } - size_type max_size(int size) const { return static_cast(-1) / size; } - - void setAllocator(TPoolAllocator* a) { allocator = *a; } - TPoolAllocator& getAllocator() const { return allocator; } - -protected: - pool_allocator& operator=(const pool_allocator&) { return *this; } - TPoolAllocator& allocator; -}; - -} // end namespace glslang - -#endif // _POOLALLOC_INCLUDED_ diff --git a/third_party/glslang-spirv/Include/ResourceLimits.h b/third_party/glslang-spirv/Include/ResourceLimits.h deleted file mode 100644 index 0d07b8c84..000000000 --- a/third_party/glslang-spirv/Include/ResourceLimits.h +++ /dev/null @@ -1,140 +0,0 @@ -// -// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. -// Copyright (C) 2013 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// - -#ifndef _RESOURCE_LIMITS_INCLUDED_ -#define _RESOURCE_LIMITS_INCLUDED_ - -struct TLimits { - bool nonInductiveForLoops; - bool whileLoops; - bool doWhileLoops; - bool generalUniformIndexing; - bool generalAttributeMatrixVectorIndexing; - bool generalVaryingIndexing; - bool generalSamplerIndexing; - bool generalVariableIndexing; - bool generalConstantMatrixVectorIndexing; -}; - -struct TBuiltInResource { - int maxLights; - int maxClipPlanes; - int maxTextureUnits; - int maxTextureCoords; - int maxVertexAttribs; - int maxVertexUniformComponents; - int maxVaryingFloats; - int maxVertexTextureImageUnits; - int maxCombinedTextureImageUnits; - int maxTextureImageUnits; - int maxFragmentUniformComponents; - int maxDrawBuffers; - int maxVertexUniformVectors; - int maxVaryingVectors; - int maxFragmentUniformVectors; - int maxVertexOutputVectors; - int maxFragmentInputVectors; - int minProgramTexelOffset; - int maxProgramTexelOffset; - int maxClipDistances; - int maxComputeWorkGroupCountX; - int maxComputeWorkGroupCountY; - int maxComputeWorkGroupCountZ; - int maxComputeWorkGroupSizeX; - int maxComputeWorkGroupSizeY; - int maxComputeWorkGroupSizeZ; - int maxComputeUniformComponents; - int maxComputeTextureImageUnits; - int maxComputeImageUniforms; - int maxComputeAtomicCounters; - int maxComputeAtomicCounterBuffers; - int maxVaryingComponents; - int maxVertexOutputComponents; - int maxGeometryInputComponents; - int maxGeometryOutputComponents; - int maxFragmentInputComponents; - int maxImageUnits; - int maxCombinedImageUnitsAndFragmentOutputs; - int maxCombinedShaderOutputResources; - int maxImageSamples; - int maxVertexImageUniforms; - int maxTessControlImageUniforms; - int maxTessEvaluationImageUniforms; - int maxGeometryImageUniforms; - int maxFragmentImageUniforms; - int maxCombinedImageUniforms; - int maxGeometryTextureImageUnits; - int maxGeometryOutputVertices; - int maxGeometryTotalOutputComponents; - int maxGeometryUniformComponents; - int maxGeometryVaryingComponents; - int maxTessControlInputComponents; - int maxTessControlOutputComponents; - int maxTessControlTextureImageUnits; - int maxTessControlUniformComponents; - int maxTessControlTotalOutputComponents; - int maxTessEvaluationInputComponents; - int maxTessEvaluationOutputComponents; - int maxTessEvaluationTextureImageUnits; - int maxTessEvaluationUniformComponents; - int maxTessPatchComponents; - int maxPatchVertices; - int maxTessGenLevel; - int maxViewports; - int maxVertexAtomicCounters; - int maxTessControlAtomicCounters; - int maxTessEvaluationAtomicCounters; - int maxGeometryAtomicCounters; - int maxFragmentAtomicCounters; - int maxCombinedAtomicCounters; - int maxAtomicCounterBindings; - int maxVertexAtomicCounterBuffers; - int maxTessControlAtomicCounterBuffers; - int maxTessEvaluationAtomicCounterBuffers; - int maxGeometryAtomicCounterBuffers; - int maxFragmentAtomicCounterBuffers; - int maxCombinedAtomicCounterBuffers; - int maxAtomicCounterBufferSize; - int maxTransformFeedbackBuffers; - int maxTransformFeedbackInterleavedComponents; - int maxCullDistances; - int maxCombinedClipAndCullDistances; - int maxSamples; - - TLimits limits; -}; - -#endif // _RESOURCE_LIMITS_INCLUDED_ diff --git a/third_party/glslang-spirv/Include/ShHandle.h b/third_party/glslang-spirv/Include/ShHandle.h deleted file mode 100644 index df07bd8ed..000000000 --- a/third_party/glslang-spirv/Include/ShHandle.h +++ /dev/null @@ -1,176 +0,0 @@ -// -// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// - -#ifndef _SHHANDLE_INCLUDED_ -#define _SHHANDLE_INCLUDED_ - -// -// Machine independent part of the compiler private objects -// sent as ShHandle to the driver. -// -// This should not be included by driver code. -// - -#define SH_EXPORTING -#include "../Public/ShaderLang.h" -#include "../MachineIndependent/Versions.h" -#include "InfoSink.h" - -class TCompiler; -class TLinker; -class TUniformMap; - -// -// The base class used to back handles returned to the driver. -// -class TShHandleBase { -public: - TShHandleBase() { pool = new glslang::TPoolAllocator; } - virtual ~TShHandleBase() { delete pool; } - virtual TCompiler* getAsCompiler() { return 0; } - virtual TLinker* getAsLinker() { return 0; } - virtual TUniformMap* getAsUniformMap() { return 0; } - virtual glslang::TPoolAllocator* getPool() const { return pool; } -private: - glslang::TPoolAllocator* pool; -}; - -// -// The base class for the machine dependent linker to derive from -// for managing where uniforms live. -// -class TUniformMap : public TShHandleBase { -public: - TUniformMap() { } - virtual ~TUniformMap() { } - virtual TUniformMap* getAsUniformMap() { return this; } - virtual int getLocation(const char* name) = 0; - virtual TInfoSink& getInfoSink() { return infoSink; } - TInfoSink infoSink; -}; - -class TIntermNode; - -// -// The base class for the machine dependent compiler to derive from -// for managing object code from the compile. -// -class TCompiler : public TShHandleBase { -public: - TCompiler(EShLanguage l, TInfoSink& sink) : infoSink(sink) , language(l), haveValidObjectCode(false) { } - virtual ~TCompiler() { } - EShLanguage getLanguage() { return language; } - virtual TInfoSink& getInfoSink() { return infoSink; } - - virtual bool compile(TIntermNode* root, int version = 0, EProfile profile = ENoProfile) = 0; - - virtual TCompiler* getAsCompiler() { return this; } - virtual bool linkable() { return haveValidObjectCode; } - - TInfoSink& infoSink; -protected: - TCompiler& operator=(TCompiler&); - - EShLanguage language; - bool haveValidObjectCode; -}; - -// -// Link operations are based on a list of compile results... -// -typedef glslang::TVector TCompilerList; -typedef glslang::TVector THandleList; - -// -// The base class for the machine dependent linker to derive from -// to manage the resulting executable. -// - -class TLinker : public TShHandleBase { -public: - TLinker(EShExecutable e, TInfoSink& iSink) : - infoSink(iSink), - executable(e), - haveReturnableObjectCode(false), - appAttributeBindings(0), - fixedAttributeBindings(0), - excludedAttributes(0), - excludedCount(0), - uniformBindings(0) { } - virtual TLinker* getAsLinker() { return this; } - virtual ~TLinker() { } - virtual bool link(TCompilerList&, TUniformMap*) = 0; - virtual bool link(THandleList&) { return false; } - virtual void setAppAttributeBindings(const ShBindingTable* t) { appAttributeBindings = t; } - virtual void setFixedAttributeBindings(const ShBindingTable* t) { fixedAttributeBindings = t; } - virtual void getAttributeBindings(ShBindingTable const **t) const = 0; - virtual void setExcludedAttributes(const int* attributes, int count) { excludedAttributes = attributes; excludedCount = count; } - virtual ShBindingTable* getUniformBindings() const { return uniformBindings; } - virtual const void* getObjectCode() const { return 0; } // a real compiler would be returning object code here - virtual TInfoSink& getInfoSink() { return infoSink; } - TInfoSink& infoSink; -protected: - TLinker& operator=(TLinker&); - EShExecutable executable; - bool haveReturnableObjectCode; // true when objectCode is acceptable to send to driver - - const ShBindingTable* appAttributeBindings; - const ShBindingTable* fixedAttributeBindings; - const int* excludedAttributes; - int excludedCount; - ShBindingTable* uniformBindings; // created by the linker -}; - -// -// This is the interface between the machine independent code -// and the machine dependent code. -// -// The machine dependent code should derive from the classes -// above. Then Construct*() and Delete*() will create and -// destroy the machine dependent objects, which contain the -// above machine independent information. -// -TCompiler* ConstructCompiler(EShLanguage, int); - -TShHandleBase* ConstructLinker(EShExecutable, int); -TShHandleBase* ConstructBindings(); -void DeleteLinker(TShHandleBase*); -void DeleteBindingList(TShHandleBase* bindingList); - -TUniformMap* ConstructUniformMap(); -void DeleteCompiler(TCompiler*); - -void DeleteUniformMap(TUniformMap*); - -#endif // _SHHANDLE_INCLUDED_ diff --git a/third_party/glslang-spirv/Include/Types.h b/third_party/glslang-spirv/Include/Types.h deleted file mode 100644 index cc847b5e7..000000000 --- a/third_party/glslang-spirv/Include/Types.h +++ /dev/null @@ -1,1924 +0,0 @@ -// -// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. -// Copyright (C) 2012-2016 LunarG, Inc. -// Copyright (C) 2015-2016 Google, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// - -#ifndef _TYPES_INCLUDED -#define _TYPES_INCLUDED - -#include "../Include/Common.h" -#include "../Include/BaseTypes.h" -#include "../Public/ShaderLang.h" -#include "arrays.h" - -#include - -namespace glslang { - -const int GlslangMaxTypeLength = 200; // TODO: need to print block/struct one member per line, so this can stay bounded - -const char* const AnonymousPrefix = "anon@"; // for something like a block whose members can be directly accessed -inline bool IsAnonymous(const TString& name) -{ - return name.compare(0, 5, AnonymousPrefix) == 0; -} - -// -// Details within a sampler type -// -enum TSamplerDim { - EsdNone, - Esd1D, - Esd2D, - Esd3D, - EsdCube, - EsdRect, - EsdBuffer, - EsdSubpass, // goes only with non-sampled image (image is true) - EsdNumDims -}; - -struct TSampler { // misnomer now; includes images, textures without sampler, and textures with sampler - TBasicType type : 8; // type returned by sampler - TSamplerDim dim : 8; - bool arrayed : 1; - bool shadow : 1; - bool ms : 1; - bool image : 1; // image, combined should be false - bool combined : 1; // true means texture is combined with a sampler, false means texture with no sampler - bool sampler : 1; // true means a pure sampler, other fields should be clear() - bool external : 1; // GL_OES_EGL_image_external - unsigned int vectorSize : 3; // vector return type size. - - // Some languages support structures as sample results. Storing the whole structure in the - // TSampler is too large, so there is an index to a separate table. - static const unsigned structReturnIndexBits = 4; // number of index bits to use. - static const unsigned structReturnSlots = (1< TTypeList; - -typedef TVector TIdentifierList; - -// -// Following are a series of helper enums for managing layouts and qualifiers, -// used for TPublicType, TType, others. -// - -enum TLayoutPacking { - ElpNone, - ElpShared, // default, but different than saying nothing - ElpStd140, - ElpStd430, - ElpPacked, - ElpCount // If expanding, see bitfield width below -}; - -enum TLayoutMatrix { - ElmNone, - ElmRowMajor, - ElmColumnMajor, // default, but different than saying nothing - ElmCount // If expanding, see bitfield width below -}; - -// Union of geometry shader and tessellation shader geometry types. -// They don't go into TType, but rather have current state per shader or -// active parser type (TPublicType). -enum TLayoutGeometry { - ElgNone, - ElgPoints, - ElgLines, - ElgLinesAdjacency, - ElgLineStrip, - ElgTriangles, - ElgTrianglesAdjacency, - ElgTriangleStrip, - ElgQuads, - ElgIsolines, -}; - -enum TVertexSpacing { - EvsNone, - EvsEqual, - EvsFractionalEven, - EvsFractionalOdd -}; - -enum TVertexOrder { - EvoNone, - EvoCw, - EvoCcw -}; - -// Note: order matters, as type of format is done by comparison. -enum TLayoutFormat { - ElfNone, - - // Float image - ElfRgba32f, - ElfRgba16f, - ElfR32f, - ElfRgba8, - ElfRgba8Snorm, - - ElfEsFloatGuard, // to help with comparisons - - ElfRg32f, - ElfRg16f, - ElfR11fG11fB10f, - ElfR16f, - ElfRgba16, - ElfRgb10A2, - ElfRg16, - ElfRg8, - ElfR16, - ElfR8, - ElfRgba16Snorm, - ElfRg16Snorm, - ElfRg8Snorm, - ElfR16Snorm, - ElfR8Snorm, - - ElfFloatGuard, // to help with comparisons - - // Int image - ElfRgba32i, - ElfRgba16i, - ElfRgba8i, - ElfR32i, - - ElfEsIntGuard, // to help with comparisons - - ElfRg32i, - ElfRg16i, - ElfRg8i, - ElfR16i, - ElfR8i, - - ElfIntGuard, // to help with comparisons - - // Uint image - ElfRgba32ui, - ElfRgba16ui, - ElfRgba8ui, - ElfR32ui, - - ElfEsUintGuard, // to help with comparisons - - ElfRg32ui, - ElfRg16ui, - ElfRgb10a2ui, - ElfRg8ui, - ElfR16ui, - ElfR8ui, - - ElfCount -}; - -enum TLayoutDepth { - EldNone, - EldAny, - EldGreater, - EldLess, - EldUnchanged, - - EldCount -}; - -enum TBlendEquationShift { - // No 'EBlendNone': - // These are used as bit-shift amounts. A mask of such shifts will have type 'int', - // and in that space, 0 means no bits set, or none. In this enum, 0 means (1 << 0), a bit is set. - EBlendMultiply, - EBlendScreen, - EBlendOverlay, - EBlendDarken, - EBlendLighten, - EBlendColordodge, - EBlendColorburn, - EBlendHardlight, - EBlendSoftlight, - EBlendDifference, - EBlendExclusion, - EBlendHslHue, - EBlendHslSaturation, - EBlendHslColor, - EBlendHslLuminosity, - EBlendAllEquations, - - EBlendCount -}; - -class TQualifier { -public: - static const int layoutNotSet = -1; - - void clear() - { - precision = EpqNone; - invariant = false; - noContraction = false; - makeTemporary(); - declaredBuiltIn = EbvNone; - } - - // drop qualifiers that don't belong in a temporary variable - void makeTemporary() - { - semanticName = nullptr; - storage = EvqTemporary; - builtIn = EbvNone; - clearInterstage(); - clearMemory(); - specConstant = false; - clearLayout(); - } - - void clearInterstage() - { - clearInterpolation(); - patch = false; - sample = false; - } - - void clearInterpolation() - { - centroid = false; - smooth = false; - flat = false; - nopersp = false; -#ifdef AMD_EXTENSIONS - explicitInterp = false; -#endif - } - - void clearMemory() - { - coherent = false; - volatil = false; - restrict = false; - readonly = false; - writeonly = false; - } - - // Drop just the storage qualification, which perhaps should - // never be done, as it is fundamentally inconsistent, but need to - // explore what downstream consumers need. - // E.g., in a deference, it is an inconsistency between: - // A) partially dereferenced resource is still in the storage class it started in - // B) partially dereferenced resource is a new temporary object - // If A, then nothing should change, if B, then everything should change, but this is half way. - void makePartialTemporary() - { - storage = EvqTemporary; - specConstant = false; - } - - const char* semanticName; - TStorageQualifier storage : 6; - TBuiltInVariable builtIn : 8; - TBuiltInVariable declaredBuiltIn : 8; - TPrecisionQualifier precision : 3; - bool invariant : 1; // require canonical treatment for cross-shader invariance - bool noContraction: 1; // prevent contraction and reassociation, e.g., for 'precise' keyword, and expressions it affects - bool centroid : 1; - bool smooth : 1; - bool flat : 1; - bool nopersp : 1; -#ifdef AMD_EXTENSIONS - bool explicitInterp : 1; -#endif - bool patch : 1; - bool sample : 1; - bool coherent : 1; - bool volatil : 1; - bool restrict : 1; - bool readonly : 1; - bool writeonly : 1; - bool specConstant : 1; // having a constant_id is not sufficient: expressions have no id, but are still specConstant - - bool isMemory() const - { - return coherent || volatil || restrict || readonly || writeonly; - } - bool isInterpolation() const - { -#ifdef AMD_EXTENSIONS - return flat || smooth || nopersp || explicitInterp; -#else - return flat || smooth || nopersp; -#endif - } - bool isAuxiliary() const - { - return centroid || patch || sample; - } - - bool isPipeInput() const - { - switch (storage) { - case EvqVaryingIn: - case EvqFragCoord: - case EvqPointCoord: - case EvqFace: - case EvqVertexId: - case EvqInstanceId: - return true; - default: - return false; - } - } - - bool isPipeOutput() const - { - switch (storage) { - case EvqPosition: - case EvqPointSize: - case EvqClipVertex: - case EvqVaryingOut: - case EvqFragColor: - case EvqFragDepth: - return true; - default: - return false; - } - } - - bool isParamInput() const - { - switch (storage) { - case EvqIn: - case EvqInOut: - case EvqConstReadOnly: - return true; - default: - return false; - } - } - - bool isParamOutput() const - { - switch (storage) { - case EvqOut: - case EvqInOut: - return true; - default: - return false; - } - } - - bool isUniformOrBuffer() const - { - switch (storage) { - case EvqUniform: - case EvqBuffer: - return true; - default: - return false; - } - } - - bool isIo() const - { - switch (storage) { - case EvqUniform: - case EvqBuffer: - case EvqVaryingIn: - case EvqFragCoord: - case EvqPointCoord: - case EvqFace: - case EvqVertexId: - case EvqInstanceId: - case EvqPosition: - case EvqPointSize: - case EvqClipVertex: - case EvqVaryingOut: - case EvqFragColor: - case EvqFragDepth: - return true; - default: - return false; - } - } - - // True if this type of IO is supposed to be arrayed with extra level for per-vertex data - bool isArrayedIo(EShLanguage language) const - { - switch (language) { - case EShLangGeometry: - return isPipeInput(); - case EShLangTessControl: - return ! patch && (isPipeInput() || isPipeOutput()); - case EShLangTessEvaluation: - return ! patch && isPipeInput(); - default: - return false; - } - } - - // Implementing an embedded layout-qualifier class here, since C++ can't have a real class bitfield - void clearLayout() // all layout - { - clearUniformLayout(); - - layoutPushConstant = false; -#ifdef NV_EXTENSIONS - layoutPassthrough = false; - layoutViewportRelative = false; - // -2048 as the default value indicating layoutSecondaryViewportRelative is not set - layoutSecondaryViewportRelativeOffset = -2048; -#endif - - clearInterstageLayout(); - - layoutSpecConstantId = layoutSpecConstantIdEnd; - - layoutFormat = ElfNone; - } - void clearInterstageLayout() - { - layoutLocation = layoutLocationEnd; - layoutComponent = layoutComponentEnd; - layoutIndex = layoutIndexEnd; - clearStreamLayout(); - clearXfbLayout(); - } - void clearStreamLayout() - { - layoutStream = layoutStreamEnd; - } - void clearXfbLayout() - { - layoutXfbBuffer = layoutXfbBufferEnd; - layoutXfbStride = layoutXfbStrideEnd; - layoutXfbOffset = layoutXfbOffsetEnd; - } - - bool hasNonXfbLayout() const - { - return hasUniformLayout() || - hasAnyLocation() || - hasStream() || - hasFormat() || - layoutPushConstant; - } - bool hasLayout() const - { - return hasNonXfbLayout() || - hasXfb(); - } - TLayoutMatrix layoutMatrix : 3; - TLayoutPacking layoutPacking : 4; - int layoutOffset; - int layoutAlign; - - unsigned int layoutLocation :12; - static const unsigned int layoutLocationEnd = 0xFFF; - - unsigned int layoutComponent : 3; - static const unsigned int layoutComponentEnd = 4; - - unsigned int layoutSet : 7; - static const unsigned int layoutSetEnd = 0x3F; - - unsigned int layoutBinding : 16; - static const unsigned int layoutBindingEnd = 0xFFFF; - - unsigned int layoutIndex : 8; - static const unsigned int layoutIndexEnd = 0xFF; - - unsigned int layoutStream : 8; - static const unsigned int layoutStreamEnd = 0xFF; - - unsigned int layoutXfbBuffer : 4; - static const unsigned int layoutXfbBufferEnd = 0xF; - - unsigned int layoutXfbStride : 10; - static const unsigned int layoutXfbStrideEnd = 0x3FF; - - unsigned int layoutXfbOffset : 10; - static const unsigned int layoutXfbOffsetEnd = 0x3FF; - - unsigned int layoutAttachment : 8; // for input_attachment_index - static const unsigned int layoutAttachmentEnd = 0XFF; - - unsigned int layoutSpecConstantId : 11; - static const unsigned int layoutSpecConstantIdEnd = 0x7FF; - - TLayoutFormat layoutFormat : 8; - - bool layoutPushConstant; - -#ifdef NV_EXTENSIONS - bool layoutPassthrough; - bool layoutViewportRelative; - int layoutSecondaryViewportRelativeOffset; -#endif - - bool hasUniformLayout() const - { - return hasMatrix() || - hasPacking() || - hasOffset() || - hasBinding() || - hasSet() || - hasAlign(); - } - void clearUniformLayout() // only uniform specific - { - layoutMatrix = ElmNone; - layoutPacking = ElpNone; - layoutOffset = layoutNotSet; - layoutAlign = layoutNotSet; - - layoutSet = layoutSetEnd; - layoutBinding = layoutBindingEnd; - layoutAttachment = layoutAttachmentEnd; - } - - bool hasMatrix() const - { - return layoutMatrix != ElmNone; - } - bool hasPacking() const - { - return layoutPacking != ElpNone; - } - bool hasOffset() const - { - return layoutOffset != layoutNotSet; - } - bool hasAlign() const - { - return layoutAlign != layoutNotSet; - } - bool hasAnyLocation() const - { - return hasLocation() || - hasComponent() || - hasIndex(); - } - bool hasLocation() const - { - return layoutLocation != layoutLocationEnd; - } - bool hasComponent() const - { - return layoutComponent != layoutComponentEnd; - } - bool hasIndex() const - { - return layoutIndex != layoutIndexEnd; - } - bool hasSet() const - { - return layoutSet != layoutSetEnd; - } - bool hasBinding() const - { - return layoutBinding != layoutBindingEnd; - } - bool hasStream() const - { - return layoutStream != layoutStreamEnd; - } - bool hasFormat() const - { - return layoutFormat != ElfNone; - } - bool hasXfb() const - { - return hasXfbBuffer() || - hasXfbStride() || - hasXfbOffset(); - } - bool hasXfbBuffer() const - { - return layoutXfbBuffer != layoutXfbBufferEnd; - } - bool hasXfbStride() const - { - return layoutXfbStride != layoutXfbStrideEnd; - } - bool hasXfbOffset() const - { - return layoutXfbOffset != layoutXfbOffsetEnd; - } - bool hasAttachment() const - { - return layoutAttachment != layoutAttachmentEnd; - } - bool hasSpecConstantId() const - { - // Not the same thing as being a specialization constant, this - // is just whether or not it was declared with an ID. - return layoutSpecConstantId != layoutSpecConstantIdEnd; - } - bool isSpecConstant() const - { - // True if type is a specialization constant, whether or not it - // had a specialization-constant ID, and false if it is not a - // true front-end constant. - return specConstant; - } - bool isFrontEndConstant() const - { - // True if the front-end knows the final constant value. - // This allows front-end constant folding. - return storage == EvqConst && ! specConstant; - } - bool isConstant() const - { - // True if is either kind of constant; specialization or regular. - return isFrontEndConstant() || isSpecConstant(); - } - void makeSpecConstant() - { - storage = EvqConst; - specConstant = true; - } - static const char* getLayoutPackingString(TLayoutPacking packing) - { - switch (packing) { - case ElpPacked: return "packed"; - case ElpShared: return "shared"; - case ElpStd140: return "std140"; - case ElpStd430: return "std430"; - default: return "none"; - } - } - static const char* getLayoutMatrixString(TLayoutMatrix m) - { - switch (m) { - case ElmColumnMajor: return "column_major"; - case ElmRowMajor: return "row_major"; - default: return "none"; - } - } - static const char* getLayoutFormatString(TLayoutFormat f) - { - switch (f) { - case ElfRgba32f: return "rgba32f"; - case ElfRgba16f: return "rgba16f"; - case ElfRg32f: return "rg32f"; - case ElfRg16f: return "rg16f"; - case ElfR11fG11fB10f: return "r11f_g11f_b10f"; - case ElfR32f: return "r32f"; - case ElfR16f: return "r16f"; - case ElfRgba16: return "rgba16"; - case ElfRgb10A2: return "rgb10_a2"; - case ElfRgba8: return "rgba8"; - case ElfRg16: return "rg16"; - case ElfRg8: return "rg8"; - case ElfR16: return "r16"; - case ElfR8: return "r8"; - case ElfRgba16Snorm: return "rgba16_snorm"; - case ElfRgba8Snorm: return "rgba8_snorm"; - case ElfRg16Snorm: return "rg16_snorm"; - case ElfRg8Snorm: return "rg8_snorm"; - case ElfR16Snorm: return "r16_snorm"; - case ElfR8Snorm: return "r8_snorm"; - - case ElfRgba32i: return "rgba32i"; - case ElfRgba16i: return "rgba16i"; - case ElfRgba8i: return "rgba8i"; - case ElfRg32i: return "rg32i"; - case ElfRg16i: return "rg16i"; - case ElfRg8i: return "rg8i"; - case ElfR32i: return "r32i"; - case ElfR16i: return "r16i"; - case ElfR8i: return "r8i"; - - case ElfRgba32ui: return "rgba32ui"; - case ElfRgba16ui: return "rgba16ui"; - case ElfRgba8ui: return "rgba8ui"; - case ElfRg32ui: return "rg32ui"; - case ElfRg16ui: return "rg16ui"; - case ElfRgb10a2ui: return "rgb10_a2ui"; - case ElfRg8ui: return "rg8ui"; - case ElfR32ui: return "r32ui"; - case ElfR16ui: return "r16ui"; - case ElfR8ui: return "r8ui"; - default: return "none"; - } - } - static const char* getLayoutDepthString(TLayoutDepth d) - { - switch (d) { - case EldAny: return "depth_any"; - case EldGreater: return "depth_greater"; - case EldLess: return "depth_less"; - case EldUnchanged: return "depth_unchanged"; - default: return "none"; - } - } - static const char* getBlendEquationString(TBlendEquationShift e) - { - switch (e) { - case EBlendMultiply: return "blend_support_multiply"; - case EBlendScreen: return "blend_support_screen"; - case EBlendOverlay: return "blend_support_overlay"; - case EBlendDarken: return "blend_support_darken"; - case EBlendLighten: return "blend_support_lighten"; - case EBlendColordodge: return "blend_support_colordodge"; - case EBlendColorburn: return "blend_support_colorburn"; - case EBlendHardlight: return "blend_support_hardlight"; - case EBlendSoftlight: return "blend_support_softlight"; - case EBlendDifference: return "blend_support_difference"; - case EBlendExclusion: return "blend_support_exclusion"; - case EBlendHslHue: return "blend_support_hsl_hue"; - case EBlendHslSaturation: return "blend_support_hsl_saturation"; - case EBlendHslColor: return "blend_support_hsl_color"; - case EBlendHslLuminosity: return "blend_support_hsl_luminosity"; - case EBlendAllEquations: return "blend_support_all_equations"; - default: return "unknown"; - } - } - static const char* getGeometryString(TLayoutGeometry geometry) - { - switch (geometry) { - case ElgPoints: return "points"; - case ElgLines: return "lines"; - case ElgLinesAdjacency: return "lines_adjacency"; - case ElgLineStrip: return "line_strip"; - case ElgTriangles: return "triangles"; - case ElgTrianglesAdjacency: return "triangles_adjacency"; - case ElgTriangleStrip: return "triangle_strip"; - case ElgQuads: return "quads"; - case ElgIsolines: return "isolines"; - default: return "none"; - } - } - static const char* getVertexSpacingString(TVertexSpacing spacing) - { - switch (spacing) { - case EvsEqual: return "equal_spacing"; - case EvsFractionalEven: return "fractional_even_spacing"; - case EvsFractionalOdd: return "fractional_odd_spacing"; - default: return "none"; - } - } - static const char* getVertexOrderString(TVertexOrder order) - { - switch (order) { - case EvoCw: return "cw"; - case EvoCcw: return "ccw"; - default: return "none"; - } - } - static int mapGeometryToSize(TLayoutGeometry geometry) - { - switch (geometry) { - case ElgPoints: return 1; - case ElgLines: return 2; - case ElgLinesAdjacency: return 4; - case ElgTriangles: return 3; - case ElgTrianglesAdjacency: return 6; - default: return 0; - } - } -}; - -// Qualifiers that don't need to be keep per object. They have shader scope, not object scope. -// So, they will not be part of TType, TQualifier, etc. -struct TShaderQualifiers { - TLayoutGeometry geometry; // geometry/tessellation shader in/out primitives - bool pixelCenterInteger; // fragment shader - bool originUpperLeft; // fragment shader - int invocations; - int vertices; // both for tessellation "vertices" and geometry "max_vertices" - TVertexSpacing spacing; - TVertexOrder order; - bool pointMode; - int localSize[3]; // compute shader - int localSizeSpecId[3]; // compute shader specialization id for gl_WorkGroupSize - bool earlyFragmentTests; // fragment input - bool postDepthCoverage; // fragment input - TLayoutDepth layoutDepth; - bool blendEquation; // true if any blend equation was specified - int numViews; // multiview extenstions - -#ifdef NV_EXTENSIONS - bool layoutOverrideCoverage; // true if layout override_coverage set -#endif - - void init() - { - geometry = ElgNone; - originUpperLeft = false; - pixelCenterInteger = false; - invocations = TQualifier::layoutNotSet; - vertices = TQualifier::layoutNotSet; - spacing = EvsNone; - order = EvoNone; - pointMode = false; - localSize[0] = 1; - localSize[1] = 1; - localSize[2] = 1; - localSizeSpecId[0] = TQualifier::layoutNotSet; - localSizeSpecId[1] = TQualifier::layoutNotSet; - localSizeSpecId[2] = TQualifier::layoutNotSet; - earlyFragmentTests = false; - postDepthCoverage = false; - layoutDepth = EldNone; - blendEquation = false; - numViews = TQualifier::layoutNotSet; -#ifdef NV_EXTENSIONS - layoutOverrideCoverage = false; -#endif - } - - // Merge in characteristics from the 'src' qualifier. They can override when - // set, but never erase when not set. - void merge(const TShaderQualifiers& src) - { - if (src.geometry != ElgNone) - geometry = src.geometry; - if (src.pixelCenterInteger) - pixelCenterInteger = src.pixelCenterInteger; - if (src.originUpperLeft) - originUpperLeft = src.originUpperLeft; - if (src.invocations != TQualifier::layoutNotSet) - invocations = src.invocations; - if (src.vertices != TQualifier::layoutNotSet) - vertices = src.vertices; - if (src.spacing != EvsNone) - spacing = src.spacing; - if (src.order != EvoNone) - order = src.order; - if (src.pointMode) - pointMode = true; - for (int i = 0; i < 3; ++i) { - if (src.localSize[i] > 1) - localSize[i] = src.localSize[i]; - } - for (int i = 0; i < 3; ++i) { - if (src.localSizeSpecId[i] != TQualifier::layoutNotSet) - localSizeSpecId[i] = src.localSizeSpecId[i]; - } - if (src.earlyFragmentTests) - earlyFragmentTests = true; - if (src.postDepthCoverage) - postDepthCoverage = true; - if (src.layoutDepth) - layoutDepth = src.layoutDepth; - if (src.blendEquation) - blendEquation = src.blendEquation; - if (src.numViews != TQualifier::layoutNotSet) - numViews = src.numViews; -#ifdef NV_EXTENSIONS - if (src.layoutOverrideCoverage) - layoutOverrideCoverage = src.layoutOverrideCoverage; -#endif - } -}; - -// -// TPublicType is just temporarily used while parsing and not quite the same -// information kept per node in TType. Due to the bison stack, it can't have -// types that it thinks have non-trivial constructors. It should -// just be used while recognizing the grammar, not anything else. -// Once enough is known about the situation, the proper information -// moved into a TType, or the parse context, etc. -// -class TPublicType { -public: - TBasicType basicType; - TSampler sampler; - TQualifier qualifier; - TShaderQualifiers shaderQualifiers; - int vectorSize : 4; - int matrixCols : 4; - int matrixRows : 4; - TArraySizes* arraySizes; - const TType* userDef; - TSourceLoc loc; - - void initType(const TSourceLoc& l) - { - basicType = EbtVoid; - vectorSize = 1; - matrixRows = 0; - matrixCols = 0; - arraySizes = nullptr; - userDef = nullptr; - loc = l; - } - - void initQualifiers(bool global = false) - { - qualifier.clear(); - if (global) - qualifier.storage = EvqGlobal; - } - - void init(const TSourceLoc& l, bool global = false) - { - initType(l); - sampler.clear(); - initQualifiers(global); - shaderQualifiers.init(); - } - - void setVector(int s) - { - matrixRows = 0; - matrixCols = 0; - vectorSize = s; - } - - void setMatrix(int c, int r) - { - matrixRows = r; - matrixCols = c; - vectorSize = 0; - } - - bool isScalar() const - { - return matrixCols == 0 && vectorSize == 1 && arraySizes == nullptr && userDef == nullptr; - } - - // "Image" is a superset of "Subpass" - bool isImage() const { return basicType == EbtSampler && sampler.isImage(); } - bool isSubpass() const { return basicType == EbtSampler && sampler.isSubpass(); } -}; - -// -// Base class for things that have a type. -// -class TType { -public: - POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) - - // for "empty" type (no args) or simple scalar/vector/matrix - explicit TType(TBasicType t = EbtVoid, TStorageQualifier q = EvqTemporary, int vs = 1, int mc = 0, int mr = 0, - bool isVector = false) : - basicType(t), vectorSize(vs), matrixCols(mc), matrixRows(mr), vector1(isVector && vs == 1), - arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr) - { - sampler.clear(); - qualifier.clear(); - qualifier.storage = q; - assert(!(isMatrix() && vectorSize != 0)); // prevent vectorSize != 0 on matrices - } - // for explicit precision qualifier - TType(TBasicType t, TStorageQualifier q, TPrecisionQualifier p, int vs = 1, int mc = 0, int mr = 0, - bool isVector = false) : - basicType(t), vectorSize(vs), matrixCols(mc), matrixRows(mr), vector1(isVector && vs == 1), - arraySizes(nullptr), structure(nullptr), fieldName(nullptr), typeName(nullptr) - { - sampler.clear(); - qualifier.clear(); - qualifier.storage = q; - qualifier.precision = p; - assert(p >= EpqNone && p <= EpqHigh); - assert(!(isMatrix() && vectorSize != 0)); // prevent vectorSize != 0 on matrices - } - // for turning a TPublicType into a TType, using a shallow copy - explicit TType(const TPublicType& p) : - basicType(p.basicType), - vectorSize(p.vectorSize), matrixCols(p.matrixCols), matrixRows(p.matrixRows), vector1(false), - arraySizes(p.arraySizes), structure(nullptr), fieldName(nullptr), typeName(nullptr) - { - if (basicType == EbtSampler) - sampler = p.sampler; - else - sampler.clear(); - qualifier = p.qualifier; - if (p.userDef) { - structure = p.userDef->getWritableStruct(); // public type is short-lived; there are no sharing issues - typeName = NewPoolTString(p.userDef->getTypeName().c_str()); - } - } - // for construction of sampler types - TType(const TSampler& sampler, TStorageQualifier q = EvqUniform, TArraySizes* as = nullptr) : - basicType(EbtSampler), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), - arraySizes(as), structure(nullptr), fieldName(nullptr), typeName(nullptr), - sampler(sampler) - { - qualifier.clear(); - qualifier.storage = q; - } - // to efficiently make a dereferenced type - // without ever duplicating the outer structure that will be thrown away - // and using only shallow copy - TType(const TType& type, int derefIndex, bool rowMajor = false) - { - if (type.isArray()) { - shallowCopy(type); - if (type.getArraySizes()->getNumDims() == 1) { - arraySizes = nullptr; - } else { - // want our own copy of the array, so we can edit it - arraySizes = new TArraySizes; - arraySizes->copyDereferenced(*type.arraySizes); - } - } else if (type.basicType == EbtStruct || type.basicType == EbtBlock) { - // do a structure dereference - const TTypeList& memberList = *type.getStruct(); - shallowCopy(*memberList[derefIndex].type); - return; - } else { - // do a vector/matrix dereference - shallowCopy(type); - if (matrixCols > 0) { - // dereference from matrix to vector - if (rowMajor) - vectorSize = matrixCols; - else - vectorSize = matrixRows; - matrixCols = 0; - matrixRows = 0; - if (vectorSize == 1) - vector1 = true; - } else if (isVector()) { - // dereference from vector to scalar - vectorSize = 1; - vector1 = false; - } - } - } - // for making structures, ... - TType(TTypeList* userDef, const TString& n) : - basicType(EbtStruct), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), - arraySizes(nullptr), structure(userDef), fieldName(nullptr) - { - sampler.clear(); - qualifier.clear(); - typeName = NewPoolTString(n.c_str()); - } - // For interface blocks - TType(TTypeList* userDef, const TString& n, const TQualifier& q) : - basicType(EbtBlock), vectorSize(1), matrixCols(0), matrixRows(0), vector1(false), - qualifier(q), arraySizes(nullptr), structure(userDef), fieldName(nullptr) - { - sampler.clear(); - typeName = NewPoolTString(n.c_str()); - } - virtual ~TType() {} - - // Not for use across pool pops; it will cause multiple instances of TType to point to the same information. - // This only works if that information (like a structure's list of types) does not change and - // the instances are sharing the same pool. - void shallowCopy(const TType& copyOf) - { - basicType = copyOf.basicType; - sampler = copyOf.sampler; - qualifier = copyOf.qualifier; - vectorSize = copyOf.vectorSize; - matrixCols = copyOf.matrixCols; - matrixRows = copyOf.matrixRows; - vector1 = copyOf.vector1; - arraySizes = copyOf.arraySizes; // copying the pointer only, not the contents - structure = copyOf.structure; - fieldName = copyOf.fieldName; - typeName = copyOf.typeName; - } - - // Make complete copy of the whole type graph rooted at 'copyOf'. - void deepCopy(const TType& copyOf) - { - TMap copied; // to enable copying a type graph as a graph, not a tree - deepCopy(copyOf, copied); - } - - // Recursively make temporary - void makeTemporary() - { - getQualifier().makeTemporary(); - - if (isStruct()) - for (unsigned int i = 0; i < structure->size(); ++i) - (*structure)[i].type->makeTemporary(); - } - - TType* clone() const - { - TType *newType = new TType(); - newType->deepCopy(*this); - - return newType; - } - - void makeVector() { vector1 = true; } - - // Merge type from parent, where a parentType is at the beginning of a declaration, - // establishing some characteristics for all subsequent names, while this type - // is on the individual names. - void mergeType(const TPublicType& parentType) - { - // arrayness is currently the only child aspect that has to be preserved - basicType = parentType.basicType; - vectorSize = parentType.vectorSize; - matrixCols = parentType.matrixCols; - matrixRows = parentType.matrixRows; - vector1 = false; // TPublicType is only GLSL which so far has no vec1 - qualifier = parentType.qualifier; - sampler = parentType.sampler; - if (parentType.arraySizes) - newArraySizes(*parentType.arraySizes); - if (parentType.userDef) { - structure = parentType.userDef->getWritableStruct(); - setTypeName(parentType.userDef->getTypeName()); - } - } - - virtual void hideMember() { basicType = EbtVoid; vectorSize = 1; } - virtual bool hiddenMember() const { return basicType == EbtVoid; } - - virtual void setTypeName(const TString& n) { typeName = NewPoolTString(n.c_str()); } - virtual void setFieldName(const TString& n) { fieldName = NewPoolTString(n.c_str()); } - virtual const TString& getTypeName() const - { - assert(typeName); - return *typeName; - } - - virtual const TString& getFieldName() const - { - assert(fieldName); - return *fieldName; - } - - virtual TBasicType getBasicType() const { return basicType; } - virtual const TSampler& getSampler() const { return sampler; } - virtual TSampler& getSampler() { return sampler; } - - virtual TQualifier& getQualifier() { return qualifier; } - virtual const TQualifier& getQualifier() const { return qualifier; } - - virtual int getVectorSize() const { return vectorSize; } // returns 1 for either scalar or vector of size 1, valid for both - virtual int getMatrixCols() const { return matrixCols; } - virtual int getMatrixRows() const { return matrixRows; } - virtual int getOuterArraySize() const { return arraySizes->getOuterSize(); } - virtual TIntermTyped* getOuterArrayNode() const { return arraySizes->getOuterNode(); } - virtual int getCumulativeArraySize() const { return arraySizes->getCumulativeSize(); } - virtual bool isArrayOfArrays() const { return arraySizes != nullptr && arraySizes->getNumDims() > 1; } - virtual int getImplicitArraySize() const { return arraySizes->getImplicitSize(); } - virtual const TArraySizes* getArraySizes() const { return arraySizes; } - virtual TArraySizes& getArraySizes() { assert(arraySizes != nullptr); return *arraySizes; } - - virtual bool isScalar() const { return ! isVector() && ! isMatrix() && ! isStruct() && ! isArray(); } - virtual bool isScalarOrVec1() const { return isScalar() || vector1; } - virtual bool isVector() const { return vectorSize > 1 || vector1; } - virtual bool isMatrix() const { return matrixCols ? true : false; } - virtual bool isArray() const { return arraySizes != nullptr; } - virtual bool isExplicitlySizedArray() const { return isArray() && getOuterArraySize() != UnsizedArraySize; } - virtual bool isImplicitlySizedArray() const { return isArray() && getOuterArraySize() == UnsizedArraySize && qualifier.storage != EvqBuffer; } - virtual bool isRuntimeSizedArray() const { return isArray() && getOuterArraySize() == UnsizedArraySize && qualifier.storage == EvqBuffer; } - virtual bool isStruct() const { return structure != nullptr; } -#ifdef AMD_EXTENSIONS - virtual bool isFloatingDomain() const { return basicType == EbtFloat || basicType == EbtDouble || basicType == EbtFloat16; } -#else - virtual bool isFloatingDomain() const { return basicType == EbtFloat || basicType == EbtDouble; } -#endif - virtual bool isIntegerDomain() const - { - switch (basicType) { - case EbtInt: - case EbtUint: - case EbtInt64: - case EbtUint64: -#ifdef AMD_EXTENSIONS - case EbtInt16: - case EbtUint16: -#endif - case EbtAtomicUint: - return true; - default: - break; - } - return false; - } - virtual bool isOpaque() const { return basicType == EbtSampler || basicType == EbtAtomicUint; } - virtual bool isBuiltIn() const { return getQualifier().builtIn != EbvNone; } - - // "Image" is a superset of "Subpass" - virtual bool isImage() const { return basicType == EbtSampler && getSampler().isImage(); } - virtual bool isSubpass() const { return basicType == EbtSampler && getSampler().isSubpass(); } - - // return true if this type contains any subtype which satisfies the given predicate. - template - bool contains(P predicate) const - { - if (predicate(this)) - return true; - - const auto hasa = [predicate](const TTypeLoc& tl) { return tl.type->contains(predicate); }; - - return structure && std::any_of(structure->begin(), structure->end(), hasa); - } - - // Recursively checks if the type contains the given basic type - virtual bool containsBasicType(TBasicType checkType) const - { - return contains([checkType](const TType* t) { return t->basicType == checkType; } ); - } - - // Recursively check the structure for any arrays, needed for some error checks - virtual bool containsArray() const - { - return contains([](const TType* t) { return t->isArray(); } ); - } - - // Check the structure for any structures, needed for some error checks - virtual bool containsStructure() const - { - return contains([this](const TType* t) { return t != this && t->isStruct(); } ); - } - - // Recursively check the structure for any implicitly-sized arrays, needed for triggering a copyUp(). - virtual bool containsImplicitlySizedArray() const - { - return contains([](const TType* t) { return t->isImplicitlySizedArray(); } ); - } - - virtual bool containsOpaque() const - { - return contains([](const TType* t) { return t->isOpaque(); } ); - } - - // Recursively checks if the type contains a built-in variable - virtual bool containsBuiltIn() const - { - return contains([](const TType* t) { return t->isBuiltIn(); } ); - } - - virtual bool containsNonOpaque() const - { - const auto nonOpaque = [](const TType* t) { - switch (t->basicType) { - case EbtVoid: - case EbtFloat: - case EbtDouble: -#ifdef AMD_EXTENSIONS - case EbtFloat16: -#endif - case EbtInt: - case EbtUint: - case EbtInt64: - case EbtUint64: -#ifdef AMD_EXTENSIONS - case EbtInt16: - case EbtUint16: -#endif - case EbtBool: - return true; - default: - return false; - } - }; - - return contains(nonOpaque); - } - - virtual bool containsSpecializationSize() const - { - return contains([](const TType* t) { return t->isArray() && t->arraySizes->isOuterSpecialization(); } ); - } - - // Array editing methods. Array descriptors can be shared across - // type instances. This allows all uses of the same array - // to be updated at once. E.g., all nodes can be explicitly sized - // by tracking and correcting one implicit size. Or, all nodes - // can get the explicit size on a redeclaration that gives size. - // - // N.B.: Don't share with the shared symbol tables (symbols are - // marked as isReadOnly(). Such symbols with arrays that will be - // edited need to copyUp() on first use, so that - // A) the edits don't effect the shared symbol table, and - // B) the edits are shared across all users. - void updateArraySizes(const TType& type) - { - // For when we may already be sharing existing array descriptors, - // keeping the pointers the same, just updating the contents. - assert(arraySizes != nullptr); - assert(type.arraySizes != nullptr); - *arraySizes = *type.arraySizes; - } - void newArraySizes(const TArraySizes& s) - { - // For setting a fresh new set of array sizes, not yet worrying about sharing. - arraySizes = new TArraySizes; - *arraySizes = s; - } - void clearArraySizes() - { - arraySizes = 0; - } - void addArrayOuterSizes(const TArraySizes& s) - { - if (arraySizes == nullptr) - newArraySizes(s); - else - arraySizes->addOuterSizes(s); - } - void changeOuterArraySize(int s) { arraySizes->changeOuterSize(s); } - void setImplicitArraySize(int s) { arraySizes->setImplicitSize(s); } - - // Recursively make the implicit array size the explicit array size, through the type tree. - void adoptImplicitArraySizes() - { - if (isImplicitlySizedArray()) - changeOuterArraySize(getImplicitArraySize()); - if (isStruct()) { - for (int i = 0; i < (int)structure->size(); ++i) - (*structure)[i].type->adoptImplicitArraySizes(); - } - } - - const char* getBasicString() const - { - return TType::getBasicString(basicType); - } - - static const char* getBasicString(TBasicType t) - { - switch (t) { - case EbtVoid: return "void"; - case EbtFloat: return "float"; - case EbtDouble: return "double"; -#ifdef AMD_EXTENSIONS - case EbtFloat16: return "float16_t"; -#endif - case EbtInt: return "int"; - case EbtUint: return "uint"; - case EbtInt64: return "int64_t"; - case EbtUint64: return "uint64_t"; -#ifdef AMD_EXTENSIONS - case EbtInt16: return "int16_t"; - case EbtUint16: return "uint16_t"; -#endif - case EbtBool: return "bool"; - case EbtAtomicUint: return "atomic_uint"; - case EbtSampler: return "sampler/image"; - case EbtStruct: return "structure"; - case EbtBlock: return "block"; - default: return "unknown type"; - } - } - - TString getCompleteString() const - { - TString typeString; - - const auto appendStr = [&](const char* s) { typeString.append(s); }; - const auto appendUint = [&](unsigned int u) { typeString.append(std::to_string(u).c_str()); }; - const auto appendInt = [&](int i) { typeString.append(std::to_string(i).c_str()); }; - - if (qualifier.hasLayout()) { - // To reduce noise, skip this if the only layout is an xfb_buffer - // with no triggering xfb_offset. - TQualifier noXfbBuffer = qualifier; - noXfbBuffer.layoutXfbBuffer = TQualifier::layoutXfbBufferEnd; - if (noXfbBuffer.hasLayout()) { - appendStr("layout("); - if (qualifier.hasAnyLocation()) { - appendStr(" location="); - appendUint(qualifier.layoutLocation); - if (qualifier.hasComponent()) { - appendStr(" component="); - appendUint(qualifier.layoutComponent); - } - if (qualifier.hasIndex()) { - appendStr(" index="); - appendUint(qualifier.layoutIndex); - } - } - if (qualifier.hasSet()) { - appendStr(" set="); - appendUint(qualifier.layoutSet); - } - if (qualifier.hasBinding()) { - appendStr(" binding="); - appendUint(qualifier.layoutBinding); - } - if (qualifier.hasStream()) { - appendStr(" stream="); - appendUint(qualifier.layoutStream); - } - if (qualifier.hasMatrix()) { - appendStr(" "); - appendStr(TQualifier::getLayoutMatrixString(qualifier.layoutMatrix)); - } - if (qualifier.hasPacking()) { - appendStr(" "); - appendStr(TQualifier::getLayoutPackingString(qualifier.layoutPacking)); - } - if (qualifier.hasOffset()) { - appendStr(" offset="); - appendInt(qualifier.layoutOffset); - } - if (qualifier.hasAlign()) { - appendStr(" align="); - appendInt(qualifier.layoutAlign); - } - if (qualifier.hasFormat()) { - appendStr(" "); - appendStr(TQualifier::getLayoutFormatString(qualifier.layoutFormat)); - } - if (qualifier.hasXfbBuffer() && qualifier.hasXfbOffset()) { - appendStr(" xfb_buffer="); - appendUint(qualifier.layoutXfbBuffer); - } - if (qualifier.hasXfbOffset()) { - appendStr(" xfb_offset="); - appendUint(qualifier.layoutXfbOffset); - } - if (qualifier.hasXfbStride()) { - appendStr(" xfb_stride="); - appendUint(qualifier.layoutXfbStride); - } - if (qualifier.hasAttachment()) { - appendStr(" input_attachment_index="); - appendUint(qualifier.layoutAttachment); - } - if (qualifier.hasSpecConstantId()) { - appendStr(" constant_id="); - appendUint(qualifier.layoutSpecConstantId); - } - if (qualifier.layoutPushConstant) - appendStr(" push_constant"); - -#ifdef NV_EXTENSIONS - if (qualifier.layoutPassthrough) - appendStr(" passthrough"); - if (qualifier.layoutViewportRelative) - appendStr(" layoutViewportRelative"); - if (qualifier.layoutSecondaryViewportRelativeOffset != -2048) { - appendStr(" layoutSecondaryViewportRelativeOffset="); - appendInt(qualifier.layoutSecondaryViewportRelativeOffset); - } -#endif - - appendStr(")"); - } - } - - if (qualifier.invariant) - appendStr(" invariant"); - if (qualifier.noContraction) - appendStr(" noContraction"); - if (qualifier.centroid) - appendStr(" centroid"); - if (qualifier.smooth) - appendStr(" smooth"); - if (qualifier.flat) - appendStr(" flat"); - if (qualifier.nopersp) - appendStr(" noperspective"); -#ifdef AMD_EXTENSIONS - if (qualifier.explicitInterp) - appendStr(" __explicitInterpAMD"); -#endif - if (qualifier.patch) - appendStr(" patch"); - if (qualifier.sample) - appendStr(" sample"); - if (qualifier.coherent) - appendStr(" coherent"); - if (qualifier.volatil) - appendStr(" volatile"); - if (qualifier.restrict) - appendStr(" restrict"); - if (qualifier.readonly) - appendStr(" readonly"); - if (qualifier.writeonly) - appendStr(" writeonly"); - if (qualifier.specConstant) - appendStr(" specialization-constant"); - appendStr(" "); - appendStr(getStorageQualifierString()); - if (isArray()) { - for(int i = 0; i < (int)arraySizes->getNumDims(); ++i) { - int size = arraySizes->getDimSize(i); - if (size == 0) - appendStr(" implicitly-sized array of"); - else { - appendStr(" "); - appendInt(arraySizes->getDimSize(i)); - appendStr("-element array of"); - } - } - } - if (qualifier.precision != EpqNone) { - appendStr(" "); - appendStr(getPrecisionQualifierString()); - } - if (isMatrix()) { - appendStr(" "); - appendInt(matrixCols); - appendStr("X"); - appendInt(matrixRows); - appendStr(" matrix of"); - } else if (isVector()) { - appendStr(" "); - appendInt(vectorSize); - appendStr("-component vector of"); - } - - appendStr(" "); - typeString.append(getBasicTypeString()); - - if (qualifier.builtIn != EbvNone) { - appendStr(" "); - appendStr(getBuiltInVariableString()); - } - - // Add struct/block members - if (structure) { - appendStr("{"); - for (size_t i = 0; i < structure->size(); ++i) { - if (! (*structure)[i].type->hiddenMember()) { - typeString.append((*structure)[i].type->getCompleteString()); - typeString.append(" "); - typeString.append((*structure)[i].type->getFieldName()); - if (i < structure->size() - 1) - appendStr(", "); - } - } - appendStr("}"); - } - - return typeString; - } - - TString getBasicTypeString() const - { - if (basicType == EbtSampler) - return sampler.getString(); - else - return getBasicString(); - } - - const char* getStorageQualifierString() const { return GetStorageQualifierString(qualifier.storage); } - const char* getBuiltInVariableString() const { return GetBuiltInVariableString(qualifier.builtIn); } - const char* getPrecisionQualifierString() const { return GetPrecisionQualifierString(qualifier.precision); } - const TTypeList* getStruct() const { return structure; } - void setStruct(TTypeList* s) { structure = s; } - TTypeList* getWritableStruct() const { return structure; } // This should only be used when known to not be sharing with other threads - - int computeNumComponents() const - { - int components = 0; - - if (getBasicType() == EbtStruct || getBasicType() == EbtBlock) { - for (TTypeList::const_iterator tl = getStruct()->begin(); tl != getStruct()->end(); tl++) - components += ((*tl).type)->computeNumComponents(); - } else if (matrixCols) - components = matrixCols * matrixRows; - else - components = vectorSize; - - if (arraySizes != nullptr) { - components *= arraySizes->getCumulativeSize(); - } - - return components; - } - - // append this type's mangled name to the passed in 'name' - void appendMangledName(TString& name) const - { - buildMangledName(name); - name += ';' ; - } - - // Do two structure types match? They could be declared independently, - // in different places, but still might satisfy the definition of matching. - // From the spec: - // - // "Structures must have the same name, sequence of type names, and - // type definitions, and member names to be considered the same type. - // This rule applies recursively for nested or embedded types." - // - bool sameStructType(const TType& right) const - { - // Most commonly, they are both nullptr, or the same pointer to the same actual structure - if (structure == right.structure) - return true; - - // Both being nullptr was caught above, now they both have to be structures of the same number of elements - if (structure == nullptr || right.structure == nullptr || - structure->size() != right.structure->size()) - return false; - - // Structure names have to match - if (*typeName != *right.typeName) - return false; - - // Compare the names and types of all the members, which have to match - for (unsigned int i = 0; i < structure->size(); ++i) { - if ((*structure)[i].type->getFieldName() != (*right.structure)[i].type->getFieldName()) - return false; - - if (*(*structure)[i].type != *(*right.structure)[i].type) - return false; - } - - return true; - } - - // See if two types match, in all aspects except arrayness - bool sameElementType(const TType& right) const - { - return basicType == right.basicType && sameElementShape(right); - } - - // See if two type's arrayness match - bool sameArrayness(const TType& right) const - { - return ((arraySizes == nullptr && right.arraySizes == nullptr) || - (arraySizes != nullptr && right.arraySizes != nullptr && *arraySizes == *right.arraySizes)); - } - - // See if two type's arrayness match in everything except their outer dimension - bool sameInnerArrayness(const TType& right) const - { - assert(arraySizes != nullptr && right.arraySizes != nullptr); - return arraySizes->sameInnerArrayness(*right.arraySizes); - } - - // See if two type's elements match in all ways except basic type - bool sameElementShape(const TType& right) const - { - return sampler == right.sampler && - vectorSize == right.vectorSize && - matrixCols == right.matrixCols && - matrixRows == right.matrixRows && - vector1 == right.vector1 && - sameStructType(right); - } - - // See if two types match in all ways (just the actual type, not qualification) - bool operator==(const TType& right) const - { - return sameElementType(right) && sameArrayness(right); - } - - bool operator!=(const TType& right) const - { - return ! operator==(right); - } - -protected: - // Require consumer to pick between deep copy and shallow copy. - TType(const TType& type); - TType& operator=(const TType& type); - - // Recursively copy a type graph, while preserving the graph-like - // quality. That is, don't make more than one copy of a structure that - // gets reused multiple times in the type graph. - void deepCopy(const TType& copyOf, TMap& copiedMap) - { - shallowCopy(copyOf); - - if (copyOf.arraySizes) { - arraySizes = new TArraySizes; - *arraySizes = *copyOf.arraySizes; - } - - if (copyOf.structure) { - auto prevCopy = copiedMap.find(copyOf.structure); - if (prevCopy != copiedMap.end()) - structure = prevCopy->second; - else { - structure = new TTypeList; - copiedMap[copyOf.structure] = structure; - for (unsigned int i = 0; i < copyOf.structure->size(); ++i) { - TTypeLoc typeLoc; - typeLoc.loc = (*copyOf.structure)[i].loc; - typeLoc.type = new TType(); - typeLoc.type->deepCopy(*(*copyOf.structure)[i].type, copiedMap); - structure->push_back(typeLoc); - } - } - } - - if (copyOf.fieldName) - fieldName = NewPoolTString(copyOf.fieldName->c_str()); - if (copyOf.typeName) - typeName = NewPoolTString(copyOf.typeName->c_str()); - } - - - void buildMangledName(TString&) const; - - TBasicType basicType : 8; - int vectorSize : 4; // 1 means either scalar or 1-component vector; see vector1 to disambiguate. - int matrixCols : 4; - int matrixRows : 4; - bool vector1 : 1; // Backward-compatible tracking of a 1-component vector distinguished from a scalar. - // GLSL 4.5 never has a 1-component vector; so this will always be false until such - // functionality is added. - // HLSL does have a 1-component vectors, so this will be true to disambiguate - // from a scalar. - TQualifier qualifier; - - TArraySizes* arraySizes; // nullptr unless an array; can be shared across types - TTypeList* structure; // nullptr unless this is a struct; can be shared across types - TString *fieldName; // for structure field names - TString *typeName; // for structure type name - TSampler sampler; -}; - -} // end namespace glslang - -#endif // _TYPES_INCLUDED_ diff --git a/third_party/glslang-spirv/Include/arrays.h b/third_party/glslang-spirv/Include/arrays.h deleted file mode 100644 index 1660a99f9..000000000 --- a/third_party/glslang-spirv/Include/arrays.h +++ /dev/null @@ -1,329 +0,0 @@ -// -// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. -// Copyright (C) 2012-2013 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// - -// -// Implement types for tracking GLSL arrays, arrays of arrays, etc. -// - -#ifndef _ARRAYS_INCLUDED -#define _ARRAYS_INCLUDED - -namespace glslang { - -// This is used to mean there is no size yet (unsized), it is waiting to get a size from somewhere else. -const int UnsizedArraySize = 0; - -class TIntermTyped; -extern bool SameSpecializationConstants(TIntermTyped*, TIntermTyped*); - -// Specialization constants need both a nominal size and a node that defines -// the specialization constant being used. Array types are the same when their -// size and specialization constant nodes are the same. -struct TArraySize { - unsigned int size; - TIntermTyped* node; // nullptr means no specialization constant node - bool operator==(const TArraySize& rhs) const - { - if (size != rhs.size) - return false; - if (node == nullptr || rhs.node == nullptr) - return node == rhs.node; - - return SameSpecializationConstants(node, rhs.node); - } -}; - -// -// TSmallArrayVector is used as the container for the set of sizes in TArraySizes. -// It has generic-container semantics, while TArraySizes has array-of-array semantics. -// That is, TSmallArrayVector should be more focused on mechanism and TArraySizes on policy. -// -struct TSmallArrayVector { - // - // TODO: memory: TSmallArrayVector is intended to be smaller. - // Almost all arrays could be handled by two sizes each fitting - // in 16 bits, needing a real vector only in the cases where there - // are more than 3 sizes or a size needing more than 16 bits. - // - POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) - - TSmallArrayVector() : sizes(nullptr) { } - virtual ~TSmallArrayVector() { dealloc(); } - - // For breaking into two non-shared copies, independently modifiable. - TSmallArrayVector& operator=(const TSmallArrayVector& from) - { - if (from.sizes == nullptr) - sizes = nullptr; - else { - alloc(); - *sizes = *from.sizes; - } - - return *this; - } - - int size() const - { - if (sizes == nullptr) - return 0; - return (int)sizes->size(); - } - - unsigned int frontSize() const - { - assert(sizes != nullptr && sizes->size() > 0); - return sizes->front().size; - } - - TIntermTyped* frontNode() const - { - assert(sizes != nullptr && sizes->size() > 0); - return sizes->front().node; - } - - void changeFront(unsigned int s) - { - assert(sizes != nullptr); - // this should only happen for implicitly sized arrays, not specialization constants - assert(sizes->front().node == nullptr); - sizes->front().size = s; - } - - void push_back(unsigned int e, TIntermTyped* n) - { - alloc(); - TArraySize pair = { e, n }; - sizes->push_back(pair); - } - - void push_front(const TSmallArrayVector& newDims) - { - alloc(); - sizes->insert(sizes->begin(), newDims.sizes->begin(), newDims.sizes->end()); - } - - void pop_front() - { - assert(sizes != nullptr && sizes->size() > 0); - if (sizes->size() == 1) - dealloc(); - else - sizes->erase(sizes->begin()); - } - - // 'this' should currently not be holding anything, and copyNonFront - // will make it hold a copy of all but the first element of rhs. - // (This would be useful for making a type that is dereferenced by - // one dimension.) - void copyNonFront(const TSmallArrayVector& rhs) - { - assert(sizes == nullptr); - if (rhs.size() > 1) { - alloc(); - sizes->insert(sizes->begin(), rhs.sizes->begin() + 1, rhs.sizes->end()); - } - } - - unsigned int getDimSize(int i) const - { - assert(sizes != nullptr && (int)sizes->size() > i); - return (*sizes)[i].size; - } - - void setDimSize(int i, unsigned int size) const - { - assert(sizes != nullptr && (int)sizes->size() > i); - assert((*sizes)[i].node == nullptr); - (*sizes)[i].size = size; - } - - TIntermTyped* getDimNode(int i) const - { - assert(sizes != nullptr && (int)sizes->size() > i); - return (*sizes)[i].node; - } - - bool operator==(const TSmallArrayVector& rhs) const - { - if (sizes == nullptr && rhs.sizes == nullptr) - return true; - if (sizes == nullptr || rhs.sizes == nullptr) - return false; - return *sizes == *rhs.sizes; - } - bool operator!=(const TSmallArrayVector& rhs) const { return ! operator==(rhs); } - -protected: - TSmallArrayVector(const TSmallArrayVector&); - - void alloc() - { - if (sizes == nullptr) - sizes = new TVector; - } - void dealloc() - { - delete sizes; - sizes = nullptr; - } - - TVector* sizes; // will either hold such a pointer, or in the future, hold the two array sizes -}; - -// -// Represent an array, or array of arrays, to arbitrary depth. This is not -// done through a hierarchy of types in a type tree, rather all contiguous arrayness -// in the type hierarchy is localized into this single cumulative object. -// -// The arrayness in TTtype is a pointer, so that it can be non-allocated and zero -// for the vast majority of types that are non-array types. -// -// Order Policy: these are all identical: -// - left to right order within a contiguous set of ...[..][..][..]... in the source language -// - index order 0, 1, 2, ... within the 'sizes' member below -// - outer-most to inner-most -// -struct TArraySizes { - POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator()) - - TArraySizes() : implicitArraySize(1) { } - - // For breaking into two non-shared copies, independently modifiable. - TArraySizes& operator=(const TArraySizes& from) - { - implicitArraySize = from.implicitArraySize; - sizes = from.sizes; - - return *this; - } - - // translate from array-of-array semantics to container semantics - int getNumDims() const { return sizes.size(); } - int getDimSize(int dim) const { return sizes.getDimSize(dim); } - TIntermTyped* getDimNode(int dim) const { return sizes.getDimNode(dim); } - void setDimSize(int dim, int size) { sizes.setDimSize(dim, size); } - int getOuterSize() const { return sizes.frontSize(); } - TIntermTyped* getOuterNode() const { return sizes.frontNode(); } - int getCumulativeSize() const - { - int size = 1; - for (int d = 0; d < sizes.size(); ++d) { - // this only makes sense in paths that have a known array size - assert(sizes.getDimSize(d) != UnsizedArraySize); - size *= sizes.getDimSize(d); - } - return size; - } - void addInnerSize() { addInnerSize((unsigned)UnsizedArraySize); } - void addInnerSize(int s) { addInnerSize((unsigned)s, nullptr); } - void addInnerSize(int s, TIntermTyped* n) { sizes.push_back((unsigned)s, n); } - void addInnerSize(TArraySize pair) { sizes.push_back(pair.size, pair.node); } - void changeOuterSize(int s) { sizes.changeFront((unsigned)s); } - int getImplicitSize() const { return (int)implicitArraySize; } - void setImplicitSize(int s) { implicitArraySize = s; } - bool isInnerImplicit() const - { - for (int d = 1; d < sizes.size(); ++d) { - if (sizes.getDimSize(d) == (unsigned)UnsizedArraySize) - return true; - } - - return false; - } - bool clearInnerImplicit() - { - for (int d = 1; d < sizes.size(); ++d) { - if (sizes.getDimSize(d) == (unsigned)UnsizedArraySize) - setDimSize(d, 1); - } - - return false; - } - bool isInnerSpecialization() const - { - for (int d = 1; d < sizes.size(); ++d) { - if (sizes.getDimNode(d) != nullptr) - return true; - } - - return false; - } - bool isOuterSpecialization() - { - return sizes.getDimNode(0) != nullptr; - } - - bool isImplicit() const { return getOuterSize() == UnsizedArraySize || isInnerImplicit(); } - void addOuterSizes(const TArraySizes& s) { sizes.push_front(s.sizes); } - void dereference() { sizes.pop_front(); } - void copyDereferenced(const TArraySizes& rhs) - { - assert(sizes.size() == 0); - if (rhs.sizes.size() > 1) - sizes.copyNonFront(rhs.sizes); - } - - bool sameInnerArrayness(const TArraySizes& rhs) const - { - if (sizes.size() != rhs.sizes.size()) - return false; - - for (int d = 1; d < sizes.size(); ++d) { - if (sizes.getDimSize(d) != rhs.sizes.getDimSize(d) || - sizes.getDimNode(d) != rhs.sizes.getDimNode(d)) - return false; - } - - return true; - } - - bool operator==(const TArraySizes& rhs) { return sizes == rhs.sizes; } - bool operator!=(const TArraySizes& rhs) { return sizes != rhs.sizes; } - -protected: - TSmallArrayVector sizes; - - TArraySizes(const TArraySizes&); - - // for tracking maximum referenced index, before an explicit size is given - // applies only to the outer-most dimension - int implicitArraySize; -}; - -} // end namespace glslang - -#endif // _ARRAYS_INCLUDED_ diff --git a/third_party/glslang-spirv/Include/intermediate.h b/third_party/glslang-spirv/Include/intermediate.h deleted file mode 100644 index 51ac45c37..000000000 --- a/third_party/glslang-spirv/Include/intermediate.h +++ /dev/null @@ -1,1486 +0,0 @@ -// -// Copyright (C) 2002-2005 3Dlabs Inc. Ltd. -// Copyright (C) 2012-2016 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// - -// -// Definition of the in-memory high-level intermediate representation -// of shaders. This is a tree that parser creates. -// -// Nodes in the tree are defined as a hierarchy of classes derived from -// TIntermNode. Each is a node in a tree. There is no preset branching factor; -// each node can have it's own type of list of children. -// - -#ifndef __INTERMEDIATE_H -#define __INTERMEDIATE_H - -#if _MSC_VER >= 1900 - #pragma warning(disable : 4464) // relative include path contains '..' - #pragma warning(disable : 5026) // 'glslang::TIntermUnary': move constructor was implicitly defined as deleted -#endif - -#include "../Include/Common.h" -#include "../Include/Types.h" -#include "../Include/ConstantUnion.h" - -namespace glslang { - -class TIntermediate; - -// -// Operators used by the high-level (parse tree) representation. -// -enum TOperator { - EOpNull, // if in a node, should only mean a node is still being built - EOpSequence, // denotes a list of statements, or parameters, etc. - EOpLinkerObjects, // for aggregate node of objects the linker may need, if not reference by the rest of the AST - EOpFunctionCall, - EOpFunction, // For function definition - EOpParameters, // an aggregate listing the parameters to a function - - // - // Unary operators - // - - EOpNegative, - EOpLogicalNot, - EOpVectorLogicalNot, - EOpBitwiseNot, - - EOpPostIncrement, - EOpPostDecrement, - EOpPreIncrement, - EOpPreDecrement, - - EOpConvIntToBool, - EOpConvUintToBool, - EOpConvFloatToBool, - EOpConvDoubleToBool, - EOpConvInt64ToBool, - EOpConvUint64ToBool, - EOpConvBoolToFloat, - EOpConvIntToFloat, - EOpConvUintToFloat, - EOpConvDoubleToFloat, - EOpConvInt64ToFloat, - EOpConvUint64ToFloat, - EOpConvUintToInt, - EOpConvFloatToInt, - EOpConvBoolToInt, - EOpConvDoubleToInt, - EOpConvInt64ToInt, - EOpConvUint64ToInt, - EOpConvIntToUint, - EOpConvFloatToUint, - EOpConvBoolToUint, - EOpConvDoubleToUint, - EOpConvInt64ToUint, - EOpConvUint64ToUint, - EOpConvIntToDouble, - EOpConvUintToDouble, - EOpConvFloatToDouble, - EOpConvBoolToDouble, - EOpConvInt64ToDouble, - EOpConvUint64ToDouble, - EOpConvBoolToInt64, - EOpConvIntToInt64, - EOpConvUintToInt64, - EOpConvFloatToInt64, - EOpConvDoubleToInt64, - EOpConvUint64ToInt64, - EOpConvBoolToUint64, - EOpConvIntToUint64, - EOpConvUintToUint64, - EOpConvFloatToUint64, - EOpConvDoubleToUint64, - EOpConvInt64ToUint64, -#ifdef AMD_EXTENSIONS - EOpConvBoolToFloat16, - EOpConvIntToFloat16, - EOpConvUintToFloat16, - EOpConvFloatToFloat16, - EOpConvDoubleToFloat16, - EOpConvInt64ToFloat16, - EOpConvUint64ToFloat16, - EOpConvFloat16ToBool, - EOpConvFloat16ToInt, - EOpConvFloat16ToUint, - EOpConvFloat16ToFloat, - EOpConvFloat16ToDouble, - EOpConvFloat16ToInt64, - EOpConvFloat16ToUint64, - - EOpConvBoolToInt16, - EOpConvIntToInt16, - EOpConvUintToInt16, - EOpConvFloatToInt16, - EOpConvDoubleToInt16, - EOpConvFloat16ToInt16, - EOpConvInt64ToInt16, - EOpConvUint64ToInt16, - EOpConvUint16ToInt16, - EOpConvInt16ToBool, - EOpConvInt16ToInt, - EOpConvInt16ToUint, - EOpConvInt16ToFloat, - EOpConvInt16ToDouble, - EOpConvInt16ToFloat16, - EOpConvInt16ToInt64, - EOpConvInt16ToUint64, - - EOpConvBoolToUint16, - EOpConvIntToUint16, - EOpConvUintToUint16, - EOpConvFloatToUint16, - EOpConvDoubleToUint16, - EOpConvFloat16ToUint16, - EOpConvInt64ToUint16, - EOpConvUint64ToUint16, - EOpConvInt16ToUint16, - EOpConvUint16ToBool, - EOpConvUint16ToInt, - EOpConvUint16ToUint, - EOpConvUint16ToFloat, - EOpConvUint16ToDouble, - EOpConvUint16ToFloat16, - EOpConvUint16ToInt64, - EOpConvUint16ToUint64, -#endif - - // - // binary operations - // - - EOpAdd, - EOpSub, - EOpMul, - EOpDiv, - EOpMod, - EOpRightShift, - EOpLeftShift, - EOpAnd, - EOpInclusiveOr, - EOpExclusiveOr, - EOpEqual, - EOpNotEqual, - EOpVectorEqual, - EOpVectorNotEqual, - EOpLessThan, - EOpGreaterThan, - EOpLessThanEqual, - EOpGreaterThanEqual, - EOpComma, - - EOpVectorTimesScalar, - EOpVectorTimesMatrix, - EOpMatrixTimesVector, - EOpMatrixTimesScalar, - - EOpLogicalOr, - EOpLogicalXor, - EOpLogicalAnd, - - EOpIndexDirect, - EOpIndexIndirect, - EOpIndexDirectStruct, - - EOpVectorSwizzle, - - EOpMethod, - EOpScoping, - - // - // Built-in functions mapped to operators - // - - EOpRadians, - EOpDegrees, - EOpSin, - EOpCos, - EOpTan, - EOpAsin, - EOpAcos, - EOpAtan, - EOpSinh, - EOpCosh, - EOpTanh, - EOpAsinh, - EOpAcosh, - EOpAtanh, - - EOpPow, - EOpExp, - EOpLog, - EOpExp2, - EOpLog2, - EOpSqrt, - EOpInverseSqrt, - - EOpAbs, - EOpSign, - EOpFloor, - EOpTrunc, - EOpRound, - EOpRoundEven, - EOpCeil, - EOpFract, - EOpModf, - EOpMin, - EOpMax, - EOpClamp, - EOpMix, - EOpStep, - EOpSmoothStep, - - EOpIsNan, - EOpIsInf, - - EOpFma, - - EOpFrexp, - EOpLdexp, - - EOpFloatBitsToInt, - EOpFloatBitsToUint, - EOpIntBitsToFloat, - EOpUintBitsToFloat, - EOpDoubleBitsToInt64, - EOpDoubleBitsToUint64, - EOpInt64BitsToDouble, - EOpUint64BitsToDouble, -#ifdef AMD_EXTENSIONS - EOpFloat16BitsToInt16, - EOpFloat16BitsToUint16, - EOpInt16BitsToFloat16, - EOpUint16BitsToFloat16, -#endif - EOpPackSnorm2x16, - EOpUnpackSnorm2x16, - EOpPackUnorm2x16, - EOpUnpackUnorm2x16, - EOpPackSnorm4x8, - EOpUnpackSnorm4x8, - EOpPackUnorm4x8, - EOpUnpackUnorm4x8, - EOpPackHalf2x16, - EOpUnpackHalf2x16, - EOpPackDouble2x32, - EOpUnpackDouble2x32, - EOpPackInt2x32, - EOpUnpackInt2x32, - EOpPackUint2x32, - EOpUnpackUint2x32, -#ifdef AMD_EXTENSIONS - EOpPackFloat2x16, - EOpUnpackFloat2x16, - EOpPackInt2x16, - EOpUnpackInt2x16, - EOpPackUint2x16, - EOpUnpackUint2x16, - EOpPackInt4x16, - EOpUnpackInt4x16, - EOpPackUint4x16, - EOpUnpackUint4x16, -#endif - - EOpLength, - EOpDistance, - EOpDot, - EOpCross, - EOpNormalize, - EOpFaceForward, - EOpReflect, - EOpRefract, - -#ifdef AMD_EXTENSIONS - EOpMin3, - EOpMax3, - EOpMid3, -#endif - - EOpDPdx, // Fragment only - EOpDPdy, // Fragment only - EOpFwidth, // Fragment only - EOpDPdxFine, // Fragment only - EOpDPdyFine, // Fragment only - EOpFwidthFine, // Fragment only - EOpDPdxCoarse, // Fragment only - EOpDPdyCoarse, // Fragment only - EOpFwidthCoarse, // Fragment only - - EOpInterpolateAtCentroid, // Fragment only - EOpInterpolateAtSample, // Fragment only - EOpInterpolateAtOffset, // Fragment only - -#ifdef AMD_EXTENSIONS - EOpInterpolateAtVertex, -#endif - - EOpMatrixTimesMatrix, - EOpOuterProduct, - EOpDeterminant, - EOpMatrixInverse, - EOpTranspose, - - EOpFtransform, - - EOpNoise, - - EOpEmitVertex, // geometry only - EOpEndPrimitive, // geometry only - EOpEmitStreamVertex, // geometry only - EOpEndStreamPrimitive, // geometry only - - EOpBarrier, - EOpMemoryBarrier, - EOpMemoryBarrierAtomicCounter, - EOpMemoryBarrierBuffer, - EOpMemoryBarrierImage, - EOpMemoryBarrierShared, // compute only - EOpGroupMemoryBarrier, // compute only - - EOpBallot, - EOpReadInvocation, - EOpReadFirstInvocation, - - EOpAnyInvocation, - EOpAllInvocations, - EOpAllInvocationsEqual, - -#ifdef AMD_EXTENSIONS - EOpMinInvocations, - EOpMaxInvocations, - EOpAddInvocations, - EOpMinInvocationsNonUniform, - EOpMaxInvocationsNonUniform, - EOpAddInvocationsNonUniform, - EOpMinInvocationsInclusiveScan, - EOpMaxInvocationsInclusiveScan, - EOpAddInvocationsInclusiveScan, - EOpMinInvocationsInclusiveScanNonUniform, - EOpMaxInvocationsInclusiveScanNonUniform, - EOpAddInvocationsInclusiveScanNonUniform, - EOpMinInvocationsExclusiveScan, - EOpMaxInvocationsExclusiveScan, - EOpAddInvocationsExclusiveScan, - EOpMinInvocationsExclusiveScanNonUniform, - EOpMaxInvocationsExclusiveScanNonUniform, - EOpAddInvocationsExclusiveScanNonUniform, - EOpSwizzleInvocations, - EOpSwizzleInvocationsMasked, - EOpWriteInvocation, - EOpMbcnt, - - EOpCubeFaceIndex, - EOpCubeFaceCoord, - EOpTime, -#endif - - EOpAtomicAdd, - EOpAtomicMin, - EOpAtomicMax, - EOpAtomicAnd, - EOpAtomicOr, - EOpAtomicXor, - EOpAtomicExchange, - EOpAtomicCompSwap, - - EOpAtomicCounterIncrement, // results in pre-increment value - EOpAtomicCounterDecrement, // results in post-decrement value - EOpAtomicCounter, - EOpAtomicCounterAdd, - EOpAtomicCounterSubtract, - EOpAtomicCounterMin, - EOpAtomicCounterMax, - EOpAtomicCounterAnd, - EOpAtomicCounterOr, - EOpAtomicCounterXor, - EOpAtomicCounterExchange, - EOpAtomicCounterCompSwap, - - EOpAny, - EOpAll, - - // - // Branch - // - - EOpKill, // Fragment only - EOpReturn, - EOpBreak, - EOpContinue, - EOpCase, - EOpDefault, - - // - // Constructors - // - - EOpConstructGuardStart, - EOpConstructInt, // these first scalar forms also identify what implicit conversion is needed - EOpConstructUint, - EOpConstructInt64, - EOpConstructUint64, -#ifdef AMD_EXTENSIONS - EOpConstructInt16, - EOpConstructUint16, -#endif - EOpConstructBool, - EOpConstructFloat, - EOpConstructDouble, -#ifdef AMD_EXTENSIONS - EOpConstructFloat16, -#endif - EOpConstructVec2, - EOpConstructVec3, - EOpConstructVec4, - EOpConstructDVec2, - EOpConstructDVec3, - EOpConstructDVec4, -#ifdef AMD_EXTENSIONS - EOpConstructF16Vec2, - EOpConstructF16Vec3, - EOpConstructF16Vec4, -#endif - EOpConstructBVec2, - EOpConstructBVec3, - EOpConstructBVec4, - EOpConstructIVec2, - EOpConstructIVec3, - EOpConstructIVec4, - EOpConstructUVec2, - EOpConstructUVec3, - EOpConstructUVec4, - EOpConstructI64Vec2, - EOpConstructI64Vec3, - EOpConstructI64Vec4, - EOpConstructU64Vec2, - EOpConstructU64Vec3, - EOpConstructU64Vec4, -#ifdef AMD_EXTENSIONS - EOpConstructI16Vec2, - EOpConstructI16Vec3, - EOpConstructI16Vec4, - EOpConstructU16Vec2, - EOpConstructU16Vec3, - EOpConstructU16Vec4, -#endif - EOpConstructMat2x2, - EOpConstructMat2x3, - EOpConstructMat2x4, - EOpConstructMat3x2, - EOpConstructMat3x3, - EOpConstructMat3x4, - EOpConstructMat4x2, - EOpConstructMat4x3, - EOpConstructMat4x4, - EOpConstructDMat2x2, - EOpConstructDMat2x3, - EOpConstructDMat2x4, - EOpConstructDMat3x2, - EOpConstructDMat3x3, - EOpConstructDMat3x4, - EOpConstructDMat4x2, - EOpConstructDMat4x3, - EOpConstructDMat4x4, - EOpConstructIMat2x2, - EOpConstructIMat2x3, - EOpConstructIMat2x4, - EOpConstructIMat3x2, - EOpConstructIMat3x3, - EOpConstructIMat3x4, - EOpConstructIMat4x2, - EOpConstructIMat4x3, - EOpConstructIMat4x4, - EOpConstructUMat2x2, - EOpConstructUMat2x3, - EOpConstructUMat2x4, - EOpConstructUMat3x2, - EOpConstructUMat3x3, - EOpConstructUMat3x4, - EOpConstructUMat4x2, - EOpConstructUMat4x3, - EOpConstructUMat4x4, - EOpConstructBMat2x2, - EOpConstructBMat2x3, - EOpConstructBMat2x4, - EOpConstructBMat3x2, - EOpConstructBMat3x3, - EOpConstructBMat3x4, - EOpConstructBMat4x2, - EOpConstructBMat4x3, - EOpConstructBMat4x4, -#ifdef AMD_EXTENSIONS - EOpConstructF16Mat2x2, - EOpConstructF16Mat2x3, - EOpConstructF16Mat2x4, - EOpConstructF16Mat3x2, - EOpConstructF16Mat3x3, - EOpConstructF16Mat3x4, - EOpConstructF16Mat4x2, - EOpConstructF16Mat4x3, - EOpConstructF16Mat4x4, -#endif - EOpConstructStruct, - EOpConstructTextureSampler, - EOpConstructGuardEnd, - - // - // moves - // - - EOpAssign, - EOpAddAssign, - EOpSubAssign, - EOpMulAssign, - EOpVectorTimesMatrixAssign, - EOpVectorTimesScalarAssign, - EOpMatrixTimesScalarAssign, - EOpMatrixTimesMatrixAssign, - EOpDivAssign, - EOpModAssign, - EOpAndAssign, - EOpInclusiveOrAssign, - EOpExclusiveOrAssign, - EOpLeftShiftAssign, - EOpRightShiftAssign, - - // - // Array operators - // - - EOpArrayLength, // "Array" distinguishes from length(v) built-in function, but it applies to vectors and matrices as well. - - // - // Image operations - // - - EOpImageGuardBegin, - - EOpImageQuerySize, - EOpImageQuerySamples, - EOpImageLoad, - EOpImageStore, -#ifdef AMD_EXTENSIONS - EOpImageLoadLod, - EOpImageStoreLod, -#endif - EOpImageAtomicAdd, - EOpImageAtomicMin, - EOpImageAtomicMax, - EOpImageAtomicAnd, - EOpImageAtomicOr, - EOpImageAtomicXor, - EOpImageAtomicExchange, - EOpImageAtomicCompSwap, - - EOpSubpassLoad, - EOpSubpassLoadMS, - EOpSparseImageLoad, -#ifdef AMD_EXTENSIONS - EOpSparseImageLoadLod, -#endif - - EOpImageGuardEnd, - - // - // Texture operations - // - - EOpTextureGuardBegin, - - EOpTextureQuerySize, - EOpTextureQueryLod, - EOpTextureQueryLevels, - EOpTextureQuerySamples, - - EOpSamplingGuardBegin, - - EOpTexture, - EOpTextureProj, - EOpTextureLod, - EOpTextureOffset, - EOpTextureFetch, - EOpTextureFetchOffset, - EOpTextureProjOffset, - EOpTextureLodOffset, - EOpTextureProjLod, - EOpTextureProjLodOffset, - EOpTextureGrad, - EOpTextureGradOffset, - EOpTextureProjGrad, - EOpTextureProjGradOffset, - EOpTextureGather, - EOpTextureGatherOffset, - EOpTextureGatherOffsets, - EOpTextureClamp, - EOpTextureOffsetClamp, - EOpTextureGradClamp, - EOpTextureGradOffsetClamp, -#ifdef AMD_EXTENSIONS - EOpTextureGatherLod, - EOpTextureGatherLodOffset, - EOpTextureGatherLodOffsets, - EOpFragmentMaskFetch, - EOpFragmentFetch, -#endif - - EOpSparseTextureGuardBegin, - - EOpSparseTexture, - EOpSparseTextureLod, - EOpSparseTextureOffset, - EOpSparseTextureFetch, - EOpSparseTextureFetchOffset, - EOpSparseTextureLodOffset, - EOpSparseTextureGrad, - EOpSparseTextureGradOffset, - EOpSparseTextureGather, - EOpSparseTextureGatherOffset, - EOpSparseTextureGatherOffsets, - EOpSparseTexelsResident, - EOpSparseTextureClamp, - EOpSparseTextureOffsetClamp, - EOpSparseTextureGradClamp, - EOpSparseTextureGradOffsetClamp, -#ifdef AMD_EXTENSIONS - EOpSparseTextureGatherLod, - EOpSparseTextureGatherLodOffset, - EOpSparseTextureGatherLodOffsets, -#endif - - EOpSparseTextureGuardEnd, - EOpSamplingGuardEnd, - EOpTextureGuardEnd, - - // - // Integer operations - // - - EOpAddCarry, - EOpSubBorrow, - EOpUMulExtended, - EOpIMulExtended, - EOpBitfieldExtract, - EOpBitfieldInsert, - EOpBitFieldReverse, - EOpBitCount, - EOpFindLSB, - EOpFindMSB, - - // - // HLSL operations - // - - EOpClip, // discard if input value < 0 - EOpIsFinite, - EOpLog10, // base 10 log - EOpRcp, // 1/x - EOpSaturate, // clamp from 0 to 1 - EOpSinCos, // sin and cos in out parameters - EOpGenMul, // mul(x,y) on any of mat/vec/scalars - EOpDst, // x = 1, y=src0.y * src1.y, z=src0.z, w=src1.w - EOpInterlockedAdd, // atomic ops, but uses [optional] out arg instead of return - EOpInterlockedAnd, // ... - EOpInterlockedCompareExchange, // ... - EOpInterlockedCompareStore, // ... - EOpInterlockedExchange, // ... - EOpInterlockedMax, // ... - EOpInterlockedMin, // ... - EOpInterlockedOr, // ... - EOpInterlockedXor, // ... - EOpAllMemoryBarrierWithGroupSync, // memory barriers without non-hlsl AST equivalents - EOpDeviceMemoryBarrier, // ... - EOpDeviceMemoryBarrierWithGroupSync, // ... - EOpWorkgroupMemoryBarrier, // ... - EOpWorkgroupMemoryBarrierWithGroupSync, // ... - EOpEvaluateAttributeSnapped, // InterpolateAtOffset with int position on 16x16 grid - EOpF32tof16, // HLSL conversion: half of a PackHalf2x16 - EOpF16tof32, // HLSL conversion: half of an UnpackHalf2x16 - EOpLit, // HLSL lighting coefficient vector - EOpTextureBias, // HLSL texture bias: will be lowered to EOpTexture - EOpAsDouble, // slightly different from EOpUint64BitsToDouble - EOpD3DCOLORtoUBYTE4, // convert and swizzle 4-component color to UBYTE4 range - - EOpMethodSample, // Texture object methods. These are translated to existing - EOpMethodSampleBias, // AST methods, and exist to represent HLSL semantics until that - EOpMethodSampleCmp, // translation is performed. See HlslParseContext::decomposeSampleMethods(). - EOpMethodSampleCmpLevelZero, // ... - EOpMethodSampleGrad, // ... - EOpMethodSampleLevel, // ... - EOpMethodLoad, // ... - EOpMethodGetDimensions, // ... - EOpMethodGetSamplePosition, // ... - EOpMethodGather, // ... - EOpMethodCalculateLevelOfDetail, // ... - EOpMethodCalculateLevelOfDetailUnclamped, // ... - - // Load already defined above for textures - EOpMethodLoad2, // Structure buffer object methods. These are translated to existing - EOpMethodLoad3, // AST methods, and exist to represent HLSL semantics until that - EOpMethodLoad4, // translation is performed. See HlslParseContext::decomposeSampleMethods(). - EOpMethodStore, // ... - EOpMethodStore2, // ... - EOpMethodStore3, // ... - EOpMethodStore4, // ... - EOpMethodIncrementCounter, // ... - EOpMethodDecrementCounter, // ... - // EOpMethodAppend is defined for geo shaders below - EOpMethodConsume, - - // SM5 texture methods - EOpMethodGatherRed, // These are covered under the above EOpMethodSample comment about - EOpMethodGatherGreen, // translation to existing AST opcodes. They exist temporarily - EOpMethodGatherBlue, // because HLSL arguments are slightly different. - EOpMethodGatherAlpha, // ... - EOpMethodGatherCmp, // ... - EOpMethodGatherCmpRed, // ... - EOpMethodGatherCmpGreen, // ... - EOpMethodGatherCmpBlue, // ... - EOpMethodGatherCmpAlpha, // ... - - // geometry methods - EOpMethodAppend, // Geometry shader methods - EOpMethodRestartStrip, // ... - - // matrix - EOpMatrixSwizzle, // select multiple matrix components (non-column) -}; - -class TIntermTraverser; -class TIntermOperator; -class TIntermAggregate; -class TIntermUnary; -class TIntermBinary; -class TIntermConstantUnion; -class TIntermSelection; -class TIntermSwitch; -class TIntermBranch; -class TIntermTyped; -class TIntermMethod; -class TIntermSymbol; -class TIntermLoop; - -} // end namespace glslang - -// -// Base class for the tree nodes -// -// (Put outside the glslang namespace, as it's used as part of the external interface.) -// -class TIntermNode { -public: - POOL_ALLOCATOR_NEW_DELETE(glslang::GetThreadPoolAllocator()) - - TIntermNode() { loc.init(); } - virtual const glslang::TSourceLoc& getLoc() const { return loc; } - virtual void setLoc(const glslang::TSourceLoc& l) { loc = l; } - virtual void traverse(glslang::TIntermTraverser*) = 0; - virtual glslang::TIntermTyped* getAsTyped() { return 0; } - virtual glslang::TIntermOperator* getAsOperator() { return 0; } - virtual glslang::TIntermConstantUnion* getAsConstantUnion() { return 0; } - virtual glslang::TIntermAggregate* getAsAggregate() { return 0; } - virtual glslang::TIntermUnary* getAsUnaryNode() { return 0; } - virtual glslang::TIntermBinary* getAsBinaryNode() { return 0; } - virtual glslang::TIntermSelection* getAsSelectionNode() { return 0; } - virtual glslang::TIntermSwitch* getAsSwitchNode() { return 0; } - virtual glslang::TIntermMethod* getAsMethodNode() { return 0; } - virtual glslang::TIntermSymbol* getAsSymbolNode() { return 0; } - virtual glslang::TIntermBranch* getAsBranchNode() { return 0; } - virtual glslang::TIntermLoop* getAsLoopNode() { return 0; } - - virtual const glslang::TIntermTyped* getAsTyped() const { return 0; } - virtual const glslang::TIntermOperator* getAsOperator() const { return 0; } - virtual const glslang::TIntermConstantUnion* getAsConstantUnion() const { return 0; } - virtual const glslang::TIntermAggregate* getAsAggregate() const { return 0; } - virtual const glslang::TIntermUnary* getAsUnaryNode() const { return 0; } - virtual const glslang::TIntermBinary* getAsBinaryNode() const { return 0; } - virtual const glslang::TIntermSelection* getAsSelectionNode() const { return 0; } - virtual const glslang::TIntermSwitch* getAsSwitchNode() const { return 0; } - virtual const glslang::TIntermMethod* getAsMethodNode() const { return 0; } - virtual const glslang::TIntermSymbol* getAsSymbolNode() const { return 0; } - virtual const glslang::TIntermBranch* getAsBranchNode() const { return 0; } - virtual const glslang::TIntermLoop* getAsLoopNode() const { return 0; } - virtual ~TIntermNode() { } - -protected: - TIntermNode(const TIntermNode&); - TIntermNode& operator=(const TIntermNode&); - glslang::TSourceLoc loc; -}; - -namespace glslang { - -// -// This is just to help yacc. -// -struct TIntermNodePair { - TIntermNode* node1; - TIntermNode* node2; -}; - -// -// Intermediate class for nodes that have a type. -// -class TIntermTyped : public TIntermNode { -public: - TIntermTyped(const TType& t) { type.shallowCopy(t); } - TIntermTyped(TBasicType basicType) { TType bt(basicType); type.shallowCopy(bt); } - virtual TIntermTyped* getAsTyped() { return this; } - virtual const TIntermTyped* getAsTyped() const { return this; } - virtual void setType(const TType& t) { type.shallowCopy(t); } - virtual const TType& getType() const { return type; } - virtual TType& getWritableType() { return type; } - - virtual TBasicType getBasicType() const { return type.getBasicType(); } - virtual TQualifier& getQualifier() { return type.getQualifier(); } - virtual const TQualifier& getQualifier() const { return type.getQualifier(); } - virtual void propagatePrecision(TPrecisionQualifier); - virtual int getVectorSize() const { return type.getVectorSize(); } - virtual int getMatrixCols() const { return type.getMatrixCols(); } - virtual int getMatrixRows() const { return type.getMatrixRows(); } - virtual bool isMatrix() const { return type.isMatrix(); } - virtual bool isArray() const { return type.isArray(); } - virtual bool isVector() const { return type.isVector(); } - virtual bool isScalar() const { return type.isScalar(); } - virtual bool isStruct() const { return type.isStruct(); } - virtual bool isFloatingDomain() const { return type.isFloatingDomain(); } - virtual bool isIntegerDomain() const { return type.isIntegerDomain(); } - TString getCompleteString() const { return type.getCompleteString(); } - -protected: - TIntermTyped& operator=(const TIntermTyped&); - TType type; -}; - -// -// Handle for, do-while, and while loops. -// -class TIntermLoop : public TIntermNode { -public: - TIntermLoop(TIntermNode* aBody, TIntermTyped* aTest, TIntermTyped* aTerminal, bool testFirst) : - body(aBody), - test(aTest), - terminal(aTerminal), - first(testFirst), - unroll(false), - dontUnroll(false), - dependency(0) - { } - - virtual TIntermLoop* getAsLoopNode() { return this; } - virtual const TIntermLoop* getAsLoopNode() const { return this; } - virtual void traverse(TIntermTraverser*); - TIntermNode* getBody() const { return body; } - TIntermTyped* getTest() const { return test; } - TIntermTyped* getTerminal() const { return terminal; } - bool testFirst() const { return first; } - - void setUnroll() { unroll = true; } - void setDontUnroll() { dontUnroll = true; } - bool getUnroll() const { return unroll; } - bool getDontUnroll() const { return dontUnroll; } - - static const unsigned int dependencyInfinite = 0xFFFFFFFF; - void setLoopDependency(int d) { dependency = d; } - int getLoopDependency() const { return dependency; } - -protected: - TIntermNode* body; // code to loop over - TIntermTyped* test; // exit condition associated with loop, could be 0 for 'for' loops - TIntermTyped* terminal; // exists for for-loops - bool first; // true for while and for, not for do-while - bool unroll; // true if unroll requested - bool dontUnroll; // true if request to not unroll - unsigned int dependency; // loop dependency hint; 0 means not set or unknown -}; - -// -// Handle case, break, continue, return, and kill. -// -class TIntermBranch : public TIntermNode { -public: - TIntermBranch(TOperator op, TIntermTyped* e) : - flowOp(op), - expression(e) { } - virtual TIntermBranch* getAsBranchNode() { return this; } - virtual const TIntermBranch* getAsBranchNode() const { return this; } - virtual void traverse(TIntermTraverser*); - TOperator getFlowOp() const { return flowOp; } - TIntermTyped* getExpression() const { return expression; } -protected: - TOperator flowOp; - TIntermTyped* expression; -}; - -// -// Represent method names before seeing their calling signature -// or resolving them to operations. Just an expression as the base object -// and a textural name. -// -class TIntermMethod : public TIntermTyped { -public: - TIntermMethod(TIntermTyped* o, const TType& t, const TString& m) : TIntermTyped(t), object(o), method(m) { } - virtual TIntermMethod* getAsMethodNode() { return this; } - virtual const TIntermMethod* getAsMethodNode() const { return this; } - virtual const TString& getMethodName() const { return method; } - virtual TIntermTyped* getObject() const { return object; } - virtual void traverse(TIntermTraverser*); -protected: - TIntermTyped* object; - TString method; -}; - -// -// Nodes that correspond to symbols or constants in the source code. -// -class TIntermSymbol : public TIntermTyped { -public: - // if symbol is initialized as symbol(sym), the memory comes from the pool allocator of sym. If sym comes from - // per process threadPoolAllocator, then it causes increased memory usage per compile - // it is essential to use "symbol = sym" to assign to symbol - TIntermSymbol(int i, const TString& n, const TType& t) - : TIntermTyped(t), id(i), -#ifdef ENABLE_HLSL - flattenSubset(-1), -#endif - constSubtree(nullptr) - { name = n; } - virtual int getId() const { return id; } - virtual const TString& getName() const { return name; } - virtual void traverse(TIntermTraverser*); - virtual TIntermSymbol* getAsSymbolNode() { return this; } - virtual const TIntermSymbol* getAsSymbolNode() const { return this; } - void setConstArray(const TConstUnionArray& c) { constArray = c; } - const TConstUnionArray& getConstArray() const { return constArray; } - void setConstSubtree(TIntermTyped* subtree) { constSubtree = subtree; } - TIntermTyped* getConstSubtree() const { return constSubtree; } -#ifdef ENABLE_HLSL - void setFlattenSubset(int subset) { flattenSubset = subset; } - int getFlattenSubset() const { return flattenSubset; } // -1 means full object -#endif - - // This is meant for cases where a node has already been constructed, and - // later on, it becomes necessary to switch to a different symbol. - virtual void switchId(int newId) { id = newId; } - -protected: - int id; // the unique id of the symbol this node represents -#ifdef ENABLE_HLSL - int flattenSubset; // how deeply the flattened object rooted at id has been dereferenced -#endif - TString name; // the name of the symbol this node represents - TConstUnionArray constArray; // if the symbol is a front-end compile-time constant, this is its value - TIntermTyped* constSubtree; -}; - -class TIntermConstantUnion : public TIntermTyped { -public: - TIntermConstantUnion(const TConstUnionArray& ua, const TType& t) : TIntermTyped(t), constArray(ua), literal(false) { } - const TConstUnionArray& getConstArray() const { return constArray; } - virtual TIntermConstantUnion* getAsConstantUnion() { return this; } - virtual const TIntermConstantUnion* getAsConstantUnion() const { return this; } - virtual void traverse(TIntermTraverser*); - virtual TIntermTyped* fold(TOperator, const TIntermTyped*) const; - virtual TIntermTyped* fold(TOperator, const TType&) const; - void setLiteral() { literal = true; } - void setExpression() { literal = false; } - bool isLiteral() const { return literal; } - -protected: - TIntermConstantUnion& operator=(const TIntermConstantUnion&); - - const TConstUnionArray constArray; - bool literal; // true if node represents a literal in the source code -}; - -// Represent the independent aspects of a texturing TOperator -struct TCrackedTextureOp { - bool query; - bool proj; - bool lod; - bool fetch; - bool offset; - bool offsets; - bool gather; - bool grad; - bool subpass; - bool lodClamp; -#ifdef AMD_EXTENSIONS - bool fragMask; -#endif -}; - -// -// Intermediate class for node types that hold operators. -// -class TIntermOperator : public TIntermTyped { -public: - virtual TIntermOperator* getAsOperator() { return this; } - virtual const TIntermOperator* getAsOperator() const { return this; } - TOperator getOp() const { return op; } - void setOp(TOperator newOp) { op = newOp; } - bool modifiesState() const; - bool isConstructor() const; - bool isTexture() const { return op > EOpTextureGuardBegin && op < EOpTextureGuardEnd; } - bool isSampling() const { return op > EOpSamplingGuardBegin && op < EOpSamplingGuardEnd; } - bool isImage() const { return op > EOpImageGuardBegin && op < EOpImageGuardEnd; } - bool isSparseTexture() const { return op > EOpSparseTextureGuardBegin && op < EOpSparseTextureGuardEnd; } - bool isSparseImage() const { return op == EOpSparseImageLoad; } - - void setOperationPrecision(TPrecisionQualifier p) { operationPrecision = p; } - TPrecisionQualifier getOperationPrecision() const { return operationPrecision != EpqNone ? - operationPrecision : - type.getQualifier().precision; } - TString getCompleteString() const - { - TString cs = type.getCompleteString(); - if (getOperationPrecision() != type.getQualifier().precision) { - cs += ", operation at "; - cs += GetPrecisionQualifierString(getOperationPrecision()); - } - - return cs; - } - - // Crack the op into the individual dimensions of texturing operation. - void crackTexture(TSampler sampler, TCrackedTextureOp& cracked) const - { - cracked.query = false; - cracked.proj = false; - cracked.lod = false; - cracked.fetch = false; - cracked.offset = false; - cracked.offsets = false; - cracked.gather = false; - cracked.grad = false; - cracked.subpass = false; - cracked.lodClamp = false; -#ifdef AMD_EXTENSIONS - cracked.fragMask = false; -#endif - - switch (op) { - case EOpImageQuerySize: - case EOpImageQuerySamples: - case EOpTextureQuerySize: - case EOpTextureQueryLod: - case EOpTextureQueryLevels: - case EOpTextureQuerySamples: - case EOpSparseTexelsResident: - cracked.query = true; - break; - case EOpTexture: - case EOpSparseTexture: - break; - case EOpTextureClamp: - case EOpSparseTextureClamp: - cracked.lodClamp = true; - break; - case EOpTextureProj: - cracked.proj = true; - break; - case EOpTextureLod: - case EOpSparseTextureLod: - cracked.lod = true; - break; - case EOpTextureOffset: - case EOpSparseTextureOffset: - cracked.offset = true; - break; - case EOpTextureOffsetClamp: - case EOpSparseTextureOffsetClamp: - cracked.offset = true; - cracked.lodClamp = true; - break; - case EOpTextureFetch: - case EOpSparseTextureFetch: - cracked.fetch = true; - if (sampler.dim == Esd1D || (sampler.dim == Esd2D && ! sampler.ms) || sampler.dim == Esd3D) - cracked.lod = true; - break; - case EOpTextureFetchOffset: - case EOpSparseTextureFetchOffset: - cracked.fetch = true; - cracked.offset = true; - if (sampler.dim == Esd1D || (sampler.dim == Esd2D && ! sampler.ms) || sampler.dim == Esd3D) - cracked.lod = true; - break; - case EOpTextureProjOffset: - cracked.offset = true; - cracked.proj = true; - break; - case EOpTextureLodOffset: - case EOpSparseTextureLodOffset: - cracked.offset = true; - cracked.lod = true; - break; - case EOpTextureProjLod: - cracked.lod = true; - cracked.proj = true; - break; - case EOpTextureProjLodOffset: - cracked.offset = true; - cracked.lod = true; - cracked.proj = true; - break; - case EOpTextureGrad: - case EOpSparseTextureGrad: - cracked.grad = true; - break; - case EOpTextureGradClamp: - case EOpSparseTextureGradClamp: - cracked.grad = true; - cracked.lodClamp = true; - break; - case EOpTextureGradOffset: - case EOpSparseTextureGradOffset: - cracked.grad = true; - cracked.offset = true; - break; - case EOpTextureProjGrad: - cracked.grad = true; - cracked.proj = true; - break; - case EOpTextureProjGradOffset: - cracked.grad = true; - cracked.offset = true; - cracked.proj = true; - break; - case EOpTextureGradOffsetClamp: - case EOpSparseTextureGradOffsetClamp: - cracked.grad = true; - cracked.offset = true; - cracked.lodClamp = true; - break; - case EOpTextureGather: - case EOpSparseTextureGather: - cracked.gather = true; - break; - case EOpTextureGatherOffset: - case EOpSparseTextureGatherOffset: - cracked.gather = true; - cracked.offset = true; - break; - case EOpTextureGatherOffsets: - case EOpSparseTextureGatherOffsets: - cracked.gather = true; - cracked.offsets = true; - break; -#ifdef AMD_EXTENSIONS - case EOpTextureGatherLod: - case EOpSparseTextureGatherLod: - cracked.gather = true; - cracked.lod = true; - break; - case EOpTextureGatherLodOffset: - case EOpSparseTextureGatherLodOffset: - cracked.gather = true; - cracked.offset = true; - cracked.lod = true; - break; - case EOpTextureGatherLodOffsets: - case EOpSparseTextureGatherLodOffsets: - cracked.gather = true; - cracked.offsets = true; - cracked.lod = true; - break; - case EOpImageLoadLod: - case EOpImageStoreLod: - case EOpSparseImageLoadLod: - cracked.lod = true; - break; - case EOpFragmentMaskFetch: - cracked.subpass = sampler.dim == EsdSubpass; - cracked.fragMask = true; - break; - case EOpFragmentFetch: - cracked.subpass = sampler.dim == EsdSubpass; - cracked.fragMask = true; - break; -#endif - case EOpSubpassLoad: - case EOpSubpassLoadMS: - cracked.subpass = true; - break; - default: - break; - } - } - -protected: - TIntermOperator(TOperator o) : TIntermTyped(EbtFloat), op(o), operationPrecision(EpqNone) {} - TIntermOperator(TOperator o, TType& t) : TIntermTyped(t), op(o), operationPrecision(EpqNone) {} - TOperator op; - // The result precision is in the inherited TType, and is usually meant to be both - // the operation precision and the result precision. However, some more complex things, - // like built-in function calls, distinguish between the two, in which case non-EqpNone - // 'operationPrecision' overrides the result precision as far as operation precision - // is concerned. - TPrecisionQualifier operationPrecision; -}; - -// -// Nodes for all the basic binary math operators. -// -class TIntermBinary : public TIntermOperator { -public: - TIntermBinary(TOperator o) : TIntermOperator(o) {} - virtual void traverse(TIntermTraverser*); - virtual void setLeft(TIntermTyped* n) { left = n; } - virtual void setRight(TIntermTyped* n) { right = n; } - virtual TIntermTyped* getLeft() const { return left; } - virtual TIntermTyped* getRight() const { return right; } - virtual TIntermBinary* getAsBinaryNode() { return this; } - virtual const TIntermBinary* getAsBinaryNode() const { return this; } - virtual void updatePrecision(); -protected: - TIntermTyped* left; - TIntermTyped* right; -}; - -// -// Nodes for unary math operators. -// -class TIntermUnary : public TIntermOperator { -public: - TIntermUnary(TOperator o, TType& t) : TIntermOperator(o, t), operand(0) {} - TIntermUnary(TOperator o) : TIntermOperator(o), operand(0) {} - virtual void traverse(TIntermTraverser*); - virtual void setOperand(TIntermTyped* o) { operand = o; } - virtual TIntermTyped* getOperand() { return operand; } - virtual const TIntermTyped* getOperand() const { return operand; } - virtual TIntermUnary* getAsUnaryNode() { return this; } - virtual const TIntermUnary* getAsUnaryNode() const { return this; } - virtual void updatePrecision(); -protected: - TIntermTyped* operand; -}; - -typedef TVector TIntermSequence; -typedef TVector TQualifierList; -// -// Nodes that operate on an arbitrary sized set of children. -// -class TIntermAggregate : public TIntermOperator { -public: - TIntermAggregate() : TIntermOperator(EOpNull), userDefined(false), pragmaTable(nullptr) { } - TIntermAggregate(TOperator o) : TIntermOperator(o), pragmaTable(nullptr) { } - ~TIntermAggregate() { delete pragmaTable; } - virtual TIntermAggregate* getAsAggregate() { return this; } - virtual const TIntermAggregate* getAsAggregate() const { return this; } - virtual void setOperator(TOperator o) { op = o; } - virtual TIntermSequence& getSequence() { return sequence; } - virtual const TIntermSequence& getSequence() const { return sequence; } - virtual void setName(const TString& n) { name = n; } - virtual const TString& getName() const { return name; } - virtual void traverse(TIntermTraverser*); - virtual void setUserDefined() { userDefined = true; } - virtual bool isUserDefined() { return userDefined; } - virtual TQualifierList& getQualifierList() { return qualifier; } - virtual const TQualifierList& getQualifierList() const { return qualifier; } - void setOptimize(bool o) { optimize = o; } - void setDebug(bool d) { debug = d; } - bool getOptimize() const { return optimize; } - bool getDebug() const { return debug; } - void setPragmaTable(const TPragmaTable& pTable); - const TPragmaTable& getPragmaTable() const { return *pragmaTable; } -protected: - TIntermAggregate(const TIntermAggregate&); // disallow copy constructor - TIntermAggregate& operator=(const TIntermAggregate&); // disallow assignment operator - TIntermSequence sequence; - TQualifierList qualifier; - TString name; - bool userDefined; // used for user defined function names - bool optimize; - bool debug; - TPragmaTable* pragmaTable; -}; - -// -// For if tests. -// -class TIntermSelection : public TIntermTyped { -public: - TIntermSelection(TIntermTyped* cond, TIntermNode* trueB, TIntermNode* falseB) : - TIntermTyped(EbtVoid), condition(cond), trueBlock(trueB), falseBlock(falseB), - flatten(false), dontFlatten(false) {} - TIntermSelection(TIntermTyped* cond, TIntermNode* trueB, TIntermNode* falseB, const TType& type) : - TIntermTyped(type), condition(cond), trueBlock(trueB), falseBlock(falseB), - flatten(false), dontFlatten(false) {} - virtual void traverse(TIntermTraverser*); - virtual TIntermTyped* getCondition() const { return condition; } - virtual TIntermNode* getTrueBlock() const { return trueBlock; } - virtual TIntermNode* getFalseBlock() const { return falseBlock; } - virtual TIntermSelection* getAsSelectionNode() { return this; } - virtual const TIntermSelection* getAsSelectionNode() const { return this; } - - void setFlatten() { flatten = true; } - void setDontFlatten() { dontFlatten = true; } - bool getFlatten() const { return flatten; } - bool getDontFlatten() const { return dontFlatten; } - -protected: - TIntermTyped* condition; - TIntermNode* trueBlock; - TIntermNode* falseBlock; - bool flatten; // true if flatten requested - bool dontFlatten; // true if requested to not flatten -}; - -// -// For switch statements. Designed use is that a switch will have sequence of nodes -// that are either case/default nodes or a *single* node that represents all the code -// in between (if any) consecutive case/defaults. So, a traversal need only deal with -// 0 or 1 nodes per case/default statement. -// -class TIntermSwitch : public TIntermNode { -public: - TIntermSwitch(TIntermTyped* cond, TIntermAggregate* b) : condition(cond), body(b), - flatten(false), dontFlatten(false) {} - virtual void traverse(TIntermTraverser*); - virtual TIntermNode* getCondition() const { return condition; } - virtual TIntermAggregate* getBody() const { return body; } - virtual TIntermSwitch* getAsSwitchNode() { return this; } - virtual const TIntermSwitch* getAsSwitchNode() const { return this; } - - void setFlatten() { flatten = true; } - void setDontFlatten() { dontFlatten = true; } - bool getFlatten() const { return flatten; } - bool getDontFlatten() const { return dontFlatten; } - -protected: - TIntermTyped* condition; - TIntermAggregate* body; - bool flatten; // true if flatten requested - bool dontFlatten; // true if requested to not flatten -}; - -enum TVisit -{ - EvPreVisit, - EvInVisit, - EvPostVisit -}; - -// -// For traversing the tree. User should derive from this, -// put their traversal specific data in it, and then pass -// it to a Traverse method. -// -// When using this, just fill in the methods for nodes you want visited. -// Return false from a pre-visit to skip visiting that node's subtree. -// -// Explicitly set postVisit to true if you want post visiting, otherwise, -// filled in methods will only be called at pre-visit time (before processing -// the subtree). Similarly for inVisit for in-order visiting of nodes with -// multiple children. -// -// If you only want post-visits, explicitly turn off preVisit (and inVisit) -// and turn on postVisit. -// -// In general, for the visit*() methods, return true from interior nodes -// to have the traversal continue on to children. -// -// If you process children yourself, or don't want them processed, return false. -// -class TIntermTraverser { -public: - POOL_ALLOCATOR_NEW_DELETE(glslang::GetThreadPoolAllocator()) - TIntermTraverser(bool preVisit = true, bool inVisit = false, bool postVisit = false, bool rightToLeft = false) : - preVisit(preVisit), - inVisit(inVisit), - postVisit(postVisit), - rightToLeft(rightToLeft), - depth(0), - maxDepth(0) { } - virtual ~TIntermTraverser() { } - - virtual void visitSymbol(TIntermSymbol*) { } - virtual void visitConstantUnion(TIntermConstantUnion*) { } - virtual bool visitBinary(TVisit, TIntermBinary*) { return true; } - virtual bool visitUnary(TVisit, TIntermUnary*) { return true; } - virtual bool visitSelection(TVisit, TIntermSelection*) { return true; } - virtual bool visitAggregate(TVisit, TIntermAggregate*) { return true; } - virtual bool visitLoop(TVisit, TIntermLoop*) { return true; } - virtual bool visitBranch(TVisit, TIntermBranch*) { return true; } - virtual bool visitSwitch(TVisit, TIntermSwitch*) { return true; } - - int getMaxDepth() const { return maxDepth; } - - void incrementDepth(TIntermNode *current) - { - depth++; - maxDepth = (std::max)(maxDepth, depth); - path.push_back(current); - } - - void decrementDepth() - { - depth--; - path.pop_back(); - } - - TIntermNode *getParentNode() - { - return path.size() == 0 ? NULL : path.back(); - } - - const bool preVisit; - const bool inVisit; - const bool postVisit; - const bool rightToLeft; - -protected: - TIntermTraverser& operator=(TIntermTraverser&); - - int depth; - int maxDepth; - - // All the nodes from root to the current node's parent during traversing. - TVector path; -}; - -// KHR_vulkan_glsl says "Two arrays sized with specialization constants are the same type only if -// sized with the same symbol, involving no operations" -inline bool SameSpecializationConstants(TIntermTyped* node1, TIntermTyped* node2) -{ - return node1->getAsSymbolNode() && node2->getAsSymbolNode() && - node1->getAsSymbolNode()->getId() == node2->getAsSymbolNode()->getId(); -} - -} // end namespace glslang - -#endif // __INTERMEDIATE_H diff --git a/third_party/glslang-spirv/Include/revision.h b/third_party/glslang-spirv/Include/revision.h deleted file mode 100644 index 218f8b67f..000000000 --- a/third_party/glslang-spirv/Include/revision.h +++ /dev/null @@ -1,6 +0,0 @@ -// This header is generated by the make-revision script. -// For the version, it uses the latest git tag followed by the number of commits. -// For the date, it uses the current date (when then script is run). - -#define GLSLANG_REVISION "Overload400-PrecQual.2000" -#define GLSLANG_DATE "12-Apr-2017" diff --git a/third_party/glslang-spirv/Include/revision.template b/third_party/glslang-spirv/Include/revision.template deleted file mode 100644 index 4a16beeb0..000000000 --- a/third_party/glslang-spirv/Include/revision.template +++ /dev/null @@ -1,13 +0,0 @@ -// The file revision.h should be updated to the latest version, somehow, on -// check-in, if glslang has changed. -// -// revision.template is the source for revision.h when using SubWCRev as the -// method of updating revision.h. You don't have to do it this way, the -// requirement is only that revision.h gets updated. -// -// revision.h is under source control so that not all consumers of glslang -// source have to figure out how to create revision.h just to get a build -// going. However, if it is not updated, it can be a version behind. - -#define GLSLANG_REVISION "$WCREV$" -#define GLSLANG_DATE "$WCDATE$" diff --git a/third_party/glslang-spirv/Logger.cpp b/third_party/glslang-spirv/Logger.cpp deleted file mode 100644 index 48bd4e3ad..000000000 --- a/third_party/glslang-spirv/Logger.cpp +++ /dev/null @@ -1,68 +0,0 @@ -// -// Copyright (C) 2016 Google, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -#include "Logger.h" - -#include -#include -#include - -namespace spv { - -void SpvBuildLogger::tbdFunctionality(const std::string& f) -{ - if (std::find(std::begin(tbdFeatures), std::end(tbdFeatures), f) == std::end(tbdFeatures)) - tbdFeatures.push_back(f); -} - -void SpvBuildLogger::missingFunctionality(const std::string& f) -{ - if (std::find(std::begin(missingFeatures), std::end(missingFeatures), f) == std::end(missingFeatures)) - missingFeatures.push_back(f); -} - -std::string SpvBuildLogger::getAllMessages() const { - std::ostringstream messages; - for (auto it = tbdFeatures.cbegin(); it != tbdFeatures.cend(); ++it) - messages << "TBD functionality: " << *it << "\n"; - for (auto it = missingFeatures.cbegin(); it != missingFeatures.cend(); ++it) - messages << "Missing functionality: " << *it << "\n"; - for (auto it = warnings.cbegin(); it != warnings.cend(); ++it) - messages << "warning: " << *it << "\n"; - for (auto it = errors.cbegin(); it != errors.cend(); ++it) - messages << "error: " << *it << "\n"; - return messages.str(); -} - -} // end spv namespace diff --git a/third_party/glslang-spirv/Logger.h b/third_party/glslang-spirv/Logger.h deleted file mode 100644 index 2e4ddaf51..000000000 --- a/third_party/glslang-spirv/Logger.h +++ /dev/null @@ -1,74 +0,0 @@ -// -// Copyright (C) 2016 Google, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -#ifndef GLSLANG_SPIRV_LOGGER_H -#define GLSLANG_SPIRV_LOGGER_H - -#include -#include - -namespace spv { - -// A class for holding all SPIR-V build status messages, including -// missing/TBD functionalities, warnings, and errors. -class SpvBuildLogger { -public: - SpvBuildLogger() {} - - // Registers a TBD functionality. - void tbdFunctionality(const std::string& f); - // Registers a missing functionality. - void missingFunctionality(const std::string& f); - - // Logs a warning. - void warning(const std::string& w) { warnings.push_back(w); } - // Logs an error. - void error(const std::string& e) { errors.push_back(e); } - - // Returns all messages accumulated in the order of: - // TBD functionalities, missing functionalities, warnings, errors. - std::string getAllMessages() const; - -private: - SpvBuildLogger(const SpvBuildLogger&); - - std::vector tbdFeatures; - std::vector missingFeatures; - std::vector warnings; - std::vector errors; -}; - -} // end spv namespace - -#endif // GLSLANG_SPIRV_LOGGER_H diff --git a/third_party/glslang-spirv/SPVRemapper.cpp b/third_party/glslang-spirv/SPVRemapper.cpp deleted file mode 100644 index f14d85729..000000000 --- a/third_party/glslang-spirv/SPVRemapper.cpp +++ /dev/null @@ -1,1479 +0,0 @@ -// -// Copyright (C) 2015 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// - -#include "SPVRemapper.h" -#include "doc.h" - -#if !defined (use_cpp11) -// ... not supported before C++11 -#else // defined (use_cpp11) - -#include -#include -#include "Include/Common.h" - -namespace spv { - - // By default, just abort on error. Can be overridden via RegisterErrorHandler - spirvbin_t::errorfn_t spirvbin_t::errorHandler = [](const std::string&) { exit(5); }; - // By default, eat log messages. Can be overridden via RegisterLogHandler - spirvbin_t::logfn_t spirvbin_t::logHandler = [](const std::string&) { }; - - // This can be overridden to provide other message behavior if needed - void spirvbin_t::msg(int minVerbosity, int indent, const std::string& txt) const - { - if (verbose >= minVerbosity) - logHandler(std::string(indent, ' ') + txt); - } - - // hash opcode, with special handling for OpExtInst - std::uint32_t spirvbin_t::asOpCodeHash(unsigned word) - { - const spv::Op opCode = asOpCode(word); - - std::uint32_t offset = 0; - - switch (opCode) { - case spv::OpExtInst: - offset += asId(word + 4); break; - default: - break; - } - - return opCode * 19 + offset; // 19 = small prime - } - - spirvbin_t::range_t spirvbin_t::literalRange(spv::Op opCode) const - { - static const int maxCount = 1<<30; - - switch (opCode) { - case spv::OpTypeFloat: // fall through... - case spv::OpTypePointer: return range_t(2, 3); - case spv::OpTypeInt: return range_t(2, 4); - // TODO: case spv::OpTypeImage: - // TODO: case spv::OpTypeSampledImage: - case spv::OpTypeSampler: return range_t(3, 8); - case spv::OpTypeVector: // fall through - case spv::OpTypeMatrix: // ... - case spv::OpTypePipe: return range_t(3, 4); - case spv::OpConstant: return range_t(3, maxCount); - default: return range_t(0, 0); - } - } - - spirvbin_t::range_t spirvbin_t::typeRange(spv::Op opCode) const - { - static const int maxCount = 1<<30; - - if (isConstOp(opCode)) - return range_t(1, 2); - - switch (opCode) { - case spv::OpTypeVector: // fall through - case spv::OpTypeMatrix: // ... - case spv::OpTypeSampler: // ... - case spv::OpTypeArray: // ... - case spv::OpTypeRuntimeArray: // ... - case spv::OpTypePipe: return range_t(2, 3); - case spv::OpTypeStruct: // fall through - case spv::OpTypeFunction: return range_t(2, maxCount); - case spv::OpTypePointer: return range_t(3, 4); - default: return range_t(0, 0); - } - } - - spirvbin_t::range_t spirvbin_t::constRange(spv::Op opCode) const - { - static const int maxCount = 1<<30; - - switch (opCode) { - case spv::OpTypeArray: // fall through... - case spv::OpTypeRuntimeArray: return range_t(3, 4); - case spv::OpConstantComposite: return range_t(3, maxCount); - default: return range_t(0, 0); - } - } - - // Return the size of a type in 32-bit words. This currently only - // handles ints and floats, and is only invoked by queries which must be - // integer types. If ever needed, it can be generalized. - unsigned spirvbin_t::typeSizeInWords(spv::Id id) const - { - const unsigned typeStart = idPos(id); - const spv::Op opCode = asOpCode(typeStart); - - if (errorLatch) - return 0; - - switch (opCode) { - case spv::OpTypeInt: // fall through... - case spv::OpTypeFloat: return (spv[typeStart+2]+31)/32; - default: - return 0; - } - } - - // Looks up the type of a given const or variable ID, and - // returns its size in 32-bit words. - unsigned spirvbin_t::idTypeSizeInWords(spv::Id id) const - { - const auto tid_it = idTypeSizeMap.find(id); - if (tid_it == idTypeSizeMap.end()) { - error("type size for ID not found"); - return 0; - } - - return tid_it->second; - } - - // Is this an opcode we should remove when using --strip? - bool spirvbin_t::isStripOp(spv::Op opCode) const - { - switch (opCode) { - case spv::OpSource: - case spv::OpSourceExtension: - case spv::OpName: - case spv::OpMemberName: - case spv::OpLine: return true; - default: return false; - } - } - - // Return true if this opcode is flow control - bool spirvbin_t::isFlowCtrl(spv::Op opCode) const - { - switch (opCode) { - case spv::OpBranchConditional: - case spv::OpBranch: - case spv::OpSwitch: - case spv::OpLoopMerge: - case spv::OpSelectionMerge: - case spv::OpLabel: - case spv::OpFunction: - case spv::OpFunctionEnd: return true; - default: return false; - } - } - - // Return true if this opcode defines a type - bool spirvbin_t::isTypeOp(spv::Op opCode) const - { - switch (opCode) { - case spv::OpTypeVoid: - case spv::OpTypeBool: - case spv::OpTypeInt: - case spv::OpTypeFloat: - case spv::OpTypeVector: - case spv::OpTypeMatrix: - case spv::OpTypeImage: - case spv::OpTypeSampler: - case spv::OpTypeArray: - case spv::OpTypeRuntimeArray: - case spv::OpTypeStruct: - case spv::OpTypeOpaque: - case spv::OpTypePointer: - case spv::OpTypeFunction: - case spv::OpTypeEvent: - case spv::OpTypeDeviceEvent: - case spv::OpTypeReserveId: - case spv::OpTypeQueue: - case spv::OpTypeSampledImage: - case spv::OpTypePipe: return true; - default: return false; - } - } - - // Return true if this opcode defines a constant - bool spirvbin_t::isConstOp(spv::Op opCode) const - { - switch (opCode) { - case spv::OpConstantNull: - case spv::OpConstantSampler: - error("unimplemented constant type"); - return true; - - case spv::OpConstantTrue: - case spv::OpConstantFalse: - case spv::OpConstantComposite: - case spv::OpConstant: - return true; - - default: - return false; - } - } - - const auto inst_fn_nop = [](spv::Op, unsigned) { return false; }; - const auto op_fn_nop = [](spv::Id&) { }; - - // g++ doesn't like these defined in the class proper in an anonymous namespace. - // Dunno why. Also MSVC doesn't like the constexpr keyword. Also dunno why. - // Defining them externally seems to please both compilers, so, here they are. - const spv::Id spirvbin_t::unmapped = spv::Id(-10000); - const spv::Id spirvbin_t::unused = spv::Id(-10001); - const int spirvbin_t::header_size = 5; - - spv::Id spirvbin_t::nextUnusedId(spv::Id id) - { - while (isNewIdMapped(id)) // search for an unused ID - ++id; - - return id; - } - - spv::Id spirvbin_t::localId(spv::Id id, spv::Id newId) - { - assert(id != spv::NoResult && newId != spv::NoResult); - - if (id > bound()) { - error(std::string("ID out of range: ") + std::to_string(id)); - return spirvbin_t::unused; - } - - if (id >= idMapL.size()) - idMapL.resize(id+1, unused); - - if (newId != unmapped && newId != unused) { - if (isOldIdUnused(id)) { - error(std::string("ID unused in module: ") + std::to_string(id)); - return spirvbin_t::unused; - } - - if (!isOldIdUnmapped(id)) { - error(std::string("ID already mapped: ") + std::to_string(id) + " -> " - + std::to_string(localId(id))); - - return spirvbin_t::unused; - } - - if (isNewIdMapped(newId)) { - error(std::string("ID already used in module: ") + std::to_string(newId)); - return spirvbin_t::unused; - } - - msg(4, 4, std::string("map: ") + std::to_string(id) + " -> " + std::to_string(newId)); - setMapped(newId); - largestNewId = std::max(largestNewId, newId); - } - - return idMapL[id] = newId; - } - - // Parse a literal string from the SPIR binary and return it as an std::string - // Due to C++11 RValue references, this doesn't copy the result string. - std::string spirvbin_t::literalString(unsigned word) const - { - std::string literal; - - literal.reserve(16); - - const char* bytes = reinterpret_cast(spv.data() + word); - - while (bytes && *bytes) - literal += *bytes++; - - return literal; - } - - void spirvbin_t::applyMap() - { - msg(3, 2, std::string("Applying map: ")); - - // Map local IDs through the ID map - process(inst_fn_nop, // ignore instructions - [this](spv::Id& id) { - id = localId(id); - - if (errorLatch) - return; - - assert(id != unused && id != unmapped); - } - ); - } - - // Find free IDs for anything we haven't mapped - void spirvbin_t::mapRemainder() - { - msg(3, 2, std::string("Remapping remainder: ")); - - spv::Id unusedId = 1; // can't use 0: that's NoResult - spirword_t maxBound = 0; - - for (spv::Id id = 0; id < idMapL.size(); ++id) { - if (isOldIdUnused(id)) - continue; - - // Find a new mapping for any used but unmapped IDs - if (isOldIdUnmapped(id)) { - localId(id, unusedId = nextUnusedId(unusedId)); - if (errorLatch) - return; - } - - if (isOldIdUnmapped(id)) { - error(std::string("old ID not mapped: ") + std::to_string(id)); - return; - } - - // Track max bound - maxBound = std::max(maxBound, localId(id) + 1); - - if (errorLatch) - return; - } - - bound(maxBound); // reset header ID bound to as big as it now needs to be - } - - // Mark debug instructions for stripping - void spirvbin_t::stripDebug() - { - // Strip instructions in the stripOp set: debug info. - process( - [&](spv::Op opCode, unsigned start) { - // remember opcodes we want to strip later - if (isStripOp(opCode)) - stripInst(start); - return true; - }, - op_fn_nop); - } - - // Mark instructions that refer to now-removed IDs for stripping - void spirvbin_t::stripDeadRefs() - { - process( - [&](spv::Op opCode, unsigned start) { - // strip opcodes pointing to removed data - switch (opCode) { - case spv::OpName: - case spv::OpMemberName: - case spv::OpDecorate: - case spv::OpMemberDecorate: - if (idPosR.find(asId(start+1)) == idPosR.end()) - stripInst(start); - break; - default: - break; // leave it alone - } - - return true; - }, - op_fn_nop); - - strip(); - } - - // Update local maps of ID, type, etc positions - void spirvbin_t::buildLocalMaps() - { - msg(2, 2, std::string("build local maps: ")); - - mapped.clear(); - idMapL.clear(); -// preserve nameMap, so we don't clear that. - fnPos.clear(); - fnCalls.clear(); - typeConstPos.clear(); - idPosR.clear(); - entryPoint = spv::NoResult; - largestNewId = 0; - - idMapL.resize(bound(), unused); - - int fnStart = 0; - spv::Id fnRes = spv::NoResult; - - // build local Id and name maps - process( - [&](spv::Op opCode, unsigned start) { - unsigned word = start+1; - spv::Id typeId = spv::NoResult; - - if (spv::InstructionDesc[opCode].hasType()) - typeId = asId(word++); - - // If there's a result ID, remember the size of its type - if (spv::InstructionDesc[opCode].hasResult()) { - const spv::Id resultId = asId(word++); - idPosR[resultId] = start; - - if (typeId != spv::NoResult) { - const unsigned idTypeSize = typeSizeInWords(typeId); - - if (errorLatch) - return false; - - if (idTypeSize != 0) - idTypeSizeMap[resultId] = idTypeSize; - } - } - - if (opCode == spv::Op::OpName) { - const spv::Id target = asId(start+1); - const std::string name = literalString(start+2); - nameMap[name] = target; - - } else if (opCode == spv::Op::OpFunctionCall) { - ++fnCalls[asId(start + 3)]; - } else if (opCode == spv::Op::OpEntryPoint) { - entryPoint = asId(start + 2); - } else if (opCode == spv::Op::OpFunction) { - if (fnStart != 0) { - error("nested function found"); - return false; - } - - fnStart = start; - fnRes = asId(start + 2); - } else if (opCode == spv::Op::OpFunctionEnd) { - assert(fnRes != spv::NoResult); - if (fnStart == 0) { - error("function end without function start"); - return false; - } - - fnPos[fnRes] = range_t(fnStart, start + asWordCount(start)); - fnStart = 0; - } else if (isConstOp(opCode)) { - if (errorLatch) - return false; - - assert(asId(start + 2) != spv::NoResult); - typeConstPos.insert(start); - } else if (isTypeOp(opCode)) { - assert(asId(start + 1) != spv::NoResult); - typeConstPos.insert(start); - } - - return false; - }, - - [this](spv::Id& id) { localId(id, unmapped); } - ); - } - - // Validate the SPIR header - void spirvbin_t::validate() const - { - msg(2, 2, std::string("validating: ")); - - if (spv.size() < header_size) { - error("file too short: "); - return; - } - - if (magic() != spv::MagicNumber) { - error("bad magic number"); - return; - } - - // field 1 = version - // field 2 = generator magic - // field 3 = result bound - - if (schemaNum() != 0) { - error("bad schema, must be 0"); - return; - } - } - - int spirvbin_t::processInstruction(unsigned word, instfn_t instFn, idfn_t idFn) - { - const auto instructionStart = word; - const unsigned wordCount = asWordCount(instructionStart); - const int nextInst = word++ + wordCount; - spv::Op opCode = asOpCode(instructionStart); - - if (nextInst > int(spv.size())) { - error("spir instruction terminated too early"); - return -1; - } - - // Base for computing number of operands; will be updated as more is learned - unsigned numOperands = wordCount - 1; - - if (instFn(opCode, instructionStart)) - return nextInst; - - // Read type and result ID from instruction desc table - if (spv::InstructionDesc[opCode].hasType()) { - idFn(asId(word++)); - --numOperands; - } - - if (spv::InstructionDesc[opCode].hasResult()) { - idFn(asId(word++)); - --numOperands; - } - - // Extended instructions: currently, assume everything is an ID. - // TODO: add whatever data we need for exceptions to that - if (opCode == spv::OpExtInst) { - word += 2; // instruction set, and instruction from set - numOperands -= 2; - - for (unsigned op=0; op < numOperands; ++op) - idFn(asId(word++)); // ID - - return nextInst; - } - - // Circular buffer so we can look back at previous unmapped values during the mapping pass. - static const unsigned idBufferSize = 4; - spv::Id idBuffer[idBufferSize]; - unsigned idBufferPos = 0; - - // Store IDs from instruction in our map - for (int op = 0; numOperands > 0; ++op, --numOperands) { - // SpecConstantOp is special: it includes the operands of another opcode which is - // given as a literal in the 3rd word. We will switch over to pretending that the - // opcode being processed is the literal opcode value of the SpecConstantOp. See the - // SPIRV spec for details. This way we will handle IDs and literals as appropriate for - // the embedded op. - if (opCode == spv::OpSpecConstantOp) { - if (op == 0) { - opCode = asOpCode(word++); // this is the opcode embedded in the SpecConstantOp. - --numOperands; - } - } - - switch (spv::InstructionDesc[opCode].operands.getClass(op)) { - case spv::OperandId: - case spv::OperandScope: - case spv::OperandMemorySemantics: - idBuffer[idBufferPos] = asId(word); - idBufferPos = (idBufferPos + 1) % idBufferSize; - idFn(asId(word++)); - break; - - case spv::OperandVariableIds: - for (unsigned i = 0; i < numOperands; ++i) - idFn(asId(word++)); - return nextInst; - - case spv::OperandVariableLiterals: - // for clarity - // if (opCode == spv::OpDecorate && asDecoration(word - 1) == spv::DecorationBuiltIn) { - // ++word; - // --numOperands; - // } - // word += numOperands; - return nextInst; - - case spv::OperandVariableLiteralId: { - if (opCode == OpSwitch) { - // word-2 is the position of the selector ID. OpSwitch Literals match its type. - // In case the IDs are currently being remapped, we get the word[-2] ID from - // the circular idBuffer. - const unsigned literalSizePos = (idBufferPos+idBufferSize-2) % idBufferSize; - const unsigned literalSize = idTypeSizeInWords(idBuffer[literalSizePos]); - const unsigned numLiteralIdPairs = (nextInst-word) / (1+literalSize); - - if (errorLatch) - return -1; - - for (unsigned arg=0; arg instPos; - instPos.reserve(unsigned(spv.size()) / 16); // initial estimate; can grow if needed. - - // Build local table of instruction start positions - process( - [&](spv::Op, unsigned start) { instPos.push_back(start); return true; }, - op_fn_nop); - - if (errorLatch) - return; - - // Window size for context-sensitive canonicalization values - // Empirical best size from a single data set. TODO: Would be a good tunable. - // We essentially perform a little convolution around each instruction, - // to capture the flavor of nearby code, to hopefully match to similar - // code in other modules. - static const unsigned windowSize = 2; - - for (unsigned entry = 0; entry < unsigned(instPos.size()); ++entry) { - const unsigned start = instPos[entry]; - const spv::Op opCode = asOpCode(start); - - if (opCode == spv::OpFunction) - fnId = asId(start + 2); - - if (opCode == spv::OpFunctionEnd) - fnId = spv::NoResult; - - if (fnId != spv::NoResult) { // if inside a function - if (spv::InstructionDesc[opCode].hasResult()) { - const unsigned word = start + (spv::InstructionDesc[opCode].hasType() ? 2 : 1); - const spv::Id resId = asId(word); - std::uint32_t hashval = fnId * 17; // small prime - - for (unsigned i = entry-1; i >= entry-windowSize; --i) { - if (asOpCode(instPos[i]) == spv::OpFunction) - break; - hashval = hashval * 30103 + asOpCodeHash(instPos[i]); // 30103 = semiarbitrary prime - } - - for (unsigned i = entry; i <= entry + windowSize; ++i) { - if (asOpCode(instPos[i]) == spv::OpFunctionEnd) - break; - hashval = hashval * 30103 + asOpCodeHash(instPos[i]); // 30103 = semiarbitrary prime - } - - if (isOldIdUnmapped(resId)) { - localId(resId, nextUnusedId(hashval % softTypeIdLimit + firstMappedID)); - if (errorLatch) - return; - } - - } - } - } - - spv::Op thisOpCode(spv::OpNop); - std::unordered_map opCounter; - int idCounter(0); - fnId = spv::NoResult; - - process( - [&](spv::Op opCode, unsigned start) { - switch (opCode) { - case spv::OpFunction: - // Reset counters at each function - idCounter = 0; - opCounter.clear(); - fnId = asId(start + 2); - break; - - case spv::OpImageSampleImplicitLod: - case spv::OpImageSampleExplicitLod: - case spv::OpImageSampleDrefImplicitLod: - case spv::OpImageSampleDrefExplicitLod: - case spv::OpImageSampleProjImplicitLod: - case spv::OpImageSampleProjExplicitLod: - case spv::OpImageSampleProjDrefImplicitLod: - case spv::OpImageSampleProjDrefExplicitLod: - case spv::OpDot: - case spv::OpCompositeExtract: - case spv::OpCompositeInsert: - case spv::OpVectorShuffle: - case spv::OpLabel: - case spv::OpVariable: - - case spv::OpAccessChain: - case spv::OpLoad: - case spv::OpStore: - case spv::OpCompositeConstruct: - case spv::OpFunctionCall: - ++opCounter[opCode]; - idCounter = 0; - thisOpCode = opCode; - break; - default: - thisOpCode = spv::OpNop; - } - - return false; - }, - - [&](spv::Id& id) { - if (thisOpCode != spv::OpNop) { - ++idCounter; - const std::uint32_t hashval = opCounter[thisOpCode] * thisOpCode * 50047 + idCounter + fnId * 117; - - if (isOldIdUnmapped(id)) - localId(id, nextUnusedId(hashval % softTypeIdLimit + firstMappedID)); - } - }); - } - - // EXPERIMENTAL: forward IO and uniform load/stores into operands - // This produces invalid Schema-0 SPIRV - void spirvbin_t::forwardLoadStores() - { - idset_t fnLocalVars; // set of function local vars - idmap_t idMap; // Map of load result IDs to what they load - - // EXPERIMENTAL: Forward input and access chain loads into consumptions - process( - [&](spv::Op opCode, unsigned start) { - // Add inputs and uniforms to the map - if ((opCode == spv::OpVariable && asWordCount(start) == 4) && - (spv[start+3] == spv::StorageClassUniform || - spv[start+3] == spv::StorageClassUniformConstant || - spv[start+3] == spv::StorageClassInput)) - fnLocalVars.insert(asId(start+2)); - - if (opCode == spv::OpAccessChain && fnLocalVars.count(asId(start+3)) > 0) - fnLocalVars.insert(asId(start+2)); - - if (opCode == spv::OpLoad && fnLocalVars.count(asId(start+3)) > 0) { - idMap[asId(start+2)] = asId(start+3); - stripInst(start); - } - - return false; - }, - - [&](spv::Id& id) { if (idMap.find(id) != idMap.end()) id = idMap[id]; } - ); - - if (errorLatch) - return; - - // EXPERIMENTAL: Implicit output stores - fnLocalVars.clear(); - idMap.clear(); - - process( - [&](spv::Op opCode, unsigned start) { - // Add inputs and uniforms to the map - if ((opCode == spv::OpVariable && asWordCount(start) == 4) && - (spv[start+3] == spv::StorageClassOutput)) - fnLocalVars.insert(asId(start+2)); - - if (opCode == spv::OpStore && fnLocalVars.count(asId(start+1)) > 0) { - idMap[asId(start+2)] = asId(start+1); - stripInst(start); - } - - return false; - }, - op_fn_nop); - - if (errorLatch) - return; - - process( - inst_fn_nop, - [&](spv::Id& id) { if (idMap.find(id) != idMap.end()) id = idMap[id]; } - ); - - if (errorLatch) - return; - - strip(); // strip out data we decided to eliminate - } - - // optimize loads and stores - void spirvbin_t::optLoadStore() - { - idset_t fnLocalVars; // candidates for removal (only locals) - idmap_t idMap; // Map of load result IDs to what they load - blockmap_t blockMap; // Map of IDs to blocks they first appear in - int blockNum = 0; // block count, to avoid crossing flow control - - // Find all the function local pointers stored at most once, and not via access chains - process( - [&](spv::Op opCode, unsigned start) { - const int wordCount = asWordCount(start); - - // Count blocks, so we can avoid crossing flow control - if (isFlowCtrl(opCode)) - ++blockNum; - - // Add local variables to the map - if ((opCode == spv::OpVariable && spv[start+3] == spv::StorageClassFunction && asWordCount(start) == 4)) { - fnLocalVars.insert(asId(start+2)); - return true; - } - - // Ignore process vars referenced via access chain - if ((opCode == spv::OpAccessChain || opCode == spv::OpInBoundsAccessChain) && fnLocalVars.count(asId(start+3)) > 0) { - fnLocalVars.erase(asId(start+3)); - idMap.erase(asId(start+3)); - return true; - } - - if (opCode == spv::OpLoad && fnLocalVars.count(asId(start+3)) > 0) { - const spv::Id varId = asId(start+3); - - // Avoid loads before stores - if (idMap.find(varId) == idMap.end()) { - fnLocalVars.erase(varId); - idMap.erase(varId); - } - - // don't do for volatile references - if (wordCount > 4 && (spv[start+4] & spv::MemoryAccessVolatileMask)) { - fnLocalVars.erase(varId); - idMap.erase(varId); - } - - // Handle flow control - if (blockMap.find(varId) == blockMap.end()) { - blockMap[varId] = blockNum; // track block we found it in. - } else if (blockMap[varId] != blockNum) { - fnLocalVars.erase(varId); // Ignore if crosses flow control - idMap.erase(varId); - } - - return true; - } - - if (opCode == spv::OpStore && fnLocalVars.count(asId(start+1)) > 0) { - const spv::Id varId = asId(start+1); - - if (idMap.find(varId) == idMap.end()) { - idMap[varId] = asId(start+2); - } else { - // Remove if it has more than one store to the same pointer - fnLocalVars.erase(varId); - idMap.erase(varId); - } - - // don't do for volatile references - if (wordCount > 3 && (spv[start+3] & spv::MemoryAccessVolatileMask)) { - fnLocalVars.erase(asId(start+3)); - idMap.erase(asId(start+3)); - } - - // Handle flow control - if (blockMap.find(varId) == blockMap.end()) { - blockMap[varId] = blockNum; // track block we found it in. - } else if (blockMap[varId] != blockNum) { - fnLocalVars.erase(varId); // Ignore if crosses flow control - idMap.erase(varId); - } - - return true; - } - - return false; - }, - - // If local var id used anywhere else, don't eliminate - [&](spv::Id& id) { - if (fnLocalVars.count(id) > 0) { - fnLocalVars.erase(id); - idMap.erase(id); - } - } - ); - - if (errorLatch) - return; - - process( - [&](spv::Op opCode, unsigned start) { - if (opCode == spv::OpLoad && fnLocalVars.count(asId(start+3)) > 0) - idMap[asId(start+2)] = idMap[asId(start+3)]; - return false; - }, - op_fn_nop); - - if (errorLatch) - return; - - // Chase replacements to their origins, in case there is a chain such as: - // 2 = store 1 - // 3 = load 2 - // 4 = store 3 - // 5 = load 4 - // We want to replace uses of 5 with 1. - for (const auto& idPair : idMap) { - spv::Id id = idPair.first; - while (idMap.find(id) != idMap.end()) // Chase to end of chain - id = idMap[id]; - - idMap[idPair.first] = id; // replace with final result - } - - // Remove the load/store/variables for the ones we've discovered - process( - [&](spv::Op opCode, unsigned start) { - if ((opCode == spv::OpLoad && fnLocalVars.count(asId(start+3)) > 0) || - (opCode == spv::OpStore && fnLocalVars.count(asId(start+1)) > 0) || - (opCode == spv::OpVariable && fnLocalVars.count(asId(start+2)) > 0)) { - - stripInst(start); - return true; - } - - return false; - }, - - [&](spv::Id& id) { - if (idMap.find(id) != idMap.end()) id = idMap[id]; - } - ); - - if (errorLatch) - return; - - strip(); // strip out data we decided to eliminate - } - - // remove bodies of uncalled functions - void spirvbin_t::dceFuncs() - { - msg(3, 2, std::string("Removing Dead Functions: ")); - - // TODO: There are more efficient ways to do this. - bool changed = true; - - while (changed) { - changed = false; - - for (auto fn = fnPos.begin(); fn != fnPos.end(); ) { - if (fn->first == entryPoint) { // don't DCE away the entry point! - ++fn; - continue; - } - - const auto call_it = fnCalls.find(fn->first); - - if (call_it == fnCalls.end() || call_it->second == 0) { - changed = true; - stripRange.push_back(fn->second); - - // decrease counts of called functions - process( - [&](spv::Op opCode, unsigned start) { - if (opCode == spv::Op::OpFunctionCall) { - const auto call_it = fnCalls.find(asId(start + 3)); - if (call_it != fnCalls.end()) { - if (--call_it->second <= 0) - fnCalls.erase(call_it); - } - } - - return true; - }, - op_fn_nop, - fn->second.first, - fn->second.second); - - if (errorLatch) - return; - - fn = fnPos.erase(fn); - } else ++fn; - } - } - } - - // remove unused function variables + decorations - void spirvbin_t::dceVars() - { - msg(3, 2, std::string("DCE Vars: ")); - - std::unordered_map varUseCount; - - // Count function variable use - process( - [&](spv::Op opCode, unsigned start) { - if (opCode == spv::OpVariable) { - ++varUseCount[asId(start+2)]; - return true; - } else if (opCode == spv::OpEntryPoint) { - const int wordCount = asWordCount(start); - for (int i = 4; i < wordCount; i++) { - ++varUseCount[asId(start+i)]; - } - return true; - } else - return false; - }, - - [&](spv::Id& id) { if (varUseCount[id]) ++varUseCount[id]; } - ); - - if (errorLatch) - return; - - // Remove single-use function variables + associated decorations and names - process( - [&](spv::Op opCode, unsigned start) { - spv::Id id = spv::NoResult; - if (opCode == spv::OpVariable) - id = asId(start+2); - if (opCode == spv::OpDecorate || opCode == spv::OpName) - id = asId(start+1); - - if (id != spv::NoResult && varUseCount[id] == 1) - stripInst(start); - - return true; - }, - op_fn_nop); - } - - // remove unused types - void spirvbin_t::dceTypes() - { - std::vector isType(bound(), false); - - // for speed, make O(1) way to get to type query (map is log(n)) - for (const auto typeStart : typeConstPos) - isType[asTypeConstId(typeStart)] = true; - - std::unordered_map typeUseCount; - - // This is not the most efficient algorithm, but this is an offline tool, and - // it's easy to write this way. Can be improved opportunistically if needed. - bool changed = true; - while (changed) { - changed = false; - strip(); - typeUseCount.clear(); - - // Count total type usage - process(inst_fn_nop, - [&](spv::Id& id) { if (isType[id]) ++typeUseCount[id]; } - ); - - if (errorLatch) - return; - - // Remove single reference types - for (const auto typeStart : typeConstPos) { - const spv::Id typeId = asTypeConstId(typeStart); - if (typeUseCount[typeId] == 1) { - changed = true; - --typeUseCount[typeId]; - stripInst(typeStart); - } - } - - if (errorLatch) - return; - } - } - -#ifdef NOTDEF - bool spirvbin_t::matchType(const spirvbin_t::globaltypes_t& globalTypes, spv::Id lt, spv::Id gt) const - { - // Find the local type id "lt" and global type id "gt" - const auto lt_it = typeConstPosR.find(lt); - if (lt_it == typeConstPosR.end()) - return false; - - const auto typeStart = lt_it->second; - - // Search for entry in global table - const auto gtype = globalTypes.find(gt); - if (gtype == globalTypes.end()) - return false; - - const auto& gdata = gtype->second; - - // local wordcount and opcode - const int wordCount = asWordCount(typeStart); - const spv::Op opCode = asOpCode(typeStart); - - // no type match if opcodes don't match, or operand count doesn't match - if (opCode != opOpCode(gdata[0]) || wordCount != opWordCount(gdata[0])) - return false; - - const unsigned numOperands = wordCount - 2; // all types have a result - - const auto cmpIdRange = [&](range_t range) { - for (int x=range.first; xsecond; - } - - // Hash types to canonical values. This can return ID collisions (it's a bit - // inevitable): it's up to the caller to handle that gracefully. - std::uint32_t spirvbin_t::hashType(unsigned typeStart) const - { - const unsigned wordCount = asWordCount(typeStart); - const spv::Op opCode = asOpCode(typeStart); - - switch (opCode) { - case spv::OpTypeVoid: return 0; - case spv::OpTypeBool: return 1; - case spv::OpTypeInt: return 3 + (spv[typeStart+3]); - case spv::OpTypeFloat: return 5; - case spv::OpTypeVector: - return 6 + hashType(idPos(spv[typeStart+2])) * (spv[typeStart+3] - 1); - case spv::OpTypeMatrix: - return 30 + hashType(idPos(spv[typeStart+2])) * (spv[typeStart+3] - 1); - case spv::OpTypeImage: - return 120 + hashType(idPos(spv[typeStart+2])) + - spv[typeStart+3] + // dimensionality - spv[typeStart+4] * 8 * 16 + // depth - spv[typeStart+5] * 4 * 16 + // arrayed - spv[typeStart+6] * 2 * 16 + // multisampled - spv[typeStart+7] * 1 * 16; // format - case spv::OpTypeSampler: - return 500; - case spv::OpTypeSampledImage: - return 502; - case spv::OpTypeArray: - return 501 + hashType(idPos(spv[typeStart+2])) * spv[typeStart+3]; - case spv::OpTypeRuntimeArray: - return 5000 + hashType(idPos(spv[typeStart+2])); - case spv::OpTypeStruct: - { - std::uint32_t hash = 10000; - for (unsigned w=2; w < wordCount; ++w) - hash += w * hashType(idPos(spv[typeStart+w])); - return hash; - } - - case spv::OpTypeOpaque: return 6000 + spv[typeStart+2]; - case spv::OpTypePointer: return 100000 + hashType(idPos(spv[typeStart+3])); - case spv::OpTypeFunction: - { - std::uint32_t hash = 200000; - for (unsigned w=2; w < wordCount; ++w) - hash += w * hashType(idPos(spv[typeStart+w])); - return hash; - } - - case spv::OpTypeEvent: return 300000; - case spv::OpTypeDeviceEvent: return 300001; - case spv::OpTypeReserveId: return 300002; - case spv::OpTypeQueue: return 300003; - case spv::OpTypePipe: return 300004; - - case spv::OpConstantNull: return 300005; - case spv::OpConstantSampler: return 300006; - - case spv::OpConstantTrue: return 300007; - case spv::OpConstantFalse: return 300008; - case spv::OpConstantComposite: - { - std::uint32_t hash = 300011 + hashType(idPos(spv[typeStart+1])); - for (unsigned w=3; w < wordCount; ++w) - hash += w * hashType(idPos(spv[typeStart+w])); - return hash; - } - case spv::OpConstant: - { - std::uint32_t hash = 400011 + hashType(idPos(spv[typeStart+1])); - for (unsigned w=3; w < wordCount; ++w) - hash += w * spv[typeStart+w]; - return hash; - } - - default: - error("unknown type opcode"); - return 0; - } - } - - void spirvbin_t::mapTypeConst() - { - globaltypes_t globalTypeMap; - - msg(3, 2, std::string("Remapping Consts & Types: ")); - - static const std::uint32_t softTypeIdLimit = 3011; // small prime. TODO: get from options - static const std::uint32_t firstMappedID = 8; // offset into ID space - - for (auto& typeStart : typeConstPos) { - const spv::Id resId = asTypeConstId(typeStart); - const std::uint32_t hashval = hashType(typeStart); - - if (errorLatch) - return; - - if (isOldIdUnmapped(resId)) { - localId(resId, nextUnusedId(hashval % softTypeIdLimit + firstMappedID)); - if (errorLatch) - return; - } - } - } - - // Strip a single binary by removing ranges given in stripRange - void spirvbin_t::strip() - { - if (stripRange.empty()) // nothing to do - return; - - // Sort strip ranges in order of traversal - std::sort(stripRange.begin(), stripRange.end()); - - // Allocate a new binary big enough to hold old binary - // We'll step this iterator through the strip ranges as we go through the binary - auto strip_it = stripRange.begin(); - - int strippedPos = 0; - for (unsigned word = 0; word < unsigned(spv.size()); ++word) { - while (strip_it != stripRange.end() && word >= strip_it->second) - ++strip_it; - - if (strip_it == stripRange.end() || word < strip_it->first || word >= strip_it->second) - spv[strippedPos++] = spv[word]; - } - - spv.resize(strippedPos); - stripRange.clear(); - - buildLocalMaps(); - } - - // Strip a single binary by removing ranges given in stripRange - void spirvbin_t::remap(std::uint32_t opts) - { - options = opts; - - // Set up opcode tables from SpvDoc - spv::Parameterize(); - - validate(); // validate header - buildLocalMaps(); // build ID maps - - msg(3, 4, std::string("ID bound: ") + std::to_string(bound())); - - if (options & STRIP) stripDebug(); - if (errorLatch) return; - - strip(); // strip out data we decided to eliminate - if (errorLatch) return; - - if (options & OPT_LOADSTORE) optLoadStore(); - if (errorLatch) return; - - if (options & OPT_FWD_LS) forwardLoadStores(); - if (errorLatch) return; - - if (options & DCE_FUNCS) dceFuncs(); - if (errorLatch) return; - - if (options & DCE_VARS) dceVars(); - if (errorLatch) return; - - if (options & DCE_TYPES) dceTypes(); - if (errorLatch) return; - - strip(); // strip out data we decided to eliminate - if (errorLatch) return; - - stripDeadRefs(); // remove references to things we DCEed - if (errorLatch) return; - - // after the last strip, we must clean any debug info referring to now-deleted data - - if (options & MAP_TYPES) mapTypeConst(); - if (errorLatch) return; - - if (options & MAP_NAMES) mapNames(); - if (errorLatch) return; - - if (options & MAP_FUNCS) mapFnBodies(); - if (errorLatch) return; - - if (options & MAP_ALL) { - mapRemainder(); // map any unmapped IDs - if (errorLatch) return; - - applyMap(); // Now remap each shader to the new IDs we've come up with - if (errorLatch) return; - } - } - - // remap from a memory image - void spirvbin_t::remap(std::vector& in_spv, std::uint32_t opts) - { - spv.swap(in_spv); - remap(opts); - spv.swap(in_spv); - } - -} // namespace SPV - -#endif // defined (use_cpp11) - diff --git a/third_party/glslang-spirv/SPVRemapper.h b/third_party/glslang-spirv/SPVRemapper.h deleted file mode 100644 index 97e3f31fa..000000000 --- a/third_party/glslang-spirv/SPVRemapper.h +++ /dev/null @@ -1,304 +0,0 @@ -// -// Copyright (C) 2015 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// - -#ifndef SPIRVREMAPPER_H -#define SPIRVREMAPPER_H - -#include -#include -#include -#include - -namespace spv { - -// MSVC defines __cplusplus as an older value, even when it supports almost all of 11. -// We handle that here by making our own symbol. -#if __cplusplus >= 201103L || _MSC_VER >= 1700 -# define use_cpp11 1 -#endif - -class spirvbin_base_t -{ -public: - enum Options { - NONE = 0, - STRIP = (1<<0), - MAP_TYPES = (1<<1), - MAP_NAMES = (1<<2), - MAP_FUNCS = (1<<3), - DCE_FUNCS = (1<<4), - DCE_VARS = (1<<5), - DCE_TYPES = (1<<6), - OPT_LOADSTORE = (1<<7), - OPT_FWD_LS = (1<<8), // EXPERIMENTAL: PRODUCES INVALID SCHEMA-0 SPIRV - MAP_ALL = (MAP_TYPES | MAP_NAMES | MAP_FUNCS), - DCE_ALL = (DCE_FUNCS | DCE_VARS | DCE_TYPES), - OPT_ALL = (OPT_LOADSTORE), - - ALL_BUT_STRIP = (MAP_ALL | DCE_ALL | OPT_ALL), - DO_EVERYTHING = (STRIP | ALL_BUT_STRIP) - }; -}; - -} // namespace SPV - -#if !defined (use_cpp11) -#include -#include - -namespace spv { -class spirvbin_t : public spirvbin_base_t -{ -public: - spirvbin_t(int /*verbose = 0*/) { } - - void remap(std::vector& /*spv*/, unsigned int /*opts = 0*/) - { - printf("Tool not compiled for C++11, which is required for SPIR-V remapping.\n"); - exit(5); - } -}; - -} // namespace SPV - -#else // defined (use_cpp11) - -#include -#include -#include -#include -#include -#include -#include - -#include "spirv.hpp" -#include "spvIR.h" - -namespace spv { - -// class to hold SPIR-V binary data for remapping, DCE, and debug stripping -class spirvbin_t : public spirvbin_base_t -{ -public: - spirvbin_t(int verbose = 0) : entryPoint(spv::NoResult), largestNewId(0), verbose(verbose), errorLatch(false) - { } - - virtual ~spirvbin_t() { } - - // remap on an existing binary in memory - void remap(std::vector& spv, std::uint32_t opts = DO_EVERYTHING); - - // Type for error/log handler functions - typedef std::function errorfn_t; - typedef std::function logfn_t; - - // Register error/log handling functions (can be lambda fn / functor / etc) - static void registerErrorHandler(errorfn_t handler) { errorHandler = handler; } - static void registerLogHandler(logfn_t handler) { logHandler = handler; } - -protected: - // This can be overridden to provide other message behavior if needed - virtual void msg(int minVerbosity, int indent, const std::string& txt) const; - -private: - // Local to global, or global to local ID map - typedef std::unordered_map idmap_t; - typedef std::unordered_set idset_t; - typedef std::unordered_map blockmap_t; - - void remap(std::uint32_t opts = DO_EVERYTHING); - - // Map of names to IDs - typedef std::unordered_map namemap_t; - - typedef std::uint32_t spirword_t; - - typedef std::pair range_t; - typedef std::function idfn_t; - typedef std::function instfn_t; - - // Special Values for ID map: - static const spv::Id unmapped; // unchanged from default value - static const spv::Id unused; // unused ID - static const int header_size; // SPIR header = 5 words - - class id_iterator_t; - - // For mapping type entries between different shaders - typedef std::vector typeentry_t; - typedef std::map globaltypes_t; - - // A set that preserves position order, and a reverse map - typedef std::set posmap_t; - typedef std::unordered_map posmap_rev_t; - - // Maps and ID to the size of its base type, if known. - typedef std::unordered_map typesize_map_t; - - // handle error - void error(const std::string& txt) const { errorLatch = true; errorHandler(txt); } - - bool isConstOp(spv::Op opCode) const; - bool isTypeOp(spv::Op opCode) const; - bool isStripOp(spv::Op opCode) const; - bool isFlowCtrl(spv::Op opCode) const; - range_t literalRange(spv::Op opCode) const; - range_t typeRange(spv::Op opCode) const; - range_t constRange(spv::Op opCode) const; - unsigned typeSizeInWords(spv::Id id) const; - unsigned idTypeSizeInWords(spv::Id id) const; - - spv::Id& asId(unsigned word) { return spv[word]; } - const spv::Id& asId(unsigned word) const { return spv[word]; } - spv::Op asOpCode(unsigned word) const { return opOpCode(spv[word]); } - std::uint32_t asOpCodeHash(unsigned word); - spv::Decoration asDecoration(unsigned word) const { return spv::Decoration(spv[word]); } - unsigned asWordCount(unsigned word) const { return opWordCount(spv[word]); } - spv::Id asTypeConstId(unsigned word) const { return asId(word + (isTypeOp(asOpCode(word)) ? 1 : 2)); } - unsigned idPos(spv::Id id) const; - - static unsigned opWordCount(spirword_t data) { return data >> spv::WordCountShift; } - static spv::Op opOpCode(spirword_t data) { return spv::Op(data & spv::OpCodeMask); } - - // Header access & set methods - spirword_t magic() const { return spv[0]; } // return magic number - spirword_t bound() const { return spv[3]; } // return Id bound from header - spirword_t bound(spirword_t b) { return spv[3] = b; }; - spirword_t genmagic() const { return spv[2]; } // generator magic - spirword_t genmagic(spirword_t m) { return spv[2] = m; } - spirword_t schemaNum() const { return spv[4]; } // schema number from header - - // Mapping fns: get - spv::Id localId(spv::Id id) const { return idMapL[id]; } - - // Mapping fns: set - inline spv::Id localId(spv::Id id, spv::Id newId); - void countIds(spv::Id id); - - // Return next unused new local ID. - // NOTE: boost::dynamic_bitset would be more efficient due to find_next(), - // which std::vector doens't have. - inline spv::Id nextUnusedId(spv::Id id); - - void buildLocalMaps(); - std::string literalString(unsigned word) const; // Return literal as a std::string - int literalStringWords(const std::string& str) const { return (int(str.size())+4)/4; } - - bool isNewIdMapped(spv::Id newId) const { return isMapped(newId); } - bool isOldIdUnmapped(spv::Id oldId) const { return localId(oldId) == unmapped; } - bool isOldIdUnused(spv::Id oldId) const { return localId(oldId) == unused; } - bool isOldIdMapped(spv::Id oldId) const { return !isOldIdUnused(oldId) && !isOldIdUnmapped(oldId); } - bool isFunction(spv::Id oldId) const { return fnPos.find(oldId) != fnPos.end(); } - - // bool matchType(const globaltypes_t& globalTypes, spv::Id lt, spv::Id gt) const; - // spv::Id findType(const globaltypes_t& globalTypes, spv::Id lt) const; - std::uint32_t hashType(unsigned typeStart) const; - - spirvbin_t& process(instfn_t, idfn_t, unsigned begin = 0, unsigned end = 0); - int processInstruction(unsigned word, instfn_t, idfn_t); - - void validate() const; - void mapTypeConst(); - void mapFnBodies(); - void optLoadStore(); - void dceFuncs(); - void dceVars(); - void dceTypes(); - void mapNames(); - void foldIds(); // fold IDs to smallest space - void forwardLoadStores(); // load store forwarding (EXPERIMENTAL) - void offsetIds(); // create relative offset IDs - - void applyMap(); // remap per local name map - void mapRemainder(); // map any IDs we haven't touched yet - void stripDebug(); // strip all debug info - void stripDeadRefs(); // strips debug info for now-dead references after DCE - void strip(); // remove debug symbols - - std::vector spv; // SPIR words - - namemap_t nameMap; // ID names from OpName - - // Since we want to also do binary ops, we can't use std::vector. we could use - // boost::dynamic_bitset, but we're trying to avoid a boost dependency. - typedef std::uint64_t bits_t; - std::vector mapped; // which new IDs have been mapped - static const int mBits = sizeof(bits_t) * 4; - - bool isMapped(spv::Id id) const { return id < maxMappedId() && ((mapped[id/mBits] & (1LL<<(id%mBits))) != 0); } - void setMapped(spv::Id id) { resizeMapped(id); mapped[id/mBits] |= (1LL<<(id%mBits)); } - void resizeMapped(spv::Id id) { if (id >= maxMappedId()) mapped.resize(id/mBits+1, 0); } - size_t maxMappedId() const { return mapped.size() * mBits; } - - // Add a strip range for a given instruction starting at 'start' - // Note: avoiding brace initializers to please older versions os MSVC. - void stripInst(unsigned start) { stripRange.push_back(range_t(start, start + asWordCount(start))); } - - // Function start and end. use unordered_map because we'll have - // many fewer functions than IDs. - std::unordered_map fnPos; - - // Which functions are called, anywhere in the module, with a call count - std::unordered_map fnCalls; - - posmap_t typeConstPos; // word positions that define types & consts (ordered) - posmap_rev_t idPosR; // reverse map from IDs to positions - typesize_map_t idTypeSizeMap; // maps each ID to its type size, if known. - - std::vector idMapL; // ID {M}ap from {L}ocal to {G}lobal IDs - - spv::Id entryPoint; // module entry point - spv::Id largestNewId; // biggest new ID we have mapped anything to - - // Sections of the binary to strip, given as [begin,end) - std::vector stripRange; - - // processing options: - std::uint32_t options; - int verbose; // verbosity level - - // Error latch: this is set if the error handler is ever executed. It would be better to - // use a try/catch block and throw, but that's not desired for certain environments, so - // this is the alternative. - mutable bool errorLatch; - - static errorfn_t errorHandler; - static logfn_t logHandler; -}; - -} // namespace SPV - -#endif // defined (use_cpp11) -#endif // SPIRVREMAPPER_H diff --git a/third_party/glslang-spirv/SpvBuilder.cpp b/third_party/glslang-spirv/SpvBuilder.cpp deleted file mode 100644 index 0afcc6433..000000000 --- a/third_party/glslang-spirv/SpvBuilder.cpp +++ /dev/null @@ -1,2676 +0,0 @@ -// -// Copyright (C) 2014-2015 LunarG, Inc. -// Copyright (C) 2015-2016 Google, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -// -// Helper for making SPIR-V IR. Generally, this is documented in the header -// SpvBuilder.h. -// - -#include -#include - -#include -#include - -#include "SpvBuilder.h" - -#ifdef AMD_EXTENSIONS - #include "hex_float.h" -#endif - -#ifndef _WIN32 - #include -#endif - -namespace spv { - -Builder::Builder(unsigned int spvVersion, unsigned int magicNumber, SpvBuildLogger* buildLogger) : - spvVersion(spvVersion), - source(SourceLanguageUnknown), - sourceVersion(0), - sourceFileStringId(NoResult), - currentLine(0), - emitOpLines(false), - addressModel(AddressingModelLogical), - memoryModel(MemoryModelGLSL450), - builderNumber(magicNumber), - buildPoint(0), - uniqueId(0), - entryPointFunction(0), - generatingOpCodeForSpecConst(false), - logger(buildLogger) -{ - clearAccessChain(); -} - -Builder::~Builder() -{ -} - -Id Builder::import(const char* name) -{ - Instruction* import = new Instruction(getUniqueId(), NoType, OpExtInstImport); - import->addStringOperand(name); - - imports.push_back(std::unique_ptr(import)); - return import->getResultId(); -} - -// Emit an OpLine if we've been asked to emit OpLines and the line number -// has changed since the last time, and is a valid line number. -void Builder::setLine(int lineNum) -{ - if (lineNum != 0 && lineNum != currentLine) { - currentLine = lineNum; - if (emitOpLines) - addLine(sourceFileStringId, currentLine, 0); - } -} - -void Builder::addLine(Id fileName, int lineNum, int column) -{ - Instruction* line = new Instruction(OpLine); - line->addIdOperand(fileName); - line->addImmediateOperand(lineNum); - line->addImmediateOperand(column); - buildPoint->addInstruction(std::unique_ptr(line)); -} - -// For creating new groupedTypes (will return old type if the requested one was already made). -Id Builder::makeVoidType() -{ - Instruction* type; - if (groupedTypes[OpTypeVoid].size() == 0) { - type = new Instruction(getUniqueId(), NoType, OpTypeVoid); - groupedTypes[OpTypeVoid].push_back(type); - constantsTypesGlobals.push_back(std::unique_ptr(type)); - module.mapInstruction(type); - } else - type = groupedTypes[OpTypeVoid].back(); - - return type->getResultId(); -} - -Id Builder::makeBoolType() -{ - Instruction* type; - if (groupedTypes[OpTypeBool].size() == 0) { - type = new Instruction(getUniqueId(), NoType, OpTypeBool); - groupedTypes[OpTypeBool].push_back(type); - constantsTypesGlobals.push_back(std::unique_ptr(type)); - module.mapInstruction(type); - } else - type = groupedTypes[OpTypeBool].back(); - - return type->getResultId(); -} - -Id Builder::makeSamplerType() -{ - Instruction* type; - if (groupedTypes[OpTypeSampler].size() == 0) { - type = new Instruction(getUniqueId(), NoType, OpTypeSampler); - groupedTypes[OpTypeSampler].push_back(type); - constantsTypesGlobals.push_back(std::unique_ptr(type)); - module.mapInstruction(type); - } else - type = groupedTypes[OpTypeSampler].back(); - - return type->getResultId(); -} - -Id Builder::makePointer(StorageClass storageClass, Id pointee) -{ - // try to find it - Instruction* type; - for (int t = 0; t < (int)groupedTypes[OpTypePointer].size(); ++t) { - type = groupedTypes[OpTypePointer][t]; - if (type->getImmediateOperand(0) == (unsigned)storageClass && - type->getIdOperand(1) == pointee) - return type->getResultId(); - } - - // not found, make it - type = new Instruction(getUniqueId(), NoType, OpTypePointer); - type->addImmediateOperand(storageClass); - type->addIdOperand(pointee); - groupedTypes[OpTypePointer].push_back(type); - constantsTypesGlobals.push_back(std::unique_ptr(type)); - module.mapInstruction(type); - - return type->getResultId(); -} - -Id Builder::makeIntegerType(int width, bool hasSign) -{ - // try to find it - Instruction* type; - for (int t = 0; t < (int)groupedTypes[OpTypeInt].size(); ++t) { - type = groupedTypes[OpTypeInt][t]; - if (type->getImmediateOperand(0) == (unsigned)width && - type->getImmediateOperand(1) == (hasSign ? 1u : 0u)) - return type->getResultId(); - } - - // not found, make it - type = new Instruction(getUniqueId(), NoType, OpTypeInt); - type->addImmediateOperand(width); - type->addImmediateOperand(hasSign ? 1 : 0); - groupedTypes[OpTypeInt].push_back(type); - constantsTypesGlobals.push_back(std::unique_ptr(type)); - module.mapInstruction(type); - - // deal with capabilities - switch (width) { - case 16: - addCapability(CapabilityInt16); - break; - case 64: - addCapability(CapabilityInt64); - break; - default: - break; - } - - return type->getResultId(); -} - -Id Builder::makeFloatType(int width) -{ - // try to find it - Instruction* type; - for (int t = 0; t < (int)groupedTypes[OpTypeFloat].size(); ++t) { - type = groupedTypes[OpTypeFloat][t]; - if (type->getImmediateOperand(0) == (unsigned)width) - return type->getResultId(); - } - - // not found, make it - type = new Instruction(getUniqueId(), NoType, OpTypeFloat); - type->addImmediateOperand(width); - groupedTypes[OpTypeFloat].push_back(type); - constantsTypesGlobals.push_back(std::unique_ptr(type)); - module.mapInstruction(type); - - // deal with capabilities - switch (width) { - case 16: - addCapability(CapabilityFloat16); - break; - case 64: - addCapability(CapabilityFloat64); - break; - default: - break; - } - - return type->getResultId(); -} - -// Make a struct without checking for duplication. -// See makeStructResultType() for non-decorated structs -// needed as the result of some instructions, which does -// check for duplicates. -Id Builder::makeStructType(const std::vector& members, const char* name) -{ - // Don't look for previous one, because in the general case, - // structs can be duplicated except for decorations. - - // not found, make it - Instruction* type = new Instruction(getUniqueId(), NoType, OpTypeStruct); - for (int op = 0; op < (int)members.size(); ++op) - type->addIdOperand(members[op]); - groupedTypes[OpTypeStruct].push_back(type); - constantsTypesGlobals.push_back(std::unique_ptr(type)); - module.mapInstruction(type); - addName(type->getResultId(), name); - - return type->getResultId(); -} - -// Make a struct for the simple results of several instructions, -// checking for duplication. -Id Builder::makeStructResultType(Id type0, Id type1) -{ - // try to find it - Instruction* type; - for (int t = 0; t < (int)groupedTypes[OpTypeStruct].size(); ++t) { - type = groupedTypes[OpTypeStruct][t]; - if (type->getNumOperands() != 2) - continue; - if (type->getIdOperand(0) != type0 || - type->getIdOperand(1) != type1) - continue; - return type->getResultId(); - } - - // not found, make it - std::vector members; - members.push_back(type0); - members.push_back(type1); - - return makeStructType(members, "ResType"); -} - -Id Builder::makeVectorType(Id component, int size) -{ - // try to find it - Instruction* type; - for (int t = 0; t < (int)groupedTypes[OpTypeVector].size(); ++t) { - type = groupedTypes[OpTypeVector][t]; - if (type->getIdOperand(0) == component && - type->getImmediateOperand(1) == (unsigned)size) - return type->getResultId(); - } - - // not found, make it - type = new Instruction(getUniqueId(), NoType, OpTypeVector); - type->addIdOperand(component); - type->addImmediateOperand(size); - groupedTypes[OpTypeVector].push_back(type); - constantsTypesGlobals.push_back(std::unique_ptr(type)); - module.mapInstruction(type); - - return type->getResultId(); -} - -Id Builder::makeMatrixType(Id component, int cols, int rows) -{ - assert(cols <= maxMatrixSize && rows <= maxMatrixSize); - - Id column = makeVectorType(component, rows); - - // try to find it - Instruction* type; - for (int t = 0; t < (int)groupedTypes[OpTypeMatrix].size(); ++t) { - type = groupedTypes[OpTypeMatrix][t]; - if (type->getIdOperand(0) == column && - type->getImmediateOperand(1) == (unsigned)cols) - return type->getResultId(); - } - - // not found, make it - type = new Instruction(getUniqueId(), NoType, OpTypeMatrix); - type->addIdOperand(column); - type->addImmediateOperand(cols); - groupedTypes[OpTypeMatrix].push_back(type); - constantsTypesGlobals.push_back(std::unique_ptr(type)); - module.mapInstruction(type); - - return type->getResultId(); -} - -// TODO: performance: track arrays per stride -// If a stride is supplied (non-zero) make an array. -// If no stride (0), reuse previous array types. -// 'size' is an Id of a constant or specialization constant of the array size -Id Builder::makeArrayType(Id element, Id sizeId, int stride) -{ - Instruction* type; - if (stride == 0) { - // try to find existing type - for (int t = 0; t < (int)groupedTypes[OpTypeArray].size(); ++t) { - type = groupedTypes[OpTypeArray][t]; - if (type->getIdOperand(0) == element && - type->getIdOperand(1) == sizeId) - return type->getResultId(); - } - } - - // not found, make it - type = new Instruction(getUniqueId(), NoType, OpTypeArray); - type->addIdOperand(element); - type->addIdOperand(sizeId); - groupedTypes[OpTypeArray].push_back(type); - constantsTypesGlobals.push_back(std::unique_ptr(type)); - module.mapInstruction(type); - - return type->getResultId(); -} - -Id Builder::makeRuntimeArray(Id element) -{ - Instruction* type = new Instruction(getUniqueId(), NoType, OpTypeRuntimeArray); - type->addIdOperand(element); - constantsTypesGlobals.push_back(std::unique_ptr(type)); - module.mapInstruction(type); - - return type->getResultId(); -} - -Id Builder::makeFunctionType(Id returnType, const std::vector& paramTypes) -{ - // try to find it - Instruction* type; - for (int t = 0; t < (int)groupedTypes[OpTypeFunction].size(); ++t) { - type = groupedTypes[OpTypeFunction][t]; - if (type->getIdOperand(0) != returnType || (int)paramTypes.size() != type->getNumOperands() - 1) - continue; - bool mismatch = false; - for (int p = 0; p < (int)paramTypes.size(); ++p) { - if (paramTypes[p] != type->getIdOperand(p + 1)) { - mismatch = true; - break; - } - } - if (! mismatch) - return type->getResultId(); - } - - // not found, make it - type = new Instruction(getUniqueId(), NoType, OpTypeFunction); - type->addIdOperand(returnType); - for (int p = 0; p < (int)paramTypes.size(); ++p) - type->addIdOperand(paramTypes[p]); - groupedTypes[OpTypeFunction].push_back(type); - constantsTypesGlobals.push_back(std::unique_ptr(type)); - module.mapInstruction(type); - - return type->getResultId(); -} - -Id Builder::makeImageType(Id sampledType, Dim dim, bool depth, bool arrayed, bool ms, unsigned sampled, ImageFormat format) -{ - assert(sampled == 1 || sampled == 2); - - // try to find it - Instruction* type; - for (int t = 0; t < (int)groupedTypes[OpTypeImage].size(); ++t) { - type = groupedTypes[OpTypeImage][t]; - if (type->getIdOperand(0) == sampledType && - type->getImmediateOperand(1) == (unsigned int)dim && - type->getImmediateOperand(2) == ( depth ? 1u : 0u) && - type->getImmediateOperand(3) == (arrayed ? 1u : 0u) && - type->getImmediateOperand(4) == ( ms ? 1u : 0u) && - type->getImmediateOperand(5) == sampled && - type->getImmediateOperand(6) == (unsigned int)format) - return type->getResultId(); - } - - // not found, make it - type = new Instruction(getUniqueId(), NoType, OpTypeImage); - type->addIdOperand(sampledType); - type->addImmediateOperand( dim); - type->addImmediateOperand( depth ? 1 : 0); - type->addImmediateOperand(arrayed ? 1 : 0); - type->addImmediateOperand( ms ? 1 : 0); - type->addImmediateOperand(sampled); - type->addImmediateOperand((unsigned int)format); - - groupedTypes[OpTypeImage].push_back(type); - constantsTypesGlobals.push_back(std::unique_ptr(type)); - module.mapInstruction(type); - - // deal with capabilities - switch (dim) { - case DimBuffer: - if (sampled == 1) - addCapability(CapabilitySampledBuffer); - else - addCapability(CapabilityImageBuffer); - break; - case Dim1D: - if (sampled == 1) - addCapability(CapabilitySampled1D); - else - addCapability(CapabilityImage1D); - break; - case DimCube: - if (arrayed) { - if (sampled == 1) - addCapability(CapabilitySampledCubeArray); - else - addCapability(CapabilityImageCubeArray); - } - break; - case DimRect: - if (sampled == 1) - addCapability(CapabilitySampledRect); - else - addCapability(CapabilityImageRect); - break; - case DimSubpassData: - addCapability(CapabilityInputAttachment); - break; - default: - break; - } - - if (ms) { - if (sampled == 2) { - // Images used with subpass data are not storage - // images, so don't require the capability for them. - if (dim != Dim::DimSubpassData) - addCapability(CapabilityStorageImageMultisample); - if (arrayed) - addCapability(CapabilityImageMSArray); - } - } - - return type->getResultId(); -} - -Id Builder::makeSampledImageType(Id imageType) -{ - // try to find it - Instruction* type; - for (int t = 0; t < (int)groupedTypes[OpTypeSampledImage].size(); ++t) { - type = groupedTypes[OpTypeSampledImage][t]; - if (type->getIdOperand(0) == imageType) - return type->getResultId(); - } - - // not found, make it - type = new Instruction(getUniqueId(), NoType, OpTypeSampledImage); - type->addIdOperand(imageType); - - groupedTypes[OpTypeSampledImage].push_back(type); - constantsTypesGlobals.push_back(std::unique_ptr(type)); - module.mapInstruction(type); - - return type->getResultId(); -} - -Id Builder::getDerefTypeId(Id resultId) const -{ - Id typeId = getTypeId(resultId); - assert(isPointerType(typeId)); - - return module.getInstruction(typeId)->getImmediateOperand(1); -} - -Op Builder::getMostBasicTypeClass(Id typeId) const -{ - Instruction* instr = module.getInstruction(typeId); - - Op typeClass = instr->getOpCode(); - switch (typeClass) - { - case OpTypeVoid: - case OpTypeBool: - case OpTypeInt: - case OpTypeFloat: - case OpTypeStruct: - return typeClass; - case OpTypeVector: - case OpTypeMatrix: - case OpTypeArray: - case OpTypeRuntimeArray: - return getMostBasicTypeClass(instr->getIdOperand(0)); - case OpTypePointer: - return getMostBasicTypeClass(instr->getIdOperand(1)); - default: - assert(0); - return OpTypeFloat; - } -} - -int Builder::getNumTypeConstituents(Id typeId) const -{ - Instruction* instr = module.getInstruction(typeId); - - switch (instr->getOpCode()) - { - case OpTypeBool: - case OpTypeInt: - case OpTypeFloat: - return 1; - case OpTypeVector: - case OpTypeMatrix: - return instr->getImmediateOperand(1); - case OpTypeArray: - { - Id lengthId = instr->getImmediateOperand(1); - return module.getInstruction(lengthId)->getImmediateOperand(0); - } - case OpTypeStruct: - return instr->getNumOperands(); - default: - assert(0); - return 1; - } -} - -// Return the lowest-level type of scalar that an homogeneous composite is made out of. -// Typically, this is just to find out if something is made out of ints or floats. -// However, it includes returning a structure, if say, it is an array of structure. -Id Builder::getScalarTypeId(Id typeId) const -{ - Instruction* instr = module.getInstruction(typeId); - - Op typeClass = instr->getOpCode(); - switch (typeClass) - { - case OpTypeVoid: - case OpTypeBool: - case OpTypeInt: - case OpTypeFloat: - case OpTypeStruct: - return instr->getResultId(); - case OpTypeVector: - case OpTypeMatrix: - case OpTypeArray: - case OpTypeRuntimeArray: - case OpTypePointer: - return getScalarTypeId(getContainedTypeId(typeId)); - default: - assert(0); - return NoResult; - } -} - -// Return the type of 'member' of a composite. -Id Builder::getContainedTypeId(Id typeId, int member) const -{ - Instruction* instr = module.getInstruction(typeId); - - Op typeClass = instr->getOpCode(); - switch (typeClass) - { - case OpTypeVector: - case OpTypeMatrix: - case OpTypeArray: - case OpTypeRuntimeArray: - return instr->getIdOperand(0); - case OpTypePointer: - return instr->getIdOperand(1); - case OpTypeStruct: - return instr->getIdOperand(member); - default: - assert(0); - return NoResult; - } -} - -// Return the immediately contained type of a given composite type. -Id Builder::getContainedTypeId(Id typeId) const -{ - return getContainedTypeId(typeId, 0); -} - -// See if a scalar constant of this type has already been created, so it -// can be reused rather than duplicated. (Required by the specification). -Id Builder::findScalarConstant(Op typeClass, Op opcode, Id typeId, unsigned value) const -{ - Instruction* constant; - for (int i = 0; i < (int)groupedConstants[typeClass].size(); ++i) { - constant = groupedConstants[typeClass][i]; - if (constant->getOpCode() == opcode && - constant->getTypeId() == typeId && - constant->getImmediateOperand(0) == value) - return constant->getResultId(); - } - - return 0; -} - -// Version of findScalarConstant (see above) for scalars that take two operands (e.g. a 'double' or 'int64'). -Id Builder::findScalarConstant(Op typeClass, Op opcode, Id typeId, unsigned v1, unsigned v2) const -{ - Instruction* constant; - for (int i = 0; i < (int)groupedConstants[typeClass].size(); ++i) { - constant = groupedConstants[typeClass][i]; - if (constant->getOpCode() == opcode && - constant->getTypeId() == typeId && - constant->getImmediateOperand(0) == v1 && - constant->getImmediateOperand(1) == v2) - return constant->getResultId(); - } - - return 0; -} - -// Return true if consuming 'opcode' means consuming a constant. -// "constant" here means after final transform to executable code, -// the value consumed will be a constant, so includes specialization. -bool Builder::isConstantOpCode(Op opcode) const -{ - switch (opcode) { - case OpUndef: - case OpConstantTrue: - case OpConstantFalse: - case OpConstant: - case OpConstantComposite: - case OpConstantSampler: - case OpConstantNull: - case OpSpecConstantTrue: - case OpSpecConstantFalse: - case OpSpecConstant: - case OpSpecConstantComposite: - case OpSpecConstantOp: - return true; - default: - return false; - } -} - -// Return true if consuming 'opcode' means consuming a specialization constant. -bool Builder::isSpecConstantOpCode(Op opcode) const -{ - switch (opcode) { - case OpSpecConstantTrue: - case OpSpecConstantFalse: - case OpSpecConstant: - case OpSpecConstantComposite: - case OpSpecConstantOp: - return true; - default: - return false; - } -} - -Id Builder::makeBoolConstant(bool b, bool specConstant) -{ - Id typeId = makeBoolType(); - Instruction* constant; - Op opcode = specConstant ? (b ? OpSpecConstantTrue : OpSpecConstantFalse) : (b ? OpConstantTrue : OpConstantFalse); - - // See if we already made it. Applies only to regular constants, because specialization constants - // must remain distinct for the purpose of applying a SpecId decoration. - if (! specConstant) { - Id existing = 0; - for (int i = 0; i < (int)groupedConstants[OpTypeBool].size(); ++i) { - constant = groupedConstants[OpTypeBool][i]; - if (constant->getTypeId() == typeId && constant->getOpCode() == opcode) - existing = constant->getResultId(); - } - - if (existing) - return existing; - } - - // Make it - Instruction* c = new Instruction(getUniqueId(), typeId, opcode); - constantsTypesGlobals.push_back(std::unique_ptr(c)); - groupedConstants[OpTypeBool].push_back(c); - module.mapInstruction(c); - - return c->getResultId(); -} - -Id Builder::makeIntConstant(Id typeId, unsigned value, bool specConstant) -{ - Op opcode = specConstant ? OpSpecConstant : OpConstant; - - // See if we already made it. Applies only to regular constants, because specialization constants - // must remain distinct for the purpose of applying a SpecId decoration. - if (! specConstant) { - Id existing = findScalarConstant(OpTypeInt, opcode, typeId, value); - if (existing) - return existing; - } - - Instruction* c = new Instruction(getUniqueId(), typeId, opcode); - c->addImmediateOperand(value); - constantsTypesGlobals.push_back(std::unique_ptr(c)); - groupedConstants[OpTypeInt].push_back(c); - module.mapInstruction(c); - - return c->getResultId(); -} - -Id Builder::makeInt64Constant(Id typeId, unsigned long long value, bool specConstant) -{ - Op opcode = specConstant ? OpSpecConstant : OpConstant; - - unsigned op1 = value & 0xFFFFFFFF; - unsigned op2 = value >> 32; - - // See if we already made it. Applies only to regular constants, because specialization constants - // must remain distinct for the purpose of applying a SpecId decoration. - if (! specConstant) { - Id existing = findScalarConstant(OpTypeInt, opcode, typeId, op1, op2); - if (existing) - return existing; - } - - Instruction* c = new Instruction(getUniqueId(), typeId, opcode); - c->addImmediateOperand(op1); - c->addImmediateOperand(op2); - constantsTypesGlobals.push_back(std::unique_ptr(c)); - groupedConstants[OpTypeInt].push_back(c); - module.mapInstruction(c); - - return c->getResultId(); -} - -Id Builder::makeFloatConstant(float f, bool specConstant) -{ - Op opcode = specConstant ? OpSpecConstant : OpConstant; - Id typeId = makeFloatType(32); - union { float fl; unsigned int ui; } u; - u.fl = f; - unsigned value = u.ui; - - // See if we already made it. Applies only to regular constants, because specialization constants - // must remain distinct for the purpose of applying a SpecId decoration. - if (! specConstant) { - Id existing = findScalarConstant(OpTypeFloat, opcode, typeId, value); - if (existing) - return existing; - } - - Instruction* c = new Instruction(getUniqueId(), typeId, opcode); - c->addImmediateOperand(value); - constantsTypesGlobals.push_back(std::unique_ptr(c)); - groupedConstants[OpTypeFloat].push_back(c); - module.mapInstruction(c); - - return c->getResultId(); -} - -Id Builder::makeDoubleConstant(double d, bool specConstant) -{ - Op opcode = specConstant ? OpSpecConstant : OpConstant; - Id typeId = makeFloatType(64); - union { double db; unsigned long long ull; } u; - u.db = d; - unsigned long long value = u.ull; - unsigned op1 = value & 0xFFFFFFFF; - unsigned op2 = value >> 32; - - // See if we already made it. Applies only to regular constants, because specialization constants - // must remain distinct for the purpose of applying a SpecId decoration. - if (! specConstant) { - Id existing = findScalarConstant(OpTypeFloat, opcode, typeId, op1, op2); - if (existing) - return existing; - } - - Instruction* c = new Instruction(getUniqueId(), typeId, opcode); - c->addImmediateOperand(op1); - c->addImmediateOperand(op2); - constantsTypesGlobals.push_back(std::unique_ptr(c)); - groupedConstants[OpTypeFloat].push_back(c); - module.mapInstruction(c); - - return c->getResultId(); -} - -#ifdef AMD_EXTENSIONS -Id Builder::makeFloat16Constant(float f16, bool specConstant) -{ - Op opcode = specConstant ? OpSpecConstant : OpConstant; - Id typeId = makeFloatType(16); - - spvutils::HexFloat> fVal(f16); - spvutils::HexFloat> f16Val(0); - fVal.castTo(f16Val, spvutils::kRoundToZero); - - unsigned value = f16Val.value().getAsFloat().get_value(); - - // See if we already made it. Applies only to regular constants, because specialization constants - // must remain distinct for the purpose of applying a SpecId decoration. - if (!specConstant) { - Id existing = findScalarConstant(OpTypeFloat, opcode, typeId, value); - if (existing) - return existing; - } - - Instruction* c = new Instruction(getUniqueId(), typeId, opcode); - c->addImmediateOperand(value); - constantsTypesGlobals.push_back(std::unique_ptr(c)); - groupedConstants[OpTypeFloat].push_back(c); - module.mapInstruction(c); - - return c->getResultId(); -} -#endif - -Id Builder::findCompositeConstant(Op typeClass, const std::vector& comps) const -{ - Instruction* constant = 0; - bool found = false; - for (int i = 0; i < (int)groupedConstants[typeClass].size(); ++i) { - constant = groupedConstants[typeClass][i]; - - // same shape? - if (constant->getNumOperands() != (int)comps.size()) - continue; - - // same contents? - bool mismatch = false; - for (int op = 0; op < constant->getNumOperands(); ++op) { - if (constant->getIdOperand(op) != comps[op]) { - mismatch = true; - break; - } - } - if (! mismatch) { - found = true; - break; - } - } - - return found ? constant->getResultId() : NoResult; -} - -// Comments in header -Id Builder::makeCompositeConstant(Id typeId, const std::vector& members, bool specConstant) -{ - Op opcode = specConstant ? OpSpecConstantComposite : OpConstantComposite; - assert(typeId); - Op typeClass = getTypeClass(typeId); - - switch (typeClass) { - case OpTypeVector: - case OpTypeArray: - case OpTypeStruct: - case OpTypeMatrix: - break; - default: - assert(0); - return makeFloatConstant(0.0); - } - - if (! specConstant) { - Id existing = findCompositeConstant(typeClass, members); - if (existing) - return existing; - } - - Instruction* c = new Instruction(getUniqueId(), typeId, opcode); - for (int op = 0; op < (int)members.size(); ++op) - c->addIdOperand(members[op]); - constantsTypesGlobals.push_back(std::unique_ptr(c)); - groupedConstants[typeClass].push_back(c); - module.mapInstruction(c); - - return c->getResultId(); -} - -Instruction* Builder::addEntryPoint(ExecutionModel model, Function* function, const char* name) -{ - Instruction* entryPoint = new Instruction(OpEntryPoint); - entryPoint->addImmediateOperand(model); - entryPoint->addIdOperand(function->getId()); - entryPoint->addStringOperand(name); - - entryPoints.push_back(std::unique_ptr(entryPoint)); - - return entryPoint; -} - -// Currently relying on the fact that all 'value' of interest are small non-negative values. -void Builder::addExecutionMode(Function* entryPoint, ExecutionMode mode, int value1, int value2, int value3) -{ - Instruction* instr = new Instruction(OpExecutionMode); - instr->addIdOperand(entryPoint->getId()); - instr->addImmediateOperand(mode); - if (value1 >= 0) - instr->addImmediateOperand(value1); - if (value2 >= 0) - instr->addImmediateOperand(value2); - if (value3 >= 0) - instr->addImmediateOperand(value3); - - executionModes.push_back(std::unique_ptr(instr)); -} - -void Builder::addName(Id id, const char* string) -{ - Instruction* name = new Instruction(OpName); - name->addIdOperand(id); - name->addStringOperand(string); - - names.push_back(std::unique_ptr(name)); -} - -void Builder::addMemberName(Id id, int memberNumber, const char* string) -{ - Instruction* name = new Instruction(OpMemberName); - name->addIdOperand(id); - name->addImmediateOperand(memberNumber); - name->addStringOperand(string); - - names.push_back(std::unique_ptr(name)); -} - -void Builder::addDecoration(Id id, Decoration decoration, int num) -{ - if (decoration == spv::DecorationMax) - return; - Instruction* dec = new Instruction(OpDecorate); - dec->addIdOperand(id); - dec->addImmediateOperand(decoration); - if (num >= 0) - dec->addImmediateOperand(num); - - decorations.push_back(std::unique_ptr(dec)); -} - -void Builder::addMemberDecoration(Id id, unsigned int member, Decoration decoration, int num) -{ - Instruction* dec = new Instruction(OpMemberDecorate); - dec->addIdOperand(id); - dec->addImmediateOperand(member); - dec->addImmediateOperand(decoration); - if (num >= 0) - dec->addImmediateOperand(num); - - decorations.push_back(std::unique_ptr(dec)); -} - -// Comments in header -Function* Builder::makeEntryPoint(const char* entryPoint) -{ - assert(! entryPointFunction); - - Block* entry; - std::vector params; - std::vector> decorations; - - entryPointFunction = makeFunctionEntry(NoPrecision, makeVoidType(), entryPoint, params, decorations, &entry); - - return entryPointFunction; -} - -// Comments in header -Function* Builder::makeFunctionEntry(Decoration precision, Id returnType, const char* name, - const std::vector& paramTypes, const std::vector>& decorations, Block **entry) -{ - // Make the function and initial instructions in it - Id typeId = makeFunctionType(returnType, paramTypes); - Id firstParamId = paramTypes.size() == 0 ? 0 : getUniqueIds((int)paramTypes.size()); - Function* function = new Function(getUniqueId(), returnType, typeId, firstParamId, module); - - // Set up the precisions - setPrecision(function->getId(), precision); - for (unsigned p = 0; p < (unsigned)decorations.size(); ++p) { - for (int d = 0; d < (int)decorations[p].size(); ++d) - addDecoration(firstParamId + p, decorations[p][d]); - } - - // CFG - if (entry) { - *entry = new Block(getUniqueId(), *function); - function->addBlock(*entry); - setBuildPoint(*entry); - } - - if (name) - addName(function->getId(), name); - - functions.push_back(std::unique_ptr(function)); - - return function; -} - -// Comments in header -void Builder::makeReturn(bool implicit, Id retVal) -{ - if (retVal) { - Instruction* inst = new Instruction(NoResult, NoType, OpReturnValue); - inst->addIdOperand(retVal); - buildPoint->addInstruction(std::unique_ptr(inst)); - } else - buildPoint->addInstruction(std::unique_ptr(new Instruction(NoResult, NoType, OpReturn))); - - if (! implicit) - createAndSetNoPredecessorBlock("post-return"); -} - -// Comments in header -void Builder::leaveFunction() -{ - Block* block = buildPoint; - Function& function = buildPoint->getParent(); - assert(block); - - // If our function did not contain a return, add a return void now. - if (! block->isTerminated()) { - if (function.getReturnType() == makeVoidType()) - makeReturn(true); - else { - makeReturn(true, createUndefined(function.getReturnType())); - } - } -} - -// Comments in header -void Builder::makeDiscard() -{ - buildPoint->addInstruction(std::unique_ptr(new Instruction(OpKill))); - createAndSetNoPredecessorBlock("post-discard"); -} - -// Comments in header -Id Builder::createVariable(StorageClass storageClass, Id type, const char* name) -{ - Id pointerType = makePointer(storageClass, type); - Instruction* inst = new Instruction(getUniqueId(), pointerType, OpVariable); - inst->addImmediateOperand(storageClass); - - switch (storageClass) { - case StorageClassFunction: - // Validation rules require the declaration in the entry block - buildPoint->getParent().addLocalVariable(std::unique_ptr(inst)); - break; - - default: - constantsTypesGlobals.push_back(std::unique_ptr(inst)); - module.mapInstruction(inst); - break; - } - - if (name) - addName(inst->getResultId(), name); - - return inst->getResultId(); -} - -// Comments in header -Id Builder::createUndefined(Id type) -{ - Instruction* inst = new Instruction(getUniqueId(), type, OpUndef); - buildPoint->addInstruction(std::unique_ptr(inst)); - return inst->getResultId(); -} - -// Comments in header -void Builder::createStore(Id rValue, Id lValue) -{ - Instruction* store = new Instruction(OpStore); - store->addIdOperand(lValue); - store->addIdOperand(rValue); - buildPoint->addInstruction(std::unique_ptr(store)); -} - -// Comments in header -Id Builder::createLoad(Id lValue) -{ - Instruction* load = new Instruction(getUniqueId(), getDerefTypeId(lValue), OpLoad); - load->addIdOperand(lValue); - buildPoint->addInstruction(std::unique_ptr(load)); - - return load->getResultId(); -} - -// Comments in header -Id Builder::createAccessChain(StorageClass storageClass, Id base, const std::vector& offsets) -{ - // Figure out the final resulting type. - spv::Id typeId = getTypeId(base); - assert(isPointerType(typeId) && offsets.size() > 0); - typeId = getContainedTypeId(typeId); - for (int i = 0; i < (int)offsets.size(); ++i) { - if (isStructType(typeId)) { - assert(isConstantScalar(offsets[i])); - typeId = getContainedTypeId(typeId, getConstantScalar(offsets[i])); - } else - typeId = getContainedTypeId(typeId, offsets[i]); - } - typeId = makePointer(storageClass, typeId); - - // Make the instruction - Instruction* chain = new Instruction(getUniqueId(), typeId, OpAccessChain); - chain->addIdOperand(base); - for (int i = 0; i < (int)offsets.size(); ++i) - chain->addIdOperand(offsets[i]); - buildPoint->addInstruction(std::unique_ptr(chain)); - - return chain->getResultId(); -} - -Id Builder::createArrayLength(Id base, unsigned int member) -{ - spv::Id intType = makeIntType(32); - Instruction* length = new Instruction(getUniqueId(), intType, OpArrayLength); - length->addIdOperand(base); - length->addImmediateOperand(member); - buildPoint->addInstruction(std::unique_ptr(length)); - - return length->getResultId(); -} - -Id Builder::createCompositeExtract(Id composite, Id typeId, unsigned index) -{ - // Generate code for spec constants if in spec constant operation - // generation mode. - if (generatingOpCodeForSpecConst) { - return createSpecConstantOp(OpCompositeExtract, typeId, std::vector(1, composite), std::vector(1, index)); - } - Instruction* extract = new Instruction(getUniqueId(), typeId, OpCompositeExtract); - extract->addIdOperand(composite); - extract->addImmediateOperand(index); - buildPoint->addInstruction(std::unique_ptr(extract)); - - return extract->getResultId(); -} - -Id Builder::createCompositeExtract(Id composite, Id typeId, const std::vector& indexes) -{ - // Generate code for spec constants if in spec constant operation - // generation mode. - if (generatingOpCodeForSpecConst) { - return createSpecConstantOp(OpCompositeExtract, typeId, std::vector(1, composite), indexes); - } - Instruction* extract = new Instruction(getUniqueId(), typeId, OpCompositeExtract); - extract->addIdOperand(composite); - for (int i = 0; i < (int)indexes.size(); ++i) - extract->addImmediateOperand(indexes[i]); - buildPoint->addInstruction(std::unique_ptr(extract)); - - return extract->getResultId(); -} - -Id Builder::createCompositeInsert(Id object, Id composite, Id typeId, unsigned index) -{ - Instruction* insert = new Instruction(getUniqueId(), typeId, OpCompositeInsert); - insert->addIdOperand(object); - insert->addIdOperand(composite); - insert->addImmediateOperand(index); - buildPoint->addInstruction(std::unique_ptr(insert)); - - return insert->getResultId(); -} - -Id Builder::createCompositeInsert(Id object, Id composite, Id typeId, const std::vector& indexes) -{ - Instruction* insert = new Instruction(getUniqueId(), typeId, OpCompositeInsert); - insert->addIdOperand(object); - insert->addIdOperand(composite); - for (int i = 0; i < (int)indexes.size(); ++i) - insert->addImmediateOperand(indexes[i]); - buildPoint->addInstruction(std::unique_ptr(insert)); - - return insert->getResultId(); -} - -Id Builder::createVectorExtractDynamic(Id vector, Id typeId, Id componentIndex) -{ - Instruction* extract = new Instruction(getUniqueId(), typeId, OpVectorExtractDynamic); - extract->addIdOperand(vector); - extract->addIdOperand(componentIndex); - buildPoint->addInstruction(std::unique_ptr(extract)); - - return extract->getResultId(); -} - -Id Builder::createVectorInsertDynamic(Id vector, Id typeId, Id component, Id componentIndex) -{ - Instruction* insert = new Instruction(getUniqueId(), typeId, OpVectorInsertDynamic); - insert->addIdOperand(vector); - insert->addIdOperand(component); - insert->addIdOperand(componentIndex); - buildPoint->addInstruction(std::unique_ptr(insert)); - - return insert->getResultId(); -} - -// An opcode that has no operands, no result id, and no type -void Builder::createNoResultOp(Op opCode) -{ - Instruction* op = new Instruction(opCode); - buildPoint->addInstruction(std::unique_ptr(op)); -} - -// An opcode that has one operand, no result id, and no type -void Builder::createNoResultOp(Op opCode, Id operand) -{ - Instruction* op = new Instruction(opCode); - op->addIdOperand(operand); - buildPoint->addInstruction(std::unique_ptr(op)); -} - -// An opcode that has one operand, no result id, and no type -void Builder::createNoResultOp(Op opCode, const std::vector& operands) -{ - Instruction* op = new Instruction(opCode); - for (auto it = operands.cbegin(); it != operands.cend(); ++it) - op->addIdOperand(*it); - buildPoint->addInstruction(std::unique_ptr(op)); -} - -void Builder::createControlBarrier(Scope execution, Scope memory, MemorySemanticsMask semantics) -{ - Instruction* op = new Instruction(OpControlBarrier); - op->addImmediateOperand(makeUintConstant(execution)); - op->addImmediateOperand(makeUintConstant(memory)); - op->addImmediateOperand(makeUintConstant(semantics)); - buildPoint->addInstruction(std::unique_ptr(op)); -} - -void Builder::createMemoryBarrier(unsigned executionScope, unsigned memorySemantics) -{ - Instruction* op = new Instruction(OpMemoryBarrier); - op->addImmediateOperand(makeUintConstant(executionScope)); - op->addImmediateOperand(makeUintConstant(memorySemantics)); - buildPoint->addInstruction(std::unique_ptr(op)); -} - -// An opcode that has one operands, a result id, and a type -Id Builder::createUnaryOp(Op opCode, Id typeId, Id operand) -{ - // Generate code for spec constants if in spec constant operation - // generation mode. - if (generatingOpCodeForSpecConst) { - return createSpecConstantOp(opCode, typeId, std::vector(1, operand), std::vector()); - } - Instruction* op = new Instruction(getUniqueId(), typeId, opCode); - op->addIdOperand(operand); - buildPoint->addInstruction(std::unique_ptr(op)); - - return op->getResultId(); -} - -Id Builder::createBinOp(Op opCode, Id typeId, Id left, Id right) -{ - // Generate code for spec constants if in spec constant operation - // generation mode. - if (generatingOpCodeForSpecConst) { - std::vector operands(2); - operands[0] = left; operands[1] = right; - return createSpecConstantOp(opCode, typeId, operands, std::vector()); - } - Instruction* op = new Instruction(getUniqueId(), typeId, opCode); - op->addIdOperand(left); - op->addIdOperand(right); - buildPoint->addInstruction(std::unique_ptr(op)); - - return op->getResultId(); -} - -Id Builder::createTriOp(Op opCode, Id typeId, Id op1, Id op2, Id op3) -{ - // Generate code for spec constants if in spec constant operation - // generation mode. - if (generatingOpCodeForSpecConst) { - std::vector operands(3); - operands[0] = op1; - operands[1] = op2; - operands[2] = op3; - return createSpecConstantOp( - opCode, typeId, operands, std::vector()); - } - Instruction* op = new Instruction(getUniqueId(), typeId, opCode); - op->addIdOperand(op1); - op->addIdOperand(op2); - op->addIdOperand(op3); - buildPoint->addInstruction(std::unique_ptr(op)); - - return op->getResultId(); -} - -Id Builder::createOp(Op opCode, Id typeId, const std::vector& operands) -{ - Instruction* op = new Instruction(getUniqueId(), typeId, opCode); - for (auto it = operands.cbegin(); it != operands.cend(); ++it) - op->addIdOperand(*it); - buildPoint->addInstruction(std::unique_ptr(op)); - - return op->getResultId(); -} - -Id Builder::createSpecConstantOp(Op opCode, Id typeId, const std::vector& operands, const std::vector& literals) -{ - Instruction* op = new Instruction(getUniqueId(), typeId, OpSpecConstantOp); - op->addImmediateOperand((unsigned) opCode); - for (auto it = operands.cbegin(); it != operands.cend(); ++it) - op->addIdOperand(*it); - for (auto it = literals.cbegin(); it != literals.cend(); ++it) - op->addImmediateOperand(*it); - module.mapInstruction(op); - constantsTypesGlobals.push_back(std::unique_ptr(op)); - - return op->getResultId(); -} - -Id Builder::createFunctionCall(spv::Function* function, const std::vector& args) -{ - Instruction* op = new Instruction(getUniqueId(), function->getReturnType(), OpFunctionCall); - op->addIdOperand(function->getId()); - for (int a = 0; a < (int)args.size(); ++a) - op->addIdOperand(args[a]); - buildPoint->addInstruction(std::unique_ptr(op)); - - return op->getResultId(); -} - -// Comments in header -Id Builder::createRvalueSwizzle(Decoration precision, Id typeId, Id source, const std::vector& channels) -{ - if (channels.size() == 1) - return setPrecision(createCompositeExtract(source, typeId, channels.front()), precision); - - if (generatingOpCodeForSpecConst) { - std::vector operands(2); - operands[0] = operands[1] = source; - return setPrecision(createSpecConstantOp(OpVectorShuffle, typeId, operands, channels), precision); - } - Instruction* swizzle = new Instruction(getUniqueId(), typeId, OpVectorShuffle); - assert(isVector(source)); - swizzle->addIdOperand(source); - swizzle->addIdOperand(source); - for (int i = 0; i < (int)channels.size(); ++i) - swizzle->addImmediateOperand(channels[i]); - buildPoint->addInstruction(std::unique_ptr(swizzle)); - - return setPrecision(swizzle->getResultId(), precision); -} - -// Comments in header -Id Builder::createLvalueSwizzle(Id typeId, Id target, Id source, const std::vector& channels) -{ - if (channels.size() == 1 && getNumComponents(source) == 1) - return createCompositeInsert(source, target, typeId, channels.front()); - - Instruction* swizzle = new Instruction(getUniqueId(), typeId, OpVectorShuffle); - - assert(isVector(target)); - swizzle->addIdOperand(target); - - assert(getNumComponents(source) == (int)channels.size()); - assert(isVector(source)); - swizzle->addIdOperand(source); - - // Set up an identity shuffle from the base value to the result value - unsigned int components[4]; - int numTargetComponents = getNumComponents(target); - for (int i = 0; i < numTargetComponents; ++i) - components[i] = i; - - // Punch in the l-value swizzle - for (int i = 0; i < (int)channels.size(); ++i) - components[channels[i]] = numTargetComponents + i; - - // finish the instruction with these components selectors - for (int i = 0; i < numTargetComponents; ++i) - swizzle->addImmediateOperand(components[i]); - buildPoint->addInstruction(std::unique_ptr(swizzle)); - - return swizzle->getResultId(); -} - -// Comments in header -void Builder::promoteScalar(Decoration precision, Id& left, Id& right) -{ - int direction = getNumComponents(right) - getNumComponents(left); - - if (direction > 0) - left = smearScalar(precision, left, makeVectorType(getTypeId(left), getNumComponents(right))); - else if (direction < 0) - right = smearScalar(precision, right, makeVectorType(getTypeId(right), getNumComponents(left))); - - return; -} - -// Comments in header -Id Builder::smearScalar(Decoration precision, Id scalar, Id vectorType) -{ - assert(getNumComponents(scalar) == 1); - assert(getTypeId(scalar) == getScalarTypeId(vectorType)); - - int numComponents = getNumTypeComponents(vectorType); - if (numComponents == 1) - return scalar; - - Instruction* smear = nullptr; - if (generatingOpCodeForSpecConst) { - auto members = std::vector(numComponents, scalar); - // Sometime even in spec-constant-op mode, the temporary vector created by - // promoting a scalar might not be a spec constant. This should depend on - // the scalar. - // e.g.: - // const vec2 spec_const_result = a_spec_const_vec2 + a_front_end_const_scalar; - // In such cases, the temporary vector created from a_front_end_const_scalar - // is not a spec constant vector, even though the binary operation node is marked - // as 'specConstant' and we are in spec-constant-op mode. - auto result_id = makeCompositeConstant(vectorType, members, isSpecConstant(scalar)); - smear = module.getInstruction(result_id); - } else { - smear = new Instruction(getUniqueId(), vectorType, OpCompositeConstruct); - for (int c = 0; c < numComponents; ++c) - smear->addIdOperand(scalar); - buildPoint->addInstruction(std::unique_ptr(smear)); - } - - return setPrecision(smear->getResultId(), precision); -} - -// Comments in header -Id Builder::createBuiltinCall(Id resultType, Id builtins, int entryPoint, const std::vector& args) -{ - Instruction* inst = new Instruction(getUniqueId(), resultType, OpExtInst); - inst->addIdOperand(builtins); - inst->addImmediateOperand(entryPoint); - for (int arg = 0; arg < (int)args.size(); ++arg) - inst->addIdOperand(args[arg]); - - buildPoint->addInstruction(std::unique_ptr(inst)); - - return inst->getResultId(); -} - -// Accept all parameters needed to create a texture instruction. -// Create the correct instruction based on the inputs, and make the call. -Id Builder::createTextureCall(Decoration precision, Id resultType, bool sparse, bool fetch, bool proj, bool gather, bool noImplicitLod, const TextureParameters& parameters) -{ - static const int maxTextureArgs = 10; - Id texArgs[maxTextureArgs] = {}; - - // - // Set up the fixed arguments - // - int numArgs = 0; - bool explicitLod = false; - texArgs[numArgs++] = parameters.sampler; - texArgs[numArgs++] = parameters.coords; - if (parameters.Dref != NoResult) - texArgs[numArgs++] = parameters.Dref; - if (parameters.component != NoResult) - texArgs[numArgs++] = parameters.component; - - // - // Set up the optional arguments - // - int optArgNum = numArgs; // track which operand, if it exists, is the mask of optional arguments - ++numArgs; // speculatively make room for the mask operand - ImageOperandsMask mask = ImageOperandsMaskNone; // the mask operand - if (parameters.bias) { - mask = (ImageOperandsMask)(mask | ImageOperandsBiasMask); - texArgs[numArgs++] = parameters.bias; - } - if (parameters.lod) { - mask = (ImageOperandsMask)(mask | ImageOperandsLodMask); - texArgs[numArgs++] = parameters.lod; - explicitLod = true; - } else if (parameters.gradX) { - mask = (ImageOperandsMask)(mask | ImageOperandsGradMask); - texArgs[numArgs++] = parameters.gradX; - texArgs[numArgs++] = parameters.gradY; - explicitLod = true; - } else if (noImplicitLod && ! fetch && ! gather) { - // have to explicitly use lod of 0 if not allowed to have them be implicit, and - // we would otherwise be about to issue an implicit instruction - mask = (ImageOperandsMask)(mask | ImageOperandsLodMask); - texArgs[numArgs++] = makeFloatConstant(0.0); - explicitLod = true; - } - if (parameters.offset) { - if (isConstant(parameters.offset)) - mask = (ImageOperandsMask)(mask | ImageOperandsConstOffsetMask); - else { - addCapability(CapabilityImageGatherExtended); - mask = (ImageOperandsMask)(mask | ImageOperandsOffsetMask); - } - texArgs[numArgs++] = parameters.offset; - } - if (parameters.offsets) { - mask = (ImageOperandsMask)(mask | ImageOperandsConstOffsetsMask); - texArgs[numArgs++] = parameters.offsets; - } - if (parameters.sample) { - mask = (ImageOperandsMask)(mask | ImageOperandsSampleMask); - texArgs[numArgs++] = parameters.sample; - } - if (parameters.lodClamp) { - // capability if this bit is used - addCapability(CapabilityMinLod); - - mask = (ImageOperandsMask)(mask | ImageOperandsMinLodMask); - texArgs[numArgs++] = parameters.lodClamp; - } - if (mask == ImageOperandsMaskNone) - --numArgs; // undo speculative reservation for the mask argument - else - texArgs[optArgNum] = mask; - - // - // Set up the instruction - // - Op opCode = OpNop; // All paths below need to set this - if (fetch) { - if (sparse) - opCode = OpImageSparseFetch; - else - opCode = OpImageFetch; - } else if (gather) { - if (parameters.Dref) - if (sparse) - opCode = OpImageSparseDrefGather; - else - opCode = OpImageDrefGather; - else - if (sparse) - opCode = OpImageSparseGather; - else - opCode = OpImageGather; - } else if (explicitLod) { - if (parameters.Dref) { - if (proj) - if (sparse) - opCode = OpImageSparseSampleProjDrefExplicitLod; - else - opCode = OpImageSampleProjDrefExplicitLod; - else - if (sparse) - opCode = OpImageSparseSampleDrefExplicitLod; - else - opCode = OpImageSampleDrefExplicitLod; - } else { - if (proj) - if (sparse) - opCode = OpImageSparseSampleProjExplicitLod; - else - opCode = OpImageSampleProjExplicitLod; - else - if (sparse) - opCode = OpImageSparseSampleExplicitLod; - else - opCode = OpImageSampleExplicitLod; - } - } else { - if (parameters.Dref) { - if (proj) - if (sparse) - opCode = OpImageSparseSampleProjDrefImplicitLod; - else - opCode = OpImageSampleProjDrefImplicitLod; - else - if (sparse) - opCode = OpImageSparseSampleDrefImplicitLod; - else - opCode = OpImageSampleDrefImplicitLod; - } else { - if (proj) - if (sparse) - opCode = OpImageSparseSampleProjImplicitLod; - else - opCode = OpImageSampleProjImplicitLod; - else - if (sparse) - opCode = OpImageSparseSampleImplicitLod; - else - opCode = OpImageSampleImplicitLod; - } - } - - // See if the result type is expecting a smeared result. - // This happens when a legacy shadow*() call is made, which - // gets a vec4 back instead of a float. - Id smearedType = resultType; - if (! isScalarType(resultType)) { - switch (opCode) { - case OpImageSampleDrefImplicitLod: - case OpImageSampleDrefExplicitLod: - case OpImageSampleProjDrefImplicitLod: - case OpImageSampleProjDrefExplicitLod: - resultType = getScalarTypeId(resultType); - break; - default: - break; - } - } - - Id typeId0 = 0; - Id typeId1 = 0; - - if (sparse) { - typeId0 = resultType; - typeId1 = getDerefTypeId(parameters.texelOut); - resultType = makeStructResultType(typeId0, typeId1); - } - - // Build the SPIR-V instruction - Instruction* textureInst = new Instruction(getUniqueId(), resultType, opCode); - for (int op = 0; op < optArgNum; ++op) - textureInst->addIdOperand(texArgs[op]); - if (optArgNum < numArgs) - textureInst->addImmediateOperand(texArgs[optArgNum]); - for (int op = optArgNum + 1; op < numArgs; ++op) - textureInst->addIdOperand(texArgs[op]); - setPrecision(textureInst->getResultId(), precision); - buildPoint->addInstruction(std::unique_ptr(textureInst)); - - Id resultId = textureInst->getResultId(); - - if (sparse) { - // set capability - addCapability(CapabilitySparseResidency); - - // Decode the return type that was a special structure - createStore(createCompositeExtract(resultId, typeId1, 1), parameters.texelOut); - resultId = createCompositeExtract(resultId, typeId0, 0); - setPrecision(resultId, precision); - } else { - // When a smear is needed, do it, as per what was computed - // above when resultType was changed to a scalar type. - if (resultType != smearedType) - resultId = smearScalar(precision, resultId, smearedType); - } - - return resultId; -} - -// Comments in header -Id Builder::createTextureQueryCall(Op opCode, const TextureParameters& parameters, bool isUnsignedResult) -{ - // All these need a capability - addCapability(CapabilityImageQuery); - - // Figure out the result type - Id resultType = 0; - switch (opCode) { - case OpImageQuerySize: - case OpImageQuerySizeLod: - { - int numComponents = 0; - switch (getTypeDimensionality(getImageType(parameters.sampler))) { - case Dim1D: - case DimBuffer: - numComponents = 1; - break; - case Dim2D: - case DimCube: - case DimRect: - case DimSubpassData: - numComponents = 2; - break; - case Dim3D: - numComponents = 3; - break; - - default: - assert(0); - break; - } - if (isArrayedImageType(getImageType(parameters.sampler))) - ++numComponents; - - Id intType = isUnsignedResult ? makeUintType(32) : makeIntType(32); - if (numComponents == 1) - resultType = intType; - else - resultType = makeVectorType(intType, numComponents); - - break; - } - case OpImageQueryLod: - resultType = makeVectorType(makeFloatType(32), 2); - break; - case OpImageQueryLevels: - case OpImageQuerySamples: - resultType = isUnsignedResult ? makeUintType(32) : makeIntType(32); - break; - default: - assert(0); - break; - } - - Instruction* query = new Instruction(getUniqueId(), resultType, opCode); - query->addIdOperand(parameters.sampler); - if (parameters.coords) - query->addIdOperand(parameters.coords); - if (parameters.lod) - query->addIdOperand(parameters.lod); - buildPoint->addInstruction(std::unique_ptr(query)); - - return query->getResultId(); -} - -// External comments in header. -// Operates recursively to visit the composite's hierarchy. -Id Builder::createCompositeCompare(Decoration precision, Id value1, Id value2, bool equal) -{ - Id boolType = makeBoolType(); - Id valueType = getTypeId(value1); - - Id resultId = NoResult; - - int numConstituents = getNumTypeConstituents(valueType); - - // Scalars and Vectors - - if (isScalarType(valueType) || isVectorType(valueType)) { - assert(valueType == getTypeId(value2)); - // These just need a single comparison, just have - // to figure out what it is. - Op op; - switch (getMostBasicTypeClass(valueType)) { - case OpTypeFloat: - op = equal ? OpFOrdEqual : OpFOrdNotEqual; - break; - case OpTypeInt: - default: - op = equal ? OpIEqual : OpINotEqual; - break; - case OpTypeBool: - op = equal ? OpLogicalEqual : OpLogicalNotEqual; - precision = NoPrecision; - break; - } - - if (isScalarType(valueType)) { - // scalar - resultId = createBinOp(op, boolType, value1, value2); - } else { - // vector - resultId = createBinOp(op, makeVectorType(boolType, numConstituents), value1, value2); - setPrecision(resultId, precision); - // reduce vector compares... - resultId = createUnaryOp(equal ? OpAll : OpAny, boolType, resultId); - } - - return setPrecision(resultId, precision); - } - - // Only structs, arrays, and matrices should be left. - // They share in common the reduction operation across their constituents. - assert(isAggregateType(valueType) || isMatrixType(valueType)); - - // Compare each pair of constituents - for (int constituent = 0; constituent < numConstituents; ++constituent) { - std::vector indexes(1, constituent); - Id constituentType1 = getContainedTypeId(getTypeId(value1), constituent); - Id constituentType2 = getContainedTypeId(getTypeId(value2), constituent); - Id constituent1 = createCompositeExtract(value1, constituentType1, indexes); - Id constituent2 = createCompositeExtract(value2, constituentType2, indexes); - - Id subResultId = createCompositeCompare(precision, constituent1, constituent2, equal); - - if (constituent == 0) - resultId = subResultId; - else - resultId = setPrecision(createBinOp(equal ? OpLogicalAnd : OpLogicalOr, boolType, resultId, subResultId), precision); - } - - return resultId; -} - -// OpCompositeConstruct -Id Builder::createCompositeConstruct(Id typeId, const std::vector& constituents) -{ - assert(isAggregateType(typeId) || (getNumTypeConstituents(typeId) > 1 && getNumTypeConstituents(typeId) == (int)constituents.size())); - - if (generatingOpCodeForSpecConst) { - // Sometime, even in spec-constant-op mode, the constant composite to be - // constructed may not be a specialization constant. - // e.g.: - // const mat2 m2 = mat2(a_spec_const, a_front_end_const, another_front_end_const, third_front_end_const); - // The first column vector should be a spec constant one, as a_spec_const is a spec constant. - // The second column vector should NOT be spec constant, as it does not contain any spec constants. - // To handle such cases, we check the constituents of the constant vector to determine whether this - // vector should be created as a spec constant. - return makeCompositeConstant(typeId, constituents, - std::any_of(constituents.begin(), constituents.end(), - [&](spv::Id id) { return isSpecConstant(id); })); - } - - Instruction* op = new Instruction(getUniqueId(), typeId, OpCompositeConstruct); - for (int c = 0; c < (int)constituents.size(); ++c) - op->addIdOperand(constituents[c]); - buildPoint->addInstruction(std::unique_ptr(op)); - - return op->getResultId(); -} - -// Vector or scalar constructor -Id Builder::createConstructor(Decoration precision, const std::vector& sources, Id resultTypeId) -{ - Id result = NoResult; - unsigned int numTargetComponents = getNumTypeComponents(resultTypeId); - unsigned int targetComponent = 0; - - // Special case: when calling a vector constructor with a single scalar - // argument, smear the scalar - if (sources.size() == 1 && isScalar(sources[0]) && numTargetComponents > 1) - return smearScalar(precision, sources[0], resultTypeId); - - // accumulate the arguments for OpCompositeConstruct - std::vector constituents; - Id scalarTypeId = getScalarTypeId(resultTypeId); - - // lambda to store the result of visiting an argument component - const auto latchResult = [&](Id comp) { - if (numTargetComponents > 1) - constituents.push_back(comp); - else - result = comp; - ++targetComponent; - }; - - // lambda to visit a vector argument's components - const auto accumulateVectorConstituents = [&](Id sourceArg) { - unsigned int sourceSize = getNumComponents(sourceArg); - unsigned int sourcesToUse = sourceSize; - if (sourcesToUse + targetComponent > numTargetComponents) - sourcesToUse = numTargetComponents - targetComponent; - - for (unsigned int s = 0; s < sourcesToUse; ++s) { - std::vector swiz; - swiz.push_back(s); - latchResult(createRvalueSwizzle(precision, scalarTypeId, sourceArg, swiz)); - } - }; - - // lambda to visit a matrix argument's components - const auto accumulateMatrixConstituents = [&](Id sourceArg) { - unsigned int sourceSize = getNumColumns(sourceArg) * getNumRows(sourceArg); - unsigned int sourcesToUse = sourceSize; - if (sourcesToUse + targetComponent > numTargetComponents) - sourcesToUse = numTargetComponents - targetComponent; - - int col = 0; - int row = 0; - for (unsigned int s = 0; s < sourcesToUse; ++s) { - if (row >= getNumRows(sourceArg)) { - row = 0; - col++; - } - std::vector indexes; - indexes.push_back(col); - indexes.push_back(row); - latchResult(createCompositeExtract(sourceArg, scalarTypeId, indexes)); - row++; - } - }; - - // Go through the source arguments, each one could have either - // a single or multiple components to contribute. - for (unsigned int i = 0; i < sources.size(); ++i) { - if (isScalar(sources[i])) - latchResult(sources[i]); - else if (isVector(sources[i])) - accumulateVectorConstituents(sources[i]); - else if (isMatrix(sources[i])) - accumulateMatrixConstituents(sources[i]); - else - assert(0); - - if (targetComponent >= numTargetComponents) - break; - } - - // If the result is a vector, make it from the gathered constituents. - if (constituents.size() > 0) - result = createCompositeConstruct(resultTypeId, constituents); - - return setPrecision(result, precision); -} - -// Comments in header -Id Builder::createMatrixConstructor(Decoration precision, const std::vector& sources, Id resultTypeId) -{ - Id componentTypeId = getScalarTypeId(resultTypeId); - int numCols = getTypeNumColumns(resultTypeId); - int numRows = getTypeNumRows(resultTypeId); - - Instruction* instr = module.getInstruction(componentTypeId); - Id bitCount = instr->getIdOperand(0); - - // Will use a two step process - // 1. make a compile-time 2D array of values - // 2. construct a matrix from that array - - // Step 1. - - // initialize the array to the identity matrix - Id ids[maxMatrixSize][maxMatrixSize]; - Id one = (bitCount == 64 ? makeDoubleConstant(1.0) : makeFloatConstant(1.0)); - Id zero = (bitCount == 64 ? makeDoubleConstant(0.0) : makeFloatConstant(0.0)); - for (int col = 0; col < 4; ++col) { - for (int row = 0; row < 4; ++row) { - if (col == row) - ids[col][row] = one; - else - ids[col][row] = zero; - } - } - - // modify components as dictated by the arguments - if (sources.size() == 1 && isScalar(sources[0])) { - // a single scalar; resets the diagonals - for (int col = 0; col < 4; ++col) - ids[col][col] = sources[0]; - } else if (isMatrix(sources[0])) { - // constructing from another matrix; copy over the parts that exist in both the argument and constructee - Id matrix = sources[0]; - int minCols = std::min(numCols, getNumColumns(matrix)); - int minRows = std::min(numRows, getNumRows(matrix)); - for (int col = 0; col < minCols; ++col) { - std::vector indexes; - indexes.push_back(col); - for (int row = 0; row < minRows; ++row) { - indexes.push_back(row); - ids[col][row] = createCompositeExtract(matrix, componentTypeId, indexes); - indexes.pop_back(); - setPrecision(ids[col][row], precision); - } - } - } else { - // fill in the matrix in column-major order with whatever argument components are available - int row = 0; - int col = 0; - - for (int arg = 0; arg < (int)sources.size(); ++arg) { - Id argComp = sources[arg]; - for (int comp = 0; comp < getNumComponents(sources[arg]); ++comp) { - if (getNumComponents(sources[arg]) > 1) { - argComp = createCompositeExtract(sources[arg], componentTypeId, comp); - setPrecision(argComp, precision); - } - ids[col][row++] = argComp; - if (row == numRows) { - row = 0; - col++; - } - } - } - } - - // Step 2: Construct a matrix from that array. - // First make the column vectors, then make the matrix. - - // make the column vectors - Id columnTypeId = getContainedTypeId(resultTypeId); - std::vector matrixColumns; - for (int col = 0; col < numCols; ++col) { - std::vector vectorComponents; - for (int row = 0; row < numRows; ++row) - vectorComponents.push_back(ids[col][row]); - Id column = createCompositeConstruct(columnTypeId, vectorComponents); - setPrecision(column, precision); - matrixColumns.push_back(column); - } - - // make the matrix - return setPrecision(createCompositeConstruct(resultTypeId, matrixColumns), precision); -} - -// Comments in header -Builder::If::If(Id cond, unsigned int ctrl, Builder& gb) : - builder(gb), - condition(cond), - control(ctrl), - elseBlock(0) -{ - function = &builder.getBuildPoint()->getParent(); - - // make the blocks, but only put the then-block into the function, - // the else-block and merge-block will be added later, in order, after - // earlier code is emitted - thenBlock = new Block(builder.getUniqueId(), *function); - mergeBlock = new Block(builder.getUniqueId(), *function); - - // Save the current block, so that we can add in the flow control split when - // makeEndIf is called. - headerBlock = builder.getBuildPoint(); - - function->addBlock(thenBlock); - builder.setBuildPoint(thenBlock); -} - -// Comments in header -void Builder::If::makeBeginElse() -{ - // Close out the "then" by having it jump to the mergeBlock - builder.createBranch(mergeBlock); - - // Make the first else block and add it to the function - elseBlock = new Block(builder.getUniqueId(), *function); - function->addBlock(elseBlock); - - // Start building the else block - builder.setBuildPoint(elseBlock); -} - -// Comments in header -void Builder::If::makeEndIf() -{ - // jump to the merge block - builder.createBranch(mergeBlock); - - // Go back to the headerBlock and make the flow control split - builder.setBuildPoint(headerBlock); - builder.createSelectionMerge(mergeBlock, control); - if (elseBlock) - builder.createConditionalBranch(condition, thenBlock, elseBlock); - else - builder.createConditionalBranch(condition, thenBlock, mergeBlock); - - // add the merge block to the function - function->addBlock(mergeBlock); - builder.setBuildPoint(mergeBlock); -} - -// Comments in header -void Builder::makeSwitch(Id selector, unsigned int control, int numSegments, const std::vector& caseValues, - const std::vector& valueIndexToSegment, int defaultSegment, - std::vector& segmentBlocks) -{ - Function& function = buildPoint->getParent(); - - // make all the blocks - for (int s = 0; s < numSegments; ++s) - segmentBlocks.push_back(new Block(getUniqueId(), function)); - - Block* mergeBlock = new Block(getUniqueId(), function); - - // make and insert the switch's selection-merge instruction - createSelectionMerge(mergeBlock, control); - - // make the switch instruction - Instruction* switchInst = new Instruction(NoResult, NoType, OpSwitch); - switchInst->addIdOperand(selector); - auto defaultOrMerge = (defaultSegment >= 0) ? segmentBlocks[defaultSegment] : mergeBlock; - switchInst->addIdOperand(defaultOrMerge->getId()); - defaultOrMerge->addPredecessor(buildPoint); - for (int i = 0; i < (int)caseValues.size(); ++i) { - switchInst->addImmediateOperand(caseValues[i]); - switchInst->addIdOperand(segmentBlocks[valueIndexToSegment[i]]->getId()); - segmentBlocks[valueIndexToSegment[i]]->addPredecessor(buildPoint); - } - buildPoint->addInstruction(std::unique_ptr(switchInst)); - - // push the merge block - switchMerges.push(mergeBlock); -} - -// Comments in header -void Builder::addSwitchBreak() -{ - // branch to the top of the merge block stack - createBranch(switchMerges.top()); - createAndSetNoPredecessorBlock("post-switch-break"); -} - -// Comments in header -void Builder::nextSwitchSegment(std::vector& segmentBlock, int nextSegment) -{ - int lastSegment = nextSegment - 1; - if (lastSegment >= 0) { - // Close out previous segment by jumping, if necessary, to next segment - if (! buildPoint->isTerminated()) - createBranch(segmentBlock[nextSegment]); - } - Block* block = segmentBlock[nextSegment]; - block->getParent().addBlock(block); - setBuildPoint(block); -} - -// Comments in header -void Builder::endSwitch(std::vector& /*segmentBlock*/) -{ - // Close out previous segment by jumping, if necessary, to next segment - if (! buildPoint->isTerminated()) - addSwitchBreak(); - - switchMerges.top()->getParent().addBlock(switchMerges.top()); - setBuildPoint(switchMerges.top()); - - switchMerges.pop(); -} - -Block& Builder::makeNewBlock() -{ - Function& function = buildPoint->getParent(); - auto block = new Block(getUniqueId(), function); - function.addBlock(block); - return *block; -} - -Builder::LoopBlocks& Builder::makeNewLoop() -{ - // This verbosity is needed to simultaneously get the same behavior - // everywhere (id's in the same order), have a syntax that works - // across lots of versions of C++, have no warnings from pedantic - // compilation modes, and leave the rest of the code alone. - Block& head = makeNewBlock(); - Block& body = makeNewBlock(); - Block& merge = makeNewBlock(); - Block& continue_target = makeNewBlock(); - LoopBlocks blocks(head, body, merge, continue_target); - loops.push(blocks); - return loops.top(); -} - -void Builder::createLoopContinue() -{ - createBranch(&loops.top().continue_target); - // Set up a block for dead code. - createAndSetNoPredecessorBlock("post-loop-continue"); -} - -void Builder::createLoopExit() -{ - createBranch(&loops.top().merge); - // Set up a block for dead code. - createAndSetNoPredecessorBlock("post-loop-break"); -} - -void Builder::closeLoop() -{ - loops.pop(); -} - -void Builder::clearAccessChain() -{ - accessChain.base = NoResult; - accessChain.indexChain.clear(); - accessChain.instr = NoResult; - accessChain.swizzle.clear(); - accessChain.component = NoResult; - accessChain.preSwizzleBaseType = NoType; - accessChain.isRValue = false; -} - -// Comments in header -void Builder::accessChainPushSwizzle(std::vector& swizzle, Id preSwizzleBaseType) -{ - // swizzles can be stacked in GLSL, but simplified to a single - // one here; the base type doesn't change - if (accessChain.preSwizzleBaseType == NoType) - accessChain.preSwizzleBaseType = preSwizzleBaseType; - - // if needed, propagate the swizzle for the current access chain - if (accessChain.swizzle.size() > 0) { - std::vector oldSwizzle = accessChain.swizzle; - accessChain.swizzle.resize(0); - for (unsigned int i = 0; i < swizzle.size(); ++i) { - assert(swizzle[i] < oldSwizzle.size()); - accessChain.swizzle.push_back(oldSwizzle[swizzle[i]]); - } - } else - accessChain.swizzle = swizzle; - - // determine if we need to track this swizzle anymore - simplifyAccessChainSwizzle(); -} - -// Comments in header -void Builder::accessChainStore(Id rvalue) -{ - assert(accessChain.isRValue == false); - - transferAccessChainSwizzle(true); - Id base = collapseAccessChain(); - Id source = rvalue; - - // dynamic component should be gone - assert(accessChain.component == NoResult); - - // If swizzle still exists, it is out-of-order or not full, we must load the target vector, - // extract and insert elements to perform writeMask and/or swizzle. - if (accessChain.swizzle.size() > 0) { - Id tempBaseId = createLoad(base); - source = createLvalueSwizzle(getTypeId(tempBaseId), tempBaseId, source, accessChain.swizzle); - } - - createStore(source, base); -} - -// Comments in header -Id Builder::accessChainLoad(Decoration precision, Id resultType) -{ - Id id; - - if (accessChain.isRValue) { - // transfer access chain, but try to stay in registers - transferAccessChainSwizzle(false); - if (accessChain.indexChain.size() > 0) { - Id swizzleBase = accessChain.preSwizzleBaseType != NoType ? accessChain.preSwizzleBaseType : resultType; - - // if all the accesses are constants, we can use OpCompositeExtract - std::vector indexes; - bool constant = true; - for (int i = 0; i < (int)accessChain.indexChain.size(); ++i) { - if (isConstantScalar(accessChain.indexChain[i])) - indexes.push_back(getConstantScalar(accessChain.indexChain[i])); - else { - constant = false; - break; - } - } - - if (constant) - id = createCompositeExtract(accessChain.base, swizzleBase, indexes); - else { - // make a new function variable for this r-value - Id lValue = createVariable(StorageClassFunction, getTypeId(accessChain.base), "indexable"); - - // store into it - createStore(accessChain.base, lValue); - - // move base to the new variable - accessChain.base = lValue; - accessChain.isRValue = false; - - // load through the access chain - id = createLoad(collapseAccessChain()); - } - setPrecision(id, precision); - } else - id = accessChain.base; // no precision, it was set when this was defined - } else { - transferAccessChainSwizzle(true); - // load through the access chain - id = createLoad(collapseAccessChain()); - setPrecision(id, precision); - } - - // Done, unless there are swizzles to do - if (accessChain.swizzle.size() == 0 && accessChain.component == NoResult) - return id; - - // Do remaining swizzling - - // Do the basic swizzle - if (accessChain.swizzle.size() > 0) { - Id swizzledType = getScalarTypeId(getTypeId(id)); - if (accessChain.swizzle.size() > 1) - swizzledType = makeVectorType(swizzledType, (int)accessChain.swizzle.size()); - id = createRvalueSwizzle(precision, swizzledType, id, accessChain.swizzle); - } - - // Do the dynamic component - if (accessChain.component != NoResult) - id = setPrecision(createVectorExtractDynamic(id, resultType, accessChain.component), precision); - - return id; -} - -Id Builder::accessChainGetLValue() -{ - assert(accessChain.isRValue == false); - - transferAccessChainSwizzle(true); - Id lvalue = collapseAccessChain(); - - // If swizzle exists, it is out-of-order or not full, we must load the target vector, - // extract and insert elements to perform writeMask and/or swizzle. This does not - // go with getting a direct l-value pointer. - assert(accessChain.swizzle.size() == 0); - assert(accessChain.component == NoResult); - - return lvalue; -} - -// comment in header -Id Builder::accessChainGetInferredType() -{ - // anything to operate on? - if (accessChain.base == NoResult) - return NoType; - Id type = getTypeId(accessChain.base); - - // do initial dereference - if (! accessChain.isRValue) - type = getContainedTypeId(type); - - // dereference each index - for (auto it = accessChain.indexChain.cbegin(); it != accessChain.indexChain.cend(); ++it) { - if (isStructType(type)) - type = getContainedTypeId(type, getConstantScalar(*it)); - else - type = getContainedTypeId(type); - } - - // dereference swizzle - if (accessChain.swizzle.size() == 1) - type = getContainedTypeId(type); - else if (accessChain.swizzle.size() > 1) - type = makeVectorType(getContainedTypeId(type), (int)accessChain.swizzle.size()); - - // dereference component selection - if (accessChain.component) - type = getContainedTypeId(type); - - return type; -} - -// comment in header -void Builder::eliminateDeadDecorations() { - std::unordered_set reachable_blocks; - std::unordered_set unreachable_definitions; - // Collect IDs defined in unreachable blocks. For each function, label the - // reachable blocks first. Then for each unreachable block, collect the - // result IDs of the instructions in it. - for (std::vector::const_iterator fi = module.getFunctions().cbegin(); - fi != module.getFunctions().cend(); fi++) { - Function* f = *fi; - Block* entry = f->getEntryBlock(); - inReadableOrder(entry, [&reachable_blocks](const Block* b) { - reachable_blocks.insert(b); - }); - for (std::vector::const_iterator bi = f->getBlocks().cbegin(); - bi != f->getBlocks().cend(); bi++) { - Block* b = *bi; - if (!reachable_blocks.count(b)) { - for (std::vector >::const_iterator - ii = b->getInstructions().cbegin(); - ii != b->getInstructions().cend(); ii++) { - Instruction* i = ii->get(); - unreachable_definitions.insert(i->getResultId()); - } - } - } - } - decorations.erase(std::remove_if(decorations.begin(), decorations.end(), - [&unreachable_definitions](std::unique_ptr& I) -> bool { - Instruction* inst = I.get(); - Id decoration_id = inst->getIdOperand(0); - return unreachable_definitions.count(decoration_id) != 0; - }), - decorations.end()); -} - -void Builder::dump(std::vector& out) const -{ - // Header, before first instructions: - out.push_back(MagicNumber); - out.push_back(spvVersion); - out.push_back(builderNumber); - out.push_back(uniqueId + 1); - out.push_back(0); - - // Capabilities - for (auto it = capabilities.cbegin(); it != capabilities.cend(); ++it) { - Instruction capInst(0, 0, OpCapability); - capInst.addImmediateOperand(*it); - capInst.dump(out); - } - - for (auto it = extensions.cbegin(); it != extensions.cend(); ++it) { - Instruction extInst(0, 0, OpExtension); - extInst.addStringOperand(it->c_str()); - extInst.dump(out); - } - - dumpInstructions(out, imports); - Instruction memInst(0, 0, OpMemoryModel); - memInst.addImmediateOperand(addressModel); - memInst.addImmediateOperand(memoryModel); - memInst.dump(out); - - // Instructions saved up while building: - dumpInstructions(out, entryPoints); - dumpInstructions(out, executionModes); - - // Debug instructions - dumpInstructions(out, strings); - dumpModuleProcesses(out); - dumpSourceInstructions(out); - for (int e = 0; e < (int)sourceExtensions.size(); ++e) { - Instruction sourceExtInst(0, 0, OpSourceExtension); - sourceExtInst.addStringOperand(sourceExtensions[e]); - sourceExtInst.dump(out); - } - dumpInstructions(out, names); - dumpInstructions(out, lines); - - // Annotation instructions - dumpInstructions(out, decorations); - - dumpInstructions(out, constantsTypesGlobals); - dumpInstructions(out, externals); - - // The functions - module.dump(out); -} - -// -// Protected methods. -// - -// Turn the described access chain in 'accessChain' into an instruction(s) -// computing its address. This *cannot* include complex swizzles, which must -// be handled after this is called. -// -// Can generate code. -Id Builder::collapseAccessChain() -{ - assert(accessChain.isRValue == false); - - // did we already emit an access chain for this? - if (accessChain.instr != NoResult) - return accessChain.instr; - - // If we have a dynamic component, we can still transfer - // that into a final operand to the access chain. We need to remap the - // dynamic component through the swizzle to get a new dynamic component to - // update. - // - // This was not done in transferAccessChainSwizzle() because it might - // generate code. - remapDynamicSwizzle(); - if (accessChain.component != NoResult) { - // transfer the dynamic component to the access chain - accessChain.indexChain.push_back(accessChain.component); - accessChain.component = NoResult; - } - - // note that non-trivial swizzling is left pending - - // do we have an access chain? - if (accessChain.indexChain.size() == 0) - return accessChain.base; - - // emit the access chain - StorageClass storageClass = (StorageClass)module.getStorageClass(getTypeId(accessChain.base)); - accessChain.instr = createAccessChain(storageClass, accessChain.base, accessChain.indexChain); - - return accessChain.instr; -} - -// For a dynamic component selection of a swizzle. -// -// Turn the swizzle and dynamic component into just a dynamic component. -// -// Generates code. -void Builder::remapDynamicSwizzle() -{ - // do we have a swizzle to remap a dynamic component through? - if (accessChain.component != NoResult && accessChain.swizzle.size() > 1) { - // build a vector of the swizzle for the component to map into - std::vector components; - for (int c = 0; c < accessChain.swizzle.size(); ++c) - components.push_back(makeUintConstant(accessChain.swizzle[c])); - Id mapType = makeVectorType(makeUintType(32), (int)accessChain.swizzle.size()); - Id map = makeCompositeConstant(mapType, components); - - // use it - accessChain.component = createVectorExtractDynamic(map, makeUintType(32), accessChain.component); - accessChain.swizzle.clear(); - } -} - -// clear out swizzle if it is redundant, that is reselecting the same components -// that would be present without the swizzle. -void Builder::simplifyAccessChainSwizzle() -{ - // If the swizzle has fewer components than the vector, it is subsetting, and must stay - // to preserve that fact. - if (getNumTypeComponents(accessChain.preSwizzleBaseType) > (int)accessChain.swizzle.size()) - return; - - // if components are out of order, it is a swizzle - for (unsigned int i = 0; i < accessChain.swizzle.size(); ++i) { - if (i != accessChain.swizzle[i]) - return; - } - - // otherwise, there is no need to track this swizzle - accessChain.swizzle.clear(); - if (accessChain.component == NoResult) - accessChain.preSwizzleBaseType = NoType; -} - -// To the extent any swizzling can become part of the chain -// of accesses instead of a post operation, make it so. -// If 'dynamic' is true, include transferring the dynamic component, -// otherwise, leave it pending. -// -// Does not generate code. just updates the access chain. -void Builder::transferAccessChainSwizzle(bool dynamic) -{ - // non existent? - if (accessChain.swizzle.size() == 0 && accessChain.component == NoResult) - return; - - // too complex? - // (this requires either a swizzle, or generating code for a dynamic component) - if (accessChain.swizzle.size() > 1) - return; - - // single component, either in the swizzle and/or dynamic component - if (accessChain.swizzle.size() == 1) { - assert(accessChain.component == NoResult); - // handle static component selection - accessChain.indexChain.push_back(makeUintConstant(accessChain.swizzle.front())); - accessChain.swizzle.clear(); - accessChain.preSwizzleBaseType = NoType; - } else if (dynamic && accessChain.component != NoResult) { - assert(accessChain.swizzle.size() == 0); - // handle dynamic component - accessChain.indexChain.push_back(accessChain.component); - accessChain.preSwizzleBaseType = NoType; - accessChain.component = NoResult; - } -} - -// Utility method for creating a new block and setting the insert point to -// be in it. This is useful for flow-control operations that need a "dummy" -// block proceeding them (e.g. instructions after a discard, etc). -void Builder::createAndSetNoPredecessorBlock(const char* /*name*/) -{ - Block* block = new Block(getUniqueId(), buildPoint->getParent()); - block->setUnreachable(); - buildPoint->getParent().addBlock(block); - setBuildPoint(block); - - // if (name) - // addName(block->getId(), name); -} - -// Comments in header -void Builder::createBranch(Block* block) -{ - Instruction* branch = new Instruction(OpBranch); - branch->addIdOperand(block->getId()); - buildPoint->addInstruction(std::unique_ptr(branch)); - block->addPredecessor(buildPoint); -} - -void Builder::createSelectionMerge(Block* mergeBlock, unsigned int control) -{ - Instruction* merge = new Instruction(OpSelectionMerge); - merge->addIdOperand(mergeBlock->getId()); - merge->addImmediateOperand(control); - buildPoint->addInstruction(std::unique_ptr(merge)); -} - -void Builder::createLoopMerge(Block* mergeBlock, Block* continueBlock, unsigned int control, - unsigned int dependencyLength) -{ - Instruction* merge = new Instruction(OpLoopMerge); - merge->addIdOperand(mergeBlock->getId()); - merge->addIdOperand(continueBlock->getId()); - merge->addImmediateOperand(control); - if ((control & LoopControlDependencyLengthMask) != 0) - merge->addImmediateOperand(dependencyLength); - buildPoint->addInstruction(std::unique_ptr(merge)); -} - -void Builder::createConditionalBranch(Id condition, Block* thenBlock, Block* elseBlock) -{ - Instruction* branch = new Instruction(OpBranchConditional); - branch->addIdOperand(condition); - branch->addIdOperand(thenBlock->getId()); - branch->addIdOperand(elseBlock->getId()); - buildPoint->addInstruction(std::unique_ptr(branch)); - thenBlock->addPredecessor(buildPoint); - elseBlock->addPredecessor(buildPoint); -} - -// OpSource -// [OpSourceContinued] -// ... -void Builder::dumpSourceInstructions(std::vector& out) const -{ - const int maxWordCount = 0xFFFF; - const int opSourceWordCount = 4; - const int nonNullBytesPerInstruction = 4 * (maxWordCount - opSourceWordCount) - 1; - - if (source != SourceLanguageUnknown) { - // OpSource Language Version File Source - Instruction sourceInst(NoResult, NoType, OpSource); - sourceInst.addImmediateOperand(source); - sourceInst.addImmediateOperand(sourceVersion); - // File operand - if (sourceFileStringId != NoResult) { - sourceInst.addIdOperand(sourceFileStringId); - // Source operand - if (sourceText.size() > 0) { - int nextByte = 0; - std::string subString; - while ((int)sourceText.size() - nextByte > 0) { - subString = sourceText.substr(nextByte, nonNullBytesPerInstruction); - if (nextByte == 0) { - // OpSource - sourceInst.addStringOperand(subString.c_str()); - sourceInst.dump(out); - } else { - // OpSourcContinued - Instruction sourceContinuedInst(OpSourceContinued); - sourceContinuedInst.addStringOperand(subString.c_str()); - sourceContinuedInst.dump(out); - } - nextByte += nonNullBytesPerInstruction; - } - } else - sourceInst.dump(out); - } else - sourceInst.dump(out); - } -} - -void Builder::dumpInstructions(std::vector& out, const std::vector >& instructions) const -{ - for (int i = 0; i < (int)instructions.size(); ++i) { - instructions[i]->dump(out); - } -} - -void Builder::dumpModuleProcesses(std::vector& out) const -{ - for (int i = 0; i < (int)moduleProcesses.size(); ++i) { - Instruction moduleProcessed(OpModuleProcessed); - moduleProcessed.addStringOperand(moduleProcesses[i]); - moduleProcessed.dump(out); - } -} - -}; // end spv namespace diff --git a/third_party/glslang-spirv/SpvBuilder.h b/third_party/glslang-spirv/SpvBuilder.h deleted file mode 100644 index 173d10ef2..000000000 --- a/third_party/glslang-spirv/SpvBuilder.h +++ /dev/null @@ -1,641 +0,0 @@ -// -// Copyright (C) 2014-2015 LunarG, Inc. -// Copyright (C) 2015-2016 Google, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -// -// "Builder" is an interface to fully build SPIR-V IR. Allocate one of -// these to build (a thread safe) internal SPIR-V representation (IR), -// and then dump it as a binary stream according to the SPIR-V specification. -// -// A Builder has a 1:1 relationship with a SPIR-V module. -// - -#pragma once -#ifndef SpvBuilder_H -#define SpvBuilder_H - -#include "Logger.h" -#include "spirv.hpp" -#include "spvIR.h" - -#include -#include -#include -#include -#include -#include - -namespace spv { - -class Builder { -public: - Builder(unsigned int spvVersion, unsigned int userNumber, SpvBuildLogger* logger); - virtual ~Builder(); - - static const int maxMatrixSize = 4; - - void setSource(spv::SourceLanguage lang, int version) - { - source = lang; - sourceVersion = version; - } - void setSourceFile(const std::string& file) - { - Instruction* fileString = new Instruction(getUniqueId(), NoType, OpString); - fileString->addStringOperand(file.c_str()); - sourceFileStringId = fileString->getResultId(); - strings.push_back(std::unique_ptr(fileString)); - } - void setSourceText(const std::string& text) { sourceText = text; } - void addSourceExtension(const char* ext) { sourceExtensions.push_back(ext); } - void addModuleProcessed(const std::string& p) { moduleProcesses.push_back(p.c_str()); } - void setEmitOpLines() { emitOpLines = true; } - void addExtension(const char* ext) { extensions.insert(ext); } - Id import(const char*); - void setMemoryModel(spv::AddressingModel addr, spv::MemoryModel mem) - { - addressModel = addr; - memoryModel = mem; - } - - void addCapability(spv::Capability cap) { capabilities.insert(cap); } - - // To get a new for anything needing a new one. - Id getUniqueId() { return ++uniqueId; } - - // To get a set of new s, e.g., for a set of function parameters - Id getUniqueIds(int numIds) - { - Id id = uniqueId + 1; - uniqueId += numIds; - return id; - } - - // Log the current line, and if different than the last one, - // issue a new OpLine, using the current file name. - void setLine(int line); - // Low-level OpLine. See setLine() for a layered helper. - void addLine(Id fileName, int line, int column); - - // For creating new types (will return old type if the requested one was already made). - Id makeVoidType(); - Id makeBoolType(); - Id makePointer(StorageClass, Id type); - Id makeIntegerType(int width, bool hasSign); // generic - Id makeIntType(int width) { return makeIntegerType(width, true); } - Id makeUintType(int width) { return makeIntegerType(width, false); } - Id makeFloatType(int width); - Id makeStructType(const std::vector& members, const char*); - Id makeStructResultType(Id type0, Id type1); - Id makeVectorType(Id component, int size); - Id makeMatrixType(Id component, int cols, int rows); - Id makeArrayType(Id element, Id sizeId, int stride); // 0 stride means no stride decoration - Id makeRuntimeArray(Id element); - Id makeFunctionType(Id returnType, const std::vector& paramTypes); - Id makeImageType(Id sampledType, Dim, bool depth, bool arrayed, bool ms, unsigned sampled, ImageFormat format); - Id makeSamplerType(); - Id makeSampledImageType(Id imageType); - - // For querying about types. - Id getTypeId(Id resultId) const { return module.getTypeId(resultId); } - Id getDerefTypeId(Id resultId) const; - Op getOpCode(Id id) const { return module.getInstruction(id)->getOpCode(); } - Op getTypeClass(Id typeId) const { return getOpCode(typeId); } - Op getMostBasicTypeClass(Id typeId) const; - int getNumComponents(Id resultId) const { return getNumTypeComponents(getTypeId(resultId)); } - int getNumTypeConstituents(Id typeId) const; - int getNumTypeComponents(Id typeId) const { return getNumTypeConstituents(typeId); } - Id getScalarTypeId(Id typeId) const; - Id getContainedTypeId(Id typeId) const; - Id getContainedTypeId(Id typeId, int) const; - StorageClass getTypeStorageClass(Id typeId) const { return module.getStorageClass(typeId); } - ImageFormat getImageTypeFormat(Id typeId) const { return (ImageFormat)module.getInstruction(typeId)->getImmediateOperand(6); } - - bool isPointer(Id resultId) const { return isPointerType(getTypeId(resultId)); } - bool isScalar(Id resultId) const { return isScalarType(getTypeId(resultId)); } - bool isVector(Id resultId) const { return isVectorType(getTypeId(resultId)); } - bool isMatrix(Id resultId) const { return isMatrixType(getTypeId(resultId)); } - bool isAggregate(Id resultId) const { return isAggregateType(getTypeId(resultId)); } - bool isSampledImage(Id resultId) const { return isSampledImageType(getTypeId(resultId)); } - - bool isBoolType(Id typeId) const { return groupedTypes[OpTypeBool].size() > 0 && typeId == groupedTypes[OpTypeBool].back()->getResultId(); } - bool isIntType(Id typeId) const { return getTypeClass(typeId) == OpTypeInt && module.getInstruction(typeId)->getImmediateOperand(1) != 0; } - bool isUintType(Id typeId) const { return getTypeClass(typeId) == OpTypeInt && module.getInstruction(typeId)->getImmediateOperand(1) == 0; } - bool isFloatType(Id typeId) const { return getTypeClass(typeId) == OpTypeFloat; } - bool isPointerType(Id typeId) const { return getTypeClass(typeId) == OpTypePointer; } - bool isScalarType(Id typeId) const { return getTypeClass(typeId) == OpTypeFloat || getTypeClass(typeId) == OpTypeInt || getTypeClass(typeId) == OpTypeBool; } - bool isVectorType(Id typeId) const { return getTypeClass(typeId) == OpTypeVector; } - bool isMatrixType(Id typeId) const { return getTypeClass(typeId) == OpTypeMatrix; } - bool isStructType(Id typeId) const { return getTypeClass(typeId) == OpTypeStruct; } - bool isArrayType(Id typeId) const { return getTypeClass(typeId) == OpTypeArray; } - bool isAggregateType(Id typeId) const { return isArrayType(typeId) || isStructType(typeId); } - bool isImageType(Id typeId) const { return getTypeClass(typeId) == OpTypeImage; } - bool isSamplerType(Id typeId) const { return getTypeClass(typeId) == OpTypeSampler; } - bool isSampledImageType(Id typeId) const { return getTypeClass(typeId) == OpTypeSampledImage; } - - bool isConstantOpCode(Op opcode) const; - bool isSpecConstantOpCode(Op opcode) const; - bool isConstant(Id resultId) const { return isConstantOpCode(getOpCode(resultId)); } - bool isConstantScalar(Id resultId) const { return getOpCode(resultId) == OpConstant; } - bool isSpecConstant(Id resultId) const { return isSpecConstantOpCode(getOpCode(resultId)); } - unsigned int getConstantScalar(Id resultId) const { return module.getInstruction(resultId)->getImmediateOperand(0); } - StorageClass getStorageClass(Id resultId) const { return getTypeStorageClass(getTypeId(resultId)); } - - int getScalarTypeWidth(Id typeId) const - { - Id scalarTypeId = getScalarTypeId(typeId); - assert(getTypeClass(scalarTypeId) == OpTypeInt || getTypeClass(scalarTypeId) == OpTypeFloat); - return module.getInstruction(scalarTypeId)->getImmediateOperand(0); - } - - int getTypeNumColumns(Id typeId) const - { - assert(isMatrixType(typeId)); - return getNumTypeConstituents(typeId); - } - int getNumColumns(Id resultId) const { return getTypeNumColumns(getTypeId(resultId)); } - int getTypeNumRows(Id typeId) const - { - assert(isMatrixType(typeId)); - return getNumTypeComponents(getContainedTypeId(typeId)); - } - int getNumRows(Id resultId) const { return getTypeNumRows(getTypeId(resultId)); } - - Dim getTypeDimensionality(Id typeId) const - { - assert(isImageType(typeId)); - return (Dim)module.getInstruction(typeId)->getImmediateOperand(1); - } - Id getImageType(Id resultId) const - { - Id typeId = getTypeId(resultId); - assert(isImageType(typeId) || isSampledImageType(typeId)); - return isSampledImageType(typeId) ? module.getInstruction(typeId)->getIdOperand(0) : typeId; - } - bool isArrayedImageType(Id typeId) const - { - assert(isImageType(typeId)); - return module.getInstruction(typeId)->getImmediateOperand(3) != 0; - } - - // For making new constants (will return old constant if the requested one was already made). - Id makeBoolConstant(bool b, bool specConstant = false); - Id makeIntConstant(int i, bool specConstant = false) { return makeIntConstant(makeIntType(32), (unsigned)i, specConstant); } - Id makeUintConstant(unsigned u, bool specConstant = false) { return makeIntConstant(makeUintType(32), u, specConstant); } - Id makeInt64Constant(long long i, bool specConstant = false) { return makeInt64Constant(makeIntType(64), (unsigned long long)i, specConstant); } - Id makeUint64Constant(unsigned long long u, bool specConstant = false) { return makeInt64Constant(makeUintType(64), u, specConstant); } -#ifdef AMD_EXTENSIONS - Id makeInt16Constant(short i, bool specConstant = false) { return makeIntConstant(makeIntType(16), (unsigned)((unsigned short)i), specConstant); } - Id makeUint16Constant(unsigned short u, bool specConstant = false) { return makeIntConstant(makeUintType(16), (unsigned)u, specConstant); } -#endif - Id makeFloatConstant(float f, bool specConstant = false); - Id makeDoubleConstant(double d, bool specConstant = false); -#ifdef AMD_EXTENSIONS - Id makeFloat16Constant(float f16, bool specConstant = false); -#endif - - // Turn the array of constants into a proper spv constant of the requested type. - Id makeCompositeConstant(Id type, const std::vector& comps, bool specConst = false); - - // Methods for adding information outside the CFG. - Instruction* addEntryPoint(ExecutionModel, Function*, const char* name); - void addExecutionMode(Function*, ExecutionMode mode, int value1 = -1, int value2 = -1, int value3 = -1); - void addName(Id, const char* name); - void addMemberName(Id, int member, const char* name); - void addDecoration(Id, Decoration, int num = -1); - void addMemberDecoration(Id, unsigned int member, Decoration, int num = -1); - - // At the end of what block do the next create*() instructions go? - void setBuildPoint(Block* bp) { buildPoint = bp; } - Block* getBuildPoint() const { return buildPoint; } - - // Make the entry-point function. The returned pointer is only valid - // for the lifetime of this builder. - Function* makeEntryPoint(const char*); - - // Make a shader-style function, and create its entry block if entry is non-zero. - // Return the function, pass back the entry. - // The returned pointer is only valid for the lifetime of this builder. - Function* makeFunctionEntry(Decoration precision, Id returnType, const char* name, const std::vector& paramTypes, - const std::vector>& precisions, Block **entry = 0); - - // Create a return. An 'implicit' return is one not appearing in the source - // code. In the case of an implicit return, no post-return block is inserted. - void makeReturn(bool implicit, Id retVal = 0); - - // Generate all the code needed to finish up a function. - void leaveFunction(); - - // Create a discard. - void makeDiscard(); - - // Create a global or function local or IO variable. - Id createVariable(StorageClass, Id type, const char* name = 0); - - // Create an intermediate with an undefined value. - Id createUndefined(Id type); - - // Store into an Id and return the l-value - void createStore(Id rValue, Id lValue); - - // Load from an Id and return it - Id createLoad(Id lValue); - - // Create an OpAccessChain instruction - Id createAccessChain(StorageClass, Id base, const std::vector& offsets); - - // Create an OpArrayLength instruction - Id createArrayLength(Id base, unsigned int member); - - // Create an OpCompositeExtract instruction - Id createCompositeExtract(Id composite, Id typeId, unsigned index); - Id createCompositeExtract(Id composite, Id typeId, const std::vector& indexes); - Id createCompositeInsert(Id object, Id composite, Id typeId, unsigned index); - Id createCompositeInsert(Id object, Id composite, Id typeId, const std::vector& indexes); - - Id createVectorExtractDynamic(Id vector, Id typeId, Id componentIndex); - Id createVectorInsertDynamic(Id vector, Id typeId, Id component, Id componentIndex); - - void createNoResultOp(Op); - void createNoResultOp(Op, Id operand); - void createNoResultOp(Op, const std::vector& operands); - void createControlBarrier(Scope execution, Scope memory, MemorySemanticsMask); - void createMemoryBarrier(unsigned executionScope, unsigned memorySemantics); - Id createUnaryOp(Op, Id typeId, Id operand); - Id createBinOp(Op, Id typeId, Id operand1, Id operand2); - Id createTriOp(Op, Id typeId, Id operand1, Id operand2, Id operand3); - Id createOp(Op, Id typeId, const std::vector& operands); - Id createFunctionCall(spv::Function*, const std::vector&); - Id createSpecConstantOp(Op, Id typeId, const std::vector& operands, const std::vector& literals); - - // Take an rvalue (source) and a set of channels to extract from it to - // make a new rvalue, which is returned. - Id createRvalueSwizzle(Decoration precision, Id typeId, Id source, const std::vector& channels); - - // Take a copy of an lvalue (target) and a source of components, and set the - // source components into the lvalue where the 'channels' say to put them. - // An updated version of the target is returned. - // (No true lvalue or stores are used.) - Id createLvalueSwizzle(Id typeId, Id target, Id source, const std::vector& channels); - - // If both the id and precision are valid, the id - // gets tagged with the requested precision. - // The passed in id is always the returned id, to simplify use patterns. - Id setPrecision(Id id, Decoration precision) - { - if (precision != NoPrecision && id != NoResult) - addDecoration(id, precision); - - return id; - } - - // Can smear a scalar to a vector for the following forms: - // - promoteScalar(scalar, vector) // smear scalar to width of vector - // - promoteScalar(vector, scalar) // smear scalar to width of vector - // - promoteScalar(pointer, scalar) // smear scalar to width of what pointer points to - // - promoteScalar(scalar, scalar) // do nothing - // Other forms are not allowed. - // - // Generally, the type of 'scalar' does not need to be the same type as the components in 'vector'. - // The type of the created vector is a vector of components of the same type as the scalar. - // - // Note: One of the arguments will change, with the result coming back that way rather than - // through the return value. - void promoteScalar(Decoration precision, Id& left, Id& right); - - // Make a value by smearing the scalar to fill the type. - // vectorType should be the correct type for making a vector of scalarVal. - // (No conversions are done.) - Id smearScalar(Decoration precision, Id scalarVal, Id vectorType); - - // Create a call to a built-in function. - Id createBuiltinCall(Id resultType, Id builtins, int entryPoint, const std::vector& args); - - // List of parameters used to create a texture operation - struct TextureParameters { - Id sampler; - Id coords; - Id bias; - Id lod; - Id Dref; - Id offset; - Id offsets; - Id gradX; - Id gradY; - Id sample; - Id component; - Id texelOut; - Id lodClamp; - }; - - // Select the correct texture operation based on all inputs, and emit the correct instruction - Id createTextureCall(Decoration precision, Id resultType, bool sparse, bool fetch, bool proj, bool gather, bool noImplicit, const TextureParameters&); - - // Emit the OpTextureQuery* instruction that was passed in. - // Figure out the right return value and type, and return it. - Id createTextureQueryCall(Op, const TextureParameters&, bool isUnsignedResult); - - Id createSamplePositionCall(Decoration precision, Id, Id); - - Id createBitFieldExtractCall(Decoration precision, Id, Id, Id, bool isSigned); - Id createBitFieldInsertCall(Decoration precision, Id, Id, Id, Id); - - // Reduction comparison for composites: For equal and not-equal resulting in a scalar. - Id createCompositeCompare(Decoration precision, Id, Id, bool /* true if for equal, false if for not-equal */); - - // OpCompositeConstruct - Id createCompositeConstruct(Id typeId, const std::vector& constituents); - - // vector or scalar constructor - Id createConstructor(Decoration precision, const std::vector& sources, Id resultTypeId); - - // matrix constructor - Id createMatrixConstructor(Decoration precision, const std::vector& sources, Id constructee); - - // Helper to use for building nested control flow with if-then-else. - class If { - public: - If(Id condition, unsigned int ctrl, Builder& builder); - ~If() {} - - void makeBeginElse(); - void makeEndIf(); - - private: - If(const If&); - If& operator=(If&); - - Builder& builder; - Id condition; - unsigned int control; - Function* function; - Block* headerBlock; - Block* thenBlock; - Block* elseBlock; - Block* mergeBlock; - }; - - // Make a switch statement. A switch has 'numSegments' of pieces of code, not containing - // any case/default labels, all separated by one or more case/default labels. Each possible - // case value v is a jump to the caseValues[v] segment. The defaultSegment is also in this - // number space. How to compute the value is given by 'condition', as in switch(condition). - // - // The SPIR-V Builder will maintain the stack of post-switch merge blocks for nested switches. - // - // Use a defaultSegment < 0 if there is no default segment (to branch to post switch). - // - // Returns the right set of basic blocks to start each code segment with, so that the caller's - // recursion stack can hold the memory for it. - // - void makeSwitch(Id condition, unsigned int control, int numSegments, const std::vector& caseValues, - const std::vector& valueToSegment, int defaultSegment, std::vector& segmentBB); // return argument - - // Add a branch to the innermost switch's merge block. - void addSwitchBreak(); - - // Move to the next code segment, passing in the return argument in makeSwitch() - void nextSwitchSegment(std::vector& segmentBB, int segment); - - // Finish off the innermost switch. - void endSwitch(std::vector& segmentBB); - - struct LoopBlocks { - LoopBlocks(Block& head, Block& body, Block& merge, Block& continue_target) : - head(head), body(body), merge(merge), continue_target(continue_target) { } - Block &head, &body, &merge, &continue_target; - private: - LoopBlocks(); - LoopBlocks& operator=(const LoopBlocks&); - }; - - // Start a new loop and prepare the builder to generate code for it. Until - // closeLoop() is called for this loop, createLoopContinue() and - // createLoopExit() will target its corresponding blocks. - LoopBlocks& makeNewLoop(); - - // Create a new block in the function containing the build point. Memory is - // owned by the function object. - Block& makeNewBlock(); - - // Add a branch to the continue_target of the current (innermost) loop. - void createLoopContinue(); - - // Add an exit (e.g. "break") from the innermost loop that we're currently - // in. - void createLoopExit(); - - // Close the innermost loop that you're in - void closeLoop(); - - // - // Access chain design for an R-Value vs. L-Value: - // - // There is a single access chain the builder is building at - // any particular time. Such a chain can be used to either to a load or - // a store, when desired. - // - // Expressions can be r-values, l-values, or both, or only r-values: - // a[b.c].d = .... // l-value - // ... = a[b.c].d; // r-value, that also looks like an l-value - // ++a[b.c].d; // r-value and l-value - // (x + y)[2]; // r-value only, can't possibly be l-value - // - // Computing an r-value means generating code. Hence, - // r-values should only be computed when they are needed, not speculatively. - // - // Computing an l-value means saving away information for later use in the compiler, - // no code is generated until the l-value is later dereferenced. It is okay - // to speculatively generate an l-value, just not okay to speculatively dereference it. - // - // The base of the access chain (the left-most variable or expression - // from which everything is based) can be set either as an l-value - // or as an r-value. Most efficient would be to set an l-value if one - // is available. If an expression was evaluated, the resulting r-value - // can be set as the chain base. - // - // The users of this single access chain can save and restore if they - // want to nest or manage multiple chains. - // - - struct AccessChain { - Id base; // for l-values, pointer to the base object, for r-values, the base object - std::vector indexChain; - Id instr; // cache the instruction that generates this access chain - std::vector swizzle; // each std::vector element selects the next GLSL component number - Id component; // a dynamic component index, can coexist with a swizzle, done after the swizzle, NoResult if not present - Id preSwizzleBaseType; // dereferenced type, before swizzle or component is applied; NoType unless a swizzle or component is present - bool isRValue; // true if 'base' is an r-value, otherwise, base is an l-value - }; - - // - // the SPIR-V builder maintains a single active chain that - // the following methods operate on - // - - // for external save and restore - AccessChain getAccessChain() { return accessChain; } - void setAccessChain(AccessChain newChain) { accessChain = newChain; } - - // clear accessChain - void clearAccessChain(); - - // set new base as an l-value base - void setAccessChainLValue(Id lValue) - { - assert(isPointer(lValue)); - accessChain.base = lValue; - } - - // set new base value as an r-value - void setAccessChainRValue(Id rValue) - { - accessChain.isRValue = true; - accessChain.base = rValue; - } - - // push offset onto the end of the chain - void accessChainPush(Id offset) - { - accessChain.indexChain.push_back(offset); - } - - // push new swizzle onto the end of any existing swizzle, merging into a single swizzle - void accessChainPushSwizzle(std::vector& swizzle, Id preSwizzleBaseType); - - // push a dynamic component selection onto the access chain, only applicable with a - // non-trivial swizzle or no swizzle - void accessChainPushComponent(Id component, Id preSwizzleBaseType) - { - if (accessChain.swizzle.size() != 1) { - accessChain.component = component; - if (accessChain.preSwizzleBaseType == NoType) - accessChain.preSwizzleBaseType = preSwizzleBaseType; - } - } - - // use accessChain and swizzle to store value - void accessChainStore(Id rvalue); - - // use accessChain and swizzle to load an r-value - Id accessChainLoad(Decoration precision, Id ResultType); - - // get the direct pointer for an l-value - Id accessChainGetLValue(); - - // Get the inferred SPIR-V type of the result of the current access chain, - // based on the type of the base and the chain of dereferences. - Id accessChainGetInferredType(); - - // Remove OpDecorate instructions whose operands are defined in unreachable - // blocks. - void eliminateDeadDecorations(); - void dump(std::vector&) const; - - void createBranch(Block* block); - void createConditionalBranch(Id condition, Block* thenBlock, Block* elseBlock); - void createLoopMerge(Block* mergeBlock, Block* continueBlock, unsigned int control, unsigned int dependencyLength); - void createSelectionMerge(Block* mergeBlock, unsigned int control); - - // Sets to generate opcode for specialization constants. - void setToSpecConstCodeGenMode() { generatingOpCodeForSpecConst = true; } - // Sets to generate opcode for non-specialization constants (normal mode). - void setToNormalCodeGenMode() { generatingOpCodeForSpecConst = false; } - // Check if the builder is generating code for spec constants. - bool isInSpecConstCodeGenMode() { return generatingOpCodeForSpecConst; } - - protected: - Id makeIntConstant(Id typeId, unsigned value, bool specConstant); - Id makeInt64Constant(Id typeId, unsigned long long value, bool specConstant); - Id findScalarConstant(Op typeClass, Op opcode, Id typeId, unsigned value) const; - Id findScalarConstant(Op typeClass, Op opcode, Id typeId, unsigned v1, unsigned v2) const; - Id findCompositeConstant(Op typeClass, const std::vector& comps) const; - Id collapseAccessChain(); - void remapDynamicSwizzle(); - void transferAccessChainSwizzle(bool dynamic); - void simplifyAccessChainSwizzle(); - void createAndSetNoPredecessorBlock(const char*); - void dumpSourceInstructions(std::vector&) const; - void dumpInstructions(std::vector&, const std::vector >&) const; - void dumpModuleProcesses(std::vector&) const; - - unsigned int spvVersion; // the version of SPIR-V to emit in the header - SourceLanguage source; - int sourceVersion; - spv::Id sourceFileStringId; - std::string sourceText; - int currentLine; - bool emitOpLines; - std::set extensions; - std::vector sourceExtensions; - std::vector moduleProcesses; - AddressingModel addressModel; - MemoryModel memoryModel; - std::set capabilities; - int builderNumber; - Module module; - Block* buildPoint; - Id uniqueId; - Function* entryPointFunction; - bool generatingOpCodeForSpecConst; - AccessChain accessChain; - - // special blocks of instructions for output - std::vector > strings; - std::vector > imports; - std::vector > entryPoints; - std::vector > executionModes; - std::vector > names; - std::vector > lines; - std::vector > decorations; - std::vector > constantsTypesGlobals; - std::vector > externals; - std::vector > functions; - - // not output, internally used for quick & dirty canonical (unique) creation - std::vector groupedConstants[OpConstant]; // all types appear before OpConstant - std::vector groupedTypes[OpConstant]; - - // stack of switches - std::stack switchMerges; - - // Our loop stack. - std::stack loops; - - // The stream for outputting warnings and errors. - SpvBuildLogger* logger; -}; // end Builder class - -}; // end spv namespace - -#endif // SpvBuilder_H diff --git a/third_party/glslang-spirv/bitutils.h b/third_party/glslang-spirv/bitutils.h deleted file mode 100644 index 31288ab69..000000000 --- a/third_party/glslang-spirv/bitutils.h +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright (c) 2015-2016 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef LIBSPIRV_UTIL_BITUTILS_H_ -#define LIBSPIRV_UTIL_BITUTILS_H_ - -#include -#include - -namespace spvutils { - -// Performs a bitwise copy of source to the destination type Dest. -template -Dest BitwiseCast(Src source) { - Dest dest; - static_assert(sizeof(source) == sizeof(dest), - "BitwiseCast: Source and destination must have the same size"); - std::memcpy(&dest, &source, sizeof(dest)); - return dest; -} - -// SetBits returns an integer of type with bits set -// for position through , counting from the least -// significant bit. In particular when Num == 0, no positions are set to 1. -// A static assert will be triggered if First + Num > sizeof(T) * 8, that is, -// a bit that will not fit in the underlying type is set. -template -struct SetBits { - static_assert(First < sizeof(T) * 8, - "Tried to set a bit that is shifted too far."); - const static T get = (T(1) << First) | SetBits::get; -}; - -template -struct SetBits { - const static T get = T(0); -}; - -// This is all compile-time so we can put our tests right here. -static_assert(SetBits::get == uint32_t(0x00000000), - "SetBits failed"); -static_assert(SetBits::get == uint32_t(0x00000001), - "SetBits failed"); -static_assert(SetBits::get == uint32_t(0x80000000), - "SetBits failed"); -static_assert(SetBits::get == uint32_t(0x00000006), - "SetBits failed"); -static_assert(SetBits::get == uint32_t(0xc0000000), - "SetBits failed"); -static_assert(SetBits::get == uint32_t(0x7FFFFFFF), - "SetBits failed"); -static_assert(SetBits::get == uint32_t(0xFFFFFFFF), - "SetBits failed"); -static_assert(SetBits::get == uint32_t(0xFFFF0000), - "SetBits failed"); - -static_assert(SetBits::get == uint64_t(0x0000000000000001LL), - "SetBits failed"); -static_assert(SetBits::get == uint64_t(0x8000000000000000LL), - "SetBits failed"); -static_assert(SetBits::get == uint64_t(0xc000000000000000LL), - "SetBits failed"); -static_assert(SetBits::get == uint64_t(0x0000000080000000LL), - "SetBits failed"); -static_assert(SetBits::get == uint64_t(0x00000000FFFF0000LL), - "SetBits failed"); - -} // namespace spvutils - -#endif // LIBSPIRV_UTIL_BITUTILS_H_ diff --git a/third_party/glslang-spirv/disassemble.cpp b/third_party/glslang-spirv/disassemble.cpp deleted file mode 100644 index c950a66a0..000000000 --- a/third_party/glslang-spirv/disassemble.cpp +++ /dev/null @@ -1,695 +0,0 @@ -// -// Copyright (C) 2014-2015 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -// -// Disassembler for SPIR-V. -// - -#include -#include -#include -#include -#include -#include -#include - -#include "disassemble.h" -#include "doc.h" - -namespace spv { - extern "C" { - // Include C-based headers that don't have a namespace - #include "GLSL.std.450.h" -#ifdef AMD_EXTENSIONS - #include "GLSL.ext.AMD.h" -#endif -#ifdef NV_EXTENSIONS - #include "GLSL.ext.NV.h" -#endif - } -} -const char* GlslStd450DebugNames[spv::GLSLstd450Count]; - -namespace spv { - -#ifdef AMD_EXTENSIONS -static const char* GLSLextAMDGetDebugNames(const char*, unsigned); -#endif - -#ifdef NV_EXTENSIONS -static const char* GLSLextNVGetDebugNames(const char*, unsigned); -#endif - -static void Kill(std::ostream& out, const char* message) -{ - out << std::endl << "Disassembly failed: " << message << std::endl; - exit(1); -} - -// used to identify the extended instruction library imported when printing -enum ExtInstSet { - GLSL450Inst, -#ifdef AMD_EXTENSIONS - GLSLextAMDInst, -#endif -#ifdef NV_EXTENSIONS - GLSLextNVInst, -#endif - OpenCLExtInst, -}; - -// Container class for a single instance of a SPIR-V stream, with methods for disassembly. -class SpirvStream { -public: - SpirvStream(std::ostream& out, const std::vector& stream) : out(out), stream(stream), word(0), nextNestedControl(0) { } - virtual ~SpirvStream() { } - - void validate(); - void processInstructions(); - -protected: - SpirvStream(const SpirvStream&); - SpirvStream& operator=(const SpirvStream&); - Op getOpCode(int id) const { return idInstruction[id] ? (Op)(stream[idInstruction[id]] & OpCodeMask) : OpNop; } - - // Output methods - void outputIndent(); - void formatId(Id id, std::stringstream&); - void outputResultId(Id id); - void outputTypeId(Id id); - void outputId(Id id); - void outputMask(OperandClass operandClass, unsigned mask); - void disassembleImmediates(int numOperands); - void disassembleIds(int numOperands); - int disassembleString(); - void disassembleInstruction(Id resultId, Id typeId, Op opCode, int numOperands); - - // Data - std::ostream& out; // where to write the disassembly - const std::vector& stream; // the actual word stream - int size; // the size of the word stream - int word; // the next word of the stream to read - - // map each to the instruction that created it - Id bound; - std::vector idInstruction; // the word offset into the stream where the instruction for result [id] starts; 0 if not yet seen (forward reference or function parameter) - - std::vector idDescriptor; // the best text string known for explaining the - - // schema - unsigned int schema; - - // stack of structured-merge points - std::stack nestedControl; - Id nextNestedControl; // need a slight delay for when we are nested -}; - -void SpirvStream::validate() -{ - size = (int)stream.size(); - if (size < 4) - Kill(out, "stream is too short"); - - // Magic number - if (stream[word++] != MagicNumber) { - out << "Bad magic number"; - return; - } - - // Version - out << "// Module Version " << std::hex << stream[word++] << std::endl; - - // Generator's magic number - out << "// Generated by (magic number): " << std::hex << stream[word++] << std::dec << std::endl; - - // Result bound - bound = stream[word++]; - idInstruction.resize(bound); - idDescriptor.resize(bound); - out << "// Id's are bound by " << bound << std::endl; - out << std::endl; - - // Reserved schema, must be 0 for now - schema = stream[word++]; - if (schema != 0) - Kill(out, "bad schema, must be 0"); -} - -// Loop over all the instructions, in order, processing each. -// Boiler plate for each is handled here directly, the rest is dispatched. -void SpirvStream::processInstructions() -{ - // Instructions - while (word < size) { - int instructionStart = word; - - // Instruction wordCount and opcode - unsigned int firstWord = stream[word]; - unsigned wordCount = firstWord >> WordCountShift; - Op opCode = (Op)(firstWord & OpCodeMask); - int nextInst = word + wordCount; - ++word; - - // Presence of full instruction - if (nextInst > size) - Kill(out, "stream instruction terminated too early"); - - // Base for computing number of operands; will be updated as more is learned - unsigned numOperands = wordCount - 1; - - // Type - Id typeId = 0; - if (InstructionDesc[opCode].hasType()) { - typeId = stream[word++]; - --numOperands; - } - - // Result - Id resultId = 0; - if (InstructionDesc[opCode].hasResult()) { - resultId = stream[word++]; - --numOperands; - - // save instruction for future reference - idInstruction[resultId] = instructionStart; - } - - outputResultId(resultId); - outputTypeId(typeId); - outputIndent(); - - // Hand off the Op and all its operands - disassembleInstruction(resultId, typeId, opCode, numOperands); - if (word != nextInst) { - out << " ERROR, incorrect number of operands consumed. At " << word << " instead of " << nextInst << " instruction start was " << instructionStart; - word = nextInst; - } - out << std::endl; - } -} - -void SpirvStream::outputIndent() -{ - for (int i = 0; i < (int)nestedControl.size(); ++i) - out << " "; -} - -void SpirvStream::formatId(Id id, std::stringstream& idStream) -{ - if (id != 0) { - // On instructions with no IDs, this is called with "0", which does not - // have to be within ID bounds on null shaders. - if (id >= bound) - Kill(out, "Bad "); - - idStream << id; - if (idDescriptor[id].size() > 0) - idStream << "(" << idDescriptor[id] << ")"; - } -} - -void SpirvStream::outputResultId(Id id) -{ - const int width = 16; - std::stringstream idStream; - formatId(id, idStream); - out << std::setw(width) << std::right << idStream.str(); - if (id != 0) - out << ":"; - else - out << " "; - - if (nestedControl.size() && id == nestedControl.top()) - nestedControl.pop(); -} - -void SpirvStream::outputTypeId(Id id) -{ - const int width = 12; - std::stringstream idStream; - formatId(id, idStream); - out << std::setw(width) << std::right << idStream.str() << " "; -} - -void SpirvStream::outputId(Id id) -{ - if (id >= bound) - Kill(out, "Bad "); - - out << id; - if (idDescriptor[id].size() > 0) - out << "(" << idDescriptor[id] << ")"; -} - -void SpirvStream::outputMask(OperandClass operandClass, unsigned mask) -{ - if (mask == 0) - out << "None"; - else { - for (int m = 0; m < OperandClassParams[operandClass].ceiling; ++m) { - if (mask & (1 << m)) - out << OperandClassParams[operandClass].getName(m) << " "; - } - } -} - -void SpirvStream::disassembleImmediates(int numOperands) -{ - for (int i = 0; i < numOperands; ++i) { - out << stream[word++]; - if (i < numOperands - 1) - out << " "; - } -} - -void SpirvStream::disassembleIds(int numOperands) -{ - for (int i = 0; i < numOperands; ++i) { - outputId(stream[word++]); - if (i < numOperands - 1) - out << " "; - } -} - -// return the number of operands consumed by the string -int SpirvStream::disassembleString() -{ - int startWord = word; - - out << " \""; - - const char* wordString; - bool done = false; - do { - unsigned int content = stream[word]; - wordString = (const char*)&content; - for (int charCount = 0; charCount < 4; ++charCount) { - if (*wordString == 0) { - done = true; - break; - } - out << *(wordString++); - } - ++word; - } while (! done); - - out << "\""; - - return word - startWord; -} - -void SpirvStream::disassembleInstruction(Id resultId, Id /*typeId*/, Op opCode, int numOperands) -{ - // Process the opcode - - out << (OpcodeString(opCode) + 2); // leave out the "Op" - - if (opCode == OpLoopMerge || opCode == OpSelectionMerge) - nextNestedControl = stream[word]; - else if (opCode == OpBranchConditional || opCode == OpSwitch) { - if (nextNestedControl) { - nestedControl.push(nextNestedControl); - nextNestedControl = 0; - } - } else if (opCode == OpExtInstImport) { - idDescriptor[resultId] = (const char*)(&stream[word]); - } - else { - if (resultId != 0 && idDescriptor[resultId].size() == 0) { - switch (opCode) { - case OpTypeInt: - idDescriptor[resultId] = "int"; - break; - case OpTypeFloat: - idDescriptor[resultId] = "float"; - break; - case OpTypeBool: - idDescriptor[resultId] = "bool"; - break; - case OpTypeStruct: - idDescriptor[resultId] = "struct"; - break; - case OpTypePointer: - idDescriptor[resultId] = "ptr"; - break; - case OpTypeVector: - if (idDescriptor[stream[word]].size() > 0) - idDescriptor[resultId].append(idDescriptor[stream[word]].begin(), idDescriptor[stream[word]].begin() + 1); - idDescriptor[resultId].append("vec"); - switch (stream[word + 1]) { - case 2: idDescriptor[resultId].append("2"); break; - case 3: idDescriptor[resultId].append("3"); break; - case 4: idDescriptor[resultId].append("4"); break; - case 8: idDescriptor[resultId].append("8"); break; - case 16: idDescriptor[resultId].append("16"); break; - case 32: idDescriptor[resultId].append("32"); break; - default: break; - } - break; - default: - break; - } - } - } - - // Process the operands. Note, a new context-dependent set could be - // swapped in mid-traversal. - - // Handle images specially, so can put out helpful strings. - if (opCode == OpTypeImage) { - out << " "; - disassembleIds(1); - out << " " << DimensionString((Dim)stream[word++]); - out << (stream[word++] != 0 ? " depth" : ""); - out << (stream[word++] != 0 ? " array" : ""); - out << (stream[word++] != 0 ? " multi-sampled" : ""); - switch (stream[word++]) { - case 0: out << " runtime"; break; - case 1: out << " sampled"; break; - case 2: out << " nonsampled"; break; - } - out << " format:" << ImageFormatString((ImageFormat)stream[word++]); - - if (numOperands == 8) { - out << " " << AccessQualifierString(stream[word++]); - } - return; - } - - // Handle all the parameterized operands - for (int op = 0; op < InstructionDesc[opCode].operands.getNum() && numOperands > 0; ++op) { - out << " "; - OperandClass operandClass = InstructionDesc[opCode].operands.getClass(op); - switch (operandClass) { - case OperandId: - case OperandScope: - case OperandMemorySemantics: - disassembleIds(1); - --numOperands; - // Get names for printing "(XXX)" for readability, *after* this id - if (opCode == OpName) - idDescriptor[stream[word - 1]] = (const char*)(&stream[word]); - break; - case OperandVariableIds: - disassembleIds(numOperands); - return; - case OperandImageOperands: - outputMask(OperandImageOperands, stream[word++]); - --numOperands; - disassembleIds(numOperands); - return; - case OperandOptionalLiteral: - case OperandVariableLiterals: - if ((opCode == OpDecorate && stream[word - 1] == DecorationBuiltIn) || - (opCode == OpMemberDecorate && stream[word - 1] == DecorationBuiltIn)) { - out << BuiltInString(stream[word++]); - --numOperands; - ++op; - } - disassembleImmediates(numOperands); - return; - case OperandVariableIdLiteral: - while (numOperands > 0) { - out << std::endl; - outputResultId(0); - outputTypeId(0); - outputIndent(); - out << " Type "; - disassembleIds(1); - out << ", member "; - disassembleImmediates(1); - numOperands -= 2; - } - return; - case OperandVariableLiteralId: - while (numOperands > 0) { - out << std::endl; - outputResultId(0); - outputTypeId(0); - outputIndent(); - out << " case "; - disassembleImmediates(1); - out << ": "; - disassembleIds(1); - numOperands -= 2; - } - return; - case OperandLiteralNumber: - disassembleImmediates(1); - --numOperands; - if (opCode == OpExtInst) { - ExtInstSet extInstSet = GLSL450Inst; - const char* name = idDescriptor[stream[word - 2]].c_str(); - if (0 == memcmp("OpenCL", name, 6)) { - extInstSet = OpenCLExtInst; -#ifdef AMD_EXTENSIONS - } else if (strcmp(spv::E_SPV_AMD_shader_ballot, name) == 0 || - strcmp(spv::E_SPV_AMD_shader_trinary_minmax, name) == 0 || - strcmp(spv::E_SPV_AMD_shader_explicit_vertex_parameter, name) == 0 || - strcmp(spv::E_SPV_AMD_gcn_shader, name) == 0) { - extInstSet = GLSLextAMDInst; -#endif -#ifdef NV_EXTENSIONS - }else if (strcmp(spv::E_SPV_NV_sample_mask_override_coverage, name) == 0 || - strcmp(spv::E_SPV_NV_geometry_shader_passthrough, name) == 0 || - strcmp(spv::E_SPV_NV_viewport_array2, name) == 0 || - strcmp(spv::E_SPV_NVX_multiview_per_view_attributes, name) == 0) { - extInstSet = GLSLextNVInst; -#endif - } - unsigned entrypoint = stream[word - 1]; - if (extInstSet == GLSL450Inst) { - if (entrypoint < GLSLstd450Count) { - out << "(" << GlslStd450DebugNames[entrypoint] << ")"; - } -#ifdef AMD_EXTENSIONS - } else if (extInstSet == GLSLextAMDInst) { - out << "(" << GLSLextAMDGetDebugNames(name, entrypoint) << ")"; -#endif -#ifdef NV_EXTENSIONS - } - else if (extInstSet == GLSLextNVInst) { - out << "(" << GLSLextNVGetDebugNames(name, entrypoint) << ")"; -#endif - } - } - break; - case OperandOptionalLiteralString: - case OperandLiteralString: - numOperands -= disassembleString(); - break; - default: - assert(operandClass >= OperandSource && operandClass < OperandOpcode); - - if (OperandClassParams[operandClass].bitmask) - outputMask(operandClass, stream[word++]); - else - out << OperandClassParams[operandClass].getName(stream[word++]); - --numOperands; - - break; - } - } - - return; -} - -static void GLSLstd450GetDebugNames(const char** names) -{ - for (int i = 0; i < GLSLstd450Count; ++i) - names[i] = "Unknown"; - - names[GLSLstd450Round] = "Round"; - names[GLSLstd450RoundEven] = "RoundEven"; - names[GLSLstd450Trunc] = "Trunc"; - names[GLSLstd450FAbs] = "FAbs"; - names[GLSLstd450SAbs] = "SAbs"; - names[GLSLstd450FSign] = "FSign"; - names[GLSLstd450SSign] = "SSign"; - names[GLSLstd450Floor] = "Floor"; - names[GLSLstd450Ceil] = "Ceil"; - names[GLSLstd450Fract] = "Fract"; - names[GLSLstd450Radians] = "Radians"; - names[GLSLstd450Degrees] = "Degrees"; - names[GLSLstd450Sin] = "Sin"; - names[GLSLstd450Cos] = "Cos"; - names[GLSLstd450Tan] = "Tan"; - names[GLSLstd450Asin] = "Asin"; - names[GLSLstd450Acos] = "Acos"; - names[GLSLstd450Atan] = "Atan"; - names[GLSLstd450Sinh] = "Sinh"; - names[GLSLstd450Cosh] = "Cosh"; - names[GLSLstd450Tanh] = "Tanh"; - names[GLSLstd450Asinh] = "Asinh"; - names[GLSLstd450Acosh] = "Acosh"; - names[GLSLstd450Atanh] = "Atanh"; - names[GLSLstd450Atan2] = "Atan2"; - names[GLSLstd450Pow] = "Pow"; - names[GLSLstd450Exp] = "Exp"; - names[GLSLstd450Log] = "Log"; - names[GLSLstd450Exp2] = "Exp2"; - names[GLSLstd450Log2] = "Log2"; - names[GLSLstd450Sqrt] = "Sqrt"; - names[GLSLstd450InverseSqrt] = "InverseSqrt"; - names[GLSLstd450Determinant] = "Determinant"; - names[GLSLstd450MatrixInverse] = "MatrixInverse"; - names[GLSLstd450Modf] = "Modf"; - names[GLSLstd450ModfStruct] = "ModfStruct"; - names[GLSLstd450FMin] = "FMin"; - names[GLSLstd450SMin] = "SMin"; - names[GLSLstd450UMin] = "UMin"; - names[GLSLstd450FMax] = "FMax"; - names[GLSLstd450SMax] = "SMax"; - names[GLSLstd450UMax] = "UMax"; - names[GLSLstd450FClamp] = "FClamp"; - names[GLSLstd450SClamp] = "SClamp"; - names[GLSLstd450UClamp] = "UClamp"; - names[GLSLstd450FMix] = "FMix"; - names[GLSLstd450Step] = "Step"; - names[GLSLstd450SmoothStep] = "SmoothStep"; - names[GLSLstd450Fma] = "Fma"; - names[GLSLstd450Frexp] = "Frexp"; - names[GLSLstd450FrexpStruct] = "FrexpStruct"; - names[GLSLstd450Ldexp] = "Ldexp"; - names[GLSLstd450PackSnorm4x8] = "PackSnorm4x8"; - names[GLSLstd450PackUnorm4x8] = "PackUnorm4x8"; - names[GLSLstd450PackSnorm2x16] = "PackSnorm2x16"; - names[GLSLstd450PackUnorm2x16] = "PackUnorm2x16"; - names[GLSLstd450PackHalf2x16] = "PackHalf2x16"; - names[GLSLstd450PackDouble2x32] = "PackDouble2x32"; - names[GLSLstd450UnpackSnorm2x16] = "UnpackSnorm2x16"; - names[GLSLstd450UnpackUnorm2x16] = "UnpackUnorm2x16"; - names[GLSLstd450UnpackHalf2x16] = "UnpackHalf2x16"; - names[GLSLstd450UnpackSnorm4x8] = "UnpackSnorm4x8"; - names[GLSLstd450UnpackUnorm4x8] = "UnpackUnorm4x8"; - names[GLSLstd450UnpackDouble2x32] = "UnpackDouble2x32"; - names[GLSLstd450Length] = "Length"; - names[GLSLstd450Distance] = "Distance"; - names[GLSLstd450Cross] = "Cross"; - names[GLSLstd450Normalize] = "Normalize"; - names[GLSLstd450FaceForward] = "FaceForward"; - names[GLSLstd450Reflect] = "Reflect"; - names[GLSLstd450Refract] = "Refract"; - names[GLSLstd450FindILsb] = "FindILsb"; - names[GLSLstd450FindSMsb] = "FindSMsb"; - names[GLSLstd450FindUMsb] = "FindUMsb"; - names[GLSLstd450InterpolateAtCentroid] = "InterpolateAtCentroid"; - names[GLSLstd450InterpolateAtSample] = "InterpolateAtSample"; - names[GLSLstd450InterpolateAtOffset] = "InterpolateAtOffset"; -} - -#ifdef AMD_EXTENSIONS -static const char* GLSLextAMDGetDebugNames(const char* name, unsigned entrypoint) -{ - if (strcmp(name, spv::E_SPV_AMD_shader_ballot) == 0) { - switch (entrypoint) { - case SwizzleInvocationsAMD: return "SwizzleInvocationsAMD"; - case SwizzleInvocationsMaskedAMD: return "SwizzleInvocationsMaskedAMD"; - case WriteInvocationAMD: return "WriteInvocationAMD"; - case MbcntAMD: return "MbcntAMD"; - default: return "Bad"; - } - } else if (strcmp(name, spv::E_SPV_AMD_shader_trinary_minmax) == 0) { - switch (entrypoint) { - case FMin3AMD: return "FMin3AMD"; - case UMin3AMD: return "UMin3AMD"; - case SMin3AMD: return "SMin3AMD"; - case FMax3AMD: return "FMax3AMD"; - case UMax3AMD: return "UMax3AMD"; - case SMax3AMD: return "SMax3AMD"; - case FMid3AMD: return "FMid3AMD"; - case UMid3AMD: return "UMid3AMD"; - case SMid3AMD: return "SMid3AMD"; - default: return "Bad"; - } - } else if (strcmp(name, spv::E_SPV_AMD_shader_explicit_vertex_parameter) == 0) { - switch (entrypoint) { - case InterpolateAtVertexAMD: return "InterpolateAtVertexAMD"; - default: return "Bad"; - } - } - else if (strcmp(name, spv::E_SPV_AMD_gcn_shader) == 0) { - switch (entrypoint) { - case CubeFaceIndexAMD: return "CubeFaceIndexAMD"; - case CubeFaceCoordAMD: return "CubeFaceCoordAMD"; - case TimeAMD: return "TimeAMD"; - default: - break; - } - } - - return "Bad"; -} -#endif - - -#ifdef NV_EXTENSIONS -static const char* GLSLextNVGetDebugNames(const char* name, unsigned entrypoint) -{ - if (strcmp(name, spv::E_SPV_NV_sample_mask_override_coverage) == 0 || - strcmp(name, spv::E_SPV_NV_geometry_shader_passthrough) == 0 || - strcmp(name, spv::E_ARB_shader_viewport_layer_array) == 0 || - strcmp(name, spv::E_SPV_NV_viewport_array2) == 0 || - strcmp(spv::E_SPV_NVX_multiview_per_view_attributes, name) == 0) { - switch (entrypoint) { - case DecorationOverrideCoverageNV: return "OverrideCoverageNV"; - case DecorationPassthroughNV: return "PassthroughNV"; - case CapabilityGeometryShaderPassthroughNV: return "GeometryShaderPassthroughNV"; - case DecorationViewportRelativeNV: return "ViewportRelativeNV"; - case BuiltInViewportMaskNV: return "ViewportMaskNV"; - case CapabilityShaderViewportMaskNV: return "ShaderViewportMaskNV"; - case DecorationSecondaryViewportRelativeNV: return "SecondaryViewportRelativeNV"; - case BuiltInSecondaryPositionNV: return "SecondaryPositionNV"; - case BuiltInSecondaryViewportMaskNV: return "SecondaryViewportMaskNV"; - case CapabilityShaderStereoViewNV: return "ShaderStereoViewNV"; - case BuiltInPositionPerViewNV: return "PositionPerViewNV"; - case BuiltInViewportMaskPerViewNV: return "ViewportMaskPerViewNV"; - case CapabilityPerViewAttributesNV: return "PerViewAttributesNV"; - default: return "Bad"; - } - } - return "Bad"; -} -#endif - -void Disassemble(std::ostream& out, const std::vector& stream) -{ - SpirvStream SpirvStream(out, stream); - spv::Parameterize(); - GLSLstd450GetDebugNames(GlslStd450DebugNames); - SpirvStream.validate(); - SpirvStream.processInstructions(); -} - -}; // end namespace spv diff --git a/third_party/glslang-spirv/disassemble.h b/third_party/glslang-spirv/disassemble.h deleted file mode 100644 index 47cef65a5..000000000 --- a/third_party/glslang-spirv/disassemble.h +++ /dev/null @@ -1,52 +0,0 @@ -// -// Copyright (C) 2014-2015 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -// -// Disassembler for SPIR-V. -// - -#pragma once -#ifndef disassembler_H -#define disassembler_H - -#include -#include - -namespace spv { - - void Disassemble(std::ostream& out, const std::vector&); - -}; // end namespace spv - -#endif // disassembler_H diff --git a/third_party/glslang-spirv/doc.cpp b/third_party/glslang-spirv/doc.cpp deleted file mode 100644 index 809af4c1c..000000000 --- a/third_party/glslang-spirv/doc.cpp +++ /dev/null @@ -1,2894 +0,0 @@ -// -// Copyright (C) 2014-2015 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -// -// 1) Programmatically fill in instruction/operand information. -// This can be used for disassembly, printing documentation, etc. -// -// 2) Print documentation from this parameterization. -// - -#include "doc.h" - -#include -#include -#include - -namespace spv { - extern "C" { - // Include C-based headers that don't have a namespace - #include "GLSL.ext.KHR.h" - #include "GLSL.ext.EXT.h" -#ifdef AMD_EXTENSIONS - #include "GLSL.ext.AMD.h" -#endif -#ifdef NV_EXTENSIONS - #include "GLSL.ext.NV.h" -#endif - } -} - -namespace spv { - -// -// Whole set of functions that translate enumerants to their text strings for -// the specification (or their sanitized versions for auto-generating the -// spirv headers. -// -// Also, the ceilings are declared next to these, to help keep them in sync. -// Ceilings should be -// - one more than the maximum value an enumerant takes on, for non-mask enumerants -// (for non-sparse enums, this is the number of enumerants) -// - the number of bits consumed by the set of masks -// (for non-sparse mask enums, this is the number of enumerants) -// - -const int SourceLanguageCeiling = 6; // HLSL todo: need official enumerant - -const char* SourceString(int source) -{ - switch (source) { - case 0: return "Unknown"; - case 1: return "ESSL"; - case 2: return "GLSL"; - case 3: return "OpenCL_C"; - case 4: return "OpenCL_CPP"; - case 5: return "HLSL"; - - case SourceLanguageCeiling: - default: return "Bad"; - } -} - -const int ExecutionModelCeiling = 7; - -const char* ExecutionModelString(int model) -{ - switch (model) { - case 0: return "Vertex"; - case 1: return "TessellationControl"; - case 2: return "TessellationEvaluation"; - case 3: return "Geometry"; - case 4: return "Fragment"; - case 5: return "GLCompute"; - case 6: return "Kernel"; - - case ExecutionModelCeiling: - default: return "Bad"; - } -} - -const int AddressingModelCeiling = 3; - -const char* AddressingString(int addr) -{ - switch (addr) { - case 0: return "Logical"; - case 1: return "Physical32"; - case 2: return "Physical64"; - - case AddressingModelCeiling: - default: return "Bad"; - } -} - -const int MemoryModelCeiling = 3; - -const char* MemoryString(int mem) -{ - switch (mem) { - case 0: return "Simple"; - case 1: return "GLSL450"; - case 2: return "OpenCL"; - - case MemoryModelCeiling: - default: return "Bad"; - } -} - -const int ExecutionModeCeiling = 33; - -const char* ExecutionModeString(int mode) -{ - switch (mode) { - case 0: return "Invocations"; - case 1: return "SpacingEqual"; - case 2: return "SpacingFractionalEven"; - case 3: return "SpacingFractionalOdd"; - case 4: return "VertexOrderCw"; - case 5: return "VertexOrderCcw"; - case 6: return "PixelCenterInteger"; - case 7: return "OriginUpperLeft"; - case 8: return "OriginLowerLeft"; - case 9: return "EarlyFragmentTests"; - case 10: return "PointMode"; - case 11: return "Xfb"; - case 12: return "DepthReplacing"; - case 13: return "Bad"; - case 14: return "DepthGreater"; - case 15: return "DepthLess"; - case 16: return "DepthUnchanged"; - case 17: return "LocalSize"; - case 18: return "LocalSizeHint"; - case 19: return "InputPoints"; - case 20: return "InputLines"; - case 21: return "InputLinesAdjacency"; - case 22: return "Triangles"; - case 23: return "InputTrianglesAdjacency"; - case 24: return "Quads"; - case 25: return "Isolines"; - case 26: return "OutputVertices"; - case 27: return "OutputPoints"; - case 28: return "OutputLineStrip"; - case 29: return "OutputTriangleStrip"; - case 30: return "VecTypeHint"; - case 31: return "ContractionOff"; - case 32: return "Bad"; - - case 4446: return "PostDepthCoverage"; - case ExecutionModeCeiling: - default: return "Bad"; - } -} - -const int StorageClassCeiling = 13; - -const char* StorageClassString(int StorageClass) -{ - switch (StorageClass) { - case 0: return "UniformConstant"; - case 1: return "Input"; - case 2: return "Uniform"; - case 3: return "Output"; - case 4: return "Workgroup"; - case 5: return "CrossWorkgroup"; - case 6: return "Private"; - case 7: return "Function"; - case 8: return "Generic"; - case 9: return "PushConstant"; - case 10: return "AtomicCounter"; - case 11: return "Image"; - case 12: return "StorageBuffer"; - - case StorageClassCeiling: - default: return "Bad"; - } -} - -const int DecorationCeiling = 45; - -const char* DecorationString(int decoration) -{ - switch (decoration) { - case 0: return "RelaxedPrecision"; - case 1: return "SpecId"; - case 2: return "Block"; - case 3: return "BufferBlock"; - case 4: return "RowMajor"; - case 5: return "ColMajor"; - case 6: return "ArrayStride"; - case 7: return "MatrixStride"; - case 8: return "GLSLShared"; - case 9: return "GLSLPacked"; - case 10: return "CPacked"; - case 11: return "BuiltIn"; - case 12: return "Bad"; - case 13: return "NoPerspective"; - case 14: return "Flat"; - case 15: return "Patch"; - case 16: return "Centroid"; - case 17: return "Sample"; - case 18: return "Invariant"; - case 19: return "Restrict"; - case 20: return "Aliased"; - case 21: return "Volatile"; - case 22: return "Constant"; - case 23: return "Coherent"; - case 24: return "NonWritable"; - case 25: return "NonReadable"; - case 26: return "Uniform"; - case 27: return "Bad"; - case 28: return "SaturatedConversion"; - case 29: return "Stream"; - case 30: return "Location"; - case 31: return "Component"; - case 32: return "Index"; - case 33: return "Binding"; - case 34: return "DescriptorSet"; - case 35: return "Offset"; - case 36: return "XfbBuffer"; - case 37: return "XfbStride"; - case 38: return "FuncParamAttr"; - case 39: return "FP Rounding Mode"; - case 40: return "FP Fast Math Mode"; - case 41: return "Linkage Attributes"; - case 42: return "NoContraction"; - case 43: return "InputAttachmentIndex"; - case 44: return "Alignment"; - - case DecorationCeiling: - default: return "Bad"; - -#ifdef AMD_EXTENSIONS - case 4999: return "ExplicitInterpAMD"; -#endif -#ifdef NV_EXTENSIONS - case 5248: return "OverrideCoverageNV"; - case 5250: return "PassthroughNV"; - case 5252: return "ViewportRelativeNV"; - case 5256: return "SecondaryViewportRelativeNV"; -#endif - } -} - -const int BuiltInCeiling = 44; - -const char* BuiltInString(int builtIn) -{ - switch (builtIn) { - case 0: return "Position"; - case 1: return "PointSize"; - case 2: return "Bad"; - case 3: return "ClipDistance"; - case 4: return "CullDistance"; - case 5: return "VertexId"; - case 6: return "InstanceId"; - case 7: return "PrimitiveId"; - case 8: return "InvocationId"; - case 9: return "Layer"; - case 10: return "ViewportIndex"; - case 11: return "TessLevelOuter"; - case 12: return "TessLevelInner"; - case 13: return "TessCoord"; - case 14: return "PatchVertices"; - case 15: return "FragCoord"; - case 16: return "PointCoord"; - case 17: return "FrontFacing"; - case 18: return "SampleId"; - case 19: return "SamplePosition"; - case 20: return "SampleMask"; - case 21: return "Bad"; - case 22: return "FragDepth"; - case 23: return "HelperInvocation"; - case 24: return "NumWorkgroups"; - case 25: return "WorkgroupSize"; - case 26: return "WorkgroupId"; - case 27: return "LocalInvocationId"; - case 28: return "GlobalInvocationId"; - case 29: return "LocalInvocationIndex"; - case 30: return "WorkDim"; - case 31: return "GlobalSize"; - case 32: return "EnqueuedWorkgroupSize"; - case 33: return "GlobalOffset"; - case 34: return "GlobalLinearId"; - case 35: return "Bad"; - case 36: return "SubgroupSize"; - case 37: return "SubgroupMaxSize"; - case 38: return "NumSubgroups"; - case 39: return "NumEnqueuedSubgroups"; - case 40: return "SubgroupId"; - case 41: return "SubgroupLocalInvocationId"; - case 42: return "VertexIndex"; // TBD: put next to VertexId? - case 43: return "InstanceIndex"; // TBD: put next to InstanceId? - - case 4416: return "SubgroupEqMaskKHR"; - case 4417: return "SubgroupGeMaskKHR"; - case 4418: return "SubgroupGtMaskKHR"; - case 4419: return "SubgroupLeMaskKHR"; - case 4420: return "SubgroupLtMaskKHR"; - case 4438: return "DeviceIndex"; - case 4440: return "ViewIndex"; - case 4424: return "BaseVertex"; - case 4425: return "BaseInstance"; - case 4426: return "DrawIndex"; - case 5014: return "FragStencilRefEXT"; - -#ifdef AMD_EXTENSIONS - case 4992: return "BaryCoordNoPerspAMD"; - case 4993: return "BaryCoordNoPerspCentroidAMD"; - case 4994: return "BaryCoordNoPerspSampleAMD"; - case 4995: return "BaryCoordSmoothAMD"; - case 4996: return "BaryCoordSmoothCentroidAMD"; - case 4997: return "BaryCoordSmoothSampleAMD"; - case 4998: return "BaryCoordPullModelAMD"; -#endif - -#ifdef NV_EXTENSIONS - case 5253: return "ViewportMaskNV"; - case 5257: return "SecondaryPositionNV"; - case 5258: return "SecondaryViewportMaskNV"; - case 5261: return "PositionPerViewNV"; - case 5262: return "ViewportMaskPerViewNV"; -#endif - - case 5264: return "FullyCoveredEXT"; - - case BuiltInCeiling: - default: return "Bad"; - } -} - -const int DimensionCeiling = 7; - -const char* DimensionString(int dim) -{ - switch (dim) { - case 0: return "1D"; - case 1: return "2D"; - case 2: return "3D"; - case 3: return "Cube"; - case 4: return "Rect"; - case 5: return "Buffer"; - case 6: return "SubpassData"; - - case DimensionCeiling: - default: return "Bad"; - } -} - -const int SamplerAddressingModeCeiling = 5; - -const char* SamplerAddressingModeString(int mode) -{ - switch (mode) { - case 0: return "None"; - case 1: return "ClampToEdge"; - case 2: return "Clamp"; - case 3: return "Repeat"; - case 4: return "RepeatMirrored"; - - case SamplerAddressingModeCeiling: - default: return "Bad"; - } -} - -const int SamplerFilterModeCeiling = 2; - -const char* SamplerFilterModeString(int mode) -{ - switch (mode) { - case 0: return "Nearest"; - case 1: return "Linear"; - - case SamplerFilterModeCeiling: - default: return "Bad"; - } -} - -const int ImageFormatCeiling = 40; - -const char* ImageFormatString(int format) -{ - switch (format) { - case 0: return "Unknown"; - - // ES/Desktop float - case 1: return "Rgba32f"; - case 2: return "Rgba16f"; - case 3: return "R32f"; - case 4: return "Rgba8"; - case 5: return "Rgba8Snorm"; - - // Desktop float - case 6: return "Rg32f"; - case 7: return "Rg16f"; - case 8: return "R11fG11fB10f"; - case 9: return "R16f"; - case 10: return "Rgba16"; - case 11: return "Rgb10A2"; - case 12: return "Rg16"; - case 13: return "Rg8"; - case 14: return "R16"; - case 15: return "R8"; - case 16: return "Rgba16Snorm"; - case 17: return "Rg16Snorm"; - case 18: return "Rg8Snorm"; - case 19: return "R16Snorm"; - case 20: return "R8Snorm"; - - // ES/Desktop int - case 21: return "Rgba32i"; - case 22: return "Rgba16i"; - case 23: return "Rgba8i"; - case 24: return "R32i"; - - // Desktop int - case 25: return "Rg32i"; - case 26: return "Rg16i"; - case 27: return "Rg8i"; - case 28: return "R16i"; - case 29: return "R8i"; - - // ES/Desktop uint - case 30: return "Rgba32ui"; - case 31: return "Rgba16ui"; - case 32: return "Rgba8ui"; - case 33: return "R32ui"; - - // Desktop uint - case 34: return "Rgb10a2ui"; - case 35: return "Rg32ui"; - case 36: return "Rg16ui"; - case 37: return "Rg8ui"; - case 38: return "R16ui"; - case 39: return "R8ui"; - - case ImageFormatCeiling: - default: - return "Bad"; - } -} - -const int ImageChannelOrderCeiling = 19; - -const char* ImageChannelOrderString(int format) -{ - switch (format) { - case 0: return "R"; - case 1: return "A"; - case 2: return "RG"; - case 3: return "RA"; - case 4: return "RGB"; - case 5: return "RGBA"; - case 6: return "BGRA"; - case 7: return "ARGB"; - case 8: return "Intensity"; - case 9: return "Luminance"; - case 10: return "Rx"; - case 11: return "RGx"; - case 12: return "RGBx"; - case 13: return "Depth"; - case 14: return "DepthStencil"; - case 15: return "sRGB"; - case 16: return "sRGBx"; - case 17: return "sRGBA"; - case 18: return "sBGRA"; - - case ImageChannelOrderCeiling: - default: - return "Bad"; - } -} - -const int ImageChannelDataTypeCeiling = 17; - -const char* ImageChannelDataTypeString(int type) -{ - switch (type) - { - case 0: return "SnormInt8"; - case 1: return "SnormInt16"; - case 2: return "UnormInt8"; - case 3: return "UnormInt16"; - case 4: return "UnormShort565"; - case 5: return "UnormShort555"; - case 6: return "UnormInt101010"; - case 7: return "SignedInt8"; - case 8: return "SignedInt16"; - case 9: return "SignedInt32"; - case 10: return "UnsignedInt8"; - case 11: return "UnsignedInt16"; - case 12: return "UnsignedInt32"; - case 13: return "HalfFloat"; - case 14: return "Float"; - case 15: return "UnormInt24"; - case 16: return "UnormInt101010_2"; - - case ImageChannelDataTypeCeiling: - default: - return "Bad"; - } -} - -const int ImageOperandsCeiling = 8; - -const char* ImageOperandsString(int format) -{ - switch (format) { - case 0: return "Bias"; - case 1: return "Lod"; - case 2: return "Grad"; - case 3: return "ConstOffset"; - case 4: return "Offset"; - case 5: return "ConstOffsets"; - case 6: return "Sample"; - case 7: return "MinLod"; - - case ImageOperandsCeiling: - default: - return "Bad"; - } -} - -const int FPFastMathCeiling = 5; - -const char* FPFastMathString(int mode) -{ - switch (mode) { - case 0: return "NotNaN"; - case 1: return "NotInf"; - case 2: return "NSZ"; - case 3: return "AllowRecip"; - case 4: return "Fast"; - - case FPFastMathCeiling: - default: return "Bad"; - } -} - -const int FPRoundingModeCeiling = 4; - -const char* FPRoundingModeString(int mode) -{ - switch (mode) { - case 0: return "RTE"; - case 1: return "RTZ"; - case 2: return "RTP"; - case 3: return "RTN"; - - case FPRoundingModeCeiling: - default: return "Bad"; - } -} - -const int LinkageTypeCeiling = 2; - -const char* LinkageTypeString(int type) -{ - switch (type) { - case 0: return "Export"; - case 1: return "Import"; - - case LinkageTypeCeiling: - default: return "Bad"; - } -} - -const int FuncParamAttrCeiling = 8; - -const char* FuncParamAttrString(int attr) -{ - switch (attr) { - case 0: return "Zext"; - case 1: return "Sext"; - case 2: return "ByVal"; - case 3: return "Sret"; - case 4: return "NoAlias"; - case 5: return "NoCapture"; - case 6: return "NoWrite"; - case 7: return "NoReadWrite"; - - case FuncParamAttrCeiling: - default: return "Bad"; - } -} - -const int AccessQualifierCeiling = 3; - -const char* AccessQualifierString(int attr) -{ - switch (attr) { - case 0: return "ReadOnly"; - case 1: return "WriteOnly"; - case 2: return "ReadWrite"; - - case AccessQualifierCeiling: - default: return "Bad"; - } -} - -const int SelectControlCeiling = 2; - -const char* SelectControlString(int cont) -{ - switch (cont) { - case 0: return "Flatten"; - case 1: return "DontFlatten"; - - case SelectControlCeiling: - default: return "Bad"; - } -} - -const int LoopControlCeiling = 4; - -const char* LoopControlString(int cont) -{ - switch (cont) { - case 0: return "Unroll"; - case 1: return "DontUnroll"; - case 2: return "DependencyInfinite"; - case 3: return "DependencyLength"; - - case LoopControlCeiling: - default: return "Bad"; - } -} - -const int FunctionControlCeiling = 4; - -const char* FunctionControlString(int cont) -{ - switch (cont) { - case 0: return "Inline"; - case 1: return "DontInline"; - case 2: return "Pure"; - case 3: return "Const"; - - case FunctionControlCeiling: - default: return "Bad"; - } -} - -const int MemorySemanticsCeiling = 12; - -const char* MemorySemanticsString(int mem) -{ - // Note: No bits set (None) means "Relaxed" - switch (mem) { - case 0: return "Bad"; // Note: this is a placeholder for 'Consume' - case 1: return "Acquire"; - case 2: return "Release"; - case 3: return "AcquireRelease"; - case 4: return "SequentiallyConsistent"; - case 5: return "Bad"; // Note: reserved for future expansion - case 6: return "UniformMemory"; - case 7: return "SubgroupMemory"; - case 8: return "WorkgroupMemory"; - case 9: return "CrossWorkgroupMemory"; - case 10: return "AtomicCounterMemory"; - case 11: return "ImageMemory"; - - case MemorySemanticsCeiling: - default: return "Bad"; - } -} - -const int MemoryAccessCeiling = 3; - -const char* MemoryAccessString(int mem) -{ - switch (mem) { - case 0: return "Volatile"; - case 1: return "Aligned"; - case 2: return "Nontemporal"; - - case MemoryAccessCeiling: - default: return "Bad"; - } -} - -const int ScopeCeiling = 5; - -const char* ScopeString(int mem) -{ - switch (mem) { - case 0: return "CrossDevice"; - case 1: return "Device"; - case 2: return "Workgroup"; - case 3: return "Subgroup"; - case 4: return "Invocation"; - - case ScopeCeiling: - default: return "Bad"; - } -} - -const int GroupOperationCeiling = 3; - -const char* GroupOperationString(int gop) -{ - - switch (gop) - { - case 0: return "Reduce"; - case 1: return "InclusiveScan"; - case 2: return "ExclusiveScan"; - - case GroupOperationCeiling: - default: return "Bad"; - } -} - -const int KernelEnqueueFlagsCeiling = 3; - -const char* KernelEnqueueFlagsString(int flag) -{ - switch (flag) - { - case 0: return "NoWait"; - case 1: return "WaitKernel"; - case 2: return "WaitWorkGroup"; - - case KernelEnqueueFlagsCeiling: - default: return "Bad"; - } -} - -const int KernelProfilingInfoCeiling = 1; - -const char* KernelProfilingInfoString(int info) -{ - switch (info) - { - case 0: return "CmdExecTime"; - - case KernelProfilingInfoCeiling: - default: return "Bad"; - } -} - -const int CapabilityCeiling = 58; - -const char* CapabilityString(int info) -{ - switch (info) - { - case 0: return "Matrix"; - case 1: return "Shader"; - case 2: return "Geometry"; - case 3: return "Tessellation"; - case 4: return "Addresses"; - case 5: return "Linkage"; - case 6: return "Kernel"; - case 7: return "Vector16"; - case 8: return "Float16Buffer"; - case 9: return "Float16"; - case 10: return "Float64"; - case 11: return "Int64"; - case 12: return "Int64Atomics"; - case 13: return "ImageBasic"; - case 14: return "ImageReadWrite"; - case 15: return "ImageMipmap"; - case 16: return "Bad"; - case 17: return "Pipes"; - case 18: return "Groups"; - case 19: return "DeviceEnqueue"; - case 20: return "LiteralSampler"; - case 21: return "AtomicStorage"; - case 22: return "Int16"; - case 23: return "TessellationPointSize"; - case 24: return "GeometryPointSize"; - case 25: return "ImageGatherExtended"; - case 26: return "Bad"; - case 27: return "StorageImageMultisample"; - case 28: return "UniformBufferArrayDynamicIndexing"; - case 29: return "SampledImageArrayDynamicIndexing"; - case 30: return "StorageBufferArrayDynamicIndexing"; - case 31: return "StorageImageArrayDynamicIndexing"; - case 32: return "ClipDistance"; - case 33: return "CullDistance"; - case 34: return "ImageCubeArray"; - case 35: return "SampleRateShading"; - case 36: return "ImageRect"; - case 37: return "SampledRect"; - case 38: return "GenericPointer"; - case 39: return "Int8"; - case 40: return "InputAttachment"; - case 41: return "SparseResidency"; - case 42: return "MinLod"; - case 43: return "Sampled1D"; - case 44: return "Image1D"; - case 45: return "SampledCubeArray"; - case 46: return "SampledBuffer"; - case 47: return "ImageBuffer"; - case 48: return "ImageMSArray"; - case 49: return "StorageImageExtendedFormats"; - case 50: return "ImageQuery"; - case 51: return "DerivativeControl"; - case 52: return "InterpolationFunction"; - case 53: return "TransformFeedback"; - case 54: return "GeometryStreams"; - case 55: return "StorageImageReadWithoutFormat"; - case 56: return "StorageImageWriteWithoutFormat"; - case 57: return "MultiViewport"; - - case 4423: return "SubgroupBallotKHR"; - case 4427: return "DrawParameters"; - case 4431: return "SubgroupVoteKHR"; - - case 4433: return "StorageUniformBufferBlock16"; - case 4434: return "StorageUniform16"; - case 4435: return "StoragePushConstant16"; - case 4436: return "StorageInputOutput16"; - - case 4437: return "DeviceGroup"; - case 4439: return "MultiView"; - - case 5013: return "StencilExportEXT"; - -#ifdef AMD_EXTENSIONS - case 5009: return "ImageGatherBiasLodAMD"; - case 5010: return "FragmentMaskAMD"; - case 5015: return "ImageReadWriteLodAMD"; -#endif - - case 4445: return "AtomicStorageOps"; - - case 4447: return "SampleMaskPostDepthCoverage"; -#ifdef NV_EXTENSIONS - case 5251: return "GeometryShaderPassthroughNV"; - case 5254: return "ShaderViewportIndexLayerNV"; - case 5255: return "ShaderViewportMaskNV"; - case 5259: return "ShaderStereoViewNV"; - case 5260: return "PerViewAttributesNV"; -#endif - - case 5265: return "FragmentFullyCoveredEXT"; - - case CapabilityCeiling: - default: return "Bad"; - } -} - -const char* OpcodeString(int op) -{ - switch (op) { - case 0: return "OpNop"; - case 1: return "OpUndef"; - case 2: return "OpSourceContinued"; - case 3: return "OpSource"; - case 4: return "OpSourceExtension"; - case 5: return "OpName"; - case 6: return "OpMemberName"; - case 7: return "OpString"; - case 8: return "OpLine"; - case 9: return "Bad"; - case 10: return "OpExtension"; - case 11: return "OpExtInstImport"; - case 12: return "OpExtInst"; - case 13: return "Bad"; - case 14: return "OpMemoryModel"; - case 15: return "OpEntryPoint"; - case 16: return "OpExecutionMode"; - case 17: return "OpCapability"; - case 18: return "Bad"; - case 19: return "OpTypeVoid"; - case 20: return "OpTypeBool"; - case 21: return "OpTypeInt"; - case 22: return "OpTypeFloat"; - case 23: return "OpTypeVector"; - case 24: return "OpTypeMatrix"; - case 25: return "OpTypeImage"; - case 26: return "OpTypeSampler"; - case 27: return "OpTypeSampledImage"; - case 28: return "OpTypeArray"; - case 29: return "OpTypeRuntimeArray"; - case 30: return "OpTypeStruct"; - case 31: return "OpTypeOpaque"; - case 32: return "OpTypePointer"; - case 33: return "OpTypeFunction"; - case 34: return "OpTypeEvent"; - case 35: return "OpTypeDeviceEvent"; - case 36: return "OpTypeReserveId"; - case 37: return "OpTypeQueue"; - case 38: return "OpTypePipe"; - case 39: return "OpTypeForwardPointer"; - case 40: return "Bad"; - case 41: return "OpConstantTrue"; - case 42: return "OpConstantFalse"; - case 43: return "OpConstant"; - case 44: return "OpConstantComposite"; - case 45: return "OpConstantSampler"; - case 46: return "OpConstantNull"; - case 47: return "Bad"; - case 48: return "OpSpecConstantTrue"; - case 49: return "OpSpecConstantFalse"; - case 50: return "OpSpecConstant"; - case 51: return "OpSpecConstantComposite"; - case 52: return "OpSpecConstantOp"; - case 53: return "Bad"; - case 54: return "OpFunction"; - case 55: return "OpFunctionParameter"; - case 56: return "OpFunctionEnd"; - case 57: return "OpFunctionCall"; - case 58: return "Bad"; - case 59: return "OpVariable"; - case 60: return "OpImageTexelPointer"; - case 61: return "OpLoad"; - case 62: return "OpStore"; - case 63: return "OpCopyMemory"; - case 64: return "OpCopyMemorySized"; - case 65: return "OpAccessChain"; - case 66: return "OpInBoundsAccessChain"; - case 67: return "OpPtrAccessChain"; - case 68: return "OpArrayLength"; - case 69: return "OpGenericPtrMemSemantics"; - case 70: return "OpInBoundsPtrAccessChain"; - case 71: return "OpDecorate"; - case 72: return "OpMemberDecorate"; - case 73: return "OpDecorationGroup"; - case 74: return "OpGroupDecorate"; - case 75: return "OpGroupMemberDecorate"; - case 76: return "Bad"; - case 77: return "OpVectorExtractDynamic"; - case 78: return "OpVectorInsertDynamic"; - case 79: return "OpVectorShuffle"; - case 80: return "OpCompositeConstruct"; - case 81: return "OpCompositeExtract"; - case 82: return "OpCompositeInsert"; - case 83: return "OpCopyObject"; - case 84: return "OpTranspose"; - case 85: return "Bad"; - case 86: return "OpSampledImage"; - case 87: return "OpImageSampleImplicitLod"; - case 88: return "OpImageSampleExplicitLod"; - case 89: return "OpImageSampleDrefImplicitLod"; - case 90: return "OpImageSampleDrefExplicitLod"; - case 91: return "OpImageSampleProjImplicitLod"; - case 92: return "OpImageSampleProjExplicitLod"; - case 93: return "OpImageSampleProjDrefImplicitLod"; - case 94: return "OpImageSampleProjDrefExplicitLod"; - case 95: return "OpImageFetch"; - case 96: return "OpImageGather"; - case 97: return "OpImageDrefGather"; - case 98: return "OpImageRead"; - case 99: return "OpImageWrite"; - case 100: return "OpImage"; - case 101: return "OpImageQueryFormat"; - case 102: return "OpImageQueryOrder"; - case 103: return "OpImageQuerySizeLod"; - case 104: return "OpImageQuerySize"; - case 105: return "OpImageQueryLod"; - case 106: return "OpImageQueryLevels"; - case 107: return "OpImageQuerySamples"; - case 108: return "Bad"; - case 109: return "OpConvertFToU"; - case 110: return "OpConvertFToS"; - case 111: return "OpConvertSToF"; - case 112: return "OpConvertUToF"; - case 113: return "OpUConvert"; - case 114: return "OpSConvert"; - case 115: return "OpFConvert"; - case 116: return "OpQuantizeToF16"; - case 117: return "OpConvertPtrToU"; - case 118: return "OpSatConvertSToU"; - case 119: return "OpSatConvertUToS"; - case 120: return "OpConvertUToPtr"; - case 121: return "OpPtrCastToGeneric"; - case 122: return "OpGenericCastToPtr"; - case 123: return "OpGenericCastToPtrExplicit"; - case 124: return "OpBitcast"; - case 125: return "Bad"; - case 126: return "OpSNegate"; - case 127: return "OpFNegate"; - case 128: return "OpIAdd"; - case 129: return "OpFAdd"; - case 130: return "OpISub"; - case 131: return "OpFSub"; - case 132: return "OpIMul"; - case 133: return "OpFMul"; - case 134: return "OpUDiv"; - case 135: return "OpSDiv"; - case 136: return "OpFDiv"; - case 137: return "OpUMod"; - case 138: return "OpSRem"; - case 139: return "OpSMod"; - case 140: return "OpFRem"; - case 141: return "OpFMod"; - case 142: return "OpVectorTimesScalar"; - case 143: return "OpMatrixTimesScalar"; - case 144: return "OpVectorTimesMatrix"; - case 145: return "OpMatrixTimesVector"; - case 146: return "OpMatrixTimesMatrix"; - case 147: return "OpOuterProduct"; - case 148: return "OpDot"; - case 149: return "OpIAddCarry"; - case 150: return "OpISubBorrow"; - case 151: return "OpUMulExtended"; - case 152: return "OpSMulExtended"; - case 153: return "Bad"; - case 154: return "OpAny"; - case 155: return "OpAll"; - case 156: return "OpIsNan"; - case 157: return "OpIsInf"; - case 158: return "OpIsFinite"; - case 159: return "OpIsNormal"; - case 160: return "OpSignBitSet"; - case 161: return "OpLessOrGreater"; - case 162: return "OpOrdered"; - case 163: return "OpUnordered"; - case 164: return "OpLogicalEqual"; - case 165: return "OpLogicalNotEqual"; - case 166: return "OpLogicalOr"; - case 167: return "OpLogicalAnd"; - case 168: return "OpLogicalNot"; - case 169: return "OpSelect"; - case 170: return "OpIEqual"; - case 171: return "OpINotEqual"; - case 172: return "OpUGreaterThan"; - case 173: return "OpSGreaterThan"; - case 174: return "OpUGreaterThanEqual"; - case 175: return "OpSGreaterThanEqual"; - case 176: return "OpULessThan"; - case 177: return "OpSLessThan"; - case 178: return "OpULessThanEqual"; - case 179: return "OpSLessThanEqual"; - case 180: return "OpFOrdEqual"; - case 181: return "OpFUnordEqual"; - case 182: return "OpFOrdNotEqual"; - case 183: return "OpFUnordNotEqual"; - case 184: return "OpFOrdLessThan"; - case 185: return "OpFUnordLessThan"; - case 186: return "OpFOrdGreaterThan"; - case 187: return "OpFUnordGreaterThan"; - case 188: return "OpFOrdLessThanEqual"; - case 189: return "OpFUnordLessThanEqual"; - case 190: return "OpFOrdGreaterThanEqual"; - case 191: return "OpFUnordGreaterThanEqual"; - case 192: return "Bad"; - case 193: return "Bad"; - case 194: return "OpShiftRightLogical"; - case 195: return "OpShiftRightArithmetic"; - case 196: return "OpShiftLeftLogical"; - case 197: return "OpBitwiseOr"; - case 198: return "OpBitwiseXor"; - case 199: return "OpBitwiseAnd"; - case 200: return "OpNot"; - case 201: return "OpBitFieldInsert"; - case 202: return "OpBitFieldSExtract"; - case 203: return "OpBitFieldUExtract"; - case 204: return "OpBitReverse"; - case 205: return "OpBitCount"; - case 206: return "Bad"; - case 207: return "OpDPdx"; - case 208: return "OpDPdy"; - case 209: return "OpFwidth"; - case 210: return "OpDPdxFine"; - case 211: return "OpDPdyFine"; - case 212: return "OpFwidthFine"; - case 213: return "OpDPdxCoarse"; - case 214: return "OpDPdyCoarse"; - case 215: return "OpFwidthCoarse"; - case 216: return "Bad"; - case 217: return "Bad"; - case 218: return "OpEmitVertex"; - case 219: return "OpEndPrimitive"; - case 220: return "OpEmitStreamVertex"; - case 221: return "OpEndStreamPrimitive"; - case 222: return "Bad"; - case 223: return "Bad"; - case 224: return "OpControlBarrier"; - case 225: return "OpMemoryBarrier"; - case 226: return "Bad"; - case 227: return "OpAtomicLoad"; - case 228: return "OpAtomicStore"; - case 229: return "OpAtomicExchange"; - case 230: return "OpAtomicCompareExchange"; - case 231: return "OpAtomicCompareExchangeWeak"; - case 232: return "OpAtomicIIncrement"; - case 233: return "OpAtomicIDecrement"; - case 234: return "OpAtomicIAdd"; - case 235: return "OpAtomicISub"; - case 236: return "OpAtomicSMin"; - case 237: return "OpAtomicUMin"; - case 238: return "OpAtomicSMax"; - case 239: return "OpAtomicUMax"; - case 240: return "OpAtomicAnd"; - case 241: return "OpAtomicOr"; - case 242: return "OpAtomicXor"; - case 243: return "Bad"; - case 244: return "Bad"; - case 245: return "OpPhi"; - case 246: return "OpLoopMerge"; - case 247: return "OpSelectionMerge"; - case 248: return "OpLabel"; - case 249: return "OpBranch"; - case 250: return "OpBranchConditional"; - case 251: return "OpSwitch"; - case 252: return "OpKill"; - case 253: return "OpReturn"; - case 254: return "OpReturnValue"; - case 255: return "OpUnreachable"; - case 256: return "OpLifetimeStart"; - case 257: return "OpLifetimeStop"; - case 258: return "Bad"; - case 259: return "OpGroupAsyncCopy"; - case 260: return "OpGroupWaitEvents"; - case 261: return "OpGroupAll"; - case 262: return "OpGroupAny"; - case 263: return "OpGroupBroadcast"; - case 264: return "OpGroupIAdd"; - case 265: return "OpGroupFAdd"; - case 266: return "OpGroupFMin"; - case 267: return "OpGroupUMin"; - case 268: return "OpGroupSMin"; - case 269: return "OpGroupFMax"; - case 270: return "OpGroupUMax"; - case 271: return "OpGroupSMax"; - case 272: return "Bad"; - case 273: return "Bad"; - case 274: return "OpReadPipe"; - case 275: return "OpWritePipe"; - case 276: return "OpReservedReadPipe"; - case 277: return "OpReservedWritePipe"; - case 278: return "OpReserveReadPipePackets"; - case 279: return "OpReserveWritePipePackets"; - case 280: return "OpCommitReadPipe"; - case 281: return "OpCommitWritePipe"; - case 282: return "OpIsValidReserveId"; - case 283: return "OpGetNumPipePackets"; - case 284: return "OpGetMaxPipePackets"; - case 285: return "OpGroupReserveReadPipePackets"; - case 286: return "OpGroupReserveWritePipePackets"; - case 287: return "OpGroupCommitReadPipe"; - case 288: return "OpGroupCommitWritePipe"; - case 289: return "Bad"; - case 290: return "Bad"; - case 291: return "OpEnqueueMarker"; - case 292: return "OpEnqueueKernel"; - case 293: return "OpGetKernelNDrangeSubGroupCount"; - case 294: return "OpGetKernelNDrangeMaxSubGroupSize"; - case 295: return "OpGetKernelWorkGroupSize"; - case 296: return "OpGetKernelPreferredWorkGroupSizeMultiple"; - case 297: return "OpRetainEvent"; - case 298: return "OpReleaseEvent"; - case 299: return "OpCreateUserEvent"; - case 300: return "OpIsValidEvent"; - case 301: return "OpSetUserEventStatus"; - case 302: return "OpCaptureEventProfilingInfo"; - case 303: return "OpGetDefaultQueue"; - case 304: return "OpBuildNDRange"; - case 305: return "OpImageSparseSampleImplicitLod"; - case 306: return "OpImageSparseSampleExplicitLod"; - case 307: return "OpImageSparseSampleDrefImplicitLod"; - case 308: return "OpImageSparseSampleDrefExplicitLod"; - case 309: return "OpImageSparseSampleProjImplicitLod"; - case 310: return "OpImageSparseSampleProjExplicitLod"; - case 311: return "OpImageSparseSampleProjDrefImplicitLod"; - case 312: return "OpImageSparseSampleProjDrefExplicitLod"; - case 313: return "OpImageSparseFetch"; - case 314: return "OpImageSparseGather"; - case 315: return "OpImageSparseDrefGather"; - case 316: return "OpImageSparseTexelsResident"; - case 317: return "OpNoLine"; - case 318: return "OpAtomicFlagTestAndSet"; - case 319: return "OpAtomicFlagClear"; - case 320: return "OpImageSparseRead"; - - case 4421: return "OpSubgroupBallotKHR"; - case 4422: return "OpSubgroupFirstInvocationKHR"; - case 4428: return "OpSubgroupAllKHR"; - case 4429: return "OpSubgroupAnyKHR"; - case 4430: return "OpSubgroupAllEqualKHR"; - case 4432: return "OpSubgroupReadInvocationKHR"; - -#ifdef AMD_EXTENSIONS - case 5000: return "OpGroupIAddNonUniformAMD"; - case 5001: return "OpGroupFAddNonUniformAMD"; - case 5002: return "OpGroupFMinNonUniformAMD"; - case 5003: return "OpGroupUMinNonUniformAMD"; - case 5004: return "OpGroupSMinNonUniformAMD"; - case 5005: return "OpGroupFMaxNonUniformAMD"; - case 5006: return "OpGroupUMaxNonUniformAMD"; - case 5007: return "OpGroupSMaxNonUniformAMD"; - - case 5011: return "OpFragmentMaskFetchAMD"; - case 5012: return "OpFragmentFetchAMD"; -#endif - - case OpcodeCeiling: - default: - return "Bad"; - } -} - -// The set of objects that hold all the instruction/operand -// parameterization information. -InstructionParameters InstructionDesc[OpCodeMask + 1]; -OperandParameters ExecutionModeOperands[ExecutionModeCeiling]; -OperandParameters DecorationOperands[DecorationCeiling]; - -EnumDefinition OperandClassParams[OperandCount]; -EnumParameters ExecutionModelParams[ExecutionModelCeiling]; -EnumParameters AddressingParams[AddressingModelCeiling]; -EnumParameters MemoryParams[MemoryModelCeiling]; -EnumParameters ExecutionModeParams[ExecutionModeCeiling]; -EnumParameters StorageParams[StorageClassCeiling]; -EnumParameters SamplerAddressingModeParams[SamplerAddressingModeCeiling]; -EnumParameters SamplerFilterModeParams[SamplerFilterModeCeiling]; -EnumParameters ImageFormatParams[ImageFormatCeiling]; -EnumParameters ImageChannelOrderParams[ImageChannelOrderCeiling]; -EnumParameters ImageChannelDataTypeParams[ImageChannelDataTypeCeiling]; -EnumParameters ImageOperandsParams[ImageOperandsCeiling]; -EnumParameters FPFastMathParams[FPFastMathCeiling]; -EnumParameters FPRoundingModeParams[FPRoundingModeCeiling]; -EnumParameters LinkageTypeParams[LinkageTypeCeiling]; -EnumParameters DecorationParams[DecorationCeiling]; -EnumParameters BuiltInParams[BuiltInCeiling]; -EnumParameters DimensionalityParams[DimensionCeiling]; -EnumParameters FuncParamAttrParams[FuncParamAttrCeiling]; -EnumParameters AccessQualifierParams[AccessQualifierCeiling]; -EnumParameters GroupOperationParams[GroupOperationCeiling]; -EnumParameters LoopControlParams[FunctionControlCeiling]; -EnumParameters SelectionControlParams[SelectControlCeiling]; -EnumParameters FunctionControlParams[FunctionControlCeiling]; -EnumParameters MemorySemanticsParams[MemorySemanticsCeiling]; -EnumParameters MemoryAccessParams[MemoryAccessCeiling]; -EnumParameters ScopeParams[ScopeCeiling]; -EnumParameters KernelEnqueueFlagsParams[KernelEnqueueFlagsCeiling]; -EnumParameters KernelProfilingInfoParams[KernelProfilingInfoCeiling]; -EnumParameters CapabilityParams[CapabilityCeiling]; - -// Set up all the parameterizing descriptions of the opcodes, operands, etc. -void Parameterize() -{ - // only do this once. - static bool initialized = false; - if (initialized) - return; - initialized = true; - - // Exceptions to having a result and a resulting type . - // (Everything is initialized to have both). - - InstructionDesc[OpNop].setResultAndType(false, false); - InstructionDesc[OpSource].setResultAndType(false, false); - InstructionDesc[OpSourceContinued].setResultAndType(false, false); - InstructionDesc[OpSourceExtension].setResultAndType(false, false); - InstructionDesc[OpExtension].setResultAndType(false, false); - InstructionDesc[OpExtInstImport].setResultAndType(true, false); - InstructionDesc[OpCapability].setResultAndType(false, false); - InstructionDesc[OpMemoryModel].setResultAndType(false, false); - InstructionDesc[OpEntryPoint].setResultAndType(false, false); - InstructionDesc[OpExecutionMode].setResultAndType(false, false); - InstructionDesc[OpTypeVoid].setResultAndType(true, false); - InstructionDesc[OpTypeBool].setResultAndType(true, false); - InstructionDesc[OpTypeInt].setResultAndType(true, false); - InstructionDesc[OpTypeFloat].setResultAndType(true, false); - InstructionDesc[OpTypeVector].setResultAndType(true, false); - InstructionDesc[OpTypeMatrix].setResultAndType(true, false); - InstructionDesc[OpTypeImage].setResultAndType(true, false); - InstructionDesc[OpTypeSampler].setResultAndType(true, false); - InstructionDesc[OpTypeSampledImage].setResultAndType(true, false); - InstructionDesc[OpTypeArray].setResultAndType(true, false); - InstructionDesc[OpTypeRuntimeArray].setResultAndType(true, false); - InstructionDesc[OpTypeStruct].setResultAndType(true, false); - InstructionDesc[OpTypeOpaque].setResultAndType(true, false); - InstructionDesc[OpTypePointer].setResultAndType(true, false); - InstructionDesc[OpTypeForwardPointer].setResultAndType(false, false); - InstructionDesc[OpTypeFunction].setResultAndType(true, false); - InstructionDesc[OpTypeEvent].setResultAndType(true, false); - InstructionDesc[OpTypeDeviceEvent].setResultAndType(true, false); - InstructionDesc[OpTypeReserveId].setResultAndType(true, false); - InstructionDesc[OpTypeQueue].setResultAndType(true, false); - InstructionDesc[OpTypePipe].setResultAndType(true, false); - InstructionDesc[OpFunctionEnd].setResultAndType(false, false); - InstructionDesc[OpStore].setResultAndType(false, false); - InstructionDesc[OpImageWrite].setResultAndType(false, false); - InstructionDesc[OpDecorationGroup].setResultAndType(true, false); - InstructionDesc[OpDecorate].setResultAndType(false, false); - InstructionDesc[OpMemberDecorate].setResultAndType(false, false); - InstructionDesc[OpGroupDecorate].setResultAndType(false, false); - InstructionDesc[OpGroupMemberDecorate].setResultAndType(false, false); - InstructionDesc[OpName].setResultAndType(false, false); - InstructionDesc[OpMemberName].setResultAndType(false, false); - InstructionDesc[OpString].setResultAndType(true, false); - InstructionDesc[OpLine].setResultAndType(false, false); - InstructionDesc[OpNoLine].setResultAndType(false, false); - InstructionDesc[OpCopyMemory].setResultAndType(false, false); - InstructionDesc[OpCopyMemorySized].setResultAndType(false, false); - InstructionDesc[OpEmitVertex].setResultAndType(false, false); - InstructionDesc[OpEndPrimitive].setResultAndType(false, false); - InstructionDesc[OpEmitStreamVertex].setResultAndType(false, false); - InstructionDesc[OpEndStreamPrimitive].setResultAndType(false, false); - InstructionDesc[OpControlBarrier].setResultAndType(false, false); - InstructionDesc[OpMemoryBarrier].setResultAndType(false, false); - InstructionDesc[OpAtomicStore].setResultAndType(false, false); - InstructionDesc[OpLoopMerge].setResultAndType(false, false); - InstructionDesc[OpSelectionMerge].setResultAndType(false, false); - InstructionDesc[OpLabel].setResultAndType(true, false); - InstructionDesc[OpBranch].setResultAndType(false, false); - InstructionDesc[OpBranchConditional].setResultAndType(false, false); - InstructionDesc[OpSwitch].setResultAndType(false, false); - InstructionDesc[OpKill].setResultAndType(false, false); - InstructionDesc[OpReturn].setResultAndType(false, false); - InstructionDesc[OpReturnValue].setResultAndType(false, false); - InstructionDesc[OpUnreachable].setResultAndType(false, false); - InstructionDesc[OpLifetimeStart].setResultAndType(false, false); - InstructionDesc[OpLifetimeStop].setResultAndType(false, false); - InstructionDesc[OpCommitReadPipe].setResultAndType(false, false); - InstructionDesc[OpCommitWritePipe].setResultAndType(false, false); - InstructionDesc[OpGroupCommitWritePipe].setResultAndType(false, false); - InstructionDesc[OpGroupCommitReadPipe].setResultAndType(false, false); - InstructionDesc[OpCaptureEventProfilingInfo].setResultAndType(false, false); - InstructionDesc[OpSetUserEventStatus].setResultAndType(false, false); - InstructionDesc[OpRetainEvent].setResultAndType(false, false); - InstructionDesc[OpReleaseEvent].setResultAndType(false, false); - InstructionDesc[OpGroupWaitEvents].setResultAndType(false, false); - InstructionDesc[OpAtomicFlagClear].setResultAndType(false, false); - - // Specific additional context-dependent operands - - ExecutionModeOperands[ExecutionModeInvocations].push(OperandLiteralNumber, "'Number of <>'"); - - ExecutionModeOperands[ExecutionModeLocalSize].push(OperandLiteralNumber, "'x size'"); - ExecutionModeOperands[ExecutionModeLocalSize].push(OperandLiteralNumber, "'y size'"); - ExecutionModeOperands[ExecutionModeLocalSize].push(OperandLiteralNumber, "'z size'"); - - ExecutionModeOperands[ExecutionModeLocalSizeHint].push(OperandLiteralNumber, "'x size'"); - ExecutionModeOperands[ExecutionModeLocalSizeHint].push(OperandLiteralNumber, "'y size'"); - ExecutionModeOperands[ExecutionModeLocalSizeHint].push(OperandLiteralNumber, "'z size'"); - - ExecutionModeOperands[ExecutionModeOutputVertices].push(OperandLiteralNumber, "'Vertex count'"); - ExecutionModeOperands[ExecutionModeVecTypeHint].push(OperandLiteralNumber, "'Vector type'"); - - DecorationOperands[DecorationStream].push(OperandLiteralNumber, "'Stream Number'"); - DecorationOperands[DecorationLocation].push(OperandLiteralNumber, "'Location'"); - DecorationOperands[DecorationComponent].push(OperandLiteralNumber, "'Component'"); - DecorationOperands[DecorationIndex].push(OperandLiteralNumber, "'Index'"); - DecorationOperands[DecorationBinding].push(OperandLiteralNumber, "'Binding Point'"); - DecorationOperands[DecorationDescriptorSet].push(OperandLiteralNumber, "'Descriptor Set'"); - DecorationOperands[DecorationOffset].push(OperandLiteralNumber, "'Byte Offset'"); - DecorationOperands[DecorationXfbBuffer].push(OperandLiteralNumber, "'XFB Buffer Number'"); - DecorationOperands[DecorationXfbStride].push(OperandLiteralNumber, "'XFB Stride'"); - DecorationOperands[DecorationArrayStride].push(OperandLiteralNumber, "'Array Stride'"); - DecorationOperands[DecorationMatrixStride].push(OperandLiteralNumber, "'Matrix Stride'"); - DecorationOperands[DecorationBuiltIn].push(OperandLiteralNumber, "See <>"); - DecorationOperands[DecorationFPRoundingMode].push(OperandFPRoundingMode, "'Floating-Point Rounding Mode'"); - DecorationOperands[DecorationFPFastMathMode].push(OperandFPFastMath, "'Fast-Math Mode'"); - DecorationOperands[DecorationLinkageAttributes].push(OperandLiteralString, "'Name'"); - DecorationOperands[DecorationLinkageAttributes].push(OperandLinkageType, "'Linkage Type'"); - DecorationOperands[DecorationFuncParamAttr].push(OperandFuncParamAttr, "'Function Parameter Attribute'"); - DecorationOperands[DecorationSpecId].push(OperandLiteralNumber, "'Specialization Constant ID'"); - DecorationOperands[DecorationInputAttachmentIndex].push(OperandLiteralNumber, "'Attachment Index'"); - DecorationOperands[DecorationAlignment].push(OperandLiteralNumber, "'Alignment'"); - - OperandClassParams[OperandSource].set(SourceLanguageCeiling, SourceString, 0); - OperandClassParams[OperandExecutionModel].set(ExecutionModelCeiling, ExecutionModelString, ExecutionModelParams); - OperandClassParams[OperandAddressing].set(AddressingModelCeiling, AddressingString, AddressingParams); - OperandClassParams[OperandMemory].set(MemoryModelCeiling, MemoryString, MemoryParams); - OperandClassParams[OperandExecutionMode].set(ExecutionModeCeiling, ExecutionModeString, ExecutionModeParams); - OperandClassParams[OperandExecutionMode].setOperands(ExecutionModeOperands); - OperandClassParams[OperandStorage].set(StorageClassCeiling, StorageClassString, StorageParams); - OperandClassParams[OperandDimensionality].set(DimensionCeiling, DimensionString, DimensionalityParams); - OperandClassParams[OperandSamplerAddressingMode].set(SamplerAddressingModeCeiling, SamplerAddressingModeString, SamplerAddressingModeParams); - OperandClassParams[OperandSamplerFilterMode].set(SamplerFilterModeCeiling, SamplerFilterModeString, SamplerFilterModeParams); - OperandClassParams[OperandSamplerImageFormat].set(ImageFormatCeiling, ImageFormatString, ImageFormatParams); - OperandClassParams[OperandImageChannelOrder].set(ImageChannelOrderCeiling, ImageChannelOrderString, ImageChannelOrderParams); - OperandClassParams[OperandImageChannelDataType].set(ImageChannelDataTypeCeiling, ImageChannelDataTypeString, ImageChannelDataTypeParams); - OperandClassParams[OperandImageOperands].set(ImageOperandsCeiling, ImageOperandsString, ImageOperandsParams, true); - OperandClassParams[OperandFPFastMath].set(FPFastMathCeiling, FPFastMathString, FPFastMathParams, true); - OperandClassParams[OperandFPRoundingMode].set(FPRoundingModeCeiling, FPRoundingModeString, FPRoundingModeParams); - OperandClassParams[OperandLinkageType].set(LinkageTypeCeiling, LinkageTypeString, LinkageTypeParams); - OperandClassParams[OperandFuncParamAttr].set(FuncParamAttrCeiling, FuncParamAttrString, FuncParamAttrParams); - OperandClassParams[OperandAccessQualifier].set(AccessQualifierCeiling, AccessQualifierString, AccessQualifierParams); - OperandClassParams[OperandDecoration].set(DecorationCeiling, DecorationString, DecorationParams); - OperandClassParams[OperandDecoration].setOperands(DecorationOperands); - OperandClassParams[OperandBuiltIn].set(BuiltInCeiling, BuiltInString, BuiltInParams); - OperandClassParams[OperandSelect].set(SelectControlCeiling, SelectControlString, SelectionControlParams, true); - OperandClassParams[OperandLoop].set(LoopControlCeiling, LoopControlString, LoopControlParams, true); - OperandClassParams[OperandFunction].set(FunctionControlCeiling, FunctionControlString, FunctionControlParams, true); - OperandClassParams[OperandMemorySemantics].set(MemorySemanticsCeiling, MemorySemanticsString, MemorySemanticsParams, true); - OperandClassParams[OperandMemoryAccess].set(MemoryAccessCeiling, MemoryAccessString, MemoryAccessParams, true); - OperandClassParams[OperandScope].set(ScopeCeiling, ScopeString, ScopeParams); - OperandClassParams[OperandGroupOperation].set(GroupOperationCeiling, GroupOperationString, GroupOperationParams); - OperandClassParams[OperandKernelEnqueueFlags].set(KernelEnqueueFlagsCeiling, KernelEnqueueFlagsString, KernelEnqueueFlagsParams); - OperandClassParams[OperandKernelProfilingInfo].set(KernelProfilingInfoCeiling, KernelProfilingInfoString, KernelProfilingInfoParams, true); - OperandClassParams[OperandCapability].set(CapabilityCeiling, CapabilityString, CapabilityParams); - OperandClassParams[OperandOpcode].set(OpcodeCeiling, OpcodeString, 0); - - CapabilityParams[CapabilityShader].caps.push_back(CapabilityMatrix); - CapabilityParams[CapabilityGeometry].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityTessellation].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityVector16].caps.push_back(CapabilityKernel); - CapabilityParams[CapabilityFloat16Buffer].caps.push_back(CapabilityKernel); - CapabilityParams[CapabilityInt64Atomics].caps.push_back(CapabilityInt64); - CapabilityParams[CapabilityImageBasic].caps.push_back(CapabilityKernel); - CapabilityParams[CapabilityImageReadWrite].caps.push_back(CapabilityImageBasic); - CapabilityParams[CapabilityImageMipmap].caps.push_back(CapabilityImageBasic); - CapabilityParams[CapabilityPipes].caps.push_back(CapabilityKernel); - CapabilityParams[CapabilityDeviceEnqueue].caps.push_back(CapabilityKernel); - CapabilityParams[CapabilityLiteralSampler].caps.push_back(CapabilityKernel); - CapabilityParams[CapabilityAtomicStorage].caps.push_back(CapabilityShader); - CapabilityParams[CapabilitySampleRateShading].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityTessellationPointSize].caps.push_back(CapabilityTessellation); - CapabilityParams[CapabilityGeometryPointSize].caps.push_back(CapabilityGeometry); - CapabilityParams[CapabilityImageGatherExtended].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityStorageImageExtendedFormats].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityStorageImageMultisample].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityUniformBufferArrayDynamicIndexing].caps.push_back(CapabilityShader); - CapabilityParams[CapabilitySampledImageArrayDynamicIndexing].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityStorageBufferArrayDynamicIndexing].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityStorageImageArrayDynamicIndexing].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityClipDistance].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityCullDistance].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityGenericPointer].caps.push_back(CapabilityAddresses); - CapabilityParams[CapabilityInt8].caps.push_back(CapabilityKernel); - CapabilityParams[CapabilityInputAttachment].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityMinLod].caps.push_back(CapabilityShader); - CapabilityParams[CapabilitySparseResidency].caps.push_back(CapabilityShader); - CapabilityParams[CapabilitySampled1D].caps.push_back(CapabilityShader); - CapabilityParams[CapabilitySampledRect].caps.push_back(CapabilityShader); - CapabilityParams[CapabilitySampledBuffer].caps.push_back(CapabilityShader); - CapabilityParams[CapabilitySampledCubeArray].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityImageMSArray].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityImage1D].caps.push_back(CapabilitySampled1D); - CapabilityParams[CapabilityImageRect].caps.push_back(CapabilitySampledRect); - CapabilityParams[CapabilityImageBuffer].caps.push_back(CapabilitySampledBuffer); - CapabilityParams[CapabilityImageCubeArray].caps.push_back(CapabilitySampledCubeArray); - CapabilityParams[CapabilityImageQuery].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityDerivativeControl].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityInterpolationFunction].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityTransformFeedback].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityGeometryStreams].caps.push_back(CapabilityGeometry); - CapabilityParams[CapabilityStorageImageReadWithoutFormat].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityStorageImageWriteWithoutFormat].caps.push_back(CapabilityShader); - CapabilityParams[CapabilityMultiViewport].caps.push_back(CapabilityGeometry); - - AddressingParams[AddressingModelPhysical32].caps.push_back(CapabilityAddresses); - AddressingParams[AddressingModelPhysical64].caps.push_back(CapabilityAddresses); - - MemoryParams[MemoryModelSimple].caps.push_back(CapabilityShader); - MemoryParams[MemoryModelGLSL450].caps.push_back(CapabilityShader); - MemoryParams[MemoryModelOpenCL].caps.push_back(CapabilityKernel); - - MemorySemanticsParams[MemorySemanticsUniformMemoryShift].caps.push_back(CapabilityShader); - MemorySemanticsParams[MemorySemanticsAtomicCounterMemoryShift].caps.push_back(CapabilityAtomicStorage); - - ExecutionModelParams[ExecutionModelVertex].caps.push_back(CapabilityShader); - ExecutionModelParams[ExecutionModelTessellationControl].caps.push_back(CapabilityTessellation); - ExecutionModelParams[ExecutionModelTessellationEvaluation].caps.push_back(CapabilityTessellation); - ExecutionModelParams[ExecutionModelGeometry].caps.push_back(CapabilityGeometry); - ExecutionModelParams[ExecutionModelFragment].caps.push_back(CapabilityShader); - ExecutionModelParams[ExecutionModelGLCompute].caps.push_back(CapabilityShader); - ExecutionModelParams[ExecutionModelKernel].caps.push_back(CapabilityKernel); - - // Storage capabilites - StorageParams[StorageClassInput].caps.push_back(CapabilityShader); - StorageParams[StorageClassUniform].caps.push_back(CapabilityShader); - StorageParams[StorageClassOutput].caps.push_back(CapabilityShader); - StorageParams[StorageClassPrivate].caps.push_back(CapabilityShader); - StorageParams[StorageClassGeneric].caps.push_back(CapabilityKernel); - StorageParams[StorageClassAtomicCounter].caps.push_back(CapabilityAtomicStorage); - StorageParams[StorageClassPushConstant].caps.push_back(CapabilityShader); - - // Sampler Filter & Addressing mode capabilities - SamplerAddressingModeParams[SamplerAddressingModeNone].caps.push_back(CapabilityKernel); - SamplerAddressingModeParams[SamplerAddressingModeClampToEdge].caps.push_back(CapabilityKernel); - SamplerAddressingModeParams[SamplerAddressingModeClamp].caps.push_back(CapabilityKernel); - SamplerAddressingModeParams[SamplerAddressingModeRepeat].caps.push_back(CapabilityKernel); - SamplerAddressingModeParams[SamplerAddressingModeRepeatMirrored].caps.push_back(CapabilityKernel); - - SamplerFilterModeParams[SamplerFilterModeNearest].caps.push_back(CapabilityKernel); - SamplerFilterModeParams[SamplerFilterModeLinear].caps.push_back(CapabilityKernel); - - // image format capabilities - - // ES/Desktop float - ImageFormatParams[ImageFormatRgba32f].caps.push_back(CapabilityShader); - ImageFormatParams[ImageFormatRgba16f].caps.push_back(CapabilityShader); - ImageFormatParams[ImageFormatR32f].caps.push_back(CapabilityShader); - ImageFormatParams[ImageFormatRgba8].caps.push_back(CapabilityShader); - ImageFormatParams[ImageFormatRgba8Snorm].caps.push_back(CapabilityShader); - - // Desktop float - ImageFormatParams[ImageFormatRg32f].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatRg16f].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatR11fG11fB10f].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatR16f].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatRgba16].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatRgb10A2].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatRg16].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatRg8].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatR16].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatR8].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatRgba16Snorm].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatRg16Snorm].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatRg8Snorm].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatR16Snorm].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatR8Snorm].caps.push_back(CapabilityStorageImageExtendedFormats); - - // ES/Desktop int - ImageFormatParams[ImageFormatRgba32i].caps.push_back(CapabilityShader); - ImageFormatParams[ImageFormatRgba16i].caps.push_back(CapabilityShader); - ImageFormatParams[ImageFormatRgba8i].caps.push_back(CapabilityShader); - ImageFormatParams[ImageFormatR32i].caps.push_back(CapabilityShader); - - // Desktop int - ImageFormatParams[ImageFormatRg32i].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatRg16i].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatRg8i].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatR16i].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatR8i].caps.push_back(CapabilityStorageImageExtendedFormats); - - // ES/Desktop uint - ImageFormatParams[ImageFormatRgba32ui].caps.push_back(CapabilityShader); - ImageFormatParams[ImageFormatRgba16ui].caps.push_back(CapabilityShader); - ImageFormatParams[ImageFormatRgba8ui].caps.push_back(CapabilityShader); - ImageFormatParams[ImageFormatR32ui].caps.push_back(CapabilityShader); - - // Desktop uint - ImageFormatParams[ImageFormatRgb10a2ui].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatRg32ui].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatRg16ui].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatRg8ui].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatR16ui].caps.push_back(CapabilityStorageImageExtendedFormats); - ImageFormatParams[ImageFormatR8ui].caps.push_back(CapabilityStorageImageExtendedFormats); - - // image channel order capabilities - for (int i = 0; i < ImageChannelOrderCeiling; ++i) { - ImageChannelOrderParams[i].caps.push_back(CapabilityKernel); - } - - // image channel type capabilities - for (int i = 0; i < ImageChannelDataTypeCeiling; ++i) { - ImageChannelDataTypeParams[i].caps.push_back(CapabilityKernel); - } - - // image lookup operands - ImageOperandsParams[ImageOperandsBiasShift].caps.push_back(CapabilityShader); - ImageOperandsParams[ImageOperandsOffsetShift].caps.push_back(CapabilityImageGatherExtended); - ImageOperandsParams[ImageOperandsMinLodShift].caps.push_back(CapabilityMinLod); - - // fast math flags capabilities - for (int i = 0; i < FPFastMathCeiling; ++i) { - FPFastMathParams[i].caps.push_back(CapabilityKernel); - } - - // fp rounding mode capabilities - for (int i = 0; i < FPRoundingModeCeiling; ++i) { - FPRoundingModeParams[i].caps.push_back(CapabilityKernel); - } - - // linkage types - for (int i = 0; i < LinkageTypeCeiling; ++i) { - LinkageTypeParams[i].caps.push_back(CapabilityLinkage); - } - - // function argument types - for (int i = 0; i < FuncParamAttrCeiling; ++i) { - FuncParamAttrParams[i].caps.push_back(CapabilityKernel); - } - - // function argument types - for (int i = 0; i < AccessQualifierCeiling; ++i) { - AccessQualifierParams[i].caps.push_back(CapabilityKernel); - } - - ExecutionModeParams[ExecutionModeInvocations].caps.push_back(CapabilityGeometry); - ExecutionModeParams[ExecutionModeSpacingEqual].caps.push_back(CapabilityTessellation); - ExecutionModeParams[ExecutionModeSpacingFractionalEven].caps.push_back(CapabilityTessellation); - ExecutionModeParams[ExecutionModeSpacingFractionalOdd].caps.push_back(CapabilityTessellation); - ExecutionModeParams[ExecutionModeVertexOrderCw].caps.push_back(CapabilityTessellation); - ExecutionModeParams[ExecutionModeVertexOrderCcw].caps.push_back(CapabilityTessellation); - ExecutionModeParams[ExecutionModePixelCenterInteger].caps.push_back(CapabilityShader); - ExecutionModeParams[ExecutionModeOriginUpperLeft].caps.push_back(CapabilityShader); - ExecutionModeParams[ExecutionModeOriginLowerLeft].caps.push_back(CapabilityShader); - ExecutionModeParams[ExecutionModeEarlyFragmentTests].caps.push_back(CapabilityShader); - ExecutionModeParams[ExecutionModePointMode].caps.push_back(CapabilityTessellation); - ExecutionModeParams[ExecutionModeXfb].caps.push_back(CapabilityTransformFeedback); - ExecutionModeParams[ExecutionModeDepthReplacing].caps.push_back(CapabilityShader); - ExecutionModeParams[ExecutionModeDepthGreater].caps.push_back(CapabilityShader); - ExecutionModeParams[ExecutionModeDepthLess].caps.push_back(CapabilityShader); - ExecutionModeParams[ExecutionModeDepthUnchanged].caps.push_back(CapabilityShader); - ExecutionModeParams[ExecutionModeLocalSizeHint].caps.push_back(CapabilityKernel); - ExecutionModeParams[ExecutionModeInputPoints].caps.push_back(CapabilityGeometry); - ExecutionModeParams[ExecutionModeInputLines].caps.push_back(CapabilityGeometry); - ExecutionModeParams[ExecutionModeInputLinesAdjacency].caps.push_back(CapabilityGeometry); - ExecutionModeParams[ExecutionModeTriangles].caps.push_back(CapabilityGeometry); - ExecutionModeParams[ExecutionModeTriangles].caps.push_back(CapabilityTessellation); - ExecutionModeParams[ExecutionModeInputTrianglesAdjacency].caps.push_back(CapabilityGeometry); - ExecutionModeParams[ExecutionModeQuads].caps.push_back(CapabilityTessellation); - ExecutionModeParams[ExecutionModeIsolines].caps.push_back(CapabilityTessellation); - ExecutionModeParams[ExecutionModeOutputVertices].caps.push_back(CapabilityGeometry); - ExecutionModeParams[ExecutionModeOutputVertices].caps.push_back(CapabilityTessellation); - ExecutionModeParams[ExecutionModeOutputPoints].caps.push_back(CapabilityGeometry); - ExecutionModeParams[ExecutionModeOutputLineStrip].caps.push_back(CapabilityGeometry); - ExecutionModeParams[ExecutionModeOutputTriangleStrip].caps.push_back(CapabilityGeometry); - ExecutionModeParams[ExecutionModeVecTypeHint].caps.push_back(CapabilityKernel); - ExecutionModeParams[ExecutionModeContractionOff].caps.push_back(CapabilityKernel); - - DecorationParams[DecorationRelaxedPrecision].caps.push_back(CapabilityShader); - DecorationParams[DecorationBlock].caps.push_back(CapabilityShader); - DecorationParams[DecorationBufferBlock].caps.push_back(CapabilityShader); - DecorationParams[DecorationRowMajor].caps.push_back(CapabilityMatrix); - DecorationParams[DecorationColMajor].caps.push_back(CapabilityMatrix); - DecorationParams[DecorationGLSLShared].caps.push_back(CapabilityShader); - DecorationParams[DecorationGLSLPacked].caps.push_back(CapabilityShader); - DecorationParams[DecorationNoPerspective].caps.push_back(CapabilityShader); - DecorationParams[DecorationFlat].caps.push_back(CapabilityShader); - DecorationParams[DecorationPatch].caps.push_back(CapabilityTessellation); - DecorationParams[DecorationCentroid].caps.push_back(CapabilityShader); - DecorationParams[DecorationSample].caps.push_back(CapabilitySampleRateShading); - DecorationParams[DecorationInvariant].caps.push_back(CapabilityShader); - DecorationParams[DecorationConstant].caps.push_back(CapabilityKernel); - DecorationParams[DecorationUniform].caps.push_back(CapabilityShader); - DecorationParams[DecorationCPacked].caps.push_back(CapabilityKernel); - DecorationParams[DecorationSaturatedConversion].caps.push_back(CapabilityKernel); - DecorationParams[DecorationStream].caps.push_back(CapabilityGeometryStreams); - DecorationParams[DecorationLocation].caps.push_back(CapabilityShader); - DecorationParams[DecorationComponent].caps.push_back(CapabilityShader); - DecorationParams[DecorationOffset].caps.push_back(CapabilityShader); - DecorationParams[DecorationIndex].caps.push_back(CapabilityShader); - DecorationParams[DecorationBinding].caps.push_back(CapabilityShader); - DecorationParams[DecorationDescriptorSet].caps.push_back(CapabilityShader); - DecorationParams[DecorationXfbBuffer].caps.push_back(CapabilityTransformFeedback); - DecorationParams[DecorationXfbStride].caps.push_back(CapabilityTransformFeedback); - DecorationParams[DecorationArrayStride].caps.push_back(CapabilityShader); - DecorationParams[DecorationMatrixStride].caps.push_back(CapabilityMatrix); - DecorationParams[DecorationFuncParamAttr].caps.push_back(CapabilityKernel); - DecorationParams[DecorationFPRoundingMode].caps.push_back(CapabilityKernel); - DecorationParams[DecorationFPFastMathMode].caps.push_back(CapabilityKernel); - DecorationParams[DecorationLinkageAttributes].caps.push_back(CapabilityLinkage); - DecorationParams[DecorationSpecId].caps.push_back(CapabilityShader); - DecorationParams[DecorationNoContraction].caps.push_back(CapabilityShader); - DecorationParams[DecorationInputAttachmentIndex].caps.push_back(CapabilityInputAttachment); - DecorationParams[DecorationAlignment].caps.push_back(CapabilityKernel); - - BuiltInParams[BuiltInPosition].caps.push_back(CapabilityShader); - BuiltInParams[BuiltInPointSize].caps.push_back(CapabilityShader); - BuiltInParams[BuiltInClipDistance].caps.push_back(CapabilityClipDistance); - BuiltInParams[BuiltInCullDistance].caps.push_back(CapabilityCullDistance); - - BuiltInParams[BuiltInVertexId].caps.push_back(CapabilityShader); - BuiltInParams[BuiltInVertexId].desc = "Vertex ID, which takes on values 0, 1, 2, . . . ."; - - BuiltInParams[BuiltInInstanceId].caps.push_back(CapabilityShader); - BuiltInParams[BuiltInInstanceId].desc = "Instance ID, which takes on values 0, 1, 2, . . . ."; - - BuiltInParams[BuiltInVertexIndex].caps.push_back(CapabilityShader); - BuiltInParams[BuiltInVertexIndex].desc = "Vertex index, which takes on values base, base+1, base+2, . . . ."; - - BuiltInParams[BuiltInInstanceIndex].caps.push_back(CapabilityShader); - BuiltInParams[BuiltInInstanceIndex].desc = "Instance index, which takes on values base, base+1, base+2, . . . ."; - - BuiltInParams[BuiltInPrimitiveId].caps.push_back(CapabilityGeometry); - BuiltInParams[BuiltInPrimitiveId].caps.push_back(CapabilityTessellation); - BuiltInParams[BuiltInInvocationId].caps.push_back(CapabilityGeometry); - BuiltInParams[BuiltInInvocationId].caps.push_back(CapabilityTessellation); - BuiltInParams[BuiltInLayer].caps.push_back(CapabilityGeometry); - BuiltInParams[BuiltInViewportIndex].caps.push_back(CapabilityMultiViewport); - BuiltInParams[BuiltInTessLevelOuter].caps.push_back(CapabilityTessellation); - BuiltInParams[BuiltInTessLevelInner].caps.push_back(CapabilityTessellation); - BuiltInParams[BuiltInTessCoord].caps.push_back(CapabilityTessellation); - BuiltInParams[BuiltInPatchVertices].caps.push_back(CapabilityTessellation); - BuiltInParams[BuiltInFragCoord].caps.push_back(CapabilityShader); - BuiltInParams[BuiltInPointCoord].caps.push_back(CapabilityShader); - BuiltInParams[BuiltInFrontFacing].caps.push_back(CapabilityShader); - BuiltInParams[BuiltInSampleId].caps.push_back(CapabilitySampleRateShading); - BuiltInParams[BuiltInSamplePosition].caps.push_back(CapabilitySampleRateShading); - BuiltInParams[BuiltInSampleMask].caps.push_back(CapabilitySampleRateShading); - BuiltInParams[BuiltInFragDepth].caps.push_back(CapabilityShader); - BuiltInParams[BuiltInHelperInvocation].caps.push_back(CapabilityShader); - BuiltInParams[BuiltInWorkDim].caps.push_back(CapabilityKernel); - BuiltInParams[BuiltInGlobalSize].caps.push_back(CapabilityKernel); - BuiltInParams[BuiltInEnqueuedWorkgroupSize].caps.push_back(CapabilityKernel); - BuiltInParams[BuiltInGlobalOffset].caps.push_back(CapabilityKernel); - BuiltInParams[BuiltInGlobalLinearId].caps.push_back(CapabilityKernel); - - BuiltInParams[BuiltInSubgroupSize].caps.push_back(CapabilityKernel); - BuiltInParams[BuiltInSubgroupMaxSize].caps.push_back(CapabilityKernel); - BuiltInParams[BuiltInNumSubgroups].caps.push_back(CapabilityKernel); - BuiltInParams[BuiltInNumEnqueuedSubgroups].caps.push_back(CapabilityKernel); - BuiltInParams[BuiltInSubgroupId].caps.push_back(CapabilityKernel); - BuiltInParams[BuiltInSubgroupLocalInvocationId].caps.push_back(CapabilityKernel); - - DimensionalityParams[Dim1D].caps.push_back(CapabilitySampled1D); - DimensionalityParams[DimCube].caps.push_back(CapabilityShader); - DimensionalityParams[DimRect].caps.push_back(CapabilitySampledRect); - DimensionalityParams[DimBuffer].caps.push_back(CapabilitySampledBuffer); - DimensionalityParams[DimSubpassData].caps.push_back(CapabilityInputAttachment); - - // Group Operations - for (int i = 0; i < GroupOperationCeiling; ++i) { - GroupOperationParams[i].caps.push_back(CapabilityKernel); - } - - // Enqueue flags - for (int i = 0; i < KernelEnqueueFlagsCeiling; ++i) { - KernelEnqueueFlagsParams[i].caps.push_back(CapabilityKernel); - } - - // Profiling info - KernelProfilingInfoParams[0].caps.push_back(CapabilityKernel); - - // set name of operator, an initial set of style operands, and the description - - InstructionDesc[OpSource].operands.push(OperandSource, ""); - InstructionDesc[OpSource].operands.push(OperandLiteralNumber, "'Version'"); - InstructionDesc[OpSource].operands.push(OperandId, "'File'", true); - InstructionDesc[OpSource].operands.push(OperandLiteralString, "'Source'", true); - - InstructionDesc[OpSourceContinued].operands.push(OperandLiteralString, "'Continued Source'"); - - InstructionDesc[OpSourceExtension].operands.push(OperandLiteralString, "'Extension'"); - - InstructionDesc[OpName].operands.push(OperandId, "'Target'"); - InstructionDesc[OpName].operands.push(OperandLiteralString, "'Name'"); - - InstructionDesc[OpMemberName].operands.push(OperandId, "'Type'"); - InstructionDesc[OpMemberName].operands.push(OperandLiteralNumber, "'Member'"); - InstructionDesc[OpMemberName].operands.push(OperandLiteralString, "'Name'"); - - InstructionDesc[OpString].operands.push(OperandLiteralString, "'String'"); - - InstructionDesc[OpLine].operands.push(OperandId, "'File'"); - InstructionDesc[OpLine].operands.push(OperandLiteralNumber, "'Line'"); - InstructionDesc[OpLine].operands.push(OperandLiteralNumber, "'Column'"); - - InstructionDesc[OpExtension].operands.push(OperandLiteralString, "'Name'"); - - InstructionDesc[OpExtInstImport].operands.push(OperandLiteralString, "'Name'"); - - InstructionDesc[OpCapability].operands.push(OperandCapability, "'Capability'"); - - InstructionDesc[OpMemoryModel].operands.push(OperandAddressing, ""); - InstructionDesc[OpMemoryModel].operands.push(OperandMemory, ""); - - InstructionDesc[OpEntryPoint].operands.push(OperandExecutionModel, ""); - InstructionDesc[OpEntryPoint].operands.push(OperandId, "'Entry Point'"); - InstructionDesc[OpEntryPoint].operands.push(OperandLiteralString, "'Name'"); - InstructionDesc[OpEntryPoint].operands.push(OperandVariableIds, "'Interface'"); - - InstructionDesc[OpExecutionMode].operands.push(OperandId, "'Entry Point'"); - InstructionDesc[OpExecutionMode].operands.push(OperandExecutionMode, "'Mode'"); - InstructionDesc[OpExecutionMode].operands.push(OperandOptionalLiteral, "See <>"); - - InstructionDesc[OpTypeInt].operands.push(OperandLiteralNumber, "'Width'"); - InstructionDesc[OpTypeInt].operands.push(OperandLiteralNumber, "'Signedness'"); - - InstructionDesc[OpTypeFloat].operands.push(OperandLiteralNumber, "'Width'"); - - InstructionDesc[OpTypeVector].operands.push(OperandId, "'Component Type'"); - InstructionDesc[OpTypeVector].operands.push(OperandLiteralNumber, "'Component Count'"); - - InstructionDesc[OpTypeMatrix].capabilities.push_back(CapabilityMatrix); - InstructionDesc[OpTypeMatrix].operands.push(OperandId, "'Column Type'"); - InstructionDesc[OpTypeMatrix].operands.push(OperandLiteralNumber, "'Column Count'"); - - InstructionDesc[OpTypeImage].operands.push(OperandId, "'Sampled Type'"); - InstructionDesc[OpTypeImage].operands.push(OperandDimensionality, ""); - InstructionDesc[OpTypeImage].operands.push(OperandLiteralNumber, "'Depth'"); - InstructionDesc[OpTypeImage].operands.push(OperandLiteralNumber, "'Arrayed'"); - InstructionDesc[OpTypeImage].operands.push(OperandLiteralNumber, "'MS'"); - InstructionDesc[OpTypeImage].operands.push(OperandLiteralNumber, "'Sampled'"); - InstructionDesc[OpTypeImage].operands.push(OperandSamplerImageFormat, ""); - InstructionDesc[OpTypeImage].operands.push(OperandAccessQualifier, "", true); - - InstructionDesc[OpTypeSampledImage].operands.push(OperandId, "'Image Type'"); - - InstructionDesc[OpTypeArray].operands.push(OperandId, "'Element Type'"); - InstructionDesc[OpTypeArray].operands.push(OperandId, "'Length'"); - - InstructionDesc[OpTypeRuntimeArray].capabilities.push_back(CapabilityShader); - InstructionDesc[OpTypeRuntimeArray].operands.push(OperandId, "'Element Type'"); - - InstructionDesc[OpTypeStruct].operands.push(OperandVariableIds, "'Member 0 type', +\n'member 1 type', +\n..."); - - InstructionDesc[OpTypeOpaque].capabilities.push_back(CapabilityKernel); - InstructionDesc[OpTypeOpaque].operands.push(OperandLiteralString, "The name of the opaque type."); - - InstructionDesc[OpTypePointer].operands.push(OperandStorage, ""); - InstructionDesc[OpTypePointer].operands.push(OperandId, "'Type'"); - - InstructionDesc[OpTypeForwardPointer].capabilities.push_back(CapabilityAddresses); - InstructionDesc[OpTypeForwardPointer].operands.push(OperandId, "'Pointer Type'"); - InstructionDesc[OpTypeForwardPointer].operands.push(OperandStorage, ""); - - InstructionDesc[OpTypeEvent].capabilities.push_back(CapabilityKernel); - - InstructionDesc[OpTypeDeviceEvent].capabilities.push_back(CapabilityDeviceEnqueue); - - InstructionDesc[OpTypeReserveId].capabilities.push_back(CapabilityPipes); - - InstructionDesc[OpTypeQueue].capabilities.push_back(CapabilityDeviceEnqueue); - - InstructionDesc[OpTypePipe].operands.push(OperandAccessQualifier, "'Qualifier'"); - InstructionDesc[OpTypePipe].capabilities.push_back(CapabilityPipes); - - InstructionDesc[OpTypeFunction].operands.push(OperandId, "'Return Type'"); - InstructionDesc[OpTypeFunction].operands.push(OperandVariableIds, "'Parameter 0 Type', +\n'Parameter 1 Type', +\n..."); - - InstructionDesc[OpConstant].operands.push(OperandVariableLiterals, "'Value'"); - - InstructionDesc[OpConstantComposite].operands.push(OperandVariableIds, "'Constituents'"); - - InstructionDesc[OpConstantSampler].capabilities.push_back(CapabilityLiteralSampler); - InstructionDesc[OpConstantSampler].operands.push(OperandSamplerAddressingMode, ""); - InstructionDesc[OpConstantSampler].operands.push(OperandLiteralNumber, "'Param'"); - InstructionDesc[OpConstantSampler].operands.push(OperandSamplerFilterMode, ""); - - InstructionDesc[OpSpecConstant].operands.push(OperandVariableLiterals, "'Value'"); - - InstructionDesc[OpSpecConstantComposite].operands.push(OperandVariableIds, "'Constituents'"); - - InstructionDesc[OpSpecConstantOp].operands.push(OperandLiteralNumber, "'Opcode'"); - InstructionDesc[OpSpecConstantOp].operands.push(OperandVariableIds, "'Operands'"); - - InstructionDesc[OpVariable].operands.push(OperandStorage, ""); - InstructionDesc[OpVariable].operands.push(OperandId, "'Initializer'", true); - - InstructionDesc[OpFunction].operands.push(OperandFunction, ""); - InstructionDesc[OpFunction].operands.push(OperandId, "'Function Type'"); - - InstructionDesc[OpFunctionCall].operands.push(OperandId, "'Function'"); - InstructionDesc[OpFunctionCall].operands.push(OperandVariableIds, "'Argument 0', +\n'Argument 1', +\n..."); - - InstructionDesc[OpExtInst].operands.push(OperandId, "'Set'"); - InstructionDesc[OpExtInst].operands.push(OperandLiteralNumber, "'Instruction'"); - InstructionDesc[OpExtInst].operands.push(OperandVariableIds, "'Operand 1', +\n'Operand 2', +\n..."); - - InstructionDesc[OpLoad].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpLoad].operands.push(OperandMemoryAccess, "", true); - - InstructionDesc[OpStore].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpStore].operands.push(OperandId, "'Object'"); - InstructionDesc[OpStore].operands.push(OperandMemoryAccess, "", true); - - InstructionDesc[OpPhi].operands.push(OperandVariableIds, "'Variable, Parent, ...'"); - - InstructionDesc[OpDecorate].operands.push(OperandId, "'Target'"); - InstructionDesc[OpDecorate].operands.push(OperandDecoration, ""); - InstructionDesc[OpDecorate].operands.push(OperandVariableLiterals, "See <>."); - - InstructionDesc[OpMemberDecorate].operands.push(OperandId, "'Structure Type'"); - InstructionDesc[OpMemberDecorate].operands.push(OperandLiteralNumber, "'Member'"); - InstructionDesc[OpMemberDecorate].operands.push(OperandDecoration, ""); - InstructionDesc[OpMemberDecorate].operands.push(OperandVariableLiterals, "See <>."); - - InstructionDesc[OpGroupDecorate].operands.push(OperandId, "'Decoration Group'"); - InstructionDesc[OpGroupDecorate].operands.push(OperandVariableIds, "'Targets'"); - - InstructionDesc[OpGroupMemberDecorate].operands.push(OperandId, "'Decoration Group'"); - InstructionDesc[OpGroupMemberDecorate].operands.push(OperandVariableIdLiteral, "'Targets'"); - - InstructionDesc[OpVectorExtractDynamic].operands.push(OperandId, "'Vector'"); - InstructionDesc[OpVectorExtractDynamic].operands.push(OperandId, "'Index'"); - - InstructionDesc[OpVectorInsertDynamic].operands.push(OperandId, "'Vector'"); - InstructionDesc[OpVectorInsertDynamic].operands.push(OperandId, "'Component'"); - InstructionDesc[OpVectorInsertDynamic].operands.push(OperandId, "'Index'"); - - InstructionDesc[OpVectorShuffle].operands.push(OperandId, "'Vector 1'"); - InstructionDesc[OpVectorShuffle].operands.push(OperandId, "'Vector 2'"); - InstructionDesc[OpVectorShuffle].operands.push(OperandVariableLiterals, "'Components'"); - - InstructionDesc[OpCompositeConstruct].operands.push(OperandVariableIds, "'Constituents'"); - - InstructionDesc[OpCompositeExtract].operands.push(OperandId, "'Composite'"); - InstructionDesc[OpCompositeExtract].operands.push(OperandVariableLiterals, "'Indexes'"); - - InstructionDesc[OpCompositeInsert].operands.push(OperandId, "'Object'"); - InstructionDesc[OpCompositeInsert].operands.push(OperandId, "'Composite'"); - InstructionDesc[OpCompositeInsert].operands.push(OperandVariableLiterals, "'Indexes'"); - - InstructionDesc[OpCopyObject].operands.push(OperandId, "'Operand'"); - - InstructionDesc[OpCopyMemory].operands.push(OperandId, "'Target'"); - InstructionDesc[OpCopyMemory].operands.push(OperandId, "'Source'"); - InstructionDesc[OpCopyMemory].operands.push(OperandMemoryAccess, "", true); - - InstructionDesc[OpCopyMemorySized].operands.push(OperandId, "'Target'"); - InstructionDesc[OpCopyMemorySized].operands.push(OperandId, "'Source'"); - InstructionDesc[OpCopyMemorySized].operands.push(OperandId, "'Size'"); - InstructionDesc[OpCopyMemorySized].operands.push(OperandMemoryAccess, "", true); - - InstructionDesc[OpCopyMemorySized].capabilities.push_back(CapabilityAddresses); - - InstructionDesc[OpSampledImage].operands.push(OperandId, "'Image'"); - InstructionDesc[OpSampledImage].operands.push(OperandId, "'Sampler'"); - - InstructionDesc[OpImage].operands.push(OperandId, "'Sampled Image'"); - - InstructionDesc[OpImageRead].operands.push(OperandId, "'Image'"); - InstructionDesc[OpImageRead].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageRead].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageRead].operands.push(OperandVariableIds, "", true); - - InstructionDesc[OpImageWrite].operands.push(OperandId, "'Image'"); - InstructionDesc[OpImageWrite].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageWrite].operands.push(OperandId, "'Texel'"); - InstructionDesc[OpImageWrite].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageWrite].operands.push(OperandVariableIds, "", true); - - InstructionDesc[OpImageSampleImplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSampleImplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSampleImplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSampleImplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSampleImplicitLod].capabilities.push_back(CapabilityShader); - - InstructionDesc[OpImageSampleExplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSampleExplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSampleExplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSampleExplicitLod].operands.push(OperandVariableIds, "", true); - - InstructionDesc[OpImageSampleDrefImplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSampleDrefImplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSampleDrefImplicitLod].operands.push(OperandId, "'D~ref~'"); - InstructionDesc[OpImageSampleDrefImplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSampleDrefImplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSampleDrefImplicitLod].capabilities.push_back(CapabilityShader); - - InstructionDesc[OpImageSampleDrefExplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSampleDrefExplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSampleDrefExplicitLod].operands.push(OperandId, "'D~ref~'"); - InstructionDesc[OpImageSampleDrefExplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSampleDrefExplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSampleDrefExplicitLod].capabilities.push_back(CapabilityShader); - - InstructionDesc[OpImageSampleProjImplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSampleProjImplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSampleProjImplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSampleProjImplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSampleProjImplicitLod].capabilities.push_back(CapabilityShader); - - InstructionDesc[OpImageSampleProjExplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSampleProjExplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSampleProjExplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSampleProjExplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSampleProjExplicitLod].capabilities.push_back(CapabilityShader); - - InstructionDesc[OpImageSampleProjDrefImplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSampleProjDrefImplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSampleProjDrefImplicitLod].operands.push(OperandId, "'D~ref~'"); - InstructionDesc[OpImageSampleProjDrefImplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSampleProjDrefImplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSampleProjDrefImplicitLod].capabilities.push_back(CapabilityShader); - - InstructionDesc[OpImageSampleProjDrefExplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSampleProjDrefExplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSampleProjDrefExplicitLod].operands.push(OperandId, "'D~ref~'"); - InstructionDesc[OpImageSampleProjDrefExplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSampleProjDrefExplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSampleProjDrefExplicitLod].capabilities.push_back(CapabilityShader); - - InstructionDesc[OpImageFetch].operands.push(OperandId, "'Image'"); - InstructionDesc[OpImageFetch].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageFetch].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageFetch].operands.push(OperandVariableIds, "", true); - - InstructionDesc[OpImageGather].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageGather].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageGather].operands.push(OperandId, "'Component'"); - InstructionDesc[OpImageGather].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageGather].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageGather].capabilities.push_back(CapabilityShader); - - InstructionDesc[OpImageDrefGather].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageDrefGather].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageDrefGather].operands.push(OperandId, "'D~ref~'"); - InstructionDesc[OpImageDrefGather].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageDrefGather].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageDrefGather].capabilities.push_back(CapabilityShader); - - InstructionDesc[OpImageSparseSampleImplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSparseSampleImplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSparseSampleImplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSparseSampleImplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSparseSampleImplicitLod].capabilities.push_back(CapabilitySparseResidency); - - InstructionDesc[OpImageSparseSampleExplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSparseSampleExplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSparseSampleExplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSparseSampleExplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSparseSampleExplicitLod].capabilities.push_back(CapabilitySparseResidency); - - InstructionDesc[OpImageSparseSampleDrefImplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSparseSampleDrefImplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSparseSampleDrefImplicitLod].operands.push(OperandId, "'D~ref~'"); - InstructionDesc[OpImageSparseSampleDrefImplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSparseSampleDrefImplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSparseSampleDrefImplicitLod].capabilities.push_back(CapabilitySparseResidency); - - InstructionDesc[OpImageSparseSampleDrefExplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSparseSampleDrefExplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSparseSampleDrefExplicitLod].operands.push(OperandId, "'D~ref~'"); - InstructionDesc[OpImageSparseSampleDrefExplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSparseSampleDrefExplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSparseSampleDrefExplicitLod].capabilities.push_back(CapabilitySparseResidency); - - InstructionDesc[OpImageSparseSampleProjImplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSparseSampleProjImplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSparseSampleProjImplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSparseSampleProjImplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSparseSampleProjImplicitLod].capabilities.push_back(CapabilitySparseResidency); - - InstructionDesc[OpImageSparseSampleProjExplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSparseSampleProjExplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSparseSampleProjExplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSparseSampleProjExplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSparseSampleProjExplicitLod].capabilities.push_back(CapabilitySparseResidency); - - InstructionDesc[OpImageSparseSampleProjDrefImplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSparseSampleProjDrefImplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSparseSampleProjDrefImplicitLod].operands.push(OperandId, "'D~ref~'"); - InstructionDesc[OpImageSparseSampleProjDrefImplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSparseSampleProjDrefImplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSparseSampleProjDrefImplicitLod].capabilities.push_back(CapabilitySparseResidency); - - InstructionDesc[OpImageSparseSampleProjDrefExplicitLod].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSparseSampleProjDrefExplicitLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSparseSampleProjDrefExplicitLod].operands.push(OperandId, "'D~ref~'"); - InstructionDesc[OpImageSparseSampleProjDrefExplicitLod].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSparseSampleProjDrefExplicitLod].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSparseSampleProjDrefExplicitLod].capabilities.push_back(CapabilitySparseResidency); - - InstructionDesc[OpImageSparseFetch].operands.push(OperandId, "'Image'"); - InstructionDesc[OpImageSparseFetch].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSparseFetch].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSparseFetch].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSparseFetch].capabilities.push_back(CapabilitySparseResidency); - - InstructionDesc[OpImageSparseGather].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSparseGather].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSparseGather].operands.push(OperandId, "'Component'"); - InstructionDesc[OpImageSparseGather].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSparseGather].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSparseGather].capabilities.push_back(CapabilitySparseResidency); - - InstructionDesc[OpImageSparseDrefGather].operands.push(OperandId, "'Sampled Image'"); - InstructionDesc[OpImageSparseDrefGather].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSparseDrefGather].operands.push(OperandId, "'D~ref~'"); - InstructionDesc[OpImageSparseDrefGather].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSparseDrefGather].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSparseDrefGather].capabilities.push_back(CapabilitySparseResidency); - - InstructionDesc[OpImageSparseRead].operands.push(OperandId, "'Image'"); - InstructionDesc[OpImageSparseRead].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageSparseRead].operands.push(OperandImageOperands, "", true); - InstructionDesc[OpImageSparseRead].operands.push(OperandVariableIds, "", true); - InstructionDesc[OpImageSparseRead].capabilities.push_back(CapabilitySparseResidency); - - InstructionDesc[OpImageSparseTexelsResident].operands.push(OperandId, "'Resident Code'"); - InstructionDesc[OpImageSparseTexelsResident].capabilities.push_back(CapabilitySparseResidency); - - InstructionDesc[OpImageQuerySizeLod].operands.push(OperandId, "'Image'"); - InstructionDesc[OpImageQuerySizeLod].operands.push(OperandId, "'Level of Detail'"); - InstructionDesc[OpImageQuerySizeLod].capabilities.push_back(CapabilityKernel); - InstructionDesc[OpImageQuerySizeLod].capabilities.push_back(CapabilityImageQuery); - - InstructionDesc[OpImageQuerySize].operands.push(OperandId, "'Image'"); - InstructionDesc[OpImageQuerySize].capabilities.push_back(CapabilityKernel); - InstructionDesc[OpImageQuerySize].capabilities.push_back(CapabilityImageQuery); - - InstructionDesc[OpImageQueryLod].operands.push(OperandId, "'Image'"); - InstructionDesc[OpImageQueryLod].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageQueryLod].capabilities.push_back(CapabilityImageQuery); - - InstructionDesc[OpImageQueryLevels].operands.push(OperandId, "'Image'"); - InstructionDesc[OpImageQueryLevels].capabilities.push_back(CapabilityKernel); - InstructionDesc[OpImageQueryLevels].capabilities.push_back(CapabilityImageQuery); - - InstructionDesc[OpImageQuerySamples].operands.push(OperandId, "'Image'"); - InstructionDesc[OpImageQuerySamples].capabilities.push_back(CapabilityKernel); - InstructionDesc[OpImageQuerySamples].capabilities.push_back(CapabilityImageQuery); - - InstructionDesc[OpImageQueryFormat].operands.push(OperandId, "'Image'"); - InstructionDesc[OpImageQueryFormat].capabilities.push_back(CapabilityKernel); - - InstructionDesc[OpImageQueryOrder].operands.push(OperandId, "'Image'"); - InstructionDesc[OpImageQueryOrder].capabilities.push_back(CapabilityKernel); - - InstructionDesc[OpAccessChain].operands.push(OperandId, "'Base'"); - InstructionDesc[OpAccessChain].operands.push(OperandVariableIds, "'Indexes'"); - - InstructionDesc[OpInBoundsAccessChain].operands.push(OperandId, "'Base'"); - InstructionDesc[OpInBoundsAccessChain].operands.push(OperandVariableIds, "'Indexes'"); - - InstructionDesc[OpPtrAccessChain].operands.push(OperandId, "'Base'"); - InstructionDesc[OpPtrAccessChain].operands.push(OperandId, "'Element'"); - InstructionDesc[OpPtrAccessChain].operands.push(OperandVariableIds, "'Indexes'"); - InstructionDesc[OpPtrAccessChain].capabilities.push_back(CapabilityAddresses); - - InstructionDesc[OpInBoundsPtrAccessChain].operands.push(OperandId, "'Base'"); - InstructionDesc[OpInBoundsPtrAccessChain].operands.push(OperandId, "'Element'"); - InstructionDesc[OpInBoundsPtrAccessChain].operands.push(OperandVariableIds, "'Indexes'"); - InstructionDesc[OpInBoundsPtrAccessChain].capabilities.push_back(CapabilityAddresses); - - InstructionDesc[OpSNegate].operands.push(OperandId, "'Operand'"); - - InstructionDesc[OpFNegate].operands.push(OperandId, "'Operand'"); - - InstructionDesc[OpNot].operands.push(OperandId, "'Operand'"); - - InstructionDesc[OpAny].operands.push(OperandId, "'Vector'"); - - InstructionDesc[OpAll].operands.push(OperandId, "'Vector'"); - - InstructionDesc[OpConvertFToU].operands.push(OperandId, "'Float Value'"); - - InstructionDesc[OpConvertFToS].operands.push(OperandId, "'Float Value'"); - - InstructionDesc[OpConvertSToF].operands.push(OperandId, "'Signed Value'"); - - InstructionDesc[OpConvertUToF].operands.push(OperandId, "'Unsigned Value'"); - - InstructionDesc[OpUConvert].operands.push(OperandId, "'Unsigned Value'"); - - InstructionDesc[OpSConvert].operands.push(OperandId, "'Signed Value'"); - - InstructionDesc[OpFConvert].operands.push(OperandId, "'Float Value'"); - - InstructionDesc[OpSatConvertSToU].operands.push(OperandId, "'Signed Value'"); - InstructionDesc[OpSatConvertSToU].capabilities.push_back(CapabilityKernel); - - InstructionDesc[OpSatConvertUToS].operands.push(OperandId, "'Unsigned Value'"); - InstructionDesc[OpSatConvertUToS].capabilities.push_back(CapabilityKernel); - - InstructionDesc[OpConvertPtrToU].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpConvertPtrToU].capabilities.push_back(CapabilityAddresses); - - InstructionDesc[OpConvertUToPtr].operands.push(OperandId, "'Integer Value'"); - InstructionDesc[OpConvertUToPtr].capabilities.push_back(CapabilityAddresses); - - InstructionDesc[OpPtrCastToGeneric].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpPtrCastToGeneric].capabilities.push_back(CapabilityKernel); - - InstructionDesc[OpGenericCastToPtr].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpGenericCastToPtr].capabilities.push_back(CapabilityKernel); - - InstructionDesc[OpGenericCastToPtrExplicit].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpGenericCastToPtrExplicit].operands.push(OperandStorage, "'Storage'"); - InstructionDesc[OpGenericCastToPtrExplicit].capabilities.push_back(CapabilityKernel); - - InstructionDesc[OpGenericPtrMemSemantics].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpGenericPtrMemSemantics].capabilities.push_back(CapabilityKernel); - - InstructionDesc[OpBitcast].operands.push(OperandId, "'Operand'"); - - InstructionDesc[OpQuantizeToF16].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpTranspose].capabilities.push_back(CapabilityMatrix); - InstructionDesc[OpTranspose].operands.push(OperandId, "'Matrix'"); - - InstructionDesc[OpIsNan].operands.push(OperandId, "'x'"); - - InstructionDesc[OpIsInf].operands.push(OperandId, "'x'"); - - InstructionDesc[OpIsFinite].capabilities.push_back(CapabilityKernel); - InstructionDesc[OpIsFinite].operands.push(OperandId, "'x'"); - - InstructionDesc[OpIsNormal].capabilities.push_back(CapabilityKernel); - InstructionDesc[OpIsNormal].operands.push(OperandId, "'x'"); - - InstructionDesc[OpSignBitSet].capabilities.push_back(CapabilityKernel); - InstructionDesc[OpSignBitSet].operands.push(OperandId, "'x'"); - - InstructionDesc[OpLessOrGreater].capabilities.push_back(CapabilityKernel); - InstructionDesc[OpLessOrGreater].operands.push(OperandId, "'x'"); - InstructionDesc[OpLessOrGreater].operands.push(OperandId, "'y'"); - - InstructionDesc[OpOrdered].capabilities.push_back(CapabilityKernel); - InstructionDesc[OpOrdered].operands.push(OperandId, "'x'"); - InstructionDesc[OpOrdered].operands.push(OperandId, "'y'"); - - InstructionDesc[OpUnordered].capabilities.push_back(CapabilityKernel); - InstructionDesc[OpUnordered].operands.push(OperandId, "'x'"); - InstructionDesc[OpUnordered].operands.push(OperandId, "'y'"); - - InstructionDesc[OpArrayLength].operands.push(OperandId, "'Structure'"); - InstructionDesc[OpArrayLength].operands.push(OperandLiteralNumber, "'Array member'"); - InstructionDesc[OpArrayLength].capabilities.push_back(CapabilityShader); - - InstructionDesc[OpIAdd].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpIAdd].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFAdd].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFAdd].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpISub].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpISub].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFSub].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFSub].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpIMul].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpIMul].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFMul].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFMul].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpUDiv].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpUDiv].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpSDiv].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpSDiv].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFDiv].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFDiv].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpUMod].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpUMod].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpSRem].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpSRem].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpSMod].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpSMod].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFRem].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFRem].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFMod].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFMod].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpVectorTimesScalar].operands.push(OperandId, "'Vector'"); - InstructionDesc[OpVectorTimesScalar].operands.push(OperandId, "'Scalar'"); - - InstructionDesc[OpMatrixTimesScalar].capabilities.push_back(CapabilityMatrix); - InstructionDesc[OpMatrixTimesScalar].operands.push(OperandId, "'Matrix'"); - InstructionDesc[OpMatrixTimesScalar].operands.push(OperandId, "'Scalar'"); - - InstructionDesc[OpVectorTimesMatrix].capabilities.push_back(CapabilityMatrix); - InstructionDesc[OpVectorTimesMatrix].operands.push(OperandId, "'Vector'"); - InstructionDesc[OpVectorTimesMatrix].operands.push(OperandId, "'Matrix'"); - - InstructionDesc[OpMatrixTimesVector].capabilities.push_back(CapabilityMatrix); - InstructionDesc[OpMatrixTimesVector].operands.push(OperandId, "'Matrix'"); - InstructionDesc[OpMatrixTimesVector].operands.push(OperandId, "'Vector'"); - - InstructionDesc[OpMatrixTimesMatrix].capabilities.push_back(CapabilityMatrix); - InstructionDesc[OpMatrixTimesMatrix].operands.push(OperandId, "'LeftMatrix'"); - InstructionDesc[OpMatrixTimesMatrix].operands.push(OperandId, "'RightMatrix'"); - - InstructionDesc[OpOuterProduct].capabilities.push_back(CapabilityMatrix); - InstructionDesc[OpOuterProduct].operands.push(OperandId, "'Vector 1'"); - InstructionDesc[OpOuterProduct].operands.push(OperandId, "'Vector 2'"); - - InstructionDesc[OpDot].operands.push(OperandId, "'Vector 1'"); - InstructionDesc[OpDot].operands.push(OperandId, "'Vector 2'"); - - InstructionDesc[OpIAddCarry].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpIAddCarry].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpISubBorrow].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpISubBorrow].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpUMulExtended].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpUMulExtended].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpSMulExtended].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpSMulExtended].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpShiftRightLogical].operands.push(OperandId, "'Base'"); - InstructionDesc[OpShiftRightLogical].operands.push(OperandId, "'Shift'"); - - InstructionDesc[OpShiftRightArithmetic].operands.push(OperandId, "'Base'"); - InstructionDesc[OpShiftRightArithmetic].operands.push(OperandId, "'Shift'"); - - InstructionDesc[OpShiftLeftLogical].operands.push(OperandId, "'Base'"); - InstructionDesc[OpShiftLeftLogical].operands.push(OperandId, "'Shift'"); - - InstructionDesc[OpLogicalOr].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpLogicalOr].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpLogicalAnd].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpLogicalAnd].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpLogicalEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpLogicalEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpLogicalNotEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpLogicalNotEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpLogicalNot].operands.push(OperandId, "'Operand'"); - - InstructionDesc[OpBitwiseOr].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpBitwiseOr].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpBitwiseXor].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpBitwiseXor].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpBitwiseAnd].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpBitwiseAnd].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpBitFieldInsert].capabilities.push_back(CapabilityShader); - InstructionDesc[OpBitFieldInsert].operands.push(OperandId, "'Base'"); - InstructionDesc[OpBitFieldInsert].operands.push(OperandId, "'Insert'"); - InstructionDesc[OpBitFieldInsert].operands.push(OperandId, "'Offset'"); - InstructionDesc[OpBitFieldInsert].operands.push(OperandId, "'Count'"); - - InstructionDesc[OpBitFieldSExtract].capabilities.push_back(CapabilityShader); - InstructionDesc[OpBitFieldSExtract].operands.push(OperandId, "'Base'"); - InstructionDesc[OpBitFieldSExtract].operands.push(OperandId, "'Offset'"); - InstructionDesc[OpBitFieldSExtract].operands.push(OperandId, "'Count'"); - - InstructionDesc[OpBitFieldUExtract].capabilities.push_back(CapabilityShader); - InstructionDesc[OpBitFieldUExtract].operands.push(OperandId, "'Base'"); - InstructionDesc[OpBitFieldUExtract].operands.push(OperandId, "'Offset'"); - InstructionDesc[OpBitFieldUExtract].operands.push(OperandId, "'Count'"); - - InstructionDesc[OpBitReverse].capabilities.push_back(CapabilityShader); - InstructionDesc[OpBitReverse].operands.push(OperandId, "'Base'"); - - InstructionDesc[OpBitCount].operands.push(OperandId, "'Base'"); - - InstructionDesc[OpSelect].operands.push(OperandId, "'Condition'"); - InstructionDesc[OpSelect].operands.push(OperandId, "'Object 1'"); - InstructionDesc[OpSelect].operands.push(OperandId, "'Object 2'"); - - InstructionDesc[OpIEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpIEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFOrdEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFOrdEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFUnordEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFUnordEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpINotEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpINotEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFOrdNotEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFOrdNotEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFUnordNotEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFUnordNotEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpULessThan].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpULessThan].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpSLessThan].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpSLessThan].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFOrdLessThan].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFOrdLessThan].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFUnordLessThan].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFUnordLessThan].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpUGreaterThan].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpUGreaterThan].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpSGreaterThan].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpSGreaterThan].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFOrdGreaterThan].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFOrdGreaterThan].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFUnordGreaterThan].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFUnordGreaterThan].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpULessThanEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpULessThanEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpSLessThanEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpSLessThanEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFOrdLessThanEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFOrdLessThanEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFUnordLessThanEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFUnordLessThanEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpUGreaterThanEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpUGreaterThanEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpSGreaterThanEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpSGreaterThanEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFOrdGreaterThanEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFOrdGreaterThanEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpFUnordGreaterThanEqual].operands.push(OperandId, "'Operand 1'"); - InstructionDesc[OpFUnordGreaterThanEqual].operands.push(OperandId, "'Operand 2'"); - - InstructionDesc[OpDPdx].capabilities.push_back(CapabilityShader); - InstructionDesc[OpDPdx].operands.push(OperandId, "'P'"); - - InstructionDesc[OpDPdy].capabilities.push_back(CapabilityShader); - InstructionDesc[OpDPdy].operands.push(OperandId, "'P'"); - - InstructionDesc[OpFwidth].capabilities.push_back(CapabilityShader); - InstructionDesc[OpFwidth].operands.push(OperandId, "'P'"); - - InstructionDesc[OpDPdxFine].capabilities.push_back(CapabilityDerivativeControl); - InstructionDesc[OpDPdxFine].operands.push(OperandId, "'P'"); - - InstructionDesc[OpDPdyFine].capabilities.push_back(CapabilityDerivativeControl); - InstructionDesc[OpDPdyFine].operands.push(OperandId, "'P'"); - - InstructionDesc[OpFwidthFine].capabilities.push_back(CapabilityDerivativeControl); - InstructionDesc[OpFwidthFine].operands.push(OperandId, "'P'"); - - InstructionDesc[OpDPdxCoarse].capabilities.push_back(CapabilityDerivativeControl); - InstructionDesc[OpDPdxCoarse].operands.push(OperandId, "'P'"); - - InstructionDesc[OpDPdyCoarse].capabilities.push_back(CapabilityDerivativeControl); - InstructionDesc[OpDPdyCoarse].operands.push(OperandId, "'P'"); - - InstructionDesc[OpFwidthCoarse].capabilities.push_back(CapabilityDerivativeControl); - InstructionDesc[OpFwidthCoarse].operands.push(OperandId, "'P'"); - - InstructionDesc[OpEmitVertex].capabilities.push_back(CapabilityGeometry); - - InstructionDesc[OpEndPrimitive].capabilities.push_back(CapabilityGeometry); - - InstructionDesc[OpEmitStreamVertex].operands.push(OperandId, "'Stream'"); - InstructionDesc[OpEmitStreamVertex].capabilities.push_back(CapabilityGeometryStreams); - - InstructionDesc[OpEndStreamPrimitive].operands.push(OperandId, "'Stream'"); - InstructionDesc[OpEndStreamPrimitive].capabilities.push_back(CapabilityGeometryStreams); - - InstructionDesc[OpControlBarrier].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpControlBarrier].operands.push(OperandScope, "'Memory'"); - InstructionDesc[OpControlBarrier].operands.push(OperandMemorySemantics, "'Semantics'"); - - InstructionDesc[OpMemoryBarrier].operands.push(OperandScope, "'Memory'"); - InstructionDesc[OpMemoryBarrier].operands.push(OperandMemorySemantics, "'Semantics'"); - - InstructionDesc[OpImageTexelPointer].operands.push(OperandId, "'Image'"); - InstructionDesc[OpImageTexelPointer].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpImageTexelPointer].operands.push(OperandId, "'Sample'"); - - InstructionDesc[OpAtomicLoad].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicLoad].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicLoad].operands.push(OperandMemorySemantics, "'Semantics'"); - - InstructionDesc[OpAtomicStore].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicStore].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicStore].operands.push(OperandMemorySemantics, "'Semantics'"); - InstructionDesc[OpAtomicStore].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpAtomicExchange].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicExchange].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicExchange].operands.push(OperandMemorySemantics, "'Semantics'"); - InstructionDesc[OpAtomicExchange].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpAtomicCompareExchange].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicCompareExchange].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicCompareExchange].operands.push(OperandMemorySemantics, "'Equal'"); - InstructionDesc[OpAtomicCompareExchange].operands.push(OperandMemorySemantics, "'Unequal'"); - InstructionDesc[OpAtomicCompareExchange].operands.push(OperandId, "'Value'"); - InstructionDesc[OpAtomicCompareExchange].operands.push(OperandId, "'Comparator'"); - - InstructionDesc[OpAtomicCompareExchangeWeak].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicCompareExchangeWeak].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicCompareExchangeWeak].operands.push(OperandMemorySemantics, "'Equal'"); - InstructionDesc[OpAtomicCompareExchangeWeak].operands.push(OperandMemorySemantics, "'Unequal'"); - InstructionDesc[OpAtomicCompareExchangeWeak].operands.push(OperandId, "'Value'"); - InstructionDesc[OpAtomicCompareExchangeWeak].operands.push(OperandId, "'Comparator'"); - InstructionDesc[OpAtomicCompareExchangeWeak].capabilities.push_back(CapabilityKernel); - - InstructionDesc[OpAtomicIIncrement].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicIIncrement].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicIIncrement].operands.push(OperandMemorySemantics, "'Semantics'"); - - InstructionDesc[OpAtomicIDecrement].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicIDecrement].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicIDecrement].operands.push(OperandMemorySemantics, "'Semantics'"); - - InstructionDesc[OpAtomicIAdd].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicIAdd].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicIAdd].operands.push(OperandMemorySemantics, "'Semantics'"); - InstructionDesc[OpAtomicIAdd].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpAtomicISub].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicISub].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicISub].operands.push(OperandMemorySemantics, "'Semantics'"); - InstructionDesc[OpAtomicISub].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpAtomicUMin].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicUMin].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicUMin].operands.push(OperandMemorySemantics, "'Semantics'"); - InstructionDesc[OpAtomicUMin].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpAtomicUMax].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicUMax].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicUMax].operands.push(OperandMemorySemantics, "'Semantics'"); - InstructionDesc[OpAtomicUMax].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpAtomicSMin].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicSMin].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicSMin].operands.push(OperandMemorySemantics, "'Semantics'"); - InstructionDesc[OpAtomicSMin].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpAtomicSMax].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicSMax].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicSMax].operands.push(OperandMemorySemantics, "'Semantics'"); - InstructionDesc[OpAtomicSMax].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpAtomicAnd].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicAnd].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicAnd].operands.push(OperandMemorySemantics, "'Semantics'"); - InstructionDesc[OpAtomicAnd].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpAtomicOr].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicOr].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicOr].operands.push(OperandMemorySemantics, "'Semantics'"); - InstructionDesc[OpAtomicOr].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpAtomicXor].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicXor].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicXor].operands.push(OperandMemorySemantics, "'Semantics'"); - InstructionDesc[OpAtomicXor].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpAtomicFlagTestAndSet].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicFlagTestAndSet].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicFlagTestAndSet].operands.push(OperandMemorySemantics, "'Semantics'"); - InstructionDesc[OpAtomicFlagTestAndSet].capabilities.push_back(CapabilityKernel); - - InstructionDesc[OpAtomicFlagClear].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpAtomicFlagClear].operands.push(OperandScope, "'Scope'"); - InstructionDesc[OpAtomicFlagClear].operands.push(OperandMemorySemantics, "'Semantics'"); - InstructionDesc[OpAtomicFlagClear].capabilities.push_back(CapabilityKernel); - - InstructionDesc[OpLoopMerge].operands.push(OperandId, "'Merge Block'"); - InstructionDesc[OpLoopMerge].operands.push(OperandId, "'Continue Target'"); - InstructionDesc[OpLoopMerge].operands.push(OperandLoop, ""); - InstructionDesc[OpLoopMerge].operands.push(OperandOptionalLiteral, ""); - - InstructionDesc[OpSelectionMerge].operands.push(OperandId, "'Merge Block'"); - InstructionDesc[OpSelectionMerge].operands.push(OperandSelect, ""); - - InstructionDesc[OpBranch].operands.push(OperandId, "'Target Label'"); - - InstructionDesc[OpBranchConditional].operands.push(OperandId, "'Condition'"); - InstructionDesc[OpBranchConditional].operands.push(OperandId, "'True Label'"); - InstructionDesc[OpBranchConditional].operands.push(OperandId, "'False Label'"); - InstructionDesc[OpBranchConditional].operands.push(OperandVariableLiterals, "'Branch weights'"); - - InstructionDesc[OpSwitch].operands.push(OperandId, "'Selector'"); - InstructionDesc[OpSwitch].operands.push(OperandId, "'Default'"); - InstructionDesc[OpSwitch].operands.push(OperandVariableLiteralId, "'Target'"); - - InstructionDesc[OpKill].capabilities.push_back(CapabilityShader); - - InstructionDesc[OpReturnValue].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpLifetimeStart].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpLifetimeStart].operands.push(OperandLiteralNumber, "'Size'"); - InstructionDesc[OpLifetimeStart].capabilities.push_back(CapabilityKernel); - - InstructionDesc[OpLifetimeStop].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpLifetimeStop].operands.push(OperandLiteralNumber, "'Size'"); - InstructionDesc[OpLifetimeStop].capabilities.push_back(CapabilityKernel); - - InstructionDesc[OpGroupAsyncCopy].capabilities.push_back(CapabilityKernel); - InstructionDesc[OpGroupAsyncCopy].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupAsyncCopy].operands.push(OperandId, "'Destination'"); - InstructionDesc[OpGroupAsyncCopy].operands.push(OperandId, "'Source'"); - InstructionDesc[OpGroupAsyncCopy].operands.push(OperandId, "'Num Elements'"); - InstructionDesc[OpGroupAsyncCopy].operands.push(OperandId, "'Stride'"); - InstructionDesc[OpGroupAsyncCopy].operands.push(OperandId, "'Event'"); - - InstructionDesc[OpGroupWaitEvents].capabilities.push_back(CapabilityKernel); - InstructionDesc[OpGroupWaitEvents].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupWaitEvents].operands.push(OperandId, "'Num Events'"); - InstructionDesc[OpGroupWaitEvents].operands.push(OperandId, "'Events List'"); - - InstructionDesc[OpGroupAll].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupAll].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupAll].operands.push(OperandId, "'Predicate'"); - - InstructionDesc[OpGroupAny].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupAny].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupAny].operands.push(OperandId, "'Predicate'"); - - InstructionDesc[OpGroupBroadcast].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupBroadcast].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupBroadcast].operands.push(OperandId, "'Value'"); - InstructionDesc[OpGroupBroadcast].operands.push(OperandId, "'LocalId'"); - - InstructionDesc[OpGroupIAdd].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupIAdd].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupIAdd].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupIAdd].operands.push(OperandId, "'X'"); - - InstructionDesc[OpGroupFAdd].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupFAdd].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupFAdd].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupFAdd].operands.push(OperandId, "'X'"); - - InstructionDesc[OpGroupUMin].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupUMin].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupUMin].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupUMin].operands.push(OperandId, "'X'"); - - InstructionDesc[OpGroupSMin].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupSMin].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupSMin].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupSMin].operands.push(OperandId, "X"); - - InstructionDesc[OpGroupFMin].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupFMin].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupFMin].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupFMin].operands.push(OperandId, "X"); - - InstructionDesc[OpGroupUMax].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupUMax].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupUMax].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupUMax].operands.push(OperandId, "X"); - - InstructionDesc[OpGroupSMax].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupSMax].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupSMax].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupSMax].operands.push(OperandId, "X"); - - InstructionDesc[OpGroupFMax].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupFMax].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupFMax].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupFMax].operands.push(OperandId, "X"); - - InstructionDesc[OpReadPipe].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpReadPipe].operands.push(OperandId, "'Pipe'"); - InstructionDesc[OpReadPipe].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpReadPipe].operands.push(OperandId, "'Packet Size'"); - InstructionDesc[OpReadPipe].operands.push(OperandId, "'Packet Alignment'"); - - InstructionDesc[OpWritePipe].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpWritePipe].operands.push(OperandId, "'Pipe'"); - InstructionDesc[OpWritePipe].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpWritePipe].operands.push(OperandId, "'Packet Size'"); - InstructionDesc[OpWritePipe].operands.push(OperandId, "'Packet Alignment'"); - - InstructionDesc[OpReservedReadPipe].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpReservedReadPipe].operands.push(OperandId, "'Pipe'"); - InstructionDesc[OpReservedReadPipe].operands.push(OperandId, "'Reserve Id'"); - InstructionDesc[OpReservedReadPipe].operands.push(OperandId, "'Index'"); - InstructionDesc[OpReservedReadPipe].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpReservedReadPipe].operands.push(OperandId, "'Packet Size'"); - InstructionDesc[OpReservedReadPipe].operands.push(OperandId, "'Packet Alignment'"); - - InstructionDesc[OpReservedWritePipe].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpReservedWritePipe].operands.push(OperandId, "'Pipe'"); - InstructionDesc[OpReservedWritePipe].operands.push(OperandId, "'Reserve Id'"); - InstructionDesc[OpReservedWritePipe].operands.push(OperandId, "'Index'"); - InstructionDesc[OpReservedWritePipe].operands.push(OperandId, "'Pointer'"); - InstructionDesc[OpReservedWritePipe].operands.push(OperandId, "'Packet Size'"); - InstructionDesc[OpReservedWritePipe].operands.push(OperandId, "'Packet Alignment'"); - - InstructionDesc[OpReserveReadPipePackets].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpReserveReadPipePackets].operands.push(OperandId, "'Pipe'"); - InstructionDesc[OpReserveReadPipePackets].operands.push(OperandId, "'Num Packets'"); - InstructionDesc[OpReserveReadPipePackets].operands.push(OperandId, "'Packet Size'"); - InstructionDesc[OpReserveReadPipePackets].operands.push(OperandId, "'Packet Alignment'"); - - InstructionDesc[OpReserveWritePipePackets].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpReserveWritePipePackets].operands.push(OperandId, "'Pipe'"); - InstructionDesc[OpReserveWritePipePackets].operands.push(OperandId, "'Num Packets'"); - InstructionDesc[OpReserveWritePipePackets].operands.push(OperandId, "'Packet Size'"); - InstructionDesc[OpReserveWritePipePackets].operands.push(OperandId, "'Packet Alignment'"); - - InstructionDesc[OpCommitReadPipe].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpCommitReadPipe].operands.push(OperandId, "'Pipe'"); - InstructionDesc[OpCommitReadPipe].operands.push(OperandId, "'Reserve Id'"); - InstructionDesc[OpCommitReadPipe].operands.push(OperandId, "'Packet Size'"); - InstructionDesc[OpCommitReadPipe].operands.push(OperandId, "'Packet Alignment'"); - - InstructionDesc[OpCommitWritePipe].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpCommitWritePipe].operands.push(OperandId, "'Pipe'"); - InstructionDesc[OpCommitWritePipe].operands.push(OperandId, "'Reserve Id'"); - InstructionDesc[OpCommitWritePipe].operands.push(OperandId, "'Packet Size'"); - InstructionDesc[OpCommitWritePipe].operands.push(OperandId, "'Packet Alignment'"); - - InstructionDesc[OpIsValidReserveId].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpIsValidReserveId].operands.push(OperandId, "'Reserve Id'"); - - InstructionDesc[OpGetNumPipePackets].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpGetNumPipePackets].operands.push(OperandId, "'Pipe'"); - InstructionDesc[OpGetNumPipePackets].operands.push(OperandId, "'Packet Size'"); - InstructionDesc[OpGetNumPipePackets].operands.push(OperandId, "'Packet Alignment'"); - - InstructionDesc[OpGetMaxPipePackets].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpGetMaxPipePackets].operands.push(OperandId, "'Pipe'"); - InstructionDesc[OpGetMaxPipePackets].operands.push(OperandId, "'Packet Size'"); - InstructionDesc[OpGetMaxPipePackets].operands.push(OperandId, "'Packet Alignment'"); - - InstructionDesc[OpGroupReserveReadPipePackets].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpGroupReserveReadPipePackets].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupReserveReadPipePackets].operands.push(OperandId, "'Pipe'"); - InstructionDesc[OpGroupReserveReadPipePackets].operands.push(OperandId, "'Num Packets'"); - InstructionDesc[OpGroupReserveReadPipePackets].operands.push(OperandId, "'Packet Size'"); - InstructionDesc[OpGroupReserveReadPipePackets].operands.push(OperandId, "'Packet Alignment'"); - - InstructionDesc[OpGroupReserveWritePipePackets].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpGroupReserveWritePipePackets].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupReserveWritePipePackets].operands.push(OperandId, "'Pipe'"); - InstructionDesc[OpGroupReserveWritePipePackets].operands.push(OperandId, "'Num Packets'"); - InstructionDesc[OpGroupReserveWritePipePackets].operands.push(OperandId, "'Packet Size'"); - InstructionDesc[OpGroupReserveWritePipePackets].operands.push(OperandId, "'Packet Alignment'"); - - InstructionDesc[OpGroupCommitReadPipe].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpGroupCommitReadPipe].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupCommitReadPipe].operands.push(OperandId, "'Pipe'"); - InstructionDesc[OpGroupCommitReadPipe].operands.push(OperandId, "'Reserve Id'"); - InstructionDesc[OpGroupCommitReadPipe].operands.push(OperandId, "'Packet Size'"); - InstructionDesc[OpGroupCommitReadPipe].operands.push(OperandId, "'Packet Alignment'"); - - InstructionDesc[OpGroupCommitWritePipe].capabilities.push_back(CapabilityPipes); - InstructionDesc[OpGroupCommitWritePipe].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupCommitWritePipe].operands.push(OperandId, "'Pipe'"); - InstructionDesc[OpGroupCommitWritePipe].operands.push(OperandId, "'Reserve Id'"); - InstructionDesc[OpGroupCommitWritePipe].operands.push(OperandId, "'Packet Size'"); - InstructionDesc[OpGroupCommitWritePipe].operands.push(OperandId, "'Packet Alignment'"); - - InstructionDesc[OpBuildNDRange].capabilities.push_back(CapabilityDeviceEnqueue); - InstructionDesc[OpBuildNDRange].operands.push(OperandId, "'GlobalWorkSize'"); - InstructionDesc[OpBuildNDRange].operands.push(OperandId, "'LocalWorkSize'"); - InstructionDesc[OpBuildNDRange].operands.push(OperandId, "'GlobalWorkOffset'"); - - InstructionDesc[OpGetDefaultQueue].capabilities.push_back(CapabilityDeviceEnqueue); - - InstructionDesc[OpCaptureEventProfilingInfo].capabilities.push_back(CapabilityDeviceEnqueue); - - InstructionDesc[OpCaptureEventProfilingInfo].operands.push(OperandId, "'Event'"); - InstructionDesc[OpCaptureEventProfilingInfo].operands.push(OperandId, "'Profiling Info'"); - InstructionDesc[OpCaptureEventProfilingInfo].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpSetUserEventStatus].capabilities.push_back(CapabilityDeviceEnqueue); - - InstructionDesc[OpSetUserEventStatus].operands.push(OperandId, "'Event'"); - InstructionDesc[OpSetUserEventStatus].operands.push(OperandId, "'Status'"); - - InstructionDesc[OpIsValidEvent].capabilities.push_back(CapabilityDeviceEnqueue); - InstructionDesc[OpIsValidEvent].operands.push(OperandId, "'Event'"); - - InstructionDesc[OpCreateUserEvent].capabilities.push_back(CapabilityDeviceEnqueue); - - InstructionDesc[OpRetainEvent].capabilities.push_back(CapabilityDeviceEnqueue); - InstructionDesc[OpRetainEvent].operands.push(OperandId, "'Event'"); - - InstructionDesc[OpReleaseEvent].capabilities.push_back(CapabilityDeviceEnqueue); - InstructionDesc[OpReleaseEvent].operands.push(OperandId, "'Event'"); - - InstructionDesc[OpGetKernelWorkGroupSize].capabilities.push_back(CapabilityDeviceEnqueue); - InstructionDesc[OpGetKernelWorkGroupSize].operands.push(OperandId, "'Invoke'"); - InstructionDesc[OpGetKernelWorkGroupSize].operands.push(OperandId, "'Param'"); - InstructionDesc[OpGetKernelWorkGroupSize].operands.push(OperandId, "'Param Size'"); - InstructionDesc[OpGetKernelWorkGroupSize].operands.push(OperandId, "'Param Align'"); - - InstructionDesc[OpGetKernelPreferredWorkGroupSizeMultiple].capabilities.push_back(CapabilityDeviceEnqueue); - InstructionDesc[OpGetKernelPreferredWorkGroupSizeMultiple].operands.push(OperandId, "'Invoke'"); - InstructionDesc[OpGetKernelPreferredWorkGroupSizeMultiple].operands.push(OperandId, "'Param'"); - InstructionDesc[OpGetKernelPreferredWorkGroupSizeMultiple].operands.push(OperandId, "'Param Size'"); - InstructionDesc[OpGetKernelPreferredWorkGroupSizeMultiple].operands.push(OperandId, "'Param Align'"); - - InstructionDesc[OpGetKernelNDrangeSubGroupCount].capabilities.push_back(CapabilityDeviceEnqueue); - InstructionDesc[OpGetKernelNDrangeSubGroupCount].operands.push(OperandId, "'ND Range'"); - InstructionDesc[OpGetKernelNDrangeSubGroupCount].operands.push(OperandId, "'Invoke'"); - InstructionDesc[OpGetKernelNDrangeSubGroupCount].operands.push(OperandId, "'Param'"); - InstructionDesc[OpGetKernelNDrangeSubGroupCount].operands.push(OperandId, "'Param Size'"); - InstructionDesc[OpGetKernelNDrangeSubGroupCount].operands.push(OperandId, "'Param Align'"); - - InstructionDesc[OpGetKernelNDrangeMaxSubGroupSize].capabilities.push_back(CapabilityDeviceEnqueue); - InstructionDesc[OpGetKernelNDrangeMaxSubGroupSize].operands.push(OperandId, "'ND Range'"); - InstructionDesc[OpGetKernelNDrangeMaxSubGroupSize].operands.push(OperandId, "'Invoke'"); - InstructionDesc[OpGetKernelNDrangeMaxSubGroupSize].operands.push(OperandId, "'Param'"); - InstructionDesc[OpGetKernelNDrangeMaxSubGroupSize].operands.push(OperandId, "'Param Size'"); - InstructionDesc[OpGetKernelNDrangeMaxSubGroupSize].operands.push(OperandId, "'Param Align'"); - - InstructionDesc[OpEnqueueKernel].capabilities.push_back(CapabilityDeviceEnqueue); - InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'Queue'"); - InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'Flags'"); - InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'ND Range'"); - InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'Num Events'"); - InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'Wait Events'"); - InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'Ret Event'"); - InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'Invoke'"); - InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'Param'"); - InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'Param Size'"); - InstructionDesc[OpEnqueueKernel].operands.push(OperandId, "'Param Align'"); - InstructionDesc[OpEnqueueKernel].operands.push(OperandVariableIds, "'Local Size'"); - - InstructionDesc[OpEnqueueMarker].capabilities.push_back(CapabilityDeviceEnqueue); - InstructionDesc[OpEnqueueMarker].operands.push(OperandId, "'Queue'"); - InstructionDesc[OpEnqueueMarker].operands.push(OperandId, "'Num Events'"); - InstructionDesc[OpEnqueueMarker].operands.push(OperandId, "'Wait Events'"); - InstructionDesc[OpEnqueueMarker].operands.push(OperandId, "'Ret Event'"); - - InstructionDesc[OpSubgroupBallotKHR].operands.push(OperandId, "'Predicate'"); - - InstructionDesc[OpSubgroupFirstInvocationKHR].operands.push(OperandId, "'Value'"); - - InstructionDesc[OpSubgroupAnyKHR].capabilities.push_back(CapabilitySubgroupVoteKHR); - InstructionDesc[OpSubgroupAnyKHR].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpSubgroupAnyKHR].operands.push(OperandId, "'Predicate'"); - - InstructionDesc[OpSubgroupAllKHR].capabilities.push_back(CapabilitySubgroupVoteKHR); - InstructionDesc[OpSubgroupAllKHR].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpSubgroupAllKHR].operands.push(OperandId, "'Predicate'"); - - InstructionDesc[OpSubgroupAllEqualKHR].capabilities.push_back(CapabilitySubgroupVoteKHR); - InstructionDesc[OpSubgroupAllEqualKHR].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpSubgroupAllEqualKHR].operands.push(OperandId, "'Predicate'"); - - InstructionDesc[OpSubgroupReadInvocationKHR].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpSubgroupReadInvocationKHR].operands.push(OperandId, "'Value'"); - InstructionDesc[OpSubgroupReadInvocationKHR].operands.push(OperandId, "'Index'"); - -#ifdef AMD_EXTENSIONS - InstructionDesc[OpGroupIAddNonUniformAMD].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupIAddNonUniformAMD].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupIAddNonUniformAMD].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupIAddNonUniformAMD].operands.push(OperandId, "'X'"); - - InstructionDesc[OpGroupFAddNonUniformAMD].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupFAddNonUniformAMD].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupFAddNonUniformAMD].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupFAddNonUniformAMD].operands.push(OperandId, "'X'"); - - InstructionDesc[OpGroupUMinNonUniformAMD].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupUMinNonUniformAMD].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupUMinNonUniformAMD].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupUMinNonUniformAMD].operands.push(OperandId, "'X'"); - - InstructionDesc[OpGroupSMinNonUniformAMD].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupSMinNonUniformAMD].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupSMinNonUniformAMD].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupSMinNonUniformAMD].operands.push(OperandId, "X"); - - InstructionDesc[OpGroupFMinNonUniformAMD].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupFMinNonUniformAMD].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupFMinNonUniformAMD].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupFMinNonUniformAMD].operands.push(OperandId, "X"); - - InstructionDesc[OpGroupUMaxNonUniformAMD].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupUMaxNonUniformAMD].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupUMaxNonUniformAMD].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupUMaxNonUniformAMD].operands.push(OperandId, "X"); - - InstructionDesc[OpGroupSMaxNonUniformAMD].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupSMaxNonUniformAMD].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupSMaxNonUniformAMD].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupSMaxNonUniformAMD].operands.push(OperandId, "X"); - - InstructionDesc[OpGroupFMaxNonUniformAMD].capabilities.push_back(CapabilityGroups); - InstructionDesc[OpGroupFMaxNonUniformAMD].operands.push(OperandScope, "'Execution'"); - InstructionDesc[OpGroupFMaxNonUniformAMD].operands.push(OperandGroupOperation, "'Operation'"); - InstructionDesc[OpGroupFMaxNonUniformAMD].operands.push(OperandId, "X"); - - InstructionDesc[OpFragmentMaskFetchAMD].capabilities.push_back(CapabilityFragmentMaskAMD); - InstructionDesc[OpFragmentMaskFetchAMD].operands.push(OperandId, "'Image'"); - InstructionDesc[OpFragmentMaskFetchAMD].operands.push(OperandId, "'Coordinate'"); - - InstructionDesc[OpFragmentFetchAMD].capabilities.push_back(CapabilityFragmentMaskAMD); - InstructionDesc[OpFragmentFetchAMD].operands.push(OperandId, "'Image'"); - InstructionDesc[OpFragmentFetchAMD].operands.push(OperandId, "'Coordinate'"); - InstructionDesc[OpFragmentFetchAMD].operands.push(OperandId, "'Fragment Index'"); -#endif -} - -}; // end spv namespace diff --git a/third_party/glslang-spirv/doc.h b/third_party/glslang-spirv/doc.h deleted file mode 100644 index 710ca1a52..000000000 --- a/third_party/glslang-spirv/doc.h +++ /dev/null @@ -1,262 +0,0 @@ -// -// Copyright (C) 2014-2015 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -// -// Parameterize the SPIR-V enumerants. -// - -#pragma once - -#include "spirv.hpp" - -#include - -namespace spv { - -// Fill in all the parameters -void Parameterize(); - -// Return the English names of all the enums. -const char* SourceString(int); -const char* AddressingString(int); -const char* MemoryString(int); -const char* ExecutionModelString(int); -const char* ExecutionModeString(int); -const char* StorageClassString(int); -const char* DecorationString(int); -const char* BuiltInString(int); -const char* DimensionString(int); -const char* SelectControlString(int); -const char* LoopControlString(int); -const char* FunctionControlString(int); -const char* SamplerAddressingModeString(int); -const char* SamplerFilterModeString(int); -const char* ImageFormatString(int); -const char* ImageChannelOrderString(int); -const char* ImageChannelTypeString(int); -const char* ImageChannelDataTypeString(int type); -const char* ImageOperandsString(int format); -const char* ImageOperands(int); -const char* FPFastMathString(int); -const char* FPRoundingModeString(int); -const char* LinkageTypeString(int); -const char* FuncParamAttrString(int); -const char* AccessQualifierString(int); -const char* MemorySemanticsString(int); -const char* MemoryAccessString(int); -const char* ExecutionScopeString(int); -const char* GroupOperationString(int); -const char* KernelEnqueueFlagsString(int); -const char* KernelProfilingInfoString(int); -const char* CapabilityString(int); -const char* OpcodeString(int); -const char* ScopeString(int mem); - -// For grouping opcodes into subsections -enum OpcodeClass { - OpClassMisc, - OpClassDebug, - OpClassAnnotate, - OpClassExtension, - OpClassMode, - OpClassType, - OpClassConstant, - OpClassMemory, - OpClassFunction, - OpClassImage, - OpClassConvert, - OpClassComposite, - OpClassArithmetic, - OpClassBit, - OpClassRelationalLogical, - OpClassDerivative, - OpClassFlowControl, - OpClassAtomic, - OpClassPrimitive, - OpClassBarrier, - OpClassGroup, - OpClassDeviceSideEnqueue, - OpClassPipe, - - OpClassCount, - OpClassMissing // all instructions start out as missing -}; - -// For parameterizing operands. -enum OperandClass { - OperandNone, - OperandId, - OperandVariableIds, - OperandOptionalLiteral, - OperandOptionalLiteralString, - OperandVariableLiterals, - OperandVariableIdLiteral, - OperandVariableLiteralId, - OperandLiteralNumber, - OperandLiteralString, - OperandSource, - OperandExecutionModel, - OperandAddressing, - OperandMemory, - OperandExecutionMode, - OperandStorage, - OperandDimensionality, - OperandSamplerAddressingMode, - OperandSamplerFilterMode, - OperandSamplerImageFormat, - OperandImageChannelOrder, - OperandImageChannelDataType, - OperandImageOperands, - OperandFPFastMath, - OperandFPRoundingMode, - OperandLinkageType, - OperandAccessQualifier, - OperandFuncParamAttr, - OperandDecoration, - OperandBuiltIn, - OperandSelect, - OperandLoop, - OperandFunction, - OperandMemorySemantics, - OperandMemoryAccess, - OperandScope, - OperandGroupOperation, - OperandKernelEnqueueFlags, - OperandKernelProfilingInfo, - OperandCapability, - - OperandOpcode, - - OperandCount -}; - -// Any specific enum can have a set of capabilities that allow it: -typedef std::vector EnumCaps; - -// Parameterize a set of operands with their OperandClass(es) and descriptions. -class OperandParameters { -public: - OperandParameters() { } - void push(OperandClass oc, const char* d, bool opt = false) - { - opClass.push_back(oc); - desc.push_back(d); - optional.push_back(opt); - } - void setOptional(); - OperandClass getClass(int op) const { return opClass[op]; } - const char* getDesc(int op) const { return desc[op]; } - bool isOptional(int op) const { return optional[op]; } - int getNum() const { return (int)opClass.size(); } - -protected: - std::vector opClass; - std::vector desc; - std::vector optional; -}; - -// Parameterize an enumerant -class EnumParameters { -public: - EnumParameters() : desc(0) { } - EnumCaps caps; - const char* desc; -}; - -// Parameterize a set of enumerants that form an enum -class EnumDefinition : public EnumParameters { -public: - EnumDefinition() : - ceiling(0), bitmask(false), getName(0), enumParams(0), operandParams(0) { } - void set(int ceil, const char* (*name)(int), EnumParameters* ep, bool mask = false) - { - ceiling = ceil; - getName = name; - bitmask = mask; - enumParams = ep; - } - void setOperands(OperandParameters* op) { operandParams = op; } - int ceiling; // ceiling of enumerants - bool bitmask; // true if these enumerants combine into a bitmask - const char* (*getName)(int); // a function that returns the name for each enumerant value (or shift) - EnumParameters* enumParams; // parameters for each individual enumerant - OperandParameters* operandParams; // sets of operands -}; - -// Parameterize an instruction's logical format, including its known set of operands, -// per OperandParameters above. -class InstructionParameters { -public: - InstructionParameters() : - opDesc("TBD"), - opClass(OpClassMissing), - typePresent(true), // most normal, only exceptions have to be spelled out - resultPresent(true) // most normal, only exceptions have to be spelled out - { } - - void setResultAndType(bool r, bool t) - { - resultPresent = r; - typePresent = t; - } - - bool hasResult() const { return resultPresent != 0; } - bool hasType() const { return typePresent != 0; } - - const char* opDesc; - EnumCaps capabilities; - OpcodeClass opClass; - OperandParameters operands; - -protected: - int typePresent : 1; - int resultPresent : 1; -}; - -const int OpcodeCeiling = 321; - -// The set of objects that hold all the instruction/operand -// parameterization information. -extern InstructionParameters InstructionDesc[]; - -// These hold definitions of the enumerants used for operands -extern EnumDefinition OperandClassParams[]; - -const char* GetOperandDesc(OperandClass operand); -void PrintImmediateRow(int imm, const char* name, const EnumParameters* enumParams, bool caps, bool hex = false); -const char* AccessQualifierString(int attr); - -void PrintOperands(const OperandParameters& operands, int reservedOperands); - -}; // end namespace spv diff --git a/third_party/glslang-spirv/hex_float.h b/third_party/glslang-spirv/hex_float.h deleted file mode 100644 index 905b21a45..000000000 --- a/third_party/glslang-spirv/hex_float.h +++ /dev/null @@ -1,1078 +0,0 @@ -// Copyright (c) 2015-2016 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef LIBSPIRV_UTIL_HEX_FLOAT_H_ -#define LIBSPIRV_UTIL_HEX_FLOAT_H_ - -#include -#include -#include -#include -#include -#include -#include - -#if defined(_MSC_VER) && _MSC_VER < 1800 -namespace std { -bool isnan(double f) -{ - return ::_isnan(f) != 0; -} -bool isinf(double f) -{ - return ::_finite(f) == 0; -} -} -#endif - -#include "bitutils.h" - -namespace spvutils { - -class Float16 { - public: - Float16(uint16_t v) : val(v) {} - Float16() {} - static bool isNan(const Float16& val) { - return ((val.val & 0x7C00) == 0x7C00) && ((val.val & 0x3FF) != 0); - } - // Returns true if the given value is any kind of infinity. - static bool isInfinity(const Float16& val) { - return ((val.val & 0x7C00) == 0x7C00) && ((val.val & 0x3FF) == 0); - } - Float16(const Float16& other) { val = other.val; } - uint16_t get_value() const { return val; } - - // Returns the maximum normal value. - static Float16 max() { return Float16(0x7bff); } - // Returns the lowest normal value. - static Float16 lowest() { return Float16(0xfbff); } - - private: - uint16_t val; -}; - -// To specialize this type, you must override uint_type to define -// an unsigned integer that can fit your floating point type. -// You must also add a isNan function that returns true if -// a value is Nan. -template -struct FloatProxyTraits { - typedef void uint_type; -}; - -template <> -struct FloatProxyTraits { - typedef uint32_t uint_type; - static bool isNan(float f) { return std::isnan(f); } - // Returns true if the given value is any kind of infinity. - static bool isInfinity(float f) { return std::isinf(f); } - // Returns the maximum normal value. - static float max() { return std::numeric_limits::max(); } - // Returns the lowest normal value. - static float lowest() { return std::numeric_limits::lowest(); } -}; - -template <> -struct FloatProxyTraits { - typedef uint64_t uint_type; - static bool isNan(double f) { return std::isnan(f); } - // Returns true if the given value is any kind of infinity. - static bool isInfinity(double f) { return std::isinf(f); } - // Returns the maximum normal value. - static double max() { return std::numeric_limits::max(); } - // Returns the lowest normal value. - static double lowest() { return std::numeric_limits::lowest(); } -}; - -template <> -struct FloatProxyTraits { - typedef uint16_t uint_type; - static bool isNan(Float16 f) { return Float16::isNan(f); } - // Returns true if the given value is any kind of infinity. - static bool isInfinity(Float16 f) { return Float16::isInfinity(f); } - // Returns the maximum normal value. - static Float16 max() { return Float16::max(); } - // Returns the lowest normal value. - static Float16 lowest() { return Float16::lowest(); } -}; - -// Since copying a floating point number (especially if it is NaN) -// does not guarantee that bits are preserved, this class lets us -// store the type and use it as a float when necessary. -template -class FloatProxy { - public: - typedef typename FloatProxyTraits::uint_type uint_type; - - // Since this is to act similar to the normal floats, - // do not initialize the data by default. - FloatProxy() {} - - // Intentionally non-explicit. This is a proxy type so - // implicit conversions allow us to use it more transparently. - FloatProxy(T val) { data_ = BitwiseCast(val); } - - // Intentionally non-explicit. This is a proxy type so - // implicit conversions allow us to use it more transparently. - FloatProxy(uint_type val) { data_ = val; } - - // This is helpful to have and is guaranteed not to stomp bits. - FloatProxy operator-() const { - return static_cast(data_ ^ - (uint_type(0x1) << (sizeof(T) * 8 - 1))); - } - - // Returns the data as a floating point value. - T getAsFloat() const { return BitwiseCast(data_); } - - // Returns the raw data. - uint_type data() const { return data_; } - - // Returns true if the value represents any type of NaN. - bool isNan() { return FloatProxyTraits::isNan(getAsFloat()); } - // Returns true if the value represents any type of infinity. - bool isInfinity() { return FloatProxyTraits::isInfinity(getAsFloat()); } - - // Returns the maximum normal value. - static FloatProxy max() { - return FloatProxy(FloatProxyTraits::max()); - } - // Returns the lowest normal value. - static FloatProxy lowest() { - return FloatProxy(FloatProxyTraits::lowest()); - } - - private: - uint_type data_; -}; - -template -bool operator==(const FloatProxy& first, const FloatProxy& second) { - return first.data() == second.data(); -} - -// Reads a FloatProxy value as a normal float from a stream. -template -std::istream& operator>>(std::istream& is, FloatProxy& value) { - T float_val; - is >> float_val; - value = FloatProxy(float_val); - return is; -} - -// This is an example traits. It is not meant to be used in practice, but will -// be the default for any non-specialized type. -template -struct HexFloatTraits { - // Integer type that can store this hex-float. - typedef void uint_type; - // Signed integer type that can store this hex-float. - typedef void int_type; - // The numerical type that this HexFloat represents. - typedef void underlying_type; - // The type needed to construct the underlying type. - typedef void native_type; - // The number of bits that are actually relevant in the uint_type. - // This allows us to deal with, for example, 24-bit values in a 32-bit - // integer. - static const uint32_t num_used_bits = 0; - // Number of bits that represent the exponent. - static const uint32_t num_exponent_bits = 0; - // Number of bits that represent the fractional part. - static const uint32_t num_fraction_bits = 0; - // The bias of the exponent. (How much we need to subtract from the stored - // value to get the correct value.) - static const uint32_t exponent_bias = 0; -}; - -// Traits for IEEE float. -// 1 sign bit, 8 exponent bits, 23 fractional bits. -template <> -struct HexFloatTraits> { - typedef uint32_t uint_type; - typedef int32_t int_type; - typedef FloatProxy underlying_type; - typedef float native_type; - static const uint_type num_used_bits = 32; - static const uint_type num_exponent_bits = 8; - static const uint_type num_fraction_bits = 23; - static const uint_type exponent_bias = 127; -}; - -// Traits for IEEE double. -// 1 sign bit, 11 exponent bits, 52 fractional bits. -template <> -struct HexFloatTraits> { - typedef uint64_t uint_type; - typedef int64_t int_type; - typedef FloatProxy underlying_type; - typedef double native_type; - static const uint_type num_used_bits = 64; - static const uint_type num_exponent_bits = 11; - static const uint_type num_fraction_bits = 52; - static const uint_type exponent_bias = 1023; -}; - -// Traits for IEEE half. -// 1 sign bit, 5 exponent bits, 10 fractional bits. -template <> -struct HexFloatTraits> { - typedef uint16_t uint_type; - typedef int16_t int_type; - typedef uint16_t underlying_type; - typedef uint16_t native_type; - static const uint_type num_used_bits = 16; - static const uint_type num_exponent_bits = 5; - static const uint_type num_fraction_bits = 10; - static const uint_type exponent_bias = 15; -}; - -enum round_direction { - kRoundToZero, - kRoundToNearestEven, - kRoundToPositiveInfinity, - kRoundToNegativeInfinity -}; - -// Template class that houses a floating pointer number. -// It exposes a number of constants based on the provided traits to -// assist in interpreting the bits of the value. -template > -class HexFloat { - public: - typedef typename Traits::uint_type uint_type; - typedef typename Traits::int_type int_type; - typedef typename Traits::underlying_type underlying_type; - typedef typename Traits::native_type native_type; - - explicit HexFloat(T f) : value_(f) {} - - T value() const { return value_; } - void set_value(T f) { value_ = f; } - - // These are all written like this because it is convenient to have - // compile-time constants for all of these values. - - // Pass-through values to save typing. - static const uint32_t num_used_bits = Traits::num_used_bits; - static const uint32_t exponent_bias = Traits::exponent_bias; - static const uint32_t num_exponent_bits = Traits::num_exponent_bits; - static const uint32_t num_fraction_bits = Traits::num_fraction_bits; - - // Number of bits to shift left to set the highest relevant bit. - static const uint32_t top_bit_left_shift = num_used_bits - 1; - // How many nibbles (hex characters) the fractional part takes up. - static const uint32_t fraction_nibbles = (num_fraction_bits + 3) / 4; - // If the fractional part does not fit evenly into a hex character (4-bits) - // then we have to left-shift to get rid of leading 0s. This is the amount - // we have to shift (might be 0). - static const uint32_t num_overflow_bits = - fraction_nibbles * 4 - num_fraction_bits; - - // The representation of the fraction, not the actual bits. This - // includes the leading bit that is usually implicit. - static const uint_type fraction_represent_mask = - spvutils::SetBits::get; - - // The topmost bit in the nibble-aligned fraction. - static const uint_type fraction_top_bit = - uint_type(1) << (num_fraction_bits + num_overflow_bits - 1); - - // The least significant bit in the exponent, which is also the bit - // immediately to the left of the significand. - static const uint_type first_exponent_bit = uint_type(1) - << (num_fraction_bits); - - // The mask for the encoded fraction. It does not include the - // implicit bit. - static const uint_type fraction_encode_mask = - spvutils::SetBits::get; - - // The bit that is used as a sign. - static const uint_type sign_mask = uint_type(1) << top_bit_left_shift; - - // The bits that represent the exponent. - static const uint_type exponent_mask = - spvutils::SetBits::get; - - // How far left the exponent is shifted. - static const uint32_t exponent_left_shift = num_fraction_bits; - - // How far from the right edge the fraction is shifted. - static const uint32_t fraction_right_shift = - static_cast(sizeof(uint_type) * 8) - num_fraction_bits; - - // The maximum representable unbiased exponent. - static const int_type max_exponent = - (exponent_mask >> num_fraction_bits) - exponent_bias; - // The minimum representable exponent for normalized numbers. - static const int_type min_exponent = -static_cast(exponent_bias); - - // Returns the bits associated with the value. - uint_type getBits() const { return spvutils::BitwiseCast(value_); } - - // Returns the bits associated with the value, without the leading sign bit. - uint_type getUnsignedBits() const { - return static_cast(spvutils::BitwiseCast(value_) & - ~sign_mask); - } - - // Returns the bits associated with the exponent, shifted to start at the - // lsb of the type. - const uint_type getExponentBits() const { - return static_cast((getBits() & exponent_mask) >> - num_fraction_bits); - } - - // Returns the exponent in unbiased form. This is the exponent in the - // human-friendly form. - const int_type getUnbiasedExponent() const { - return static_cast(getExponentBits() - exponent_bias); - } - - // Returns just the significand bits from the value. - const uint_type getSignificandBits() const { - return getBits() & fraction_encode_mask; - } - - // If the number was normalized, returns the unbiased exponent. - // If the number was denormal, normalize the exponent first. - const int_type getUnbiasedNormalizedExponent() const { - if ((getBits() & ~sign_mask) == 0) { // special case if everything is 0 - return 0; - } - int_type exp = getUnbiasedExponent(); - if (exp == min_exponent) { // We are in denorm land. - uint_type significand_bits = getSignificandBits(); - while ((significand_bits & (first_exponent_bit >> 1)) == 0) { - significand_bits = static_cast(significand_bits << 1); - exp = static_cast(exp - 1); - } - significand_bits &= fraction_encode_mask; - } - return exp; - } - - // Returns the signficand after it has been normalized. - const uint_type getNormalizedSignificand() const { - int_type unbiased_exponent = getUnbiasedNormalizedExponent(); - uint_type significand = getSignificandBits(); - for (int_type i = unbiased_exponent; i <= min_exponent; ++i) { - significand = static_cast(significand << 1); - } - significand &= fraction_encode_mask; - return significand; - } - - // Returns true if this number represents a negative value. - bool isNegative() const { return (getBits() & sign_mask) != 0; } - - // Sets this HexFloat from the individual components. - // Note this assumes EVERY significand is normalized, and has an implicit - // leading one. This means that the only way that this method will set 0, - // is if you set a number so denormalized that it underflows. - // Do not use this method with raw bits extracted from a subnormal number, - // since subnormals do not have an implicit leading 1 in the significand. - // The significand is also expected to be in the - // lowest-most num_fraction_bits of the uint_type. - // The exponent is expected to be unbiased, meaning an exponent of - // 0 actually means 0. - // If underflow_round_up is set, then on underflow, if a number is non-0 - // and would underflow, we round up to the smallest denorm. - void setFromSignUnbiasedExponentAndNormalizedSignificand( - bool negative, int_type exponent, uint_type significand, - bool round_denorm_up) { - bool significand_is_zero = significand == 0; - - if (exponent <= min_exponent) { - // If this was denormalized, then we have to shift the bit on, meaning - // the significand is not zero. - significand_is_zero = false; - significand |= first_exponent_bit; - significand = static_cast(significand >> 1); - } - - while (exponent < min_exponent) { - significand = static_cast(significand >> 1); - ++exponent; - } - - if (exponent == min_exponent) { - if (significand == 0 && !significand_is_zero && round_denorm_up) { - significand = static_cast(0x1); - } - } - - uint_type new_value = 0; - if (negative) { - new_value = static_cast(new_value | sign_mask); - } - exponent = static_cast(exponent + exponent_bias); - assert(exponent >= 0); - - // put it all together - exponent = static_cast((exponent << exponent_left_shift) & - exponent_mask); - significand = static_cast(significand & fraction_encode_mask); - new_value = static_cast(new_value | (exponent | significand)); - value_ = BitwiseCast(new_value); - } - - // Increments the significand of this number by the given amount. - // If this would spill the significand into the implicit bit, - // carry is set to true and the significand is shifted to fit into - // the correct location, otherwise carry is set to false. - // All significands and to_increment are assumed to be within the bounds - // for a valid significand. - static uint_type incrementSignificand(uint_type significand, - uint_type to_increment, bool* carry) { - significand = static_cast(significand + to_increment); - *carry = false; - if (significand & first_exponent_bit) { - *carry = true; - // The implicit 1-bit will have carried, so we should zero-out the - // top bit and shift back. - significand = static_cast(significand & ~first_exponent_bit); - significand = static_cast(significand >> 1); - } - return significand; - } - - // These exist because MSVC throws warnings on negative right-shifts - // even if they are not going to be executed. Eg: - // constant_number < 0? 0: constant_number - // These convert the negative left-shifts into right shifts. - - template - uint_type negatable_left_shift(int_type N, uint_type val) - { - if(N >= 0) - return val << N; - - return val >> -N; - } - - template - uint_type negatable_right_shift(int_type N, uint_type val) - { - if(N >= 0) - return val >> N; - - return val << -N; - } - - // Returns the significand, rounded to fit in a significand in - // other_T. This is shifted so that the most significant - // bit of the rounded number lines up with the most significant bit - // of the returned significand. - template - typename other_T::uint_type getRoundedNormalizedSignificand( - round_direction dir, bool* carry_bit) { - typedef typename other_T::uint_type other_uint_type; - static const int_type num_throwaway_bits = - static_cast(num_fraction_bits) - - static_cast(other_T::num_fraction_bits); - - static const uint_type last_significant_bit = - (num_throwaway_bits < 0) - ? 0 - : negatable_left_shift(num_throwaway_bits, 1u); - static const uint_type first_rounded_bit = - (num_throwaway_bits < 1) - ? 0 - : negatable_left_shift(num_throwaway_bits - 1, 1u); - - static const uint_type throwaway_mask_bits = - num_throwaway_bits > 0 ? num_throwaway_bits : 0; - static const uint_type throwaway_mask = - spvutils::SetBits::get; - - *carry_bit = false; - other_uint_type out_val = 0; - uint_type significand = getNormalizedSignificand(); - // If we are up-casting, then we just have to shift to the right location. - if (num_throwaway_bits <= 0) { - out_val = static_cast(significand); - uint_type shift_amount = static_cast(-num_throwaway_bits); - out_val = static_cast(out_val << shift_amount); - return out_val; - } - - // If every non-representable bit is 0, then we don't have any casting to - // do. - if ((significand & throwaway_mask) == 0) { - return static_cast( - negatable_right_shift(num_throwaway_bits, significand)); - } - - bool round_away_from_zero = false; - // We actually have to narrow the significand here, so we have to follow the - // rounding rules. - switch (dir) { - case kRoundToZero: - break; - case kRoundToPositiveInfinity: - round_away_from_zero = !isNegative(); - break; - case kRoundToNegativeInfinity: - round_away_from_zero = isNegative(); - break; - case kRoundToNearestEven: - // Have to round down, round bit is 0 - if ((first_rounded_bit & significand) == 0) { - break; - } - if (((significand & throwaway_mask) & ~first_rounded_bit) != 0) { - // If any subsequent bit of the rounded portion is non-0 then we round - // up. - round_away_from_zero = true; - break; - } - // We are exactly half-way between 2 numbers, pick even. - if ((significand & last_significant_bit) != 0) { - // 1 for our last bit, round up. - round_away_from_zero = true; - break; - } - break; - } - - if (round_away_from_zero) { - return static_cast( - negatable_right_shift(num_throwaway_bits, incrementSignificand( - significand, last_significant_bit, carry_bit))); - } else { - return static_cast( - negatable_right_shift(num_throwaway_bits, significand)); - } - } - - // Casts this value to another HexFloat. If the cast is widening, - // then round_dir is ignored. If the cast is narrowing, then - // the result is rounded in the direction specified. - // This number will retain Nan and Inf values. - // It will also saturate to Inf if the number overflows, and - // underflow to (0 or min depending on rounding) if the number underflows. - template - void castTo(other_T& other, round_direction round_dir) { - other = other_T(static_cast(0)); - bool negate = isNegative(); - if (getUnsignedBits() == 0) { - if (negate) { - other.set_value(-other.value()); - } - return; - } - uint_type significand = getSignificandBits(); - bool carried = false; - typename other_T::uint_type rounded_significand = - getRoundedNormalizedSignificand(round_dir, &carried); - - int_type exponent = getUnbiasedExponent(); - if (exponent == min_exponent) { - // If we are denormal, normalize the exponent, so that we can encode - // easily. - exponent = static_cast(exponent + 1); - for (uint_type check_bit = first_exponent_bit >> 1; check_bit != 0; - check_bit = static_cast(check_bit >> 1)) { - exponent = static_cast(exponent - 1); - if (check_bit & significand) break; - } - } - - bool is_nan = - (getBits() & exponent_mask) == exponent_mask && significand != 0; - bool is_inf = - !is_nan && - ((exponent + carried) > static_cast(other_T::exponent_bias) || - (significand == 0 && (getBits() & exponent_mask) == exponent_mask)); - - // If we are Nan or Inf we should pass that through. - if (is_inf) { - other.set_value(BitwiseCast( - static_cast( - (negate ? other_T::sign_mask : 0) | other_T::exponent_mask))); - return; - } - if (is_nan) { - typename other_T::uint_type shifted_significand; - shifted_significand = static_cast( - negatable_left_shift( - static_cast(other_T::num_fraction_bits) - - static_cast(num_fraction_bits), significand)); - - // We are some sort of Nan. We try to keep the bit-pattern of the Nan - // as close as possible. If we had to shift off bits so we are 0, then we - // just set the last bit. - other.set_value(BitwiseCast( - static_cast( - (negate ? other_T::sign_mask : 0) | other_T::exponent_mask | - (shifted_significand == 0 ? 0x1 : shifted_significand)))); - return; - } - - bool round_underflow_up = - isNegative() ? round_dir == kRoundToNegativeInfinity - : round_dir == kRoundToPositiveInfinity; - typedef typename other_T::int_type other_int_type; - // setFromSignUnbiasedExponentAndNormalizedSignificand will - // zero out any underflowing value (but retain the sign). - other.setFromSignUnbiasedExponentAndNormalizedSignificand( - negate, static_cast(exponent), rounded_significand, - round_underflow_up); - return; - } - - private: - T value_; - - static_assert(num_used_bits == - Traits::num_exponent_bits + Traits::num_fraction_bits + 1, - "The number of bits do not fit"); - static_assert(sizeof(T) == sizeof(uint_type), "The type sizes do not match"); -}; - -// Returns 4 bits represented by the hex character. -inline uint8_t get_nibble_from_character(int character) { - const char* dec = "0123456789"; - const char* lower = "abcdef"; - const char* upper = "ABCDEF"; - const char* p = nullptr; - if ((p = strchr(dec, character))) { - return static_cast(p - dec); - } else if ((p = strchr(lower, character))) { - return static_cast(p - lower + 0xa); - } else if ((p = strchr(upper, character))) { - return static_cast(p - upper + 0xa); - } - - assert(false && "This was called with a non-hex character"); - return 0; -} - -// Outputs the given HexFloat to the stream. -template -std::ostream& operator<<(std::ostream& os, const HexFloat& value) { - typedef HexFloat HF; - typedef typename HF::uint_type uint_type; - typedef typename HF::int_type int_type; - - static_assert(HF::num_used_bits != 0, - "num_used_bits must be non-zero for a valid float"); - static_assert(HF::num_exponent_bits != 0, - "num_exponent_bits must be non-zero for a valid float"); - static_assert(HF::num_fraction_bits != 0, - "num_fractin_bits must be non-zero for a valid float"); - - const uint_type bits = spvutils::BitwiseCast(value.value()); - const char* const sign = (bits & HF::sign_mask) ? "-" : ""; - const uint_type exponent = static_cast( - (bits & HF::exponent_mask) >> HF::num_fraction_bits); - - uint_type fraction = static_cast((bits & HF::fraction_encode_mask) - << HF::num_overflow_bits); - - const bool is_zero = exponent == 0 && fraction == 0; - const bool is_denorm = exponent == 0 && !is_zero; - - // exponent contains the biased exponent we have to convert it back into - // the normal range. - int_type int_exponent = static_cast(exponent - HF::exponent_bias); - // If the number is all zeros, then we actually have to NOT shift the - // exponent. - int_exponent = is_zero ? 0 : int_exponent; - - // If we are denorm, then start shifting, and decreasing the exponent until - // our leading bit is 1. - - if (is_denorm) { - while ((fraction & HF::fraction_top_bit) == 0) { - fraction = static_cast(fraction << 1); - int_exponent = static_cast(int_exponent - 1); - } - // Since this is denormalized, we have to consume the leading 1 since it - // will end up being implicit. - fraction = static_cast(fraction << 1); // eat the leading 1 - fraction &= HF::fraction_represent_mask; - } - - uint_type fraction_nibbles = HF::fraction_nibbles; - // We do not have to display any trailing 0s, since this represents the - // fractional part. - while (fraction_nibbles > 0 && (fraction & 0xF) == 0) { - // Shift off any trailing values; - fraction = static_cast(fraction >> 4); - --fraction_nibbles; - } - - const auto saved_flags = os.flags(); - const auto saved_fill = os.fill(); - - os << sign << "0x" << (is_zero ? '0' : '1'); - if (fraction_nibbles) { - // Make sure to keep the leading 0s in place, since this is the fractional - // part. - os << "." << std::setw(static_cast(fraction_nibbles)) - << std::setfill('0') << std::hex << fraction; - } - os << "p" << std::dec << (int_exponent >= 0 ? "+" : "") << int_exponent; - - os.flags(saved_flags); - os.fill(saved_fill); - - return os; -} - -// Returns true if negate_value is true and the next character on the -// input stream is a plus or minus sign. In that case we also set the fail bit -// on the stream and set the value to the zero value for its type. -template -inline bool RejectParseDueToLeadingSign(std::istream& is, bool negate_value, - HexFloat& value) { - if (negate_value) { - auto next_char = is.peek(); - if (next_char == '-' || next_char == '+') { - // Fail the parse. Emulate standard behaviour by setting the value to - // the zero value, and set the fail bit on the stream. - value = HexFloat(typename HexFloat::uint_type(0)); - is.setstate(std::ios_base::failbit); - return true; - } - } - return false; -} - -// Parses a floating point number from the given stream and stores it into the -// value parameter. -// If negate_value is true then the number may not have a leading minus or -// plus, and if it successfully parses, then the number is negated before -// being stored into the value parameter. -// If the value cannot be correctly parsed or overflows the target floating -// point type, then set the fail bit on the stream. -// TODO(dneto): Promise C++11 standard behavior in how the value is set in -// the error case, but only after all target platforms implement it correctly. -// In particular, the Microsoft C++ runtime appears to be out of spec. -template -inline std::istream& ParseNormalFloat(std::istream& is, bool negate_value, - HexFloat& value) { - if (RejectParseDueToLeadingSign(is, negate_value, value)) { - return is; - } - T val; - is >> val; - if (negate_value) { - val = -val; - } - value.set_value(val); - // In the failure case, map -0.0 to 0.0. - if (is.fail() && value.getUnsignedBits() == 0u) { - value = HexFloat(typename HexFloat::uint_type(0)); - } - if (val.isInfinity()) { - // Fail the parse. Emulate standard behaviour by setting the value to - // the closest normal value, and set the fail bit on the stream. - value.set_value((value.isNegative() | negate_value) ? T::lowest() - : T::max()); - is.setstate(std::ios_base::failbit); - } - return is; -} - -// Specialization of ParseNormalFloat for FloatProxy values. -// This will parse the float as it were a 32-bit floating point number, -// and then round it down to fit into a Float16 value. -// The number is rounded towards zero. -// If negate_value is true then the number may not have a leading minus or -// plus, and if it successfully parses, then the number is negated before -// being stored into the value parameter. -// If the value cannot be correctly parsed or overflows the target floating -// point type, then set the fail bit on the stream. -// TODO(dneto): Promise C++11 standard behavior in how the value is set in -// the error case, but only after all target platforms implement it correctly. -// In particular, the Microsoft C++ runtime appears to be out of spec. -template <> -inline std::istream& -ParseNormalFloat, HexFloatTraits>>( - std::istream& is, bool negate_value, - HexFloat, HexFloatTraits>>& value) { - // First parse as a 32-bit float. - HexFloat> float_val(0.0f); - ParseNormalFloat(is, negate_value, float_val); - - // Then convert to 16-bit float, saturating at infinities, and - // rounding toward zero. - float_val.castTo(value, kRoundToZero); - - // Overflow on 16-bit behaves the same as for 32- and 64-bit: set the - // fail bit and set the lowest or highest value. - if (Float16::isInfinity(value.value().getAsFloat())) { - value.set_value(value.isNegative() ? Float16::lowest() : Float16::max()); - is.setstate(std::ios_base::failbit); - } - return is; -} - -// Reads a HexFloat from the given stream. -// If the float is not encoded as a hex-float then it will be parsed -// as a regular float. -// This may fail if your stream does not support at least one unget. -// Nan values can be encoded with "0x1.p+exponent_bias". -// This would normally overflow a float and round to -// infinity but this special pattern is the exact representation for a NaN, -// and therefore is actually encoded as the correct NaN. To encode inf, -// either 0x0p+exponent_bias can be specified or any exponent greater than -// exponent_bias. -// Examples using IEEE 32-bit float encoding. -// 0x1.0p+128 (+inf) -// -0x1.0p-128 (-inf) -// -// 0x1.1p+128 (+Nan) -// -0x1.1p+128 (-Nan) -// -// 0x1p+129 (+inf) -// -0x1p+129 (-inf) -template -std::istream& operator>>(std::istream& is, HexFloat& value) { - using HF = HexFloat; - using uint_type = typename HF::uint_type; - using int_type = typename HF::int_type; - - value.set_value(static_cast(0.f)); - - if (is.flags() & std::ios::skipws) { - // If the user wants to skip whitespace , then we should obey that. - while (std::isspace(is.peek())) { - is.get(); - } - } - - auto next_char = is.peek(); - bool negate_value = false; - - if (next_char != '-' && next_char != '0') { - return ParseNormalFloat(is, negate_value, value); - } - - if (next_char == '-') { - negate_value = true; - is.get(); - next_char = is.peek(); - } - - if (next_char == '0') { - is.get(); // We may have to unget this. - auto maybe_hex_start = is.peek(); - if (maybe_hex_start != 'x' && maybe_hex_start != 'X') { - is.unget(); - return ParseNormalFloat(is, negate_value, value); - } else { - is.get(); // Throw away the 'x'; - } - } else { - return ParseNormalFloat(is, negate_value, value); - } - - // This "looks" like a hex-float so treat it as one. - bool seen_p = false; - bool seen_dot = false; - uint_type fraction_index = 0; - - uint_type fraction = 0; - int_type exponent = HF::exponent_bias; - - // Strip off leading zeros so we don't have to special-case them later. - while ((next_char = is.peek()) == '0') { - is.get(); - } - - bool is_denorm = - true; // Assume denorm "representation" until we hear otherwise. - // NB: This does not mean the value is actually denorm, - // it just means that it was written 0. - bool bits_written = false; // Stays false until we write a bit. - while (!seen_p && !seen_dot) { - // Handle characters that are left of the fractional part. - if (next_char == '.') { - seen_dot = true; - } else if (next_char == 'p') { - seen_p = true; - } else if (::isxdigit(next_char)) { - // We know this is not denormalized since we have stripped all leading - // zeroes and we are not a ".". - is_denorm = false; - int number = get_nibble_from_character(next_char); - for (int i = 0; i < 4; ++i, number <<= 1) { - uint_type write_bit = (number & 0x8) ? 0x1 : 0x0; - if (bits_written) { - // If we are here the bits represented belong in the fractional - // part of the float, and we have to adjust the exponent accordingly. - fraction = static_cast( - fraction | - static_cast( - write_bit << (HF::top_bit_left_shift - fraction_index++))); - exponent = static_cast(exponent + 1); - } - bits_written |= write_bit != 0; - } - } else { - // We have not found our exponent yet, so we have to fail. - is.setstate(std::ios::failbit); - return is; - } - is.get(); - next_char = is.peek(); - } - bits_written = false; - while (seen_dot && !seen_p) { - // Handle only fractional parts now. - if (next_char == 'p') { - seen_p = true; - } else if (::isxdigit(next_char)) { - int number = get_nibble_from_character(next_char); - for (int i = 0; i < 4; ++i, number <<= 1) { - uint_type write_bit = (number & 0x8) ? 0x01 : 0x00; - bits_written |= write_bit != 0; - if (is_denorm && !bits_written) { - // Handle modifying the exponent here this way we can handle - // an arbitrary number of hex values without overflowing our - // integer. - exponent = static_cast(exponent - 1); - } else { - fraction = static_cast( - fraction | - static_cast( - write_bit << (HF::top_bit_left_shift - fraction_index++))); - } - } - } else { - // We still have not found our 'p' exponent yet, so this is not a valid - // hex-float. - is.setstate(std::ios::failbit); - return is; - } - is.get(); - next_char = is.peek(); - } - - bool seen_sign = false; - int8_t exponent_sign = 1; - int_type written_exponent = 0; - while (true) { - if ((next_char == '-' || next_char == '+')) { - if (seen_sign) { - is.setstate(std::ios::failbit); - return is; - } - seen_sign = true; - exponent_sign = (next_char == '-') ? -1 : 1; - } else if (::isdigit(next_char)) { - // Hex-floats express their exponent as decimal. - written_exponent = static_cast(written_exponent * 10); - written_exponent = - static_cast(written_exponent + (next_char - '0')); - } else { - break; - } - is.get(); - next_char = is.peek(); - } - - written_exponent = static_cast(written_exponent * exponent_sign); - exponent = static_cast(exponent + written_exponent); - - bool is_zero = is_denorm && (fraction == 0); - if (is_denorm && !is_zero) { - fraction = static_cast(fraction << 1); - exponent = static_cast(exponent - 1); - } else if (is_zero) { - exponent = 0; - } - - if (exponent <= 0 && !is_zero) { - fraction = static_cast(fraction >> 1); - fraction |= static_cast(1) << HF::top_bit_left_shift; - } - - fraction = (fraction >> HF::fraction_right_shift) & HF::fraction_encode_mask; - - const int_type max_exponent = - SetBits::get; - - // Handle actual denorm numbers - while (exponent < 0 && !is_zero) { - fraction = static_cast(fraction >> 1); - exponent = static_cast(exponent + 1); - - fraction &= HF::fraction_encode_mask; - if (fraction == 0) { - // We have underflowed our fraction. We should clamp to zero. - is_zero = true; - exponent = 0; - } - } - - // We have overflowed so we should be inf/-inf. - if (exponent > max_exponent) { - exponent = max_exponent; - fraction = 0; - } - - uint_type output_bits = static_cast( - static_cast(negate_value ? 1 : 0) << HF::top_bit_left_shift); - output_bits |= fraction; - - uint_type shifted_exponent = static_cast( - static_cast(exponent << HF::exponent_left_shift) & - HF::exponent_mask); - output_bits |= shifted_exponent; - - T output_float = spvutils::BitwiseCast(output_bits); - value.set_value(output_float); - - return is; -} - -// Writes a FloatProxy value to a stream. -// Zero and normal numbers are printed in the usual notation, but with -// enough digits to fully reproduce the value. Other values (subnormal, -// NaN, and infinity) are printed as a hex float. -template -std::ostream& operator<<(std::ostream& os, const FloatProxy& value) { - auto float_val = value.getAsFloat(); - switch (std::fpclassify(float_val)) { - case FP_ZERO: - case FP_NORMAL: { - auto saved_precision = os.precision(); - os.precision(std::numeric_limits::digits10); - os << float_val; - os.precision(saved_precision); - } break; - default: - os << HexFloat>(value); - break; - } - return os; -} - -template <> -inline std::ostream& operator<<(std::ostream& os, - const FloatProxy& value) { - os << HexFloat>(value); - return os; -} -} - -#endif // LIBSPIRV_UTIL_HEX_FLOAT_H_ diff --git a/third_party/glslang-spirv/spirv.hpp b/third_party/glslang-spirv/spirv.hpp deleted file mode 100644 index c6776638e..000000000 --- a/third_party/glslang-spirv/spirv.hpp +++ /dev/null @@ -1,1028 +0,0 @@ -// Copyright (c) 2014-2018 The Khronos Group Inc. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and/or associated documentation files (the "Materials"), -// to deal in the Materials without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Materials, and to permit persons to whom the -// Materials are furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Materials. -// -// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -// -// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -// IN THE MATERIALS. - -// This header is automatically generated by the same tool that creates -// the Binary Section of the SPIR-V specification. - -// Enumeration tokens for SPIR-V, in various styles: -// C, C++, C++11, JSON, Lua, Python -// -// - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL -// - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL -// - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL -// - Lua will use tables, e.g.: spv.SourceLanguage.GLSL -// - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] -// -// Some tokens act like mask values, which can be OR'd together, -// while others are mutually exclusive. The mask-like ones have -// "Mask" in their name, and a parallel enum that has the shift -// amount (1 << x) for each corresponding enumerant. - -#ifndef spirv_HPP -#define spirv_HPP - -namespace spv { - -typedef unsigned int Id; - -#define SPV_VERSION 0x10200 -#define SPV_REVISION 3 - -static const unsigned int MagicNumber = 0x07230203; -static const unsigned int Version = 0x00010200; -static const unsigned int Revision = 3; -static const unsigned int OpCodeMask = 0xffff; -static const unsigned int WordCountShift = 16; - -enum SourceLanguage { - SourceLanguageUnknown = 0, - SourceLanguageESSL = 1, - SourceLanguageGLSL = 2, - SourceLanguageOpenCL_C = 3, - SourceLanguageOpenCL_CPP = 4, - SourceLanguageHLSL = 5, - SourceLanguageMax = 0x7fffffff, -}; - -enum ExecutionModel { - ExecutionModelVertex = 0, - ExecutionModelTessellationControl = 1, - ExecutionModelTessellationEvaluation = 2, - ExecutionModelGeometry = 3, - ExecutionModelFragment = 4, - ExecutionModelGLCompute = 5, - ExecutionModelKernel = 6, - ExecutionModelMax = 0x7fffffff, -}; - -enum AddressingModel { - AddressingModelLogical = 0, - AddressingModelPhysical32 = 1, - AddressingModelPhysical64 = 2, - AddressingModelMax = 0x7fffffff, -}; - -enum MemoryModel { - MemoryModelSimple = 0, - MemoryModelGLSL450 = 1, - MemoryModelOpenCL = 2, - MemoryModelMax = 0x7fffffff, -}; - -enum ExecutionMode { - ExecutionModeInvocations = 0, - ExecutionModeSpacingEqual = 1, - ExecutionModeSpacingFractionalEven = 2, - ExecutionModeSpacingFractionalOdd = 3, - ExecutionModeVertexOrderCw = 4, - ExecutionModeVertexOrderCcw = 5, - ExecutionModePixelCenterInteger = 6, - ExecutionModeOriginUpperLeft = 7, - ExecutionModeOriginLowerLeft = 8, - ExecutionModeEarlyFragmentTests = 9, - ExecutionModePointMode = 10, - ExecutionModeXfb = 11, - ExecutionModeDepthReplacing = 12, - ExecutionModeDepthGreater = 14, - ExecutionModeDepthLess = 15, - ExecutionModeDepthUnchanged = 16, - ExecutionModeLocalSize = 17, - ExecutionModeLocalSizeHint = 18, - ExecutionModeInputPoints = 19, - ExecutionModeInputLines = 20, - ExecutionModeInputLinesAdjacency = 21, - ExecutionModeTriangles = 22, - ExecutionModeInputTrianglesAdjacency = 23, - ExecutionModeQuads = 24, - ExecutionModeIsolines = 25, - ExecutionModeOutputVertices = 26, - ExecutionModeOutputPoints = 27, - ExecutionModeOutputLineStrip = 28, - ExecutionModeOutputTriangleStrip = 29, - ExecutionModeVecTypeHint = 30, - ExecutionModeContractionOff = 31, - ExecutionModeInitializer = 33, - ExecutionModeFinalizer = 34, - ExecutionModeSubgroupSize = 35, - ExecutionModeSubgroupsPerWorkgroup = 36, - ExecutionModeSubgroupsPerWorkgroupId = 37, - ExecutionModeLocalSizeId = 38, - ExecutionModeLocalSizeHintId = 39, - ExecutionModePostDepthCoverage = 4446, - ExecutionModeStencilRefReplacingEXT = 5027, - ExecutionModeMax = 0x7fffffff, -}; - -enum StorageClass { - StorageClassUniformConstant = 0, - StorageClassInput = 1, - StorageClassUniform = 2, - StorageClassOutput = 3, - StorageClassWorkgroup = 4, - StorageClassCrossWorkgroup = 5, - StorageClassPrivate = 6, - StorageClassFunction = 7, - StorageClassGeneric = 8, - StorageClassPushConstant = 9, - StorageClassAtomicCounter = 10, - StorageClassImage = 11, - StorageClassStorageBuffer = 12, - StorageClassMax = 0x7fffffff, -}; - -enum Dim { - Dim1D = 0, - Dim2D = 1, - Dim3D = 2, - DimCube = 3, - DimRect = 4, - DimBuffer = 5, - DimSubpassData = 6, - DimMax = 0x7fffffff, -}; - -enum SamplerAddressingMode { - SamplerAddressingModeNone = 0, - SamplerAddressingModeClampToEdge = 1, - SamplerAddressingModeClamp = 2, - SamplerAddressingModeRepeat = 3, - SamplerAddressingModeRepeatMirrored = 4, - SamplerAddressingModeMax = 0x7fffffff, -}; - -enum SamplerFilterMode { - SamplerFilterModeNearest = 0, - SamplerFilterModeLinear = 1, - SamplerFilterModeMax = 0x7fffffff, -}; - -enum ImageFormat { - ImageFormatUnknown = 0, - ImageFormatRgba32f = 1, - ImageFormatRgba16f = 2, - ImageFormatR32f = 3, - ImageFormatRgba8 = 4, - ImageFormatRgba8Snorm = 5, - ImageFormatRg32f = 6, - ImageFormatRg16f = 7, - ImageFormatR11fG11fB10f = 8, - ImageFormatR16f = 9, - ImageFormatRgba16 = 10, - ImageFormatRgb10A2 = 11, - ImageFormatRg16 = 12, - ImageFormatRg8 = 13, - ImageFormatR16 = 14, - ImageFormatR8 = 15, - ImageFormatRgba16Snorm = 16, - ImageFormatRg16Snorm = 17, - ImageFormatRg8Snorm = 18, - ImageFormatR16Snorm = 19, - ImageFormatR8Snorm = 20, - ImageFormatRgba32i = 21, - ImageFormatRgba16i = 22, - ImageFormatRgba8i = 23, - ImageFormatR32i = 24, - ImageFormatRg32i = 25, - ImageFormatRg16i = 26, - ImageFormatRg8i = 27, - ImageFormatR16i = 28, - ImageFormatR8i = 29, - ImageFormatRgba32ui = 30, - ImageFormatRgba16ui = 31, - ImageFormatRgba8ui = 32, - ImageFormatR32ui = 33, - ImageFormatRgb10a2ui = 34, - ImageFormatRg32ui = 35, - ImageFormatRg16ui = 36, - ImageFormatRg8ui = 37, - ImageFormatR16ui = 38, - ImageFormatR8ui = 39, - ImageFormatMax = 0x7fffffff, -}; - -enum ImageChannelOrder { - ImageChannelOrderR = 0, - ImageChannelOrderA = 1, - ImageChannelOrderRG = 2, - ImageChannelOrderRA = 3, - ImageChannelOrderRGB = 4, - ImageChannelOrderRGBA = 5, - ImageChannelOrderBGRA = 6, - ImageChannelOrderARGB = 7, - ImageChannelOrderIntensity = 8, - ImageChannelOrderLuminance = 9, - ImageChannelOrderRx = 10, - ImageChannelOrderRGx = 11, - ImageChannelOrderRGBx = 12, - ImageChannelOrderDepth = 13, - ImageChannelOrderDepthStencil = 14, - ImageChannelOrdersRGB = 15, - ImageChannelOrdersRGBx = 16, - ImageChannelOrdersRGBA = 17, - ImageChannelOrdersBGRA = 18, - ImageChannelOrderABGR = 19, - ImageChannelOrderMax = 0x7fffffff, -}; - -enum ImageChannelDataType { - ImageChannelDataTypeSnormInt8 = 0, - ImageChannelDataTypeSnormInt16 = 1, - ImageChannelDataTypeUnormInt8 = 2, - ImageChannelDataTypeUnormInt16 = 3, - ImageChannelDataTypeUnormShort565 = 4, - ImageChannelDataTypeUnormShort555 = 5, - ImageChannelDataTypeUnormInt101010 = 6, - ImageChannelDataTypeSignedInt8 = 7, - ImageChannelDataTypeSignedInt16 = 8, - ImageChannelDataTypeSignedInt32 = 9, - ImageChannelDataTypeUnsignedInt8 = 10, - ImageChannelDataTypeUnsignedInt16 = 11, - ImageChannelDataTypeUnsignedInt32 = 12, - ImageChannelDataTypeHalfFloat = 13, - ImageChannelDataTypeFloat = 14, - ImageChannelDataTypeUnormInt24 = 15, - ImageChannelDataTypeUnormInt101010_2 = 16, - ImageChannelDataTypeMax = 0x7fffffff, -}; - -enum ImageOperandsShift { - ImageOperandsBiasShift = 0, - ImageOperandsLodShift = 1, - ImageOperandsGradShift = 2, - ImageOperandsConstOffsetShift = 3, - ImageOperandsOffsetShift = 4, - ImageOperandsConstOffsetsShift = 5, - ImageOperandsSampleShift = 6, - ImageOperandsMinLodShift = 7, - ImageOperandsMax = 0x7fffffff, -}; - -enum ImageOperandsMask { - ImageOperandsMaskNone = 0, - ImageOperandsBiasMask = 0x00000001, - ImageOperandsLodMask = 0x00000002, - ImageOperandsGradMask = 0x00000004, - ImageOperandsConstOffsetMask = 0x00000008, - ImageOperandsOffsetMask = 0x00000010, - ImageOperandsConstOffsetsMask = 0x00000020, - ImageOperandsSampleMask = 0x00000040, - ImageOperandsMinLodMask = 0x00000080, -}; - -enum FPFastMathModeShift { - FPFastMathModeNotNaNShift = 0, - FPFastMathModeNotInfShift = 1, - FPFastMathModeNSZShift = 2, - FPFastMathModeAllowRecipShift = 3, - FPFastMathModeFastShift = 4, - FPFastMathModeMax = 0x7fffffff, -}; - -enum FPFastMathModeMask { - FPFastMathModeMaskNone = 0, - FPFastMathModeNotNaNMask = 0x00000001, - FPFastMathModeNotInfMask = 0x00000002, - FPFastMathModeNSZMask = 0x00000004, - FPFastMathModeAllowRecipMask = 0x00000008, - FPFastMathModeFastMask = 0x00000010, -}; - -enum FPRoundingMode { - FPRoundingModeRTE = 0, - FPRoundingModeRTZ = 1, - FPRoundingModeRTP = 2, - FPRoundingModeRTN = 3, - FPRoundingModeMax = 0x7fffffff, -}; - -enum LinkageType { - LinkageTypeExport = 0, - LinkageTypeImport = 1, - LinkageTypeMax = 0x7fffffff, -}; - -enum AccessQualifier { - AccessQualifierReadOnly = 0, - AccessQualifierWriteOnly = 1, - AccessQualifierReadWrite = 2, - AccessQualifierMax = 0x7fffffff, -}; - -enum FunctionParameterAttribute { - FunctionParameterAttributeZext = 0, - FunctionParameterAttributeSext = 1, - FunctionParameterAttributeByVal = 2, - FunctionParameterAttributeSret = 3, - FunctionParameterAttributeNoAlias = 4, - FunctionParameterAttributeNoCapture = 5, - FunctionParameterAttributeNoWrite = 6, - FunctionParameterAttributeNoReadWrite = 7, - FunctionParameterAttributeMax = 0x7fffffff, -}; - -enum Decoration { - DecorationRelaxedPrecision = 0, - DecorationSpecId = 1, - DecorationBlock = 2, - DecorationBufferBlock = 3, - DecorationRowMajor = 4, - DecorationColMajor = 5, - DecorationArrayStride = 6, - DecorationMatrixStride = 7, - DecorationGLSLShared = 8, - DecorationGLSLPacked = 9, - DecorationCPacked = 10, - DecorationBuiltIn = 11, - DecorationNoPerspective = 13, - DecorationFlat = 14, - DecorationPatch = 15, - DecorationCentroid = 16, - DecorationSample = 17, - DecorationInvariant = 18, - DecorationRestrict = 19, - DecorationAliased = 20, - DecorationVolatile = 21, - DecorationConstant = 22, - DecorationCoherent = 23, - DecorationNonWritable = 24, - DecorationNonReadable = 25, - DecorationUniform = 26, - DecorationSaturatedConversion = 28, - DecorationStream = 29, - DecorationLocation = 30, - DecorationComponent = 31, - DecorationIndex = 32, - DecorationBinding = 33, - DecorationDescriptorSet = 34, - DecorationOffset = 35, - DecorationXfbBuffer = 36, - DecorationXfbStride = 37, - DecorationFuncParamAttr = 38, - DecorationFPRoundingMode = 39, - DecorationFPFastMathMode = 40, - DecorationLinkageAttributes = 41, - DecorationNoContraction = 42, - DecorationInputAttachmentIndex = 43, - DecorationAlignment = 44, - DecorationMaxByteOffset = 45, - DecorationAlignmentId = 46, - DecorationMaxByteOffsetId = 47, - DecorationExplicitInterpAMD = 4999, - DecorationOverrideCoverageNV = 5248, - DecorationPassthroughNV = 5250, - DecorationViewportRelativeNV = 5252, - DecorationSecondaryViewportRelativeNV = 5256, - DecorationMax = 0x7fffffff, -}; - -enum BuiltIn { - BuiltInPosition = 0, - BuiltInPointSize = 1, - BuiltInClipDistance = 3, - BuiltInCullDistance = 4, - BuiltInVertexId = 5, - BuiltInInstanceId = 6, - BuiltInPrimitiveId = 7, - BuiltInInvocationId = 8, - BuiltInLayer = 9, - BuiltInViewportIndex = 10, - BuiltInTessLevelOuter = 11, - BuiltInTessLevelInner = 12, - BuiltInTessCoord = 13, - BuiltInPatchVertices = 14, - BuiltInFragCoord = 15, - BuiltInPointCoord = 16, - BuiltInFrontFacing = 17, - BuiltInSampleId = 18, - BuiltInSamplePosition = 19, - BuiltInSampleMask = 20, - BuiltInFragDepth = 22, - BuiltInHelperInvocation = 23, - BuiltInNumWorkgroups = 24, - BuiltInWorkgroupSize = 25, - BuiltInWorkgroupId = 26, - BuiltInLocalInvocationId = 27, - BuiltInGlobalInvocationId = 28, - BuiltInLocalInvocationIndex = 29, - BuiltInWorkDim = 30, - BuiltInGlobalSize = 31, - BuiltInEnqueuedWorkgroupSize = 32, - BuiltInGlobalOffset = 33, - BuiltInGlobalLinearId = 34, - BuiltInSubgroupSize = 36, - BuiltInSubgroupMaxSize = 37, - BuiltInNumSubgroups = 38, - BuiltInNumEnqueuedSubgroups = 39, - BuiltInSubgroupId = 40, - BuiltInSubgroupLocalInvocationId = 41, - BuiltInVertexIndex = 42, - BuiltInInstanceIndex = 43, - BuiltInSubgroupEqMaskKHR = 4416, - BuiltInSubgroupGeMaskKHR = 4417, - BuiltInSubgroupGtMaskKHR = 4418, - BuiltInSubgroupLeMaskKHR = 4419, - BuiltInSubgroupLtMaskKHR = 4420, - BuiltInBaseVertex = 4424, - BuiltInBaseInstance = 4425, - BuiltInDrawIndex = 4426, - BuiltInDeviceIndex = 4438, - BuiltInViewIndex = 4440, - BuiltInBaryCoordNoPerspAMD = 4992, - BuiltInBaryCoordNoPerspCentroidAMD = 4993, - BuiltInBaryCoordNoPerspSampleAMD = 4994, - BuiltInBaryCoordSmoothAMD = 4995, - BuiltInBaryCoordSmoothCentroidAMD = 4996, - BuiltInBaryCoordSmoothSampleAMD = 4997, - BuiltInBaryCoordPullModelAMD = 4998, - BuiltInFragStencilRefEXT = 5014, - BuiltInViewportMaskNV = 5253, - BuiltInSecondaryPositionNV = 5257, - BuiltInSecondaryViewportMaskNV = 5258, - BuiltInPositionPerViewNV = 5261, - BuiltInViewportMaskPerViewNV = 5262, - BuiltInFullyCoveredEXT = 5264, - BuiltInMax = 0x7fffffff, -}; - -enum SelectionControlShift { - SelectionControlFlattenShift = 0, - SelectionControlDontFlattenShift = 1, - SelectionControlMax = 0x7fffffff, -}; - -enum SelectionControlMask { - SelectionControlMaskNone = 0, - SelectionControlFlattenMask = 0x00000001, - SelectionControlDontFlattenMask = 0x00000002, -}; - -enum LoopControlShift { - LoopControlUnrollShift = 0, - LoopControlDontUnrollShift = 1, - LoopControlDependencyInfiniteShift = 2, - LoopControlDependencyLengthShift = 3, - LoopControlMax = 0x7fffffff, -}; - -enum LoopControlMask { - LoopControlMaskNone = 0, - LoopControlUnrollMask = 0x00000001, - LoopControlDontUnrollMask = 0x00000002, - LoopControlDependencyInfiniteMask = 0x00000004, - LoopControlDependencyLengthMask = 0x00000008, -}; - -enum FunctionControlShift { - FunctionControlInlineShift = 0, - FunctionControlDontInlineShift = 1, - FunctionControlPureShift = 2, - FunctionControlConstShift = 3, - FunctionControlMax = 0x7fffffff, -}; - -enum FunctionControlMask { - FunctionControlMaskNone = 0, - FunctionControlInlineMask = 0x00000001, - FunctionControlDontInlineMask = 0x00000002, - FunctionControlPureMask = 0x00000004, - FunctionControlConstMask = 0x00000008, -}; - -enum MemorySemanticsShift { - MemorySemanticsAcquireShift = 1, - MemorySemanticsReleaseShift = 2, - MemorySemanticsAcquireReleaseShift = 3, - MemorySemanticsSequentiallyConsistentShift = 4, - MemorySemanticsUniformMemoryShift = 6, - MemorySemanticsSubgroupMemoryShift = 7, - MemorySemanticsWorkgroupMemoryShift = 8, - MemorySemanticsCrossWorkgroupMemoryShift = 9, - MemorySemanticsAtomicCounterMemoryShift = 10, - MemorySemanticsImageMemoryShift = 11, - MemorySemanticsMax = 0x7fffffff, -}; - -enum MemorySemanticsMask { - MemorySemanticsMaskNone = 0, - MemorySemanticsAcquireMask = 0x00000002, - MemorySemanticsReleaseMask = 0x00000004, - MemorySemanticsAcquireReleaseMask = 0x00000008, - MemorySemanticsSequentiallyConsistentMask = 0x00000010, - MemorySemanticsUniformMemoryMask = 0x00000040, - MemorySemanticsSubgroupMemoryMask = 0x00000080, - MemorySemanticsWorkgroupMemoryMask = 0x00000100, - MemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, - MemorySemanticsAtomicCounterMemoryMask = 0x00000400, - MemorySemanticsImageMemoryMask = 0x00000800, -}; - -enum MemoryAccessShift { - MemoryAccessVolatileShift = 0, - MemoryAccessAlignedShift = 1, - MemoryAccessNontemporalShift = 2, - MemoryAccessMax = 0x7fffffff, -}; - -enum MemoryAccessMask { - MemoryAccessMaskNone = 0, - MemoryAccessVolatileMask = 0x00000001, - MemoryAccessAlignedMask = 0x00000002, - MemoryAccessNontemporalMask = 0x00000004, -}; - -enum Scope { - ScopeCrossDevice = 0, - ScopeDevice = 1, - ScopeWorkgroup = 2, - ScopeSubgroup = 3, - ScopeInvocation = 4, - ScopeMax = 0x7fffffff, -}; - -enum GroupOperation { - GroupOperationReduce = 0, - GroupOperationInclusiveScan = 1, - GroupOperationExclusiveScan = 2, - GroupOperationMax = 0x7fffffff, -}; - -enum KernelEnqueueFlags { - KernelEnqueueFlagsNoWait = 0, - KernelEnqueueFlagsWaitKernel = 1, - KernelEnqueueFlagsWaitWorkGroup = 2, - KernelEnqueueFlagsMax = 0x7fffffff, -}; - -enum KernelProfilingInfoShift { - KernelProfilingInfoCmdExecTimeShift = 0, - KernelProfilingInfoMax = 0x7fffffff, -}; - -enum KernelProfilingInfoMask { - KernelProfilingInfoMaskNone = 0, - KernelProfilingInfoCmdExecTimeMask = 0x00000001, -}; - -enum Capability { - CapabilityMatrix = 0, - CapabilityShader = 1, - CapabilityGeometry = 2, - CapabilityTessellation = 3, - CapabilityAddresses = 4, - CapabilityLinkage = 5, - CapabilityKernel = 6, - CapabilityVector16 = 7, - CapabilityFloat16Buffer = 8, - CapabilityFloat16 = 9, - CapabilityFloat64 = 10, - CapabilityInt64 = 11, - CapabilityInt64Atomics = 12, - CapabilityImageBasic = 13, - CapabilityImageReadWrite = 14, - CapabilityImageMipmap = 15, - CapabilityPipes = 17, - CapabilityGroups = 18, - CapabilityDeviceEnqueue = 19, - CapabilityLiteralSampler = 20, - CapabilityAtomicStorage = 21, - CapabilityInt16 = 22, - CapabilityTessellationPointSize = 23, - CapabilityGeometryPointSize = 24, - CapabilityImageGatherExtended = 25, - CapabilityStorageImageMultisample = 27, - CapabilityUniformBufferArrayDynamicIndexing = 28, - CapabilitySampledImageArrayDynamicIndexing = 29, - CapabilityStorageBufferArrayDynamicIndexing = 30, - CapabilityStorageImageArrayDynamicIndexing = 31, - CapabilityClipDistance = 32, - CapabilityCullDistance = 33, - CapabilityImageCubeArray = 34, - CapabilitySampleRateShading = 35, - CapabilityImageRect = 36, - CapabilitySampledRect = 37, - CapabilityGenericPointer = 38, - CapabilityInt8 = 39, - CapabilityInputAttachment = 40, - CapabilitySparseResidency = 41, - CapabilityMinLod = 42, - CapabilitySampled1D = 43, - CapabilityImage1D = 44, - CapabilitySampledCubeArray = 45, - CapabilitySampledBuffer = 46, - CapabilityImageBuffer = 47, - CapabilityImageMSArray = 48, - CapabilityStorageImageExtendedFormats = 49, - CapabilityImageQuery = 50, - CapabilityDerivativeControl = 51, - CapabilityInterpolationFunction = 52, - CapabilityTransformFeedback = 53, - CapabilityGeometryStreams = 54, - CapabilityStorageImageReadWithoutFormat = 55, - CapabilityStorageImageWriteWithoutFormat = 56, - CapabilityMultiViewport = 57, - CapabilitySubgroupDispatch = 58, - CapabilityNamedBarrier = 59, - CapabilityPipeStorage = 60, - CapabilitySubgroupBallotKHR = 4423, - CapabilityDrawParameters = 4427, - CapabilitySubgroupVoteKHR = 4431, - CapabilityStorageBuffer16BitAccess = 4433, - CapabilityStorageUniformBufferBlock16 = 4433, - CapabilityStorageUniform16 = 4434, - CapabilityUniformAndStorageBuffer16BitAccess = 4434, - CapabilityStoragePushConstant16 = 4435, - CapabilityStorageInputOutput16 = 4436, - CapabilityDeviceGroup = 4437, - CapabilityMultiView = 4439, - CapabilityVariablePointersStorageBuffer = 4441, - CapabilityVariablePointers = 4442, - CapabilityAtomicStorageOps = 4445, - CapabilitySampleMaskPostDepthCoverage = 4447, - CapabilityImageGatherBiasLodAMD = 5009, - CapabilityFragmentMaskAMD = 5010, - CapabilityStencilExportEXT = 5013, - CapabilityImageReadWriteLodAMD = 5015, - CapabilitySampleMaskOverrideCoverageNV = 5249, - CapabilityGeometryShaderPassthroughNV = 5251, - CapabilityShaderViewportIndexLayerEXT = 5254, - CapabilityShaderViewportIndexLayerNV = 5254, - CapabilityShaderViewportMaskNV = 5255, - CapabilityShaderStereoViewNV = 5259, - CapabilityPerViewAttributesNV = 5260, - CapabilityFragmentFullyCoveredEXT = 5265, - CapabilitySubgroupShuffleINTEL = 5568, - CapabilitySubgroupBufferBlockIOINTEL = 5569, - CapabilitySubgroupImageBlockIOINTEL = 5570, - CapabilityMax = 0x7fffffff, -}; - -enum Op { - OpNop = 0, - OpUndef = 1, - OpSourceContinued = 2, - OpSource = 3, - OpSourceExtension = 4, - OpName = 5, - OpMemberName = 6, - OpString = 7, - OpLine = 8, - OpExtension = 10, - OpExtInstImport = 11, - OpExtInst = 12, - OpMemoryModel = 14, - OpEntryPoint = 15, - OpExecutionMode = 16, - OpCapability = 17, - OpTypeVoid = 19, - OpTypeBool = 20, - OpTypeInt = 21, - OpTypeFloat = 22, - OpTypeVector = 23, - OpTypeMatrix = 24, - OpTypeImage = 25, - OpTypeSampler = 26, - OpTypeSampledImage = 27, - OpTypeArray = 28, - OpTypeRuntimeArray = 29, - OpTypeStruct = 30, - OpTypeOpaque = 31, - OpTypePointer = 32, - OpTypeFunction = 33, - OpTypeEvent = 34, - OpTypeDeviceEvent = 35, - OpTypeReserveId = 36, - OpTypeQueue = 37, - OpTypePipe = 38, - OpTypeForwardPointer = 39, - OpConstantTrue = 41, - OpConstantFalse = 42, - OpConstant = 43, - OpConstantComposite = 44, - OpConstantSampler = 45, - OpConstantNull = 46, - OpSpecConstantTrue = 48, - OpSpecConstantFalse = 49, - OpSpecConstant = 50, - OpSpecConstantComposite = 51, - OpSpecConstantOp = 52, - OpFunction = 54, - OpFunctionParameter = 55, - OpFunctionEnd = 56, - OpFunctionCall = 57, - OpVariable = 59, - OpImageTexelPointer = 60, - OpLoad = 61, - OpStore = 62, - OpCopyMemory = 63, - OpCopyMemorySized = 64, - OpAccessChain = 65, - OpInBoundsAccessChain = 66, - OpPtrAccessChain = 67, - OpArrayLength = 68, - OpGenericPtrMemSemantics = 69, - OpInBoundsPtrAccessChain = 70, - OpDecorate = 71, - OpMemberDecorate = 72, - OpDecorationGroup = 73, - OpGroupDecorate = 74, - OpGroupMemberDecorate = 75, - OpVectorExtractDynamic = 77, - OpVectorInsertDynamic = 78, - OpVectorShuffle = 79, - OpCompositeConstruct = 80, - OpCompositeExtract = 81, - OpCompositeInsert = 82, - OpCopyObject = 83, - OpTranspose = 84, - OpSampledImage = 86, - OpImageSampleImplicitLod = 87, - OpImageSampleExplicitLod = 88, - OpImageSampleDrefImplicitLod = 89, - OpImageSampleDrefExplicitLod = 90, - OpImageSampleProjImplicitLod = 91, - OpImageSampleProjExplicitLod = 92, - OpImageSampleProjDrefImplicitLod = 93, - OpImageSampleProjDrefExplicitLod = 94, - OpImageFetch = 95, - OpImageGather = 96, - OpImageDrefGather = 97, - OpImageRead = 98, - OpImageWrite = 99, - OpImage = 100, - OpImageQueryFormat = 101, - OpImageQueryOrder = 102, - OpImageQuerySizeLod = 103, - OpImageQuerySize = 104, - OpImageQueryLod = 105, - OpImageQueryLevels = 106, - OpImageQuerySamples = 107, - OpConvertFToU = 109, - OpConvertFToS = 110, - OpConvertSToF = 111, - OpConvertUToF = 112, - OpUConvert = 113, - OpSConvert = 114, - OpFConvert = 115, - OpQuantizeToF16 = 116, - OpConvertPtrToU = 117, - OpSatConvertSToU = 118, - OpSatConvertUToS = 119, - OpConvertUToPtr = 120, - OpPtrCastToGeneric = 121, - OpGenericCastToPtr = 122, - OpGenericCastToPtrExplicit = 123, - OpBitcast = 124, - OpSNegate = 126, - OpFNegate = 127, - OpIAdd = 128, - OpFAdd = 129, - OpISub = 130, - OpFSub = 131, - OpIMul = 132, - OpFMul = 133, - OpUDiv = 134, - OpSDiv = 135, - OpFDiv = 136, - OpUMod = 137, - OpSRem = 138, - OpSMod = 139, - OpFRem = 140, - OpFMod = 141, - OpVectorTimesScalar = 142, - OpMatrixTimesScalar = 143, - OpVectorTimesMatrix = 144, - OpMatrixTimesVector = 145, - OpMatrixTimesMatrix = 146, - OpOuterProduct = 147, - OpDot = 148, - OpIAddCarry = 149, - OpISubBorrow = 150, - OpUMulExtended = 151, - OpSMulExtended = 152, - OpAny = 154, - OpAll = 155, - OpIsNan = 156, - OpIsInf = 157, - OpIsFinite = 158, - OpIsNormal = 159, - OpSignBitSet = 160, - OpLessOrGreater = 161, - OpOrdered = 162, - OpUnordered = 163, - OpLogicalEqual = 164, - OpLogicalNotEqual = 165, - OpLogicalOr = 166, - OpLogicalAnd = 167, - OpLogicalNot = 168, - OpSelect = 169, - OpIEqual = 170, - OpINotEqual = 171, - OpUGreaterThan = 172, - OpSGreaterThan = 173, - OpUGreaterThanEqual = 174, - OpSGreaterThanEqual = 175, - OpULessThan = 176, - OpSLessThan = 177, - OpULessThanEqual = 178, - OpSLessThanEqual = 179, - OpFOrdEqual = 180, - OpFUnordEqual = 181, - OpFOrdNotEqual = 182, - OpFUnordNotEqual = 183, - OpFOrdLessThan = 184, - OpFUnordLessThan = 185, - OpFOrdGreaterThan = 186, - OpFUnordGreaterThan = 187, - OpFOrdLessThanEqual = 188, - OpFUnordLessThanEqual = 189, - OpFOrdGreaterThanEqual = 190, - OpFUnordGreaterThanEqual = 191, - OpShiftRightLogical = 194, - OpShiftRightArithmetic = 195, - OpShiftLeftLogical = 196, - OpBitwiseOr = 197, - OpBitwiseXor = 198, - OpBitwiseAnd = 199, - OpNot = 200, - OpBitFieldInsert = 201, - OpBitFieldSExtract = 202, - OpBitFieldUExtract = 203, - OpBitReverse = 204, - OpBitCount = 205, - OpDPdx = 207, - OpDPdy = 208, - OpFwidth = 209, - OpDPdxFine = 210, - OpDPdyFine = 211, - OpFwidthFine = 212, - OpDPdxCoarse = 213, - OpDPdyCoarse = 214, - OpFwidthCoarse = 215, - OpEmitVertex = 218, - OpEndPrimitive = 219, - OpEmitStreamVertex = 220, - OpEndStreamPrimitive = 221, - OpControlBarrier = 224, - OpMemoryBarrier = 225, - OpAtomicLoad = 227, - OpAtomicStore = 228, - OpAtomicExchange = 229, - OpAtomicCompareExchange = 230, - OpAtomicCompareExchangeWeak = 231, - OpAtomicIIncrement = 232, - OpAtomicIDecrement = 233, - OpAtomicIAdd = 234, - OpAtomicISub = 235, - OpAtomicSMin = 236, - OpAtomicUMin = 237, - OpAtomicSMax = 238, - OpAtomicUMax = 239, - OpAtomicAnd = 240, - OpAtomicOr = 241, - OpAtomicXor = 242, - OpPhi = 245, - OpLoopMerge = 246, - OpSelectionMerge = 247, - OpLabel = 248, - OpBranch = 249, - OpBranchConditional = 250, - OpSwitch = 251, - OpKill = 252, - OpReturn = 253, - OpReturnValue = 254, - OpUnreachable = 255, - OpLifetimeStart = 256, - OpLifetimeStop = 257, - OpGroupAsyncCopy = 259, - OpGroupWaitEvents = 260, - OpGroupAll = 261, - OpGroupAny = 262, - OpGroupBroadcast = 263, - OpGroupIAdd = 264, - OpGroupFAdd = 265, - OpGroupFMin = 266, - OpGroupUMin = 267, - OpGroupSMin = 268, - OpGroupFMax = 269, - OpGroupUMax = 270, - OpGroupSMax = 271, - OpReadPipe = 274, - OpWritePipe = 275, - OpReservedReadPipe = 276, - OpReservedWritePipe = 277, - OpReserveReadPipePackets = 278, - OpReserveWritePipePackets = 279, - OpCommitReadPipe = 280, - OpCommitWritePipe = 281, - OpIsValidReserveId = 282, - OpGetNumPipePackets = 283, - OpGetMaxPipePackets = 284, - OpGroupReserveReadPipePackets = 285, - OpGroupReserveWritePipePackets = 286, - OpGroupCommitReadPipe = 287, - OpGroupCommitWritePipe = 288, - OpEnqueueMarker = 291, - OpEnqueueKernel = 292, - OpGetKernelNDrangeSubGroupCount = 293, - OpGetKernelNDrangeMaxSubGroupSize = 294, - OpGetKernelWorkGroupSize = 295, - OpGetKernelPreferredWorkGroupSizeMultiple = 296, - OpRetainEvent = 297, - OpReleaseEvent = 298, - OpCreateUserEvent = 299, - OpIsValidEvent = 300, - OpSetUserEventStatus = 301, - OpCaptureEventProfilingInfo = 302, - OpGetDefaultQueue = 303, - OpBuildNDRange = 304, - OpImageSparseSampleImplicitLod = 305, - OpImageSparseSampleExplicitLod = 306, - OpImageSparseSampleDrefImplicitLod = 307, - OpImageSparseSampleDrefExplicitLod = 308, - OpImageSparseSampleProjImplicitLod = 309, - OpImageSparseSampleProjExplicitLod = 310, - OpImageSparseSampleProjDrefImplicitLod = 311, - OpImageSparseSampleProjDrefExplicitLod = 312, - OpImageSparseFetch = 313, - OpImageSparseGather = 314, - OpImageSparseDrefGather = 315, - OpImageSparseTexelsResident = 316, - OpNoLine = 317, - OpAtomicFlagTestAndSet = 318, - OpAtomicFlagClear = 319, - OpImageSparseRead = 320, - OpSizeOf = 321, - OpTypePipeStorage = 322, - OpConstantPipeStorage = 323, - OpCreatePipeFromPipeStorage = 324, - OpGetKernelLocalSizeForSubgroupCount = 325, - OpGetKernelMaxNumSubgroups = 326, - OpTypeNamedBarrier = 327, - OpNamedBarrierInitialize = 328, - OpMemoryNamedBarrier = 329, - OpModuleProcessed = 330, - OpExecutionModeId = 331, - OpDecorateId = 332, - OpSubgroupBallotKHR = 4421, - OpSubgroupFirstInvocationKHR = 4422, - OpSubgroupAllKHR = 4428, - OpSubgroupAnyKHR = 4429, - OpSubgroupAllEqualKHR = 4430, - OpSubgroupReadInvocationKHR = 4432, - OpGroupIAddNonUniformAMD = 5000, - OpGroupFAddNonUniformAMD = 5001, - OpGroupFMinNonUniformAMD = 5002, - OpGroupUMinNonUniformAMD = 5003, - OpGroupSMinNonUniformAMD = 5004, - OpGroupFMaxNonUniformAMD = 5005, - OpGroupUMaxNonUniformAMD = 5006, - OpGroupSMaxNonUniformAMD = 5007, - OpFragmentMaskFetchAMD = 5011, - OpFragmentFetchAMD = 5012, - OpSubgroupShuffleINTEL = 5571, - OpSubgroupShuffleDownINTEL = 5572, - OpSubgroupShuffleUpINTEL = 5573, - OpSubgroupShuffleXorINTEL = 5574, - OpSubgroupBlockReadINTEL = 5575, - OpSubgroupBlockWriteINTEL = 5576, - OpSubgroupImageBlockReadINTEL = 5577, - OpSubgroupImageBlockWriteINTEL = 5578, - OpMax = 0x7fffffff, -}; - -// Overload operator| for mask bit combining - -inline ImageOperandsMask operator|(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) | unsigned(b)); } -inline FPFastMathModeMask operator|(FPFastMathModeMask a, FPFastMathModeMask b) { return FPFastMathModeMask(unsigned(a) | unsigned(b)); } -inline SelectionControlMask operator|(SelectionControlMask a, SelectionControlMask b) { return SelectionControlMask(unsigned(a) | unsigned(b)); } -inline LoopControlMask operator|(LoopControlMask a, LoopControlMask b) { return LoopControlMask(unsigned(a) | unsigned(b)); } -inline FunctionControlMask operator|(FunctionControlMask a, FunctionControlMask b) { return FunctionControlMask(unsigned(a) | unsigned(b)); } -inline MemorySemanticsMask operator|(MemorySemanticsMask a, MemorySemanticsMask b) { return MemorySemanticsMask(unsigned(a) | unsigned(b)); } -inline MemoryAccessMask operator|(MemoryAccessMask a, MemoryAccessMask b) { return MemoryAccessMask(unsigned(a) | unsigned(b)); } -inline KernelProfilingInfoMask operator|(KernelProfilingInfoMask a, KernelProfilingInfoMask b) { return KernelProfilingInfoMask(unsigned(a) | unsigned(b)); } - -} // end namespace spv - -#endif // #ifndef spirv_HPP - diff --git a/third_party/glslang-spirv/spvIR.h b/third_party/glslang-spirv/spvIR.h deleted file mode 100644 index faa2701ff..000000000 --- a/third_party/glslang-spirv/spvIR.h +++ /dev/null @@ -1,407 +0,0 @@ -// -// Copyright (C) 2014 LunarG, Inc. -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// -// Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// -// Neither the name of 3Dlabs Inc. Ltd. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -// SPIRV-IR -// -// Simple in-memory representation (IR) of SPIRV. Just for holding -// Each function's CFG of blocks. Has this hierarchy: -// - Module, which is a list of -// - Function, which is a list of -// - Block, which is a list of -// - Instruction -// - -#pragma once -#ifndef spvIR_H -#define spvIR_H - -#include "spirv.hpp" - -#include -#include -#include -#include -#include -#include - -namespace spv { - -class Block; -class Function; -class Module; - -const Id NoResult = 0; -const Id NoType = 0; - -const Decoration NoPrecision = DecorationMax; - -#ifdef __GNUC__ -# define POTENTIALLY_UNUSED __attribute__((unused)) -#else -# define POTENTIALLY_UNUSED -#endif - -POTENTIALLY_UNUSED -const MemorySemanticsMask MemorySemanticsAllMemory = - (MemorySemanticsMask)(MemorySemanticsUniformMemoryMask | - MemorySemanticsWorkgroupMemoryMask | - MemorySemanticsAtomicCounterMemoryMask | - MemorySemanticsImageMemoryMask); - -// -// SPIR-V IR instruction. -// - -class Instruction { -public: - Instruction(Id resultId, Id typeId, Op opCode) : resultId(resultId), typeId(typeId), opCode(opCode), block(nullptr) { } - explicit Instruction(Op opCode) : resultId(NoResult), typeId(NoType), opCode(opCode), block(nullptr) { } - virtual ~Instruction() {} - void addIdOperand(Id id) { operands.push_back(id); } - void addImmediateOperand(unsigned int immediate) { operands.push_back(immediate); } - void addStringOperand(const char* str) - { - unsigned int word; - char* wordString = (char*)&word; - char* wordPtr = wordString; - int charCount = 0; - char c; - do { - c = *(str++); - *(wordPtr++) = c; - ++charCount; - if (charCount == 4) { - addImmediateOperand(word); - wordPtr = wordString; - charCount = 0; - } - } while (c != 0); - - // deal with partial last word - if (charCount > 0) { - // pad with 0s - for (; charCount < 4; ++charCount) - *(wordPtr++) = 0; - addImmediateOperand(word); - } - } - void setBlock(Block* b) { block = b; } - Block* getBlock() const { return block; } - Op getOpCode() const { return opCode; } - int getNumOperands() const { return (int)operands.size(); } - Id getResultId() const { return resultId; } - Id getTypeId() const { return typeId; } - Id getIdOperand(int op) const { return operands[op]; } - unsigned int getImmediateOperand(int op) const { return operands[op]; } - - // Write out the binary form. - void dump(std::vector& out) const - { - // Compute the wordCount - unsigned int wordCount = 1; - if (typeId) - ++wordCount; - if (resultId) - ++wordCount; - wordCount += (unsigned int)operands.size(); - - // Write out the beginning of the instruction - out.push_back(((wordCount) << WordCountShift) | opCode); - if (typeId) - out.push_back(typeId); - if (resultId) - out.push_back(resultId); - - // Write out the operands - for (int op = 0; op < (int)operands.size(); ++op) - out.push_back(operands[op]); - } - -protected: - Instruction(const Instruction&); - Id resultId; - Id typeId; - Op opCode; - std::vector operands; - Block* block; -}; - -// -// SPIR-V IR block. -// - -class Block { -public: - Block(Id id, Function& parent); - virtual ~Block() - { - } - - Id getId() { return instructions.front()->getResultId(); } - - Function& getParent() const { return parent; } - void addInstruction(std::unique_ptr inst); - void addPredecessor(Block* pred) { predecessors.push_back(pred); pred->successors.push_back(this);} - void addLocalVariable(std::unique_ptr inst) { localVariables.push_back(std::move(inst)); } - const std::vector& getPredecessors() const { return predecessors; } - const std::vector& getSuccessors() const { return successors; } - const std::vector >& getInstructions() const { - return instructions; - } - void setUnreachable() { unreachable = true; } - bool isUnreachable() const { return unreachable; } - // Returns the block's merge instruction, if one exists (otherwise null). - const Instruction* getMergeInstruction() const { - if (instructions.size() < 2) return nullptr; - const Instruction* nextToLast = (instructions.cend() - 2)->get(); - switch (nextToLast->getOpCode()) { - case OpSelectionMerge: - case OpLoopMerge: - return nextToLast; - default: - return nullptr; - } - return nullptr; - } - - bool isTerminated() const - { - switch (instructions.back()->getOpCode()) { - case OpBranch: - case OpBranchConditional: - case OpSwitch: - case OpKill: - case OpReturn: - case OpReturnValue: - return true; - default: - return false; - } - } - - void dump(std::vector& out) const - { - instructions[0]->dump(out); - for (int i = 0; i < (int)localVariables.size(); ++i) - localVariables[i]->dump(out); - for (int i = 1; i < (int)instructions.size(); ++i) - instructions[i]->dump(out); - } - -protected: - Block(const Block&); - Block& operator=(Block&); - - // To enforce keeping parent and ownership in sync: - friend Function; - - std::vector > instructions; - std::vector predecessors, successors; - std::vector > localVariables; - Function& parent; - - // track whether this block is known to be uncreachable (not necessarily - // true for all unreachable blocks, but should be set at least - // for the extraneous ones introduced by the builder). - bool unreachable; -}; - -// Traverses the control-flow graph rooted at root in an order suited for -// readable code generation. Invokes callback at every node in the traversal -// order. -void inReadableOrder(Block* root, std::function callback); - -// -// SPIR-V IR Function. -// - -class Function { -public: - Function(Id id, Id resultType, Id functionType, Id firstParam, Module& parent); - virtual ~Function() - { - for (int i = 0; i < (int)parameterInstructions.size(); ++i) - delete parameterInstructions[i]; - - for (int i = 0; i < (int)blocks.size(); ++i) - delete blocks[i]; - } - Id getId() const { return functionInstruction.getResultId(); } - Id getParamId(int p) { return parameterInstructions[p]->getResultId(); } - - void addBlock(Block* block) { blocks.push_back(block); } - void removeBlock(Block* block) - { - auto found = find(blocks.begin(), blocks.end(), block); - assert(found != blocks.end()); - blocks.erase(found); - delete block; - } - - Module& getParent() const { return parent; } - Block* getEntryBlock() const { return blocks.front(); } - Block* getLastBlock() const { return blocks.back(); } - const std::vector& getBlocks() const { return blocks; } - void addLocalVariable(std::unique_ptr inst); - Id getReturnType() const { return functionInstruction.getTypeId(); } - - void setImplicitThis() { implicitThis = true; } - bool hasImplicitThis() const { return implicitThis; } - - void dump(std::vector& out) const - { - // OpFunction - functionInstruction.dump(out); - - // OpFunctionParameter - for (int p = 0; p < (int)parameterInstructions.size(); ++p) - parameterInstructions[p]->dump(out); - - // Blocks - inReadableOrder(blocks[0], [&out](const Block* b) { b->dump(out); }); - Instruction end(0, 0, OpFunctionEnd); - end.dump(out); - } - -protected: - Function(const Function&); - Function& operator=(Function&); - - Module& parent; - Instruction functionInstruction; - std::vector parameterInstructions; - std::vector blocks; - bool implicitThis; // true if this is a member function expecting to be passed a 'this' as the first argument -}; - -// -// SPIR-V IR Module. -// - -class Module { -public: - Module() {} - virtual ~Module() - { - // TODO delete things - } - - void addFunction(Function *fun) { functions.push_back(fun); } - - void mapInstruction(Instruction *instruction) - { - spv::Id resultId = instruction->getResultId(); - // map the instruction's result id - if (resultId >= idToInstruction.size()) - idToInstruction.resize(resultId + 16); - idToInstruction[resultId] = instruction; - } - - Instruction* getInstruction(Id id) const { return idToInstruction[id]; } - const std::vector& getFunctions() const { return functions; } - spv::Id getTypeId(Id resultId) const { return idToInstruction[resultId]->getTypeId(); } - StorageClass getStorageClass(Id typeId) const - { - assert(idToInstruction[typeId]->getOpCode() == spv::OpTypePointer); - return (StorageClass)idToInstruction[typeId]->getImmediateOperand(0); - } - - void dump(std::vector& out) const - { - for (int f = 0; f < (int)functions.size(); ++f) - functions[f]->dump(out); - } - -protected: - Module(const Module&); - std::vector functions; - - // map from result id to instruction having that result id - std::vector idToInstruction; - - // map from a result id to its type id -}; - -// -// Implementation (it's here due to circular type definitions). -// - -// Add both -// - the OpFunction instruction -// - all the OpFunctionParameter instructions -__inline Function::Function(Id id, Id resultType, Id functionType, Id firstParamId, Module& parent) - : parent(parent), functionInstruction(id, resultType, OpFunction), implicitThis(false) -{ - // OpFunction - functionInstruction.addImmediateOperand(FunctionControlMaskNone); - functionInstruction.addIdOperand(functionType); - parent.mapInstruction(&functionInstruction); - parent.addFunction(this); - - // OpFunctionParameter - Instruction* typeInst = parent.getInstruction(functionType); - int numParams = typeInst->getNumOperands() - 1; - for (int p = 0; p < numParams; ++p) { - Instruction* param = new Instruction(firstParamId + p, typeInst->getIdOperand(p + 1), OpFunctionParameter); - parent.mapInstruction(param); - parameterInstructions.push_back(param); - } -} - -__inline void Function::addLocalVariable(std::unique_ptr inst) -{ - Instruction* raw_instruction = inst.get(); - blocks[0]->addLocalVariable(std::move(inst)); - parent.mapInstruction(raw_instruction); -} - -__inline Block::Block(Id id, Function& parent) : parent(parent), unreachable(false) -{ - instructions.push_back(std::unique_ptr(new Instruction(id, NoType, OpLabel))); - instructions.back()->setBlock(this); - parent.getParent().mapInstruction(instructions.back().get()); -} - -__inline void Block::addInstruction(std::unique_ptr inst) -{ - Instruction* raw_instruction = inst.get(); - instructions.push_back(std::move(inst)); - raw_instruction->setBlock(this); - if (raw_instruction->getResultId()) - parent.getParent().mapInstruction(raw_instruction); -} - -}; // end spv namespace - -#endif // spvIR_H diff --git a/third_party/spirv-headers b/third_party/spirv-headers deleted file mode 160000 index 2bf91d32b..000000000 --- a/third_party/spirv-headers +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 2bf91d32b2ce17df9ca6c1e62cf478b24e7d2644 diff --git a/third_party/spirv-tools b/third_party/spirv-tools deleted file mode 160000 index 27a2bbb86..000000000 --- a/third_party/spirv-tools +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 27a2bbb865ef638afe4260bf214110425a2b904b diff --git a/third_party/spirv-tools.lua b/third_party/spirv-tools.lua deleted file mode 100644 index bf900a6e9..000000000 --- a/third_party/spirv-tools.lua +++ /dev/null @@ -1,76 +0,0 @@ -group("third_party") -project("spirv-tools") - uuid("621512da-bb50-40f2-85ba-ae615ff13e68") - kind("StaticLib") - language("C++") - links({ - }) - defines({ - "_LIB", - }) - includedirs({ - "spirv-headers/include", - "spirv-tools/include", - "spirv-tools/source", - }) - files({ - "spirv-tools/include/spirv-tools/libspirv.h", - "spirv-tools/source/val/basic_block.cpp", - "spirv-tools/source/val/basic_block.h", - "spirv-tools/source/val/construct.cpp", - "spirv-tools/source/val/construct.h", - "spirv-tools/source/val/function.cpp", - "spirv-tools/source/val/function.h", - "spirv-tools/source/val/validation_state.cpp", - "spirv-tools/source/val/validation_state.h", - "spirv-tools/source/util/*.cpp", - "spirv-tools/source/util/*.h", - "spirv-tools/source/val/*.cpp", - "spirv-tools/source/val/*.h", - "spirv-tools/source/*.inc", - "spirv-tools/source/*.cpp", - "spirv-tools/source/*.h", - "spirv-tools/source/assembly_grammar.cpp", - "spirv-tools/source/assembly_grammar.h", - "spirv-tools/source/binary.cpp", - "spirv-tools/source/binary.h", - "spirv-tools/source/diagnostic.cpp", - "spirv-tools/source/diagnostic.h", - "spirv-tools/source/disassemble.cpp", - "spirv-tools/source/ext_inst.cpp", - "spirv-tools/source/ext_inst.h", - "spirv-tools/source/instruction.h", - "spirv-tools/source/macro.h", - "spirv-tools/source/opcode.cpp", - "spirv-tools/source/opcode.h", - "spirv-tools/source/operand.cpp", - "spirv-tools/source/operand.h", - "spirv-tools/source/print.cpp", - "spirv-tools/source/print.h", - -- "spirv-tools/source/software_version.cpp", - "spirv-tools/source/spirv_constant.h", - "spirv-tools/source/spirv_definition.h", - "spirv-tools/source/spirv_endian.cpp", - "spirv-tools/source/spirv_endian.h", - "spirv-tools/source/spirv_target_env.cpp", - "spirv-tools/source/spirv_target_env.h", - "spirv-tools/source/table.cpp", - "spirv-tools/source/table.h", - "spirv-tools/source/text.cpp", - "spirv-tools/source/text.h", - "spirv-tools/source/text_handler.cpp", - "spirv-tools/source/text_handler.h", - "spirv-tools/source/validate.cpp", - "spirv-tools/source/validate.h", - "spirv-tools/source/validate_cfg.cpp", - "spirv-tools/source/validate_id.cpp", - "spirv-tools/source/validate_instruction.cpp", - "spirv-tools/source/validate_layout.cpp", - "spirv-tools/source/util/bitutils.h", - "spirv-tools/source/util/hex_float.h", - }) - filter("platforms:Windows") - buildoptions({ - "/wd4800", -- Forcing value to bool 'true' or 'false' - "/wd4996", -- Call to 'std::equal' with parameters that may be unsafe - }) \ No newline at end of file diff --git a/third_party/spirv/GLSL.std.450.h b/third_party/spirv/GLSL.std.450.h deleted file mode 100644 index df31092be..000000000 --- a/third_party/spirv/GLSL.std.450.h +++ /dev/null @@ -1,131 +0,0 @@ -/* -** Copyright (c) 2014-2016 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and/or associated documentation files (the "Materials"), -** to deal in the Materials without restriction, including without limitation -** the rights to use, copy, modify, merge, publish, distribute, sublicense, -** and/or sell copies of the Materials, and to permit persons to whom the -** Materials are furnished to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in -** all copies or substantial portions of the Materials. -** -** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -** IN THE MATERIALS. -*/ - -#ifndef GLSLstd450_H -#define GLSLstd450_H - -static const int GLSLstd450Version = 100; -static const int GLSLstd450Revision = 1; - -enum GLSLstd450 { - GLSLstd450Bad = 0, // Don't use - - GLSLstd450Round = 1, - GLSLstd450RoundEven = 2, - GLSLstd450Trunc = 3, - GLSLstd450FAbs = 4, - GLSLstd450SAbs = 5, - GLSLstd450FSign = 6, - GLSLstd450SSign = 7, - GLSLstd450Floor = 8, - GLSLstd450Ceil = 9, - GLSLstd450Fract = 10, - - GLSLstd450Radians = 11, - GLSLstd450Degrees = 12, - GLSLstd450Sin = 13, - GLSLstd450Cos = 14, - GLSLstd450Tan = 15, - GLSLstd450Asin = 16, - GLSLstd450Acos = 17, - GLSLstd450Atan = 18, - GLSLstd450Sinh = 19, - GLSLstd450Cosh = 20, - GLSLstd450Tanh = 21, - GLSLstd450Asinh = 22, - GLSLstd450Acosh = 23, - GLSLstd450Atanh = 24, - GLSLstd450Atan2 = 25, - - GLSLstd450Pow = 26, - GLSLstd450Exp = 27, - GLSLstd450Log = 28, - GLSLstd450Exp2 = 29, - GLSLstd450Log2 = 30, - GLSLstd450Sqrt = 31, - GLSLstd450InverseSqrt = 32, - - GLSLstd450Determinant = 33, - GLSLstd450MatrixInverse = 34, - - GLSLstd450Modf = 35, // second operand needs an OpVariable to write to - GLSLstd450ModfStruct = 36, // no OpVariable operand - GLSLstd450FMin = 37, - GLSLstd450UMin = 38, - GLSLstd450SMin = 39, - GLSLstd450FMax = 40, - GLSLstd450UMax = 41, - GLSLstd450SMax = 42, - GLSLstd450FClamp = 43, - GLSLstd450UClamp = 44, - GLSLstd450SClamp = 45, - GLSLstd450FMix = 46, - GLSLstd450IMix = 47, // Reserved - GLSLstd450Step = 48, - GLSLstd450SmoothStep = 49, - - GLSLstd450Fma = 50, - GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to - GLSLstd450FrexpStruct = 52, // no OpVariable operand - GLSLstd450Ldexp = 53, - - GLSLstd450PackSnorm4x8 = 54, - GLSLstd450PackUnorm4x8 = 55, - GLSLstd450PackSnorm2x16 = 56, - GLSLstd450PackUnorm2x16 = 57, - GLSLstd450PackHalf2x16 = 58, - GLSLstd450PackDouble2x32 = 59, - GLSLstd450UnpackSnorm2x16 = 60, - GLSLstd450UnpackUnorm2x16 = 61, - GLSLstd450UnpackHalf2x16 = 62, - GLSLstd450UnpackSnorm4x8 = 63, - GLSLstd450UnpackUnorm4x8 = 64, - GLSLstd450UnpackDouble2x32 = 65, - - GLSLstd450Length = 66, - GLSLstd450Distance = 67, - GLSLstd450Cross = 68, - GLSLstd450Normalize = 69, - GLSLstd450FaceForward = 70, - GLSLstd450Reflect = 71, - GLSLstd450Refract = 72, - - GLSLstd450FindILsb = 73, - GLSLstd450FindSMsb = 74, - GLSLstd450FindUMsb = 75, - - GLSLstd450InterpolateAtCentroid = 76, - GLSLstd450InterpolateAtSample = 77, - GLSLstd450InterpolateAtOffset = 78, - - GLSLstd450NMin = 79, - GLSLstd450NMax = 80, - GLSLstd450NClamp = 81, - - GLSLstd450Count -}; - -#endif // #ifndef GLSLstd450_H diff --git a/third_party/spirv/GLSL.std.450.hpp11 b/third_party/spirv/GLSL.std.450.hpp11 deleted file mode 100644 index 526912006..000000000 --- a/third_party/spirv/GLSL.std.450.hpp11 +++ /dev/null @@ -1,135 +0,0 @@ -/* -** Copyright (c) 2014-2016 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and/or associated documentation files (the "Materials"), -** to deal in the Materials without restriction, including without limitation -** the rights to use, copy, modify, merge, publish, distribute, sublicense, -** and/or sell copies of the Materials, and to permit persons to whom the -** Materials are furnished to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in -** all copies or substantial portions of the Materials. -** -** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -** IN THE MATERIALS. -*/ - -#ifndef GLSLstd450_HPP -#define GLSLstd450_HPP - -namespace spv { - -constexpr int GLSLstd450Version = 100; -constexpr int GLSLstd450Revision = 1; - -enum class GLSLstd450 { - kBad = 0, // Don't use - - kRound = 1, - kRoundEven = 2, - kTrunc = 3, - kFAbs = 4, - kSAbs = 5, - kFSign = 6, - kSSign = 7, - kFloor = 8, - kCeil = 9, - kFract = 10, - - kRadians = 11, - kDegrees = 12, - kSin = 13, - kCos = 14, - kTan = 15, - kAsin = 16, - kAcos = 17, - kAtan = 18, - kSinh = 19, - kCosh = 20, - kTanh = 21, - kAsinh = 22, - kAcosh = 23, - kAtanh = 24, - kAtan2 = 25, - - kPow = 26, - kExp = 27, - kLog = 28, - kExp2 = 29, - kLog2 = 30, - kSqrt = 31, - kInverseSqrt = 32, - - kDeterminant = 33, - kMatrixInverse = 34, - - kModf = 35, // second operand needs an OpVariable to write to - kModfStruct = 36, // no OpVariable operand - kFMin = 37, - kUMin = 38, - kSMin = 39, - kFMax = 40, - kUMax = 41, - kSMax = 42, - kFClamp = 43, - kUClamp = 44, - kSClamp = 45, - kFMix = 46, - kIMix = 47, // Reserved - kStep = 48, - kSmoothStep = 49, - - kFma = 50, - kFrexp = 51, // second operand needs an OpVariable to write to - kFrexpStruct = 52, // no OpVariable operand - kLdexp = 53, - - kPackSnorm4x8 = 54, - kPackUnorm4x8 = 55, - kPackSnorm2x16 = 56, - kPackUnorm2x16 = 57, - kPackHalf2x16 = 58, - kPackDouble2x32 = 59, - kUnpackSnorm2x16 = 60, - kUnpackUnorm2x16 = 61, - kUnpackHalf2x16 = 62, - kUnpackSnorm4x8 = 63, - kUnpackUnorm4x8 = 64, - kUnpackDouble2x32 = 65, - - kLength = 66, - kDistance = 67, - kCross = 68, - kNormalize = 69, - kFaceForward = 70, - kReflect = 71, - kRefract = 72, - - kFindILsb = 73, - kFindSMsb = 74, - kFindUMsb = 75, - - kInterpolateAtCentroid = 76, - kInterpolateAtSample = 77, - kInterpolateAtOffset = 78, - - kNMin = 79, - kNMax = 80, - kNClamp = 81, - - kCount -}; - -} // namespace spv - -#endif // #ifndef GLSLstd450_HPP diff --git a/third_party/spirv/OpenCL.std.h b/third_party/spirv/OpenCL.std.h deleted file mode 100644 index af29c527e..000000000 --- a/third_party/spirv/OpenCL.std.h +++ /dev/null @@ -1,272 +0,0 @@ -/* -** Copyright (c) 2015-2016 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and/or associated documentation files (the "Materials"), -** to deal in the Materials without restriction, including without limitation -** the rights to use, copy, modify, merge, publish, distribute, sublicense, -** and/or sell copies of the Materials, and to permit persons to whom the -** Materials are furnished to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in -** all copies or substantial portions of the Materials. -** -** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -** IN THE MATERIALS. -*/ - -// -// Author: Boaz Ouriel, Intel -// - -namespace OpenCLLIB { - -enum Entrypoints { - - // math functions - Acos = 0, - Acosh = 1, - Acospi = 2, - Asin = 3, - Asinh = 4, - Asinpi = 5, - Atan = 6, - Atan2 = 7, - Atanh = 8, - Atanpi = 9, - Atan2pi = 10, - Cbrt = 11, - Ceil = 12, - Copysign = 13, - Cos = 14, - Cosh = 15, - Cospi = 16, - Erfc = 17, - Erf = 18, - Exp = 19, - Exp2 = 20, - Exp10 = 21, - Expm1 = 22, - Fabs = 23, - Fdim = 24, - Floor = 25, - Fma = 26, - Fmax = 27, - Fmin = 28, - Fmod = 29, - Fract = 30, - Frexp = 31, - Hypot = 32, - Ilogb = 33, - Ldexp = 34, - Lgamma = 35, - Lgamma_r = 36, - Log = 37, - Log2 = 38, - Log10 = 39, - Log1p = 40, - Logb = 41, - Mad = 42, - Maxmag = 43, - Minmag = 44, - Modf = 45, - Nan = 46, - Nextafter = 47, - Pow = 48, - Pown = 49, - Powr = 50, - Remainder = 51, - Remquo = 52, - Rint = 53, - Rootn = 54, - Round = 55, - Rsqrt = 56, - Sin = 57, - Sincos = 58, - Sinh = 59, - Sinpi = 60, - Sqrt = 61, - Tan = 62, - Tanh = 63, - Tanpi = 64, - Tgamma = 65, - Trunc = 66, - Half_cos = 67, - Half_divide = 68, - Half_exp = 69, - Half_exp2 = 70, - Half_exp10 = 71, - Half_log = 72, - Half_log2 = 73, - Half_log10 = 74, - Half_powr = 75, - Half_recip = 76, - Half_rsqrt = 77, - Half_sin = 78, - Half_sqrt = 79, - Half_tan = 80, - Native_cos = 81, - Native_divide = 82, - Native_exp = 83, - Native_exp2 = 84, - Native_exp10 = 85, - Native_log = 86, - Native_log2 = 87, - Native_log10 = 88, - Native_powr = 89, - Native_recip = 90, - Native_rsqrt = 91, - Native_sin = 92, - Native_sqrt = 93, - Native_tan = 94, - - // Common - FClamp = 95, - Degrees = 96, - FMax_common = 97, - FMin_common = 98, - Mix = 99, - Radians = 100, - Step = 101, - Smoothstep = 102, - Sign = 103, - - // Geometrics - Cross = 104, - Distance = 105, - Length = 106, - Normalize = 107, - Fast_distance = 108, - Fast_length = 109, - Fast_normalize = 110, - - // Images - Deprecated - Read_imagef = 111, - Read_imagei = 112, - Read_imageui = 113, - Read_imageh = 114, - - Read_imagef_samplerless = 115, - Read_imagei_samplerless = 116, - Read_imageui_samplerless = 117, - Read_imageh_samplerless = 118, - - Write_imagef = 119, - Write_imagei = 120, - Write_imageui = 121, - Write_imageh = 122, - Read_imagef_mipmap_lod = 123, - Read_imagei_mipmap_lod = 124, - Read_imageui_mipmap_lod = 125, - Read_imagef_mipmap_grad = 126, - Read_imagei_mipmap_grad = 127, - Read_imageui_mipmap_grad = 128, - - // Image write with LOD - Write_imagef_mipmap_lod = 129, - Write_imagei_mipmap_lod = 130, - Write_imageui_mipmap_lod = 131, - - // Images - Deprecated - Get_image_width = 132, - Get_image_height = 133, - Get_image_depth = 134, - Get_image_channel_data_type = 135, - Get_image_channel_order = 136, - Get_image_dim = 137, - Get_image_array_size = 138, - Get_image_num_samples = 139, - Get_image_num_mip_levels = 140, - - // Integers - SAbs = 141, - SAbs_diff = 142, - SAdd_sat = 143, - UAdd_sat = 144, - SHadd = 145, - UHadd = 146, - SRhadd = 147, - URhadd = 148, - SClamp = 149, - UClamp = 150, - Clz = 151, - Ctz = 152, - SMad_hi = 153, - UMad_sat = 154, - SMad_sat = 155, - SMax = 156, - UMax = 157, - SMin = 158, - UMin = 159, - SMul_hi = 160, - Rotate = 161, - SSub_sat = 162, - USub_sat = 163, - U_Upsample = 164, - S_Upsample = 165, - Popcount = 166, - SMad24 = 167, - UMad24 = 168, - SMul24 = 169, - UMul24 = 170, - - // Vector Loads/Stores - Vloadn = 171, - Vstoren = 172, - Vload_half = 173, - Vload_halfn = 174, - Vstore_half = 175, - Vstore_half_r = 176, - Vstore_halfn = 177, - Vstore_halfn_r = 178, - Vloada_halfn = 179, - Vstorea_halfn = 180, - Vstorea_halfn_r = 181, - - // Vector Misc - Shuffle = 182, - Shuffle2 = 183, - - // - Printf = 184, - Prefetch = 185, - - // Relationals - Bitselect = 186, - Select = 187, - - // pipes - Read_pipe = 188, - Write_pipe = 189, - Reserve_read_pipe = 190, - Reserve_write_pipe = 191, - Commit_read_pipe = 192, - Commit_write_pipe = 193, - Is_valid_reserve_id = 194, - Work_group_reserve_read_pipe = 195, - Work_group_reserve_write_pipe = 196, - Work_group_commit_read_pipe = 197, - Work_group_commit_write_pipe = 198, - Get_pipe_num_packets = 199, - Get_pipe_max_packets = 200, - - // more integers - UAbs = 201, - UAbs_diff = 202, - UMul_hi = 203, - UMad_hi = 204, -}; - - - -}; // end namespace OpenCL20 - diff --git a/third_party/spirv/spirv.h b/third_party/spirv/spirv.h deleted file mode 100644 index d48488e94..000000000 --- a/third_party/spirv/spirv.h +++ /dev/null @@ -1,871 +0,0 @@ -/* -** Copyright (c) 2014-2016 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and/or associated documentation files (the "Materials"), -** to deal in the Materials without restriction, including without limitation -** the rights to use, copy, modify, merge, publish, distribute, sublicense, -** and/or sell copies of the Materials, and to permit persons to whom the -** Materials are furnished to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in -** all copies or substantial portions of the Materials. -** -** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -** IN THE MATERIALS. -*/ - -/* -** This header is automatically generated by the same tool that creates -** the Binary Section of the SPIR-V specification. -*/ - -/* -** Enumeration tokens for SPIR-V, in various styles: -** C, C++, C++11, JSON, Lua, Python -** -** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL -** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL -** - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL -** - Lua will use tables, e.g.: spv.SourceLanguage.GLSL -** - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] -** -** Some tokens act like mask values, which can be OR'd together, -** while others are mutually exclusive. The mask-like ones have -** "Mask" in their name, and a parallel enum that has the shift -** amount (1 << x) for each corresponding enumerant. -*/ - -#ifndef spirv_H -#define spirv_H - -typedef unsigned int SpvId; - -#define SPV_VERSION 0x10000 -#define SPV_REVISION 3 - -static const unsigned int SpvMagicNumber = 0x07230203; -static const unsigned int SpvVersion = 0x00010000; -static const unsigned int SpvRevision = 3; -static const unsigned int SpvOpCodeMask = 0xffff; -static const unsigned int SpvWordCountShift = 16; - -typedef enum SpvSourceLanguage_ { - SpvSourceLanguageUnknown = 0, - SpvSourceLanguageESSL = 1, - SpvSourceLanguageGLSL = 2, - SpvSourceLanguageOpenCL_C = 3, - SpvSourceLanguageOpenCL_CPP = 4, -} SpvSourceLanguage; - -typedef enum SpvExecutionModel_ { - SpvExecutionModelVertex = 0, - SpvExecutionModelTessellationControl = 1, - SpvExecutionModelTessellationEvaluation = 2, - SpvExecutionModelGeometry = 3, - SpvExecutionModelFragment = 4, - SpvExecutionModelGLCompute = 5, - SpvExecutionModelKernel = 6, -} SpvExecutionModel; - -typedef enum SpvAddressingModel_ { - SpvAddressingModelLogical = 0, - SpvAddressingModelPhysical32 = 1, - SpvAddressingModelPhysical64 = 2, -} SpvAddressingModel; - -typedef enum SpvMemoryModel_ { - SpvMemoryModelSimple = 0, - SpvMemoryModelGLSL450 = 1, - SpvMemoryModelOpenCL = 2, -} SpvMemoryModel; - -typedef enum SpvExecutionMode_ { - SpvExecutionModeInvocations = 0, - SpvExecutionModeSpacingEqual = 1, - SpvExecutionModeSpacingFractionalEven = 2, - SpvExecutionModeSpacingFractionalOdd = 3, - SpvExecutionModeVertexOrderCw = 4, - SpvExecutionModeVertexOrderCcw = 5, - SpvExecutionModePixelCenterInteger = 6, - SpvExecutionModeOriginUpperLeft = 7, - SpvExecutionModeOriginLowerLeft = 8, - SpvExecutionModeEarlyFragmentTests = 9, - SpvExecutionModePointMode = 10, - SpvExecutionModeXfb = 11, - SpvExecutionModeDepthReplacing = 12, - SpvExecutionModeDepthGreater = 14, - SpvExecutionModeDepthLess = 15, - SpvExecutionModeDepthUnchanged = 16, - SpvExecutionModeLocalSize = 17, - SpvExecutionModeLocalSizeHint = 18, - SpvExecutionModeInputPoints = 19, - SpvExecutionModeInputLines = 20, - SpvExecutionModeInputLinesAdjacency = 21, - SpvExecutionModeTriangles = 22, - SpvExecutionModeInputTrianglesAdjacency = 23, - SpvExecutionModeQuads = 24, - SpvExecutionModeIsolines = 25, - SpvExecutionModeOutputVertices = 26, - SpvExecutionModeOutputPoints = 27, - SpvExecutionModeOutputLineStrip = 28, - SpvExecutionModeOutputTriangleStrip = 29, - SpvExecutionModeVecTypeHint = 30, - SpvExecutionModeContractionOff = 31, -} SpvExecutionMode; - -typedef enum SpvStorageClass_ { - SpvStorageClassUniformConstant = 0, - SpvStorageClassInput = 1, - SpvStorageClassUniform = 2, - SpvStorageClassOutput = 3, - SpvStorageClassWorkgroup = 4, - SpvStorageClassCrossWorkgroup = 5, - SpvStorageClassPrivate = 6, - SpvStorageClassFunction = 7, - SpvStorageClassGeneric = 8, - SpvStorageClassPushConstant = 9, - SpvStorageClassAtomicCounter = 10, - SpvStorageClassImage = 11, -} SpvStorageClass; - -typedef enum SpvDim_ { - SpvDim1D = 0, - SpvDim2D = 1, - SpvDim3D = 2, - SpvDimCube = 3, - SpvDimRect = 4, - SpvDimBuffer = 5, - SpvDimSubpassData = 6, -} SpvDim; - -typedef enum SpvSamplerAddressingMode_ { - SpvSamplerAddressingModeNone = 0, - SpvSamplerAddressingModeClampToEdge = 1, - SpvSamplerAddressingModeClamp = 2, - SpvSamplerAddressingModeRepeat = 3, - SpvSamplerAddressingModeRepeatMirrored = 4, -} SpvSamplerAddressingMode; - -typedef enum SpvSamplerFilterMode_ { - SpvSamplerFilterModeNearest = 0, - SpvSamplerFilterModeLinear = 1, -} SpvSamplerFilterMode; - -typedef enum SpvImageFormat_ { - SpvImageFormatUnknown = 0, - SpvImageFormatRgba32f = 1, - SpvImageFormatRgba16f = 2, - SpvImageFormatR32f = 3, - SpvImageFormatRgba8 = 4, - SpvImageFormatRgba8Snorm = 5, - SpvImageFormatRg32f = 6, - SpvImageFormatRg16f = 7, - SpvImageFormatR11fG11fB10f = 8, - SpvImageFormatR16f = 9, - SpvImageFormatRgba16 = 10, - SpvImageFormatRgb10A2 = 11, - SpvImageFormatRg16 = 12, - SpvImageFormatRg8 = 13, - SpvImageFormatR16 = 14, - SpvImageFormatR8 = 15, - SpvImageFormatRgba16Snorm = 16, - SpvImageFormatRg16Snorm = 17, - SpvImageFormatRg8Snorm = 18, - SpvImageFormatR16Snorm = 19, - SpvImageFormatR8Snorm = 20, - SpvImageFormatRgba32i = 21, - SpvImageFormatRgba16i = 22, - SpvImageFormatRgba8i = 23, - SpvImageFormatR32i = 24, - SpvImageFormatRg32i = 25, - SpvImageFormatRg16i = 26, - SpvImageFormatRg8i = 27, - SpvImageFormatR16i = 28, - SpvImageFormatR8i = 29, - SpvImageFormatRgba32ui = 30, - SpvImageFormatRgba16ui = 31, - SpvImageFormatRgba8ui = 32, - SpvImageFormatR32ui = 33, - SpvImageFormatRgb10a2ui = 34, - SpvImageFormatRg32ui = 35, - SpvImageFormatRg16ui = 36, - SpvImageFormatRg8ui = 37, - SpvImageFormatR16ui = 38, - SpvImageFormatR8ui = 39, -} SpvImageFormat; - -typedef enum SpvImageChannelOrder_ { - SpvImageChannelOrderR = 0, - SpvImageChannelOrderA = 1, - SpvImageChannelOrderRG = 2, - SpvImageChannelOrderRA = 3, - SpvImageChannelOrderRGB = 4, - SpvImageChannelOrderRGBA = 5, - SpvImageChannelOrderBGRA = 6, - SpvImageChannelOrderARGB = 7, - SpvImageChannelOrderIntensity = 8, - SpvImageChannelOrderLuminance = 9, - SpvImageChannelOrderRx = 10, - SpvImageChannelOrderRGx = 11, - SpvImageChannelOrderRGBx = 12, - SpvImageChannelOrderDepth = 13, - SpvImageChannelOrderDepthStencil = 14, - SpvImageChannelOrdersRGB = 15, - SpvImageChannelOrdersRGBx = 16, - SpvImageChannelOrdersRGBA = 17, - SpvImageChannelOrdersBGRA = 18, -} SpvImageChannelOrder; - -typedef enum SpvImageChannelDataType_ { - SpvImageChannelDataTypeSnormInt8 = 0, - SpvImageChannelDataTypeSnormInt16 = 1, - SpvImageChannelDataTypeUnormInt8 = 2, - SpvImageChannelDataTypeUnormInt16 = 3, - SpvImageChannelDataTypeUnormShort565 = 4, - SpvImageChannelDataTypeUnormShort555 = 5, - SpvImageChannelDataTypeUnormInt101010 = 6, - SpvImageChannelDataTypeSignedInt8 = 7, - SpvImageChannelDataTypeSignedInt16 = 8, - SpvImageChannelDataTypeSignedInt32 = 9, - SpvImageChannelDataTypeUnsignedInt8 = 10, - SpvImageChannelDataTypeUnsignedInt16 = 11, - SpvImageChannelDataTypeUnsignedInt32 = 12, - SpvImageChannelDataTypeHalfFloat = 13, - SpvImageChannelDataTypeFloat = 14, - SpvImageChannelDataTypeUnormInt24 = 15, - SpvImageChannelDataTypeUnormInt101010_2 = 16, -} SpvImageChannelDataType; - -typedef enum SpvImageOperandsShift_ { - SpvImageOperandsBiasShift = 0, - SpvImageOperandsLodShift = 1, - SpvImageOperandsGradShift = 2, - SpvImageOperandsConstOffsetShift = 3, - SpvImageOperandsOffsetShift = 4, - SpvImageOperandsConstOffsetsShift = 5, - SpvImageOperandsSampleShift = 6, - SpvImageOperandsMinLodShift = 7, -} SpvImageOperandsShift; - -typedef enum SpvImageOperandsMask_ { - SpvImageOperandsMaskNone = 0, - SpvImageOperandsBiasMask = 0x00000001, - SpvImageOperandsLodMask = 0x00000002, - SpvImageOperandsGradMask = 0x00000004, - SpvImageOperandsConstOffsetMask = 0x00000008, - SpvImageOperandsOffsetMask = 0x00000010, - SpvImageOperandsConstOffsetsMask = 0x00000020, - SpvImageOperandsSampleMask = 0x00000040, - SpvImageOperandsMinLodMask = 0x00000080, -} SpvImageOperandsMask; - -typedef enum SpvFPFastMathModeShift_ { - SpvFPFastMathModeNotNaNShift = 0, - SpvFPFastMathModeNotInfShift = 1, - SpvFPFastMathModeNSZShift = 2, - SpvFPFastMathModeAllowRecipShift = 3, - SpvFPFastMathModeFastShift = 4, -} SpvFPFastMathModeShift; - -typedef enum SpvFPFastMathModeMask_ { - SpvFPFastMathModeMaskNone = 0, - SpvFPFastMathModeNotNaNMask = 0x00000001, - SpvFPFastMathModeNotInfMask = 0x00000002, - SpvFPFastMathModeNSZMask = 0x00000004, - SpvFPFastMathModeAllowRecipMask = 0x00000008, - SpvFPFastMathModeFastMask = 0x00000010, -} SpvFPFastMathModeMask; - -typedef enum SpvFPRoundingMode_ { - SpvFPRoundingModeRTE = 0, - SpvFPRoundingModeRTZ = 1, - SpvFPRoundingModeRTP = 2, - SpvFPRoundingModeRTN = 3, -} SpvFPRoundingMode; - -typedef enum SpvLinkageType_ { - SpvLinkageTypeExport = 0, - SpvLinkageTypeImport = 1, -} SpvLinkageType; - -typedef enum SpvAccessQualifier_ { - SpvAccessQualifierReadOnly = 0, - SpvAccessQualifierWriteOnly = 1, - SpvAccessQualifierReadWrite = 2, -} SpvAccessQualifier; - -typedef enum SpvFunctionParameterAttribute_ { - SpvFunctionParameterAttributeZext = 0, - SpvFunctionParameterAttributeSext = 1, - SpvFunctionParameterAttributeByVal = 2, - SpvFunctionParameterAttributeSret = 3, - SpvFunctionParameterAttributeNoAlias = 4, - SpvFunctionParameterAttributeNoCapture = 5, - SpvFunctionParameterAttributeNoWrite = 6, - SpvFunctionParameterAttributeNoReadWrite = 7, -} SpvFunctionParameterAttribute; - -typedef enum SpvDecoration_ { - SpvDecorationRelaxedPrecision = 0, - SpvDecorationSpecId = 1, - SpvDecorationBlock = 2, - SpvDecorationBufferBlock = 3, - SpvDecorationRowMajor = 4, - SpvDecorationColMajor = 5, - SpvDecorationArrayStride = 6, - SpvDecorationMatrixStride = 7, - SpvDecorationGLSLShared = 8, - SpvDecorationGLSLPacked = 9, - SpvDecorationCPacked = 10, - SpvDecorationBuiltIn = 11, - SpvDecorationNoPerspective = 13, - SpvDecorationFlat = 14, - SpvDecorationPatch = 15, - SpvDecorationCentroid = 16, - SpvDecorationSample = 17, - SpvDecorationInvariant = 18, - SpvDecorationRestrict = 19, - SpvDecorationAliased = 20, - SpvDecorationVolatile = 21, - SpvDecorationConstant = 22, - SpvDecorationCoherent = 23, - SpvDecorationNonWritable = 24, - SpvDecorationNonReadable = 25, - SpvDecorationUniform = 26, - SpvDecorationSaturatedConversion = 28, - SpvDecorationStream = 29, - SpvDecorationLocation = 30, - SpvDecorationComponent = 31, - SpvDecorationIndex = 32, - SpvDecorationBinding = 33, - SpvDecorationDescriptorSet = 34, - SpvDecorationOffset = 35, - SpvDecorationXfbBuffer = 36, - SpvDecorationXfbStride = 37, - SpvDecorationFuncParamAttr = 38, - SpvDecorationFPRoundingMode = 39, - SpvDecorationFPFastMathMode = 40, - SpvDecorationLinkageAttributes = 41, - SpvDecorationNoContraction = 42, - SpvDecorationInputAttachmentIndex = 43, - SpvDecorationAlignment = 44, -} SpvDecoration; - -typedef enum SpvBuiltIn_ { - SpvBuiltInPosition = 0, - SpvBuiltInPointSize = 1, - SpvBuiltInClipDistance = 3, - SpvBuiltInCullDistance = 4, - SpvBuiltInVertexId = 5, - SpvBuiltInInstanceId = 6, - SpvBuiltInPrimitiveId = 7, - SpvBuiltInInvocationId = 8, - SpvBuiltInLayer = 9, - SpvBuiltInViewportIndex = 10, - SpvBuiltInTessLevelOuter = 11, - SpvBuiltInTessLevelInner = 12, - SpvBuiltInTessCoord = 13, - SpvBuiltInPatchVertices = 14, - SpvBuiltInFragCoord = 15, - SpvBuiltInPointCoord = 16, - SpvBuiltInFrontFacing = 17, - SpvBuiltInSampleId = 18, - SpvBuiltInSamplePosition = 19, - SpvBuiltInSampleMask = 20, - SpvBuiltInFragDepth = 22, - SpvBuiltInHelperInvocation = 23, - SpvBuiltInNumWorkgroups = 24, - SpvBuiltInWorkgroupSize = 25, - SpvBuiltInWorkgroupId = 26, - SpvBuiltInLocalInvocationId = 27, - SpvBuiltInGlobalInvocationId = 28, - SpvBuiltInLocalInvocationIndex = 29, - SpvBuiltInWorkDim = 30, - SpvBuiltInGlobalSize = 31, - SpvBuiltInEnqueuedWorkgroupSize = 32, - SpvBuiltInGlobalOffset = 33, - SpvBuiltInGlobalLinearId = 34, - SpvBuiltInSubgroupSize = 36, - SpvBuiltInSubgroupMaxSize = 37, - SpvBuiltInNumSubgroups = 38, - SpvBuiltInNumEnqueuedSubgroups = 39, - SpvBuiltInSubgroupId = 40, - SpvBuiltInSubgroupLocalInvocationId = 41, - SpvBuiltInVertexIndex = 42, - SpvBuiltInInstanceIndex = 43, -} SpvBuiltIn; - -typedef enum SpvSelectionControlShift_ { - SpvSelectionControlFlattenShift = 0, - SpvSelectionControlDontFlattenShift = 1, -} SpvSelectionControlShift; - -typedef enum SpvSelectionControlMask_ { - SpvSelectionControlMaskNone = 0, - SpvSelectionControlFlattenMask = 0x00000001, - SpvSelectionControlDontFlattenMask = 0x00000002, -} SpvSelectionControlMask; - -typedef enum SpvLoopControlShift_ { - SpvLoopControlUnrollShift = 0, - SpvLoopControlDontUnrollShift = 1, -} SpvLoopControlShift; - -typedef enum SpvLoopControlMask_ { - SpvLoopControlMaskNone = 0, - SpvLoopControlUnrollMask = 0x00000001, - SpvLoopControlDontUnrollMask = 0x00000002, -} SpvLoopControlMask; - -typedef enum SpvFunctionControlShift_ { - SpvFunctionControlInlineShift = 0, - SpvFunctionControlDontInlineShift = 1, - SpvFunctionControlPureShift = 2, - SpvFunctionControlConstShift = 3, -} SpvFunctionControlShift; - -typedef enum SpvFunctionControlMask_ { - SpvFunctionControlMaskNone = 0, - SpvFunctionControlInlineMask = 0x00000001, - SpvFunctionControlDontInlineMask = 0x00000002, - SpvFunctionControlPureMask = 0x00000004, - SpvFunctionControlConstMask = 0x00000008, -} SpvFunctionControlMask; - -typedef enum SpvMemorySemanticsShift_ { - SpvMemorySemanticsAcquireShift = 1, - SpvMemorySemanticsReleaseShift = 2, - SpvMemorySemanticsAcquireReleaseShift = 3, - SpvMemorySemanticsSequentiallyConsistentShift = 4, - SpvMemorySemanticsUniformMemoryShift = 6, - SpvMemorySemanticsSubgroupMemoryShift = 7, - SpvMemorySemanticsWorkgroupMemoryShift = 8, - SpvMemorySemanticsCrossWorkgroupMemoryShift = 9, - SpvMemorySemanticsAtomicCounterMemoryShift = 10, - SpvMemorySemanticsImageMemoryShift = 11, -} SpvMemorySemanticsShift; - -typedef enum SpvMemorySemanticsMask_ { - SpvMemorySemanticsMaskNone = 0, - SpvMemorySemanticsAcquireMask = 0x00000002, - SpvMemorySemanticsReleaseMask = 0x00000004, - SpvMemorySemanticsAcquireReleaseMask = 0x00000008, - SpvMemorySemanticsSequentiallyConsistentMask = 0x00000010, - SpvMemorySemanticsUniformMemoryMask = 0x00000040, - SpvMemorySemanticsSubgroupMemoryMask = 0x00000080, - SpvMemorySemanticsWorkgroupMemoryMask = 0x00000100, - SpvMemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, - SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000400, - SpvMemorySemanticsImageMemoryMask = 0x00000800, -} SpvMemorySemanticsMask; - -typedef enum SpvMemoryAccessShift_ { - SpvMemoryAccessVolatileShift = 0, - SpvMemoryAccessAlignedShift = 1, - SpvMemoryAccessNontemporalShift = 2, -} SpvMemoryAccessShift; - -typedef enum SpvMemoryAccessMask_ { - SpvMemoryAccessMaskNone = 0, - SpvMemoryAccessVolatileMask = 0x00000001, - SpvMemoryAccessAlignedMask = 0x00000002, - SpvMemoryAccessNontemporalMask = 0x00000004, -} SpvMemoryAccessMask; - -typedef enum SpvScope_ { - SpvScopeCrossDevice = 0, - SpvScopeDevice = 1, - SpvScopeWorkgroup = 2, - SpvScopeSubgroup = 3, - SpvScopeInvocation = 4, -} SpvScope; - -typedef enum SpvGroupOperation_ { - SpvGroupOperationReduce = 0, - SpvGroupOperationInclusiveScan = 1, - SpvGroupOperationExclusiveScan = 2, -} SpvGroupOperation; - -typedef enum SpvKernelEnqueueFlags_ { - SpvKernelEnqueueFlagsNoWait = 0, - SpvKernelEnqueueFlagsWaitKernel = 1, - SpvKernelEnqueueFlagsWaitWorkGroup = 2, -} SpvKernelEnqueueFlags; - -typedef enum SpvKernelProfilingInfoShift_ { - SpvKernelProfilingInfoCmdExecTimeShift = 0, -} SpvKernelProfilingInfoShift; - -typedef enum SpvKernelProfilingInfoMask_ { - SpvKernelProfilingInfoMaskNone = 0, - SpvKernelProfilingInfoCmdExecTimeMask = 0x00000001, -} SpvKernelProfilingInfoMask; - -typedef enum SpvCapability_ { - SpvCapabilityMatrix = 0, - SpvCapabilityShader = 1, - SpvCapabilityGeometry = 2, - SpvCapabilityTessellation = 3, - SpvCapabilityAddresses = 4, - SpvCapabilityLinkage = 5, - SpvCapabilityKernel = 6, - SpvCapabilityVector16 = 7, - SpvCapabilityFloat16Buffer = 8, - SpvCapabilityFloat16 = 9, - SpvCapabilityFloat64 = 10, - SpvCapabilityInt64 = 11, - SpvCapabilityInt64Atomics = 12, - SpvCapabilityImageBasic = 13, - SpvCapabilityImageReadWrite = 14, - SpvCapabilityImageMipmap = 15, - SpvCapabilityPipes = 17, - SpvCapabilityGroups = 18, - SpvCapabilityDeviceEnqueue = 19, - SpvCapabilityLiteralSampler = 20, - SpvCapabilityAtomicStorage = 21, - SpvCapabilityInt16 = 22, - SpvCapabilityTessellationPointSize = 23, - SpvCapabilityGeometryPointSize = 24, - SpvCapabilityImageGatherExtended = 25, - SpvCapabilityStorageImageMultisample = 27, - SpvCapabilityUniformBufferArrayDynamicIndexing = 28, - SpvCapabilitySampledImageArrayDynamicIndexing = 29, - SpvCapabilityStorageBufferArrayDynamicIndexing = 30, - SpvCapabilityStorageImageArrayDynamicIndexing = 31, - SpvCapabilityClipDistance = 32, - SpvCapabilityCullDistance = 33, - SpvCapabilityImageCubeArray = 34, - SpvCapabilitySampleRateShading = 35, - SpvCapabilityImageRect = 36, - SpvCapabilitySampledRect = 37, - SpvCapabilityGenericPointer = 38, - SpvCapabilityInt8 = 39, - SpvCapabilityInputAttachment = 40, - SpvCapabilitySparseResidency = 41, - SpvCapabilityMinLod = 42, - SpvCapabilitySampled1D = 43, - SpvCapabilityImage1D = 44, - SpvCapabilitySampledCubeArray = 45, - SpvCapabilitySampledBuffer = 46, - SpvCapabilityImageBuffer = 47, - SpvCapabilityImageMSArray = 48, - SpvCapabilityStorageImageExtendedFormats = 49, - SpvCapabilityImageQuery = 50, - SpvCapabilityDerivativeControl = 51, - SpvCapabilityInterpolationFunction = 52, - SpvCapabilityTransformFeedback = 53, - SpvCapabilityGeometryStreams = 54, - SpvCapabilityStorageImageReadWithoutFormat = 55, - SpvCapabilityStorageImageWriteWithoutFormat = 56, - SpvCapabilityMultiViewport = 57, -} SpvCapability; - -typedef enum SpvOp_ { - SpvOpNop = 0, - SpvOpUndef = 1, - SpvOpSourceContinued = 2, - SpvOpSource = 3, - SpvOpSourceExtension = 4, - SpvOpName = 5, - SpvOpMemberName = 6, - SpvOpString = 7, - SpvOpLine = 8, - SpvOpExtension = 10, - SpvOpExtInstImport = 11, - SpvOpExtInst = 12, - SpvOpMemoryModel = 14, - SpvOpEntryPoint = 15, - SpvOpExecutionMode = 16, - SpvOpCapability = 17, - SpvOpTypeVoid = 19, - SpvOpTypeBool = 20, - SpvOpTypeInt = 21, - SpvOpTypeFloat = 22, - SpvOpTypeVector = 23, - SpvOpTypeMatrix = 24, - SpvOpTypeImage = 25, - SpvOpTypeSampler = 26, - SpvOpTypeSampledImage = 27, - SpvOpTypeArray = 28, - SpvOpTypeRuntimeArray = 29, - SpvOpTypeStruct = 30, - SpvOpTypeOpaque = 31, - SpvOpTypePointer = 32, - SpvOpTypeFunction = 33, - SpvOpTypeEvent = 34, - SpvOpTypeDeviceEvent = 35, - SpvOpTypeReserveId = 36, - SpvOpTypeQueue = 37, - SpvOpTypePipe = 38, - SpvOpTypeForwardPointer = 39, - SpvOpConstantTrue = 41, - SpvOpConstantFalse = 42, - SpvOpConstant = 43, - SpvOpConstantComposite = 44, - SpvOpConstantSampler = 45, - SpvOpConstantNull = 46, - SpvOpSpecConstantTrue = 48, - SpvOpSpecConstantFalse = 49, - SpvOpSpecConstant = 50, - SpvOpSpecConstantComposite = 51, - SpvOpSpecConstantOp = 52, - SpvOpFunction = 54, - SpvOpFunctionParameter = 55, - SpvOpFunctionEnd = 56, - SpvOpFunctionCall = 57, - SpvOpVariable = 59, - SpvOpImageTexelPointer = 60, - SpvOpLoad = 61, - SpvOpStore = 62, - SpvOpCopyMemory = 63, - SpvOpCopyMemorySized = 64, - SpvOpAccessChain = 65, - SpvOpInBoundsAccessChain = 66, - SpvOpPtrAccessChain = 67, - SpvOpArrayLength = 68, - SpvOpGenericPtrMemSemantics = 69, - SpvOpInBoundsPtrAccessChain = 70, - SpvOpDecorate = 71, - SpvOpMemberDecorate = 72, - SpvOpDecorationGroup = 73, - SpvOpGroupDecorate = 74, - SpvOpGroupMemberDecorate = 75, - SpvOpVectorExtractDynamic = 77, - SpvOpVectorInsertDynamic = 78, - SpvOpVectorShuffle = 79, - SpvOpCompositeConstruct = 80, - SpvOpCompositeExtract = 81, - SpvOpCompositeInsert = 82, - SpvOpCopyObject = 83, - SpvOpTranspose = 84, - SpvOpSampledImage = 86, - SpvOpImageSampleImplicitLod = 87, - SpvOpImageSampleExplicitLod = 88, - SpvOpImageSampleDrefImplicitLod = 89, - SpvOpImageSampleDrefExplicitLod = 90, - SpvOpImageSampleProjImplicitLod = 91, - SpvOpImageSampleProjExplicitLod = 92, - SpvOpImageSampleProjDrefImplicitLod = 93, - SpvOpImageSampleProjDrefExplicitLod = 94, - SpvOpImageFetch = 95, - SpvOpImageGather = 96, - SpvOpImageDrefGather = 97, - SpvOpImageRead = 98, - SpvOpImageWrite = 99, - SpvOpImage = 100, - SpvOpImageQueryFormat = 101, - SpvOpImageQueryOrder = 102, - SpvOpImageQuerySizeLod = 103, - SpvOpImageQuerySize = 104, - SpvOpImageQueryLod = 105, - SpvOpImageQueryLevels = 106, - SpvOpImageQuerySamples = 107, - SpvOpConvertFToU = 109, - SpvOpConvertFToS = 110, - SpvOpConvertSToF = 111, - SpvOpConvertUToF = 112, - SpvOpUConvert = 113, - SpvOpSConvert = 114, - SpvOpFConvert = 115, - SpvOpQuantizeToF16 = 116, - SpvOpConvertPtrToU = 117, - SpvOpSatConvertSToU = 118, - SpvOpSatConvertUToS = 119, - SpvOpConvertUToPtr = 120, - SpvOpPtrCastToGeneric = 121, - SpvOpGenericCastToPtr = 122, - SpvOpGenericCastToPtrExplicit = 123, - SpvOpBitcast = 124, - SpvOpSNegate = 126, - SpvOpFNegate = 127, - SpvOpIAdd = 128, - SpvOpFAdd = 129, - SpvOpISub = 130, - SpvOpFSub = 131, - SpvOpIMul = 132, - SpvOpFMul = 133, - SpvOpUDiv = 134, - SpvOpSDiv = 135, - SpvOpFDiv = 136, - SpvOpUMod = 137, - SpvOpSRem = 138, - SpvOpSMod = 139, - SpvOpFRem = 140, - SpvOpFMod = 141, - SpvOpVectorTimesScalar = 142, - SpvOpMatrixTimesScalar = 143, - SpvOpVectorTimesMatrix = 144, - SpvOpMatrixTimesVector = 145, - SpvOpMatrixTimesMatrix = 146, - SpvOpOuterProduct = 147, - SpvOpDot = 148, - SpvOpIAddCarry = 149, - SpvOpISubBorrow = 150, - SpvOpUMulExtended = 151, - SpvOpSMulExtended = 152, - SpvOpAny = 154, - SpvOpAll = 155, - SpvOpIsNan = 156, - SpvOpIsInf = 157, - SpvOpIsFinite = 158, - SpvOpIsNormal = 159, - SpvOpSignBitSet = 160, - SpvOpLessOrGreater = 161, - SpvOpOrdered = 162, - SpvOpUnordered = 163, - SpvOpLogicalEqual = 164, - SpvOpLogicalNotEqual = 165, - SpvOpLogicalOr = 166, - SpvOpLogicalAnd = 167, - SpvOpLogicalNot = 168, - SpvOpSelect = 169, - SpvOpIEqual = 170, - SpvOpINotEqual = 171, - SpvOpUGreaterThan = 172, - SpvOpSGreaterThan = 173, - SpvOpUGreaterThanEqual = 174, - SpvOpSGreaterThanEqual = 175, - SpvOpULessThan = 176, - SpvOpSLessThan = 177, - SpvOpULessThanEqual = 178, - SpvOpSLessThanEqual = 179, - SpvOpFOrdEqual = 180, - SpvOpFUnordEqual = 181, - SpvOpFOrdNotEqual = 182, - SpvOpFUnordNotEqual = 183, - SpvOpFOrdLessThan = 184, - SpvOpFUnordLessThan = 185, - SpvOpFOrdGreaterThan = 186, - SpvOpFUnordGreaterThan = 187, - SpvOpFOrdLessThanEqual = 188, - SpvOpFUnordLessThanEqual = 189, - SpvOpFOrdGreaterThanEqual = 190, - SpvOpFUnordGreaterThanEqual = 191, - SpvOpShiftRightLogical = 194, - SpvOpShiftRightArithmetic = 195, - SpvOpShiftLeftLogical = 196, - SpvOpBitwiseOr = 197, - SpvOpBitwiseXor = 198, - SpvOpBitwiseAnd = 199, - SpvOpNot = 200, - SpvOpBitFieldInsert = 201, - SpvOpBitFieldSExtract = 202, - SpvOpBitFieldUExtract = 203, - SpvOpBitReverse = 204, - SpvOpBitCount = 205, - SpvOpDPdx = 207, - SpvOpDPdy = 208, - SpvOpFwidth = 209, - SpvOpDPdxFine = 210, - SpvOpDPdyFine = 211, - SpvOpFwidthFine = 212, - SpvOpDPdxCoarse = 213, - SpvOpDPdyCoarse = 214, - SpvOpFwidthCoarse = 215, - SpvOpEmitVertex = 218, - SpvOpEndPrimitive = 219, - SpvOpEmitStreamVertex = 220, - SpvOpEndStreamPrimitive = 221, - SpvOpControlBarrier = 224, - SpvOpMemoryBarrier = 225, - SpvOpAtomicLoad = 227, - SpvOpAtomicStore = 228, - SpvOpAtomicExchange = 229, - SpvOpAtomicCompareExchange = 230, - SpvOpAtomicCompareExchangeWeak = 231, - SpvOpAtomicIIncrement = 232, - SpvOpAtomicIDecrement = 233, - SpvOpAtomicIAdd = 234, - SpvOpAtomicISub = 235, - SpvOpAtomicSMin = 236, - SpvOpAtomicUMin = 237, - SpvOpAtomicSMax = 238, - SpvOpAtomicUMax = 239, - SpvOpAtomicAnd = 240, - SpvOpAtomicOr = 241, - SpvOpAtomicXor = 242, - SpvOpPhi = 245, - SpvOpLoopMerge = 246, - SpvOpSelectionMerge = 247, - SpvOpLabel = 248, - SpvOpBranch = 249, - SpvOpBranchConditional = 250, - SpvOpSwitch = 251, - SpvOpKill = 252, - SpvOpReturn = 253, - SpvOpReturnValue = 254, - SpvOpUnreachable = 255, - SpvOpLifetimeStart = 256, - SpvOpLifetimeStop = 257, - SpvOpGroupAsyncCopy = 259, - SpvOpGroupWaitEvents = 260, - SpvOpGroupAll = 261, - SpvOpGroupAny = 262, - SpvOpGroupBroadcast = 263, - SpvOpGroupIAdd = 264, - SpvOpGroupFAdd = 265, - SpvOpGroupFMin = 266, - SpvOpGroupUMin = 267, - SpvOpGroupSMin = 268, - SpvOpGroupFMax = 269, - SpvOpGroupUMax = 270, - SpvOpGroupSMax = 271, - SpvOpReadPipe = 274, - SpvOpWritePipe = 275, - SpvOpReservedReadPipe = 276, - SpvOpReservedWritePipe = 277, - SpvOpReserveReadPipePackets = 278, - SpvOpReserveWritePipePackets = 279, - SpvOpCommitReadPipe = 280, - SpvOpCommitWritePipe = 281, - SpvOpIsValidReserveId = 282, - SpvOpGetNumPipePackets = 283, - SpvOpGetMaxPipePackets = 284, - SpvOpGroupReserveReadPipePackets = 285, - SpvOpGroupReserveWritePipePackets = 286, - SpvOpGroupCommitReadPipe = 287, - SpvOpGroupCommitWritePipe = 288, - SpvOpEnqueueMarker = 291, - SpvOpEnqueueKernel = 292, - SpvOpGetKernelNDrangeSubGroupCount = 293, - SpvOpGetKernelNDrangeMaxSubGroupSize = 294, - SpvOpGetKernelWorkGroupSize = 295, - SpvOpGetKernelPreferredWorkGroupSizeMultiple = 296, - SpvOpRetainEvent = 297, - SpvOpReleaseEvent = 298, - SpvOpCreateUserEvent = 299, - SpvOpIsValidEvent = 300, - SpvOpSetUserEventStatus = 301, - SpvOpCaptureEventProfilingInfo = 302, - SpvOpGetDefaultQueue = 303, - SpvOpBuildNDRange = 304, - SpvOpImageSparseSampleImplicitLod = 305, - SpvOpImageSparseSampleExplicitLod = 306, - SpvOpImageSparseSampleDrefImplicitLod = 307, - SpvOpImageSparseSampleDrefExplicitLod = 308, - SpvOpImageSparseSampleProjImplicitLod = 309, - SpvOpImageSparseSampleProjExplicitLod = 310, - SpvOpImageSparseSampleProjDrefImplicitLod = 311, - SpvOpImageSparseSampleProjDrefExplicitLod = 312, - SpvOpImageSparseFetch = 313, - SpvOpImageSparseGather = 314, - SpvOpImageSparseDrefGather = 315, - SpvOpImageSparseTexelsResident = 316, - SpvOpNoLine = 317, - SpvOpAtomicFlagTestAndSet = 318, - SpvOpAtomicFlagClear = 319, - SpvOpImageSparseRead = 320, -} SpvOp; - -#endif // #ifndef spirv_H - diff --git a/third_party/spirv/spirv.hpp11 b/third_party/spirv/spirv.hpp11 deleted file mode 100644 index 03faaac38..000000000 --- a/third_party/spirv/spirv.hpp11 +++ /dev/null @@ -1,880 +0,0 @@ -// Copyright (c) 2014-2016 The Khronos Group Inc. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and/or associated documentation files (the "Materials"), -// to deal in the Materials without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Materials, and to permit persons to whom the -// Materials are furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Materials. -// -// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -// -// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -// IN THE MATERIALS. - -// This header is automatically generated by the same tool that creates -// the Binary Section of the SPIR-V specification. - -// Enumeration tokens for SPIR-V, in various styles: -// C, C++, C++11, JSON, Lua, Python -// -// - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL -// - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL -// - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL -// - Lua will use tables, e.g.: spv.SourceLanguage.GLSL -// - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] -// -// Some tokens act like mask values, which can be OR'd together, -// while others are mutually exclusive. The mask-like ones have -// "Mask" in their name, and a parallel enum that has the shift -// amount (1 << x) for each corresponding enumerant. - -#ifndef spirv_HPP -#define spirv_HPP - -namespace spv { - -typedef unsigned int Id; - -#define SPV_VERSION 0x10000 -#define SPV_REVISION 3 - -static const unsigned int MagicNumber = 0x07230203; -static const unsigned int Version = 0x00010000; -static const unsigned int Revision = 3; -static const unsigned int OpCodeMask = 0xffff; -static const unsigned int WordCountShift = 16; - -enum class SourceLanguage : unsigned { - Unknown = 0, - ESSL = 1, - GLSL = 2, - OpenCL_C = 3, - OpenCL_CPP = 4, -}; - -enum class ExecutionModel : unsigned { - Vertex = 0, - TessellationControl = 1, - TessellationEvaluation = 2, - Geometry = 3, - Fragment = 4, - GLCompute = 5, - Kernel = 6, -}; - -enum class AddressingModel : unsigned { - Logical = 0, - Physical32 = 1, - Physical64 = 2, -}; - -enum class MemoryModel : unsigned { - Simple = 0, - GLSL450 = 1, - OpenCL = 2, -}; - -enum class ExecutionMode : unsigned { - Invocations = 0, - SpacingEqual = 1, - SpacingFractionalEven = 2, - SpacingFractionalOdd = 3, - VertexOrderCw = 4, - VertexOrderCcw = 5, - PixelCenterInteger = 6, - OriginUpperLeft = 7, - OriginLowerLeft = 8, - EarlyFragmentTests = 9, - PointMode = 10, - Xfb = 11, - DepthReplacing = 12, - DepthGreater = 14, - DepthLess = 15, - DepthUnchanged = 16, - LocalSize = 17, - LocalSizeHint = 18, - InputPoints = 19, - InputLines = 20, - InputLinesAdjacency = 21, - Triangles = 22, - InputTrianglesAdjacency = 23, - Quads = 24, - Isolines = 25, - OutputVertices = 26, - OutputPoints = 27, - OutputLineStrip = 28, - OutputTriangleStrip = 29, - VecTypeHint = 30, - ContractionOff = 31, -}; - -enum class StorageClass : unsigned { - UniformConstant = 0, - Input = 1, - Uniform = 2, - Output = 3, - Workgroup = 4, - CrossWorkgroup = 5, - Private = 6, - Function = 7, - Generic = 8, - PushConstant = 9, - AtomicCounter = 10, - Image = 11, -}; - -enum class Dim : unsigned { - Dim1D = 0, - Dim2D = 1, - Dim3D = 2, - Cube = 3, - Rect = 4, - Buffer = 5, - SubpassData = 6, -}; - -enum class SamplerAddressingMode : unsigned { - None = 0, - ClampToEdge = 1, - Clamp = 2, - Repeat = 3, - RepeatMirrored = 4, -}; - -enum class SamplerFilterMode : unsigned { - Nearest = 0, - Linear = 1, -}; - -enum class ImageFormat : unsigned { - Unknown = 0, - Rgba32f = 1, - Rgba16f = 2, - R32f = 3, - Rgba8 = 4, - Rgba8Snorm = 5, - Rg32f = 6, - Rg16f = 7, - R11fG11fB10f = 8, - R16f = 9, - Rgba16 = 10, - Rgb10A2 = 11, - Rg16 = 12, - Rg8 = 13, - R16 = 14, - R8 = 15, - Rgba16Snorm = 16, - Rg16Snorm = 17, - Rg8Snorm = 18, - R16Snorm = 19, - R8Snorm = 20, - Rgba32i = 21, - Rgba16i = 22, - Rgba8i = 23, - R32i = 24, - Rg32i = 25, - Rg16i = 26, - Rg8i = 27, - R16i = 28, - R8i = 29, - Rgba32ui = 30, - Rgba16ui = 31, - Rgba8ui = 32, - R32ui = 33, - Rgb10a2ui = 34, - Rg32ui = 35, - Rg16ui = 36, - Rg8ui = 37, - R16ui = 38, - R8ui = 39, -}; - -enum class ImageChannelOrder : unsigned { - R = 0, - A = 1, - RG = 2, - RA = 3, - RGB = 4, - RGBA = 5, - BGRA = 6, - ARGB = 7, - Intensity = 8, - Luminance = 9, - Rx = 10, - RGx = 11, - RGBx = 12, - Depth = 13, - DepthStencil = 14, - sRGB = 15, - sRGBx = 16, - sRGBA = 17, - sBGRA = 18, -}; - -enum class ImageChannelDataType : unsigned { - SnormInt8 = 0, - SnormInt16 = 1, - UnormInt8 = 2, - UnormInt16 = 3, - UnormShort565 = 4, - UnormShort555 = 5, - UnormInt101010 = 6, - SignedInt8 = 7, - SignedInt16 = 8, - SignedInt32 = 9, - UnsignedInt8 = 10, - UnsignedInt16 = 11, - UnsignedInt32 = 12, - HalfFloat = 13, - Float = 14, - UnormInt24 = 15, - UnormInt101010_2 = 16, -}; - -enum class ImageOperandsShift : unsigned { - Bias = 0, - Lod = 1, - Grad = 2, - ConstOffset = 3, - Offset = 4, - ConstOffsets = 5, - Sample = 6, - MinLod = 7, -}; - -enum class ImageOperandsMask : unsigned { - MaskNone = 0, - Bias = 0x00000001, - Lod = 0x00000002, - Grad = 0x00000004, - ConstOffset = 0x00000008, - Offset = 0x00000010, - ConstOffsets = 0x00000020, - Sample = 0x00000040, - MinLod = 0x00000080, -}; - -enum class FPFastMathModeShift : unsigned { - NotNaN = 0, - NotInf = 1, - NSZ = 2, - AllowRecip = 3, - Fast = 4, -}; - -enum class FPFastMathModeMask : unsigned { - MaskNone = 0, - NotNaN = 0x00000001, - NotInf = 0x00000002, - NSZ = 0x00000004, - AllowRecip = 0x00000008, - Fast = 0x00000010, -}; - -enum class FPRoundingMode : unsigned { - RTE = 0, - RTZ = 1, - RTP = 2, - RTN = 3, -}; - -enum class LinkageType : unsigned { - Export = 0, - Import = 1, -}; - -enum class AccessQualifier : unsigned { - ReadOnly = 0, - WriteOnly = 1, - ReadWrite = 2, -}; - -enum class FunctionParameterAttribute : unsigned { - Zext = 0, - Sext = 1, - ByVal = 2, - Sret = 3, - NoAlias = 4, - NoCapture = 5, - NoWrite = 6, - NoReadWrite = 7, -}; - -enum class Decoration : unsigned { - RelaxedPrecision = 0, - SpecId = 1, - Block = 2, - BufferBlock = 3, - RowMajor = 4, - ColMajor = 5, - ArrayStride = 6, - MatrixStride = 7, - GLSLShared = 8, - GLSLPacked = 9, - CPacked = 10, - BuiltIn = 11, - NoPerspective = 13, - Flat = 14, - Patch = 15, - Centroid = 16, - Sample = 17, - Invariant = 18, - Restrict = 19, - Aliased = 20, - Volatile = 21, - Constant = 22, - Coherent = 23, - NonWritable = 24, - NonReadable = 25, - Uniform = 26, - SaturatedConversion = 28, - Stream = 29, - Location = 30, - Component = 31, - Index = 32, - Binding = 33, - DescriptorSet = 34, - Offset = 35, - XfbBuffer = 36, - XfbStride = 37, - FuncParamAttr = 38, - FPRoundingMode = 39, - FPFastMathMode = 40, - LinkageAttributes = 41, - NoContraction = 42, - InputAttachmentIndex = 43, - Alignment = 44, -}; - -enum class BuiltIn : unsigned { - Position = 0, - PointSize = 1, - ClipDistance = 3, - CullDistance = 4, - VertexId = 5, - InstanceId = 6, - PrimitiveId = 7, - InvocationId = 8, - Layer = 9, - ViewportIndex = 10, - TessLevelOuter = 11, - TessLevelInner = 12, - TessCoord = 13, - PatchVertices = 14, - FragCoord = 15, - PointCoord = 16, - FrontFacing = 17, - SampleId = 18, - SamplePosition = 19, - SampleMask = 20, - FragDepth = 22, - HelperInvocation = 23, - NumWorkgroups = 24, - WorkgroupSize = 25, - WorkgroupId = 26, - LocalInvocationId = 27, - GlobalInvocationId = 28, - LocalInvocationIndex = 29, - WorkDim = 30, - GlobalSize = 31, - EnqueuedWorkgroupSize = 32, - GlobalOffset = 33, - GlobalLinearId = 34, - SubgroupSize = 36, - SubgroupMaxSize = 37, - NumSubgroups = 38, - NumEnqueuedSubgroups = 39, - SubgroupId = 40, - SubgroupLocalInvocationId = 41, - VertexIndex = 42, - InstanceIndex = 43, -}; - -enum class SelectionControlShift : unsigned { - Flatten = 0, - DontFlatten = 1, -}; - -enum class SelectionControlMask : unsigned { - MaskNone = 0, - Flatten = 0x00000001, - DontFlatten = 0x00000002, -}; - -enum class LoopControlShift : unsigned { - Unroll = 0, - DontUnroll = 1, -}; - -enum class LoopControlMask : unsigned { - MaskNone = 0, - Unroll = 0x00000001, - DontUnroll = 0x00000002, -}; - -enum class FunctionControlShift : unsigned { - Inline = 0, - DontInline = 1, - Pure = 2, - Const = 3, -}; - -enum class FunctionControlMask : unsigned { - MaskNone = 0, - Inline = 0x00000001, - DontInline = 0x00000002, - Pure = 0x00000004, - Const = 0x00000008, -}; - -enum class MemorySemanticsShift : unsigned { - Acquire = 1, - Release = 2, - AcquireRelease = 3, - SequentiallyConsistent = 4, - UniformMemory = 6, - SubgroupMemory = 7, - WorkgroupMemory = 8, - CrossWorkgroupMemory = 9, - AtomicCounterMemory = 10, - ImageMemory = 11, -}; - -enum class MemorySemanticsMask : unsigned { - MaskNone = 0, - Acquire = 0x00000002, - Release = 0x00000004, - AcquireRelease = 0x00000008, - SequentiallyConsistent = 0x00000010, - UniformMemory = 0x00000040, - SubgroupMemory = 0x00000080, - WorkgroupMemory = 0x00000100, - CrossWorkgroupMemory = 0x00000200, - AtomicCounterMemory = 0x00000400, - ImageMemory = 0x00000800, -}; - -enum class MemoryAccessShift : unsigned { - Volatile = 0, - Aligned = 1, - Nontemporal = 2, -}; - -enum class MemoryAccessMask : unsigned { - MaskNone = 0, - Volatile = 0x00000001, - Aligned = 0x00000002, - Nontemporal = 0x00000004, -}; - -enum class Scope : unsigned { - CrossDevice = 0, - Device = 1, - Workgroup = 2, - Subgroup = 3, - Invocation = 4, -}; - -enum class GroupOperation : unsigned { - Reduce = 0, - InclusiveScan = 1, - ExclusiveScan = 2, -}; - -enum class KernelEnqueueFlags : unsigned { - NoWait = 0, - WaitKernel = 1, - WaitWorkGroup = 2, -}; - -enum class KernelProfilingInfoShift : unsigned { - CmdExecTime = 0, -}; - -enum class KernelProfilingInfoMask : unsigned { - MaskNone = 0, - CmdExecTime = 0x00000001, -}; - -enum class Capability : unsigned { - Matrix = 0, - Shader = 1, - Geometry = 2, - Tessellation = 3, - Addresses = 4, - Linkage = 5, - Kernel = 6, - Vector16 = 7, - Float16Buffer = 8, - Float16 = 9, - Float64 = 10, - Int64 = 11, - Int64Atomics = 12, - ImageBasic = 13, - ImageReadWrite = 14, - ImageMipmap = 15, - Pipes = 17, - Groups = 18, - DeviceEnqueue = 19, - LiteralSampler = 20, - AtomicStorage = 21, - Int16 = 22, - TessellationPointSize = 23, - GeometryPointSize = 24, - ImageGatherExtended = 25, - StorageImageMultisample = 27, - UniformBufferArrayDynamicIndexing = 28, - SampledImageArrayDynamicIndexing = 29, - StorageBufferArrayDynamicIndexing = 30, - StorageImageArrayDynamicIndexing = 31, - ClipDistance = 32, - CullDistance = 33, - ImageCubeArray = 34, - SampleRateShading = 35, - ImageRect = 36, - SampledRect = 37, - GenericPointer = 38, - Int8 = 39, - InputAttachment = 40, - SparseResidency = 41, - MinLod = 42, - Sampled1D = 43, - Image1D = 44, - SampledCubeArray = 45, - SampledBuffer = 46, - ImageBuffer = 47, - ImageMSArray = 48, - StorageImageExtendedFormats = 49, - ImageQuery = 50, - DerivativeControl = 51, - InterpolationFunction = 52, - TransformFeedback = 53, - GeometryStreams = 54, - StorageImageReadWithoutFormat = 55, - StorageImageWriteWithoutFormat = 56, - MultiViewport = 57, -}; - -enum class Op : unsigned { - OpNop = 0, - OpUndef = 1, - OpSourceContinued = 2, - OpSource = 3, - OpSourceExtension = 4, - OpName = 5, - OpMemberName = 6, - OpString = 7, - OpLine = 8, - OpExtension = 10, - OpExtInstImport = 11, - OpExtInst = 12, - OpMemoryModel = 14, - OpEntryPoint = 15, - OpExecutionMode = 16, - OpCapability = 17, - OpTypeVoid = 19, - OpTypeBool = 20, - OpTypeInt = 21, - OpTypeFloat = 22, - OpTypeVector = 23, - OpTypeMatrix = 24, - OpTypeImage = 25, - OpTypeSampler = 26, - OpTypeSampledImage = 27, - OpTypeArray = 28, - OpTypeRuntimeArray = 29, - OpTypeStruct = 30, - OpTypeOpaque = 31, - OpTypePointer = 32, - OpTypeFunction = 33, - OpTypeEvent = 34, - OpTypeDeviceEvent = 35, - OpTypeReserveId = 36, - OpTypeQueue = 37, - OpTypePipe = 38, - OpTypeForwardPointer = 39, - OpConstantTrue = 41, - OpConstantFalse = 42, - OpConstant = 43, - OpConstantComposite = 44, - OpConstantSampler = 45, - OpConstantNull = 46, - OpSpecConstantTrue = 48, - OpSpecConstantFalse = 49, - OpSpecConstant = 50, - OpSpecConstantComposite = 51, - OpSpecConstantOp = 52, - OpFunction = 54, - OpFunctionParameter = 55, - OpFunctionEnd = 56, - OpFunctionCall = 57, - OpVariable = 59, - OpImageTexelPointer = 60, - OpLoad = 61, - OpStore = 62, - OpCopyMemory = 63, - OpCopyMemorySized = 64, - OpAccessChain = 65, - OpInBoundsAccessChain = 66, - OpPtrAccessChain = 67, - OpArrayLength = 68, - OpGenericPtrMemSemantics = 69, - OpInBoundsPtrAccessChain = 70, - OpDecorate = 71, - OpMemberDecorate = 72, - OpDecorationGroup = 73, - OpGroupDecorate = 74, - OpGroupMemberDecorate = 75, - OpVectorExtractDynamic = 77, - OpVectorInsertDynamic = 78, - OpVectorShuffle = 79, - OpCompositeConstruct = 80, - OpCompositeExtract = 81, - OpCompositeInsert = 82, - OpCopyObject = 83, - OpTranspose = 84, - OpSampledImage = 86, - OpImageSampleImplicitLod = 87, - OpImageSampleExplicitLod = 88, - OpImageSampleDrefImplicitLod = 89, - OpImageSampleDrefExplicitLod = 90, - OpImageSampleProjImplicitLod = 91, - OpImageSampleProjExplicitLod = 92, - OpImageSampleProjDrefImplicitLod = 93, - OpImageSampleProjDrefExplicitLod = 94, - OpImageFetch = 95, - OpImageGather = 96, - OpImageDrefGather = 97, - OpImageRead = 98, - OpImageWrite = 99, - OpImage = 100, - OpImageQueryFormat = 101, - OpImageQueryOrder = 102, - OpImageQuerySizeLod = 103, - OpImageQuerySize = 104, - OpImageQueryLod = 105, - OpImageQueryLevels = 106, - OpImageQuerySamples = 107, - OpConvertFToU = 109, - OpConvertFToS = 110, - OpConvertSToF = 111, - OpConvertUToF = 112, - OpUConvert = 113, - OpSConvert = 114, - OpFConvert = 115, - OpQuantizeToF16 = 116, - OpConvertPtrToU = 117, - OpSatConvertSToU = 118, - OpSatConvertUToS = 119, - OpConvertUToPtr = 120, - OpPtrCastToGeneric = 121, - OpGenericCastToPtr = 122, - OpGenericCastToPtrExplicit = 123, - OpBitcast = 124, - OpSNegate = 126, - OpFNegate = 127, - OpIAdd = 128, - OpFAdd = 129, - OpISub = 130, - OpFSub = 131, - OpIMul = 132, - OpFMul = 133, - OpUDiv = 134, - OpSDiv = 135, - OpFDiv = 136, - OpUMod = 137, - OpSRem = 138, - OpSMod = 139, - OpFRem = 140, - OpFMod = 141, - OpVectorTimesScalar = 142, - OpMatrixTimesScalar = 143, - OpVectorTimesMatrix = 144, - OpMatrixTimesVector = 145, - OpMatrixTimesMatrix = 146, - OpOuterProduct = 147, - OpDot = 148, - OpIAddCarry = 149, - OpISubBorrow = 150, - OpUMulExtended = 151, - OpSMulExtended = 152, - OpAny = 154, - OpAll = 155, - OpIsNan = 156, - OpIsInf = 157, - OpIsFinite = 158, - OpIsNormal = 159, - OpSignBitSet = 160, - OpLessOrGreater = 161, - OpOrdered = 162, - OpUnordered = 163, - OpLogicalEqual = 164, - OpLogicalNotEqual = 165, - OpLogicalOr = 166, - OpLogicalAnd = 167, - OpLogicalNot = 168, - OpSelect = 169, - OpIEqual = 170, - OpINotEqual = 171, - OpUGreaterThan = 172, - OpSGreaterThan = 173, - OpUGreaterThanEqual = 174, - OpSGreaterThanEqual = 175, - OpULessThan = 176, - OpSLessThan = 177, - OpULessThanEqual = 178, - OpSLessThanEqual = 179, - OpFOrdEqual = 180, - OpFUnordEqual = 181, - OpFOrdNotEqual = 182, - OpFUnordNotEqual = 183, - OpFOrdLessThan = 184, - OpFUnordLessThan = 185, - OpFOrdGreaterThan = 186, - OpFUnordGreaterThan = 187, - OpFOrdLessThanEqual = 188, - OpFUnordLessThanEqual = 189, - OpFOrdGreaterThanEqual = 190, - OpFUnordGreaterThanEqual = 191, - OpShiftRightLogical = 194, - OpShiftRightArithmetic = 195, - OpShiftLeftLogical = 196, - OpBitwiseOr = 197, - OpBitwiseXor = 198, - OpBitwiseAnd = 199, - OpNot = 200, - OpBitFieldInsert = 201, - OpBitFieldSExtract = 202, - OpBitFieldUExtract = 203, - OpBitReverse = 204, - OpBitCount = 205, - OpDPdx = 207, - OpDPdy = 208, - OpFwidth = 209, - OpDPdxFine = 210, - OpDPdyFine = 211, - OpFwidthFine = 212, - OpDPdxCoarse = 213, - OpDPdyCoarse = 214, - OpFwidthCoarse = 215, - OpEmitVertex = 218, - OpEndPrimitive = 219, - OpEmitStreamVertex = 220, - OpEndStreamPrimitive = 221, - OpControlBarrier = 224, - OpMemoryBarrier = 225, - OpAtomicLoad = 227, - OpAtomicStore = 228, - OpAtomicExchange = 229, - OpAtomicCompareExchange = 230, - OpAtomicCompareExchangeWeak = 231, - OpAtomicIIncrement = 232, - OpAtomicIDecrement = 233, - OpAtomicIAdd = 234, - OpAtomicISub = 235, - OpAtomicSMin = 236, - OpAtomicUMin = 237, - OpAtomicSMax = 238, - OpAtomicUMax = 239, - OpAtomicAnd = 240, - OpAtomicOr = 241, - OpAtomicXor = 242, - OpPhi = 245, - OpLoopMerge = 246, - OpSelectionMerge = 247, - OpLabel = 248, - OpBranch = 249, - OpBranchConditional = 250, - OpSwitch = 251, - OpKill = 252, - OpReturn = 253, - OpReturnValue = 254, - OpUnreachable = 255, - OpLifetimeStart = 256, - OpLifetimeStop = 257, - OpGroupAsyncCopy = 259, - OpGroupWaitEvents = 260, - OpGroupAll = 261, - OpGroupAny = 262, - OpGroupBroadcast = 263, - OpGroupIAdd = 264, - OpGroupFAdd = 265, - OpGroupFMin = 266, - OpGroupUMin = 267, - OpGroupSMin = 268, - OpGroupFMax = 269, - OpGroupUMax = 270, - OpGroupSMax = 271, - OpReadPipe = 274, - OpWritePipe = 275, - OpReservedReadPipe = 276, - OpReservedWritePipe = 277, - OpReserveReadPipePackets = 278, - OpReserveWritePipePackets = 279, - OpCommitReadPipe = 280, - OpCommitWritePipe = 281, - OpIsValidReserveId = 282, - OpGetNumPipePackets = 283, - OpGetMaxPipePackets = 284, - OpGroupReserveReadPipePackets = 285, - OpGroupReserveWritePipePackets = 286, - OpGroupCommitReadPipe = 287, - OpGroupCommitWritePipe = 288, - OpEnqueueMarker = 291, - OpEnqueueKernel = 292, - OpGetKernelNDrangeSubGroupCount = 293, - OpGetKernelNDrangeMaxSubGroupSize = 294, - OpGetKernelWorkGroupSize = 295, - OpGetKernelPreferredWorkGroupSizeMultiple = 296, - OpRetainEvent = 297, - OpReleaseEvent = 298, - OpCreateUserEvent = 299, - OpIsValidEvent = 300, - OpSetUserEventStatus = 301, - OpCaptureEventProfilingInfo = 302, - OpGetDefaultQueue = 303, - OpBuildNDRange = 304, - OpImageSparseSampleImplicitLod = 305, - OpImageSparseSampleExplicitLod = 306, - OpImageSparseSampleDrefImplicitLod = 307, - OpImageSparseSampleDrefExplicitLod = 308, - OpImageSparseSampleProjImplicitLod = 309, - OpImageSparseSampleProjExplicitLod = 310, - OpImageSparseSampleProjDrefImplicitLod = 311, - OpImageSparseSampleProjDrefExplicitLod = 312, - OpImageSparseFetch = 313, - OpImageSparseGather = 314, - OpImageSparseDrefGather = 315, - OpImageSparseTexelsResident = 316, - OpNoLine = 317, - OpAtomicFlagTestAndSet = 318, - OpAtomicFlagClear = 319, - OpImageSparseRead = 320, -}; - -// Overload operator| for mask bit combining - -inline ImageOperandsMask operator|(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) | unsigned(b)); } -inline FPFastMathModeMask operator|(FPFastMathModeMask a, FPFastMathModeMask b) { return FPFastMathModeMask(unsigned(a) | unsigned(b)); } -inline SelectionControlMask operator|(SelectionControlMask a, SelectionControlMask b) { return SelectionControlMask(unsigned(a) | unsigned(b)); } -inline LoopControlMask operator|(LoopControlMask a, LoopControlMask b) { return LoopControlMask(unsigned(a) | unsigned(b)); } -inline FunctionControlMask operator|(FunctionControlMask a, FunctionControlMask b) { return FunctionControlMask(unsigned(a) | unsigned(b)); } -inline MemorySemanticsMask operator|(MemorySemanticsMask a, MemorySemanticsMask b) { return MemorySemanticsMask(unsigned(a) | unsigned(b)); } -inline MemoryAccessMask operator|(MemoryAccessMask a, MemoryAccessMask b) { return MemoryAccessMask(unsigned(a) | unsigned(b)); } -inline KernelProfilingInfoMask operator|(KernelProfilingInfoMask a, KernelProfilingInfoMask b) { return KernelProfilingInfoMask(unsigned(a) | unsigned(b)); } - -} // end namespace spv - -#endif // #ifndef spirv_HPP - From d572e878af5b9d8a2e9cb1d94dd3f80fa3623061 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Thu, 8 Oct 2020 23:06:02 +0300 Subject: [PATCH 024/123] [Vulkan] Remove FinalizeTrace --- src/xenia/gpu/vulkan/vulkan_command_processor.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 1dcea8284..0fe4e0255 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -211,8 +211,6 @@ void VulkanCommandProcessor::InitializeTrace() { } } -void VulkanCommandProcessor::FinalizeTrace() {} - void VulkanCommandProcessor::CheckSubmissionFence(uint64_t await_submission) { if (await_submission >= GetCurrentSubmission()) { if (submission_open_) { From 3a308dedb3f732450210ce0fe17d3b7e5663d2b4 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Thu, 8 Oct 2020 23:24:25 +0300 Subject: [PATCH 025/123] [Vulkan] Temporarily move to a fork of glslang --- .gitmodules | 3 ++- third_party/glslang | 2 +- third_party/glslang-spirv.lua | 6 ++---- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.gitmodules b/.gitmodules index c50326cb4..0b1f9ce9c 100644 --- a/.gitmodules +++ b/.gitmodules @@ -57,4 +57,5 @@ url = https://github.com/microsoft/DirectXShaderCompiler.git [submodule "third_party/glslang"] path = third_party/glslang - url = https://github.com/KhronosGroup/glslang.git + url = https://github.com/Triang3l/glslang.git + branch = patch-1 diff --git a/third_party/glslang b/third_party/glslang index 2067d1a93..5a9dfb674 160000 --- a/third_party/glslang +++ b/third_party/glslang @@ -1 +1 @@ -Subproject commit 2067d1a93e6edc17f2a6b7e3e5138a9bbcd35ef9 +Subproject commit 5a9dfb6741ca851f8bb57abc0fe808f5a0705fa2 diff --git a/third_party/glslang-spirv.lua b/third_party/glslang-spirv.lua index b5cdf15ba..19a04c71e 100644 --- a/third_party/glslang-spirv.lua +++ b/third_party/glslang-spirv.lua @@ -12,10 +12,8 @@ project("glslang-spirv") }) files({ "glslang/SPIRV/bitutils.h", - -- Disabled temporarily until PR #2417 removing SpvTools.h dependency is - -- merged. - -- "glslang/SPIRV/disassemble.cpp", - -- "glslang/SPIRV/disassemble.h", + "glslang/SPIRV/disassemble.cpp", + "glslang/SPIRV/disassemble.h", "glslang/SPIRV/doc.cpp", "glslang/SPIRV/doc.h", "glslang/SPIRV/GLSL.ext.AMD.h", From b3339d7e46aa32b74ada16cba4e9f3cc842648fc Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 11 Oct 2020 20:22:15 +0300 Subject: [PATCH 026/123] [SPIR-V] Some initial bits of the translator --- src/xenia/gpu/shader_compiler_main.cc | 28 ++++- src/xenia/gpu/spirv_shader_translator.cc | 151 +++++++++++++++++++++++ src/xenia/gpu/spirv_shader_translator.h | 85 +++++++++++++ 3 files changed, 261 insertions(+), 3 deletions(-) create mode 100644 src/xenia/gpu/spirv_shader_translator.cc create mode 100644 src/xenia/gpu/spirv_shader_translator.h diff --git a/src/xenia/gpu/shader_compiler_main.cc b/src/xenia/gpu/shader_compiler_main.cc index b8949f948..a342ecfdf 100644 --- a/src/xenia/gpu/shader_compiler_main.cc +++ b/src/xenia/gpu/shader_compiler_main.cc @@ -9,9 +9,12 @@ #include #include +#include #include +#include #include +#include "third_party/glslang/SPIRV/disassemble.h" #include "xenia/base/cvar.h" #include "xenia/base/logging.h" #include "xenia/base/main.h" @@ -19,6 +22,7 @@ #include "xenia/base/string.h" #include "xenia/gpu/dxbc_shader_translator.h" #include "xenia/gpu/shader_translator.h" +#include "xenia/gpu/spirv_shader_translator.h" // For D3DDisassemble: #if XE_PLATFORM_WIN32 @@ -31,7 +35,8 @@ DEFINE_string(shader_input_type, "", "GPU"); DEFINE_path(shader_output, "", "Output shader file path.", "GPU"); DEFINE_string(shader_output_type, "ucode", - "Translator to use: [ucode, dxbc, dxbctext].", "GPU"); + "Translator to use: [ucode, spirv, spirvtext, dxbc, dxbctext].", + "GPU"); DEFINE_string( vertex_shader_output_type, "", "Type of the host interface to produce the vertex or domain shader for: " @@ -102,8 +107,11 @@ int shader_compiler_main(const std::vector& args) { shader_type, ucode_data_hash, ucode_dwords.data(), ucode_dwords.size()); std::unique_ptr translator; - if (cvars::shader_output_type == "dxbc" || - cvars::shader_output_type == "dxbctext") { + if (cvars::shader_output_type == "spirv" || + cvars::shader_output_type == "spirvtext") { + translator = std::make_unique(); + } else if (cvars::shader_output_type == "dxbc" || + cvars::shader_output_type == "dxbctext") { translator = std::make_unique( 0, cvars::shader_output_bindless_resources, cvars::shader_output_dxbc_rov); @@ -140,6 +148,20 @@ int shader_compiler_main(const std::vector& args) { const void* source_data = shader->translated_binary().data(); size_t source_data_size = shader->translated_binary().size(); + std::string spirv_disasm; + if (cvars::shader_output_type == "spirvtext") { + std::ostringstream spirv_disasm_stream; + std::vector spirv_source; + spirv_source.reserve(source_data_size / sizeof(unsigned int)); + spirv_source.insert(spirv_source.cend(), + reinterpret_cast(source_data), + reinterpret_cast(source_data) + + source_data_size / sizeof(unsigned int)); + spv::Disassemble(spirv_disasm_stream, spirv_source); + spirv_disasm = std::move(spirv_disasm_stream.str()); + source_data = spirv_disasm.c_str(); + source_data_size = spirv_disasm.size(); + } #if XE_PLATFORM_WIN32 ID3DBlob* dxbc_disasm_blob = nullptr; if (cvars::shader_output_type == "dxbctext") { diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc new file mode 100644 index 000000000..ce232a7ad --- /dev/null +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -0,0 +1,151 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/spirv_shader_translator.h" + +#include +#include + +#include "third_party/glslang/SPIRV/GLSL.std.450.h" + +namespace xe { +namespace gpu { + +SpirvShaderTranslator::SpirvShaderTranslator(bool supports_clip_distance, + bool supports_cull_distance) + : supports_clip_distance_(supports_clip_distance), + supports_cull_distance_(supports_cull_distance) {} + +void SpirvShaderTranslator::Reset() { + ShaderTranslator::Reset(); + + builder_.reset(); +} + +void SpirvShaderTranslator::StartTranslation() { + // TODO(Triang3l): Once tool ID (likely 26) is registered in SPIRV-Headers, + // use it instead. + // TODO(Triang3l): Logger. + builder_ = std::make_unique(0x10000, 0xFFFF0001, nullptr); + + builder_->addCapability(IsSpirvTessEvalShader() ? spv::CapabilityTessellation + : spv::CapabilityShader); + ext_inst_glsl_std_450_ = builder_->import("GLSL.std.450"); + builder_->setMemoryModel(spv::AddressingModelLogical, + spv::MemoryModelGLSL450); + builder_->setSource(spv::SourceLanguageUnknown, 0); + + type_void_ = builder_->makeVoidType(); + type_float_ = builder_->makeFloatType(32); + type_float2_ = builder_->makeVectorType(type_float_, 2); + type_float3_ = builder_->makeVectorType(type_float_, 3); + type_float4_ = builder_->makeVectorType(type_float_, 4); + type_int_ = builder_->makeIntType(32); + + if (IsSpirvVertexOrTessEvalShader()) { + StartVertexOrTessEvalShaderBeforeMain(); + } + + // Begin the main function. + std::vector main_param_types; + std::vector> main_precisions; + spv::Block* main_entry; + builder_->makeFunctionEntry(spv::NoPrecision, type_void_, "main", + main_param_types, main_precisions, &main_entry); + + // Begin ucode translation. + if (register_count()) { + var_main_registers_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassFunction, + builder_->makeArrayType( + type_float4_, builder_->makeUintConstant(register_count()), 0), + "xe_r"); + } +} + +std::vector SpirvShaderTranslator::CompleteTranslation() { + if (IsSpirvVertexOrTessEvalShader()) { + CompleteVertexOrTessEvalShaderInMain(); + } + + // End the main function.. + builder_->leaveFunction(); + + // TODO(Triang3l): Avoid copy? + std::vector module_uints; + builder_->dump(module_uints); + std::vector module_bytes; + module_bytes.reserve(sizeof(unsigned int) * module_uints.size()); + module_bytes.insert(module_bytes.cend(), + reinterpret_cast(module_uints.data()), + reinterpret_cast(module_uints.data()) + + sizeof(unsigned int) * module_uints.size()); + return module_bytes; +} + +void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() { + // Create the inputs. + if (IsSpirvTessEvalShader()) { + input_vertex_index_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassInput, type_int_, "gl_PrimitiveID"); + builder_->addDecoration(input_vertex_index_, spv::DecorationBuiltIn, + spv::BuiltInPrimitiveId); + } else { + input_primitive_id_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassInput, type_int_, "gl_VertexIndex"); + builder_->addDecoration(input_primitive_id_, spv::DecorationBuiltIn, + spv::BuiltInVertexIndex); + } + + // Create the entire GLSL 4.50 gl_PerVertex output similar to what glslang + // does. Members (like gl_PointSize) don't need to be used, and also + // ClipDistance and CullDistance may exist even if the device doesn't support + // them, as long as the capabilities aren't enabled, and nothing is stored to + // them. + if (supports_clip_distance_) { + builder_->addCapability(spv::CapabilityClipDistance); + } + if (supports_cull_distance_) { + builder_->addCapability(spv::CapabilityCullDistance); + } + std::vector struct_per_vertex_members; + struct_per_vertex_members.reserve(kOutputPerVertexMemberCount); + struct_per_vertex_members.push_back(type_float4_); + struct_per_vertex_members.push_back(type_float_); + // TODO(Triang3l): Specialization constant for ucp_cull_only_ena, for 6 + 1 + // or 1 + 7 array sizes. + struct_per_vertex_members.push_back(builder_->makeArrayType( + type_float_, builder_->makeUintConstant(supports_clip_distance_ ? 6 : 1), + 0)); + struct_per_vertex_members.push_back( + builder_->makeArrayType(type_float_, builder_->makeUintConstant(1), 0)); + spv::Id type_struct_per_vertex = + builder_->makeStructType(struct_per_vertex_members, "gl_PerVertex"); + builder_->addMemberDecoration(type_struct_per_vertex, + kOutputPerVertexMemberPosition, + spv::DecorationBuiltIn, spv::BuiltInPosition); + builder_->addMemberDecoration(type_struct_per_vertex, + kOutputPerVertexMemberPointSize, + spv::DecorationBuiltIn, spv::BuiltInPointSize); + builder_->addMemberDecoration( + type_struct_per_vertex, kOutputPerVertexMemberClipDistance, + spv::DecorationBuiltIn, spv::BuiltInClipDistance); + builder_->addMemberDecoration( + type_struct_per_vertex, kOutputPerVertexMemberCullDistance, + spv::DecorationBuiltIn, spv::BuiltInCullDistance); + builder_->addDecoration(type_struct_per_vertex, spv::DecorationBlock); + output_per_vertex_ = + builder_->createVariable(spv::NoPrecision, spv::StorageClassOutput, + type_struct_per_vertex, "xe_out_gl_PerVertex"); +} + +void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() {} + +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h new file mode 100644 index 000000000..5ef5dfc2c --- /dev/null +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -0,0 +1,85 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_SPIRV_SHADER_TRANSLATOR_H_ +#define XENIA_GPU_SPIRV_SHADER_TRANSLATOR_H_ + +#include +#include +#include + +#include "third_party/glslang/SPIRV/SpvBuilder.h" +#include "xenia/gpu/shader_translator.h" + +namespace xe { +namespace gpu { + +class SpirvShaderTranslator : public ShaderTranslator { + public: + SpirvShaderTranslator(bool supports_clip_distance = true, + bool supports_cull_distance = true); + + protected: + void Reset() override; + + void StartTranslation() override; + + std::vector CompleteTranslation() override; + + private: + // TODO(Triang3l): Depth-only pixel shader. + bool IsSpirvVertexOrTessEvalShader() const { return is_vertex_shader(); } + bool IsSpirvVertexShader() const { + return IsSpirvVertexOrTessEvalShader() && + host_vertex_shader_type() == Shader::HostVertexShaderType::kVertex; + } + bool IsSpirvTessEvalShader() const { + return IsSpirvVertexOrTessEvalShader() && + host_vertex_shader_type() != Shader::HostVertexShaderType::kVertex; + } + bool IsSpirvFragmentShader() const { return is_pixel_shader(); } + + void StartVertexOrTessEvalShaderBeforeMain(); + void CompleteVertexOrTessEvalShaderInMain(); + + bool supports_clip_distance_; + bool supports_cull_distance_; + + std::unique_ptr builder_; + + spv::Id ext_inst_glsl_std_450_; + + spv::Id type_void_; + spv::Id type_float_; + spv::Id type_float2_; + spv::Id type_float3_; + spv::Id type_float4_; + spv::Id type_int_; + spv::Id type_uint_; + + spv::Id input_vertex_index_; + spv::Id input_primitive_id_; + + enum OutputPerVertexMember : unsigned int { + kOutputPerVertexMemberPosition, + kOutputPerVertexMemberPointSize, + kOutputPerVertexMemberClipDistance, + kOutputPerVertexMemberCullDistance, + kOutputPerVertexMemberCount, + }; + spv::Id output_per_vertex_; + + spv::Id function_main_; + spv::Id var_main_registers_; +}; + +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_SPIRV_SHADER_TRANSLATOR_H_ From 1de144938ce57fd2d75a33176dc0bd41f1939931 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Fri, 16 Oct 2020 19:55:41 +0300 Subject: [PATCH 027/123] [SPIR-V] Main loop blocks, validation --- .gitmodules | 6 +- src/xenia/gpu/premake5.lua | 1 + src/xenia/gpu/shader_compiler_main.cc | 12 ++ src/xenia/gpu/shader_translator.cc | 5 - src/xenia/gpu/shader_translator.h | 4 - src/xenia/gpu/spirv_shader_translator.cc | 177 ++++++++++++++++++--- src/xenia/gpu/spirv_shader_translator.h | 33 +++- src/xenia/ui/vulkan/spirv_tools_context.cc | 113 +++++++++++++ src/xenia/ui/vulkan/spirv_tools_context.h | 72 +++++++++ third_party/SPIRV-Tools | 1 + third_party/glslang | 2 +- 11 files changed, 393 insertions(+), 33 deletions(-) create mode 100644 src/xenia/ui/vulkan/spirv_tools_context.cc create mode 100644 src/xenia/ui/vulkan/spirv_tools_context.h create mode 160000 third_party/SPIRV-Tools diff --git a/.gitmodules b/.gitmodules index 0b1f9ce9c..2338fce50 100644 --- a/.gitmodules +++ b/.gitmodules @@ -57,5 +57,7 @@ url = https://github.com/microsoft/DirectXShaderCompiler.git [submodule "third_party/glslang"] path = third_party/glslang - url = https://github.com/Triang3l/glslang.git - branch = patch-1 + url = https://github.com/KhronosGroup/glslang.git +[submodule "third_party/SPIRV-Tools"] + path = third_party/SPIRV-Tools + url = https://github.com/KhronosGroup/SPIRV-Tools.git diff --git a/src/xenia/gpu/premake5.lua b/src/xenia/gpu/premake5.lua index f4b2a08d8..5de398e94 100644 --- a/src/xenia/gpu/premake5.lua +++ b/src/xenia/gpu/premake5.lua @@ -30,6 +30,7 @@ project("xenia-gpu-shader-compiler") "glslang-spirv", "xenia-base", "xenia-gpu", + "xenia-ui-vulkan", }) defines({ }) diff --git a/src/xenia/gpu/shader_compiler_main.cc b/src/xenia/gpu/shader_compiler_main.cc index a342ecfdf..f79e36df0 100644 --- a/src/xenia/gpu/shader_compiler_main.cc +++ b/src/xenia/gpu/shader_compiler_main.cc @@ -23,6 +23,7 @@ #include "xenia/gpu/dxbc_shader_translator.h" #include "xenia/gpu/shader_translator.h" #include "xenia/gpu/spirv_shader_translator.h" +#include "xenia/ui/vulkan/spirv_tools_context.h" // For D3DDisassemble: #if XE_PLATFORM_WIN32 @@ -159,6 +160,17 @@ int shader_compiler_main(const std::vector& args) { source_data_size / sizeof(unsigned int)); spv::Disassemble(spirv_disasm_stream, spirv_source); spirv_disasm = std::move(spirv_disasm_stream.str()); + ui::vulkan::SpirvToolsContext spirv_tools_context; + if (spirv_tools_context.Initialize()) { + std::string spirv_validation_error; + spirv_tools_context.Validate( + reinterpret_cast(spirv_source.data()), + spirv_source.size(), &spirv_validation_error); + if (!spirv_validation_error.empty()) { + spirv_disasm.append(1, '\n'); + spirv_disasm.append(spirv_validation_error); + } + } source_data = spirv_disasm.c_str(); source_data_size = spirv_disasm.size(); } diff --git a/src/xenia/gpu/shader_translator.cc b/src/xenia/gpu/shader_translator.cc index 304acf602..f2bf35bf5 100644 --- a/src/xenia/gpu/shader_translator.cc +++ b/src/xenia/gpu/shader_translator.cc @@ -101,7 +101,6 @@ bool ShaderTranslator::TranslateInternal( // Each control flow instruction is executed sequentially until the final // ending instruction. uint32_t max_cf_dword_index = static_cast(ucode_dword_count_); - std::vector cf_instructions; for (uint32_t i = 0; i < max_cf_dword_index; i += 3) { ControlFlowInstruction cf_a; ControlFlowInstruction cf_b; @@ -121,8 +120,6 @@ bool ShaderTranslator::TranslateInternal( // Translators may need this before they start codegen. GatherInstructionInformation(cf_a); GatherInstructionInformation(cf_b); - cf_instructions.push_back(cf_a); - cf_instructions.push_back(cf_b); } if (constant_register_map_.float_dynamic_addressing) { @@ -159,8 +156,6 @@ bool ShaderTranslator::TranslateInternal( StartTranslation(); - PreProcessControlFlowInstructions(cf_instructions); - // Translate all instructions. for (uint32_t i = 0, cf_index = 0; i < max_cf_dword_index; i += 3) { ControlFlowInstruction cf_a; diff --git a/src/xenia/gpu/shader_translator.h b/src/xenia/gpu/shader_translator.h index 3d4fa208d..73ab4f6c0 100644 --- a/src/xenia/gpu/shader_translator.h +++ b/src/xenia/gpu/shader_translator.h @@ -136,10 +136,6 @@ class ShaderTranslator { shader->host_disassembly_ = std::move(value); } - // Pre-process a control-flow instruction before anything else. - virtual void PreProcessControlFlowInstructions( - std::vector instrs) {} - // Handles translation for control flow label addresses. // This is triggered once for each label required (due to control flow // operations) before any of the instructions within the target exec. diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index ce232a7ad..d423ae509 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -10,9 +10,11 @@ #include "xenia/gpu/spirv_shader_translator.h" #include +#include #include #include "third_party/glslang/SPIRV/GLSL.std.450.h" +#include "xenia/base/assert.h" namespace xe { namespace gpu { @@ -26,13 +28,15 @@ void SpirvShaderTranslator::Reset() { ShaderTranslator::Reset(); builder_.reset(); + + // main_switch_cases_.reset(); } void SpirvShaderTranslator::StartTranslation() { - // TODO(Triang3l): Once tool ID (likely 26) is registered in SPIRV-Headers, - // use it instead. + // Tool ID 26 "Xenia Emulator Microcode Translator". + // https://github.com/KhronosGroup/SPIRV-Headers/blob/c43a43c7cc3af55910b9bec2a71e3e8a622443cf/include/spirv/spir-v.xml#L79 // TODO(Triang3l): Logger. - builder_ = std::make_unique(0x10000, 0xFFFF0001, nullptr); + builder_ = std::make_unique(1 << 16, (26 << 16) | 1, nullptr); builder_->addCapability(IsSpirvTessEvalShader() ? spv::CapabilityTessellation : spv::CapabilityShader); @@ -42,11 +46,29 @@ void SpirvShaderTranslator::StartTranslation() { builder_->setSource(spv::SourceLanguageUnknown, 0); type_void_ = builder_->makeVoidType(); + type_bool_ = builder_->makeBoolType(); + type_int_ = builder_->makeIntType(32); + type_int4_ = builder_->makeVectorType(type_int_, 4); type_float_ = builder_->makeFloatType(32); type_float2_ = builder_->makeVectorType(type_float_, 2); type_float3_ = builder_->makeVectorType(type_float_, 3); type_float4_ = builder_->makeVectorType(type_float_, 4); - type_int_ = builder_->makeIntType(32); + + const_int_0_ = builder_->makeIntConstant(0); + id_vector_temp_.clear(); + id_vector_temp_.reserve(4); + for (uint32_t i = 0; i < 4; ++i) { + id_vector_temp_.push_back(const_int_0_); + } + const_int4_0_ = builder_->makeCompositeConstant(type_int4_, id_vector_temp_); + const_float_0_ = builder_->makeFloatConstant(0.0f); + id_vector_temp_.clear(); + id_vector_temp_.reserve(4); + for (uint32_t i = 0; i < 4; ++i) { + id_vector_temp_.push_back(const_float_0_); + } + const_float4_0_ = + builder_->makeCompositeConstant(type_float4_, id_vector_temp_); if (IsSpirvVertexOrTessEvalShader()) { StartVertexOrTessEvalShaderBeforeMain(); @@ -55,28 +77,131 @@ void SpirvShaderTranslator::StartTranslation() { // Begin the main function. std::vector main_param_types; std::vector> main_precisions; - spv::Block* main_entry; - builder_->makeFunctionEntry(spv::NoPrecision, type_void_, "main", - main_param_types, main_precisions, &main_entry); + spv::Block* function_main_entry; + function_main_ = builder_->makeFunctionEntry( + spv::NoPrecision, type_void_, "main", main_param_types, main_precisions, + &function_main_entry); - // Begin ucode translation. - if (register_count()) { + // Begin ucode translation. Initialize everything, even without defined + // defaults, for safety. + var_main_predicate_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassFunction, type_bool_, + "xe_var_predicate", builder_->makeBoolConstant(false)); + var_main_address_absolute_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassFunction, type_int_, + "xe_var_address_absolute", const_int_0_); + var_main_address_relative_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassFunction, type_int4_, + "xe_var_address_relative", const_int4_0_); + uint32_t register_array_size = register_count(); + if (register_array_size) { + id_vector_temp_.clear(); + id_vector_temp_.reserve(register_array_size); + // TODO(Triang3l): In PS, only initialize starting from the interpolators, + // probably manually. But not very important. + for (uint32_t i = 0; i < register_array_size; ++i) { + id_vector_temp_.push_back(const_float4_0_); + } + spv::Id type_register_array = builder_->makeArrayType( + type_float4_, builder_->makeUintConstant(register_array_size), 0); var_main_registers_ = builder_->createVariable( - spv::NoPrecision, spv::StorageClassFunction, - builder_->makeArrayType( - type_float4_, builder_->makeUintConstant(register_count()), 0), - "xe_r"); + spv::NoPrecision, spv::StorageClassFunction, type_register_array, + "xe_var_registers", + builder_->makeCompositeConstant(type_register_array, id_vector_temp_)); } + + // Write the execution model-specific prologue with access to variables in the + // main function. + if (IsSpirvVertexOrTessEvalShader()) { + StartVertexOrTessEvalShaderInMain(); + } + + // Open the main loop. + + spv::Block* main_loop_pre_header = builder_->getBuildPoint(); + main_loop_header_ = &builder_->makeNewBlock(); + spv::Block& main_loop_body = builder_->makeNewBlock(); + // Added later because the body has nested control flow, but according to the + // specification: + // "The order of blocks in a function must satisfy the rule that blocks appear + // before all blocks they dominate." + main_loop_continue_ = + new spv::Block(builder_->getUniqueId(), *function_main_); + main_loop_merge_ = new spv::Block(builder_->getUniqueId(), *function_main_); + builder_->createBranch(main_loop_header_); + + // Main loop header - based on whether it's the first iteration (entered from + // the function or from the continuation), choose the program counter. + builder_->setBuildPoint(main_loop_header_); + id_vector_temp_.clear(); + id_vector_temp_.reserve(4); + id_vector_temp_.push_back(const_int_0_); + id_vector_temp_.push_back(main_loop_pre_header->getId()); + main_loop_pc_next_ = builder_->getUniqueId(); + id_vector_temp_.push_back(main_loop_pc_next_); + id_vector_temp_.push_back(main_loop_continue_->getId()); + spv::Id main_loop_pc_current = + builder_->createOp(spv::OpPhi, type_int_, id_vector_temp_); + uint_vector_temp_.clear(); + builder_->createLoopMerge(main_loop_merge_, main_loop_continue_, + spv::LoopControlDontUnrollMask, uint_vector_temp_); + builder_->createBranch(&main_loop_body); + + // Main loop body. + builder_->setBuildPoint(&main_loop_body); + // TODO(Triang3l): Create the switch, add the block for the case 0 and set the + // build point to it. } std::vector SpirvShaderTranslator::CompleteTranslation() { + // Close the main loop. + // Break from the body after falling through the end or breaking. + builder_->createBranch(main_loop_merge_); + // Main loop continuation - choose the program counter based on the path + // taken (-1 if not from a jump as a safe fallback, which would result in not + // hitting any switch case and reaching the final break in the body). + function_main_->addBlock(main_loop_continue_); + builder_->setBuildPoint(main_loop_continue_); + { + std::unique_ptr main_loop_pc_next_op = + std::make_unique(main_loop_pc_next_, type_int_, + spv::OpCopyObject); + // TODO(Triang3l): Phi between the continues in the switch cases and the + // switch merge block. + main_loop_pc_next_op->addIdOperand(builder_->makeIntConstant(-1)); + builder_->getBuildPoint()->addInstruction(std::move(main_loop_pc_next_op)); + } + builder_->createBranch(main_loop_header_); + // Add the main loop merge block and go back to the function. + function_main_->addBlock(main_loop_merge_); + builder_->setBuildPoint(main_loop_merge_); + if (IsSpirvVertexOrTessEvalShader()) { CompleteVertexOrTessEvalShaderInMain(); } - // End the main function.. + // End the main function. builder_->leaveFunction(); + // Make the main function the entry point. + spv::ExecutionModel execution_model; + if (IsSpirvFragmentShader()) { + execution_model = spv::ExecutionModelFragment; + builder_->addExecutionMode(function_main_, + spv::ExecutionModeOriginUpperLeft); + } else { + assert_true(IsSpirvVertexOrTessEvalShader()); + execution_model = IsSpirvTessEvalShader() + ? spv::ExecutionModelTessellationEvaluation + : spv::ExecutionModelVertex; + } + spv::Instruction* entry_point = + builder_->addEntryPoint(execution_model, function_main_, "main"); + + if (IsSpirvVertexOrTessEvalShader()) { + CompleteVertexOrTessEvalShaderAfterMain(entry_point); + } + // TODO(Triang3l): Avoid copy? std::vector module_uints; builder_->dump(module_uints); @@ -92,14 +217,14 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() { // Create the inputs. if (IsSpirvTessEvalShader()) { - input_vertex_index_ = builder_->createVariable( + input_primitive_id_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassInput, type_int_, "gl_PrimitiveID"); - builder_->addDecoration(input_vertex_index_, spv::DecorationBuiltIn, + builder_->addDecoration(input_primitive_id_, spv::DecorationBuiltIn, spv::BuiltInPrimitiveId); } else { - input_primitive_id_ = builder_->createVariable( + input_vertex_index_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassInput, type_int_, "gl_VertexIndex"); - builder_->addDecoration(input_primitive_id_, spv::DecorationBuiltIn, + builder_->addDecoration(input_vertex_index_, spv::DecorationBuiltIn, spv::BuiltInVertexIndex); } @@ -145,7 +270,23 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() { type_struct_per_vertex, "xe_out_gl_PerVertex"); } +void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() { + var_main_point_size_edge_flag_kill_vertex_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassFunction, type_float3_, + "xe_var_point_size_edge_flag_kill_vertex"); +} + void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() {} +void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderAfterMain( + spv::Instruction* entry_point) { + if (IsSpirvTessEvalShader()) { + entry_point->addIdOperand(input_primitive_id_); + } else { + entry_point->addIdOperand(input_vertex_index_); + } + entry_point->addIdOperand(output_per_vertex_); +} + } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 5ef5dfc2c..1fa777271 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -46,24 +46,38 @@ class SpirvShaderTranslator : public ShaderTranslator { bool IsSpirvFragmentShader() const { return is_pixel_shader(); } void StartVertexOrTessEvalShaderBeforeMain(); + void StartVertexOrTessEvalShaderInMain(); void CompleteVertexOrTessEvalShaderInMain(); + void CompleteVertexOrTessEvalShaderAfterMain(spv::Instruction* entry_point); bool supports_clip_distance_; bool supports_cull_distance_; std::unique_ptr builder_; + std::vector id_vector_temp_; + std::vector uint_vector_temp_; + spv::Id ext_inst_glsl_std_450_; spv::Id type_void_; + spv::Id type_bool_; + spv::Id type_int_; + spv::Id type_int4_; + spv::Id type_uint_; spv::Id type_float_; spv::Id type_float2_; spv::Id type_float3_; spv::Id type_float4_; - spv::Id type_int_; - spv::Id type_uint_; + spv::Id const_int_0_; + spv::Id const_int4_0_; + spv::Id const_float_0_; + spv::Id const_float4_0_; + + // VS as VS only - int. spv::Id input_vertex_index_; + // VS as TES only - int. spv::Id input_primitive_id_; enum OutputPerVertexMember : unsigned int { @@ -75,8 +89,21 @@ class SpirvShaderTranslator : public ShaderTranslator { }; spv::Id output_per_vertex_; - spv::Id function_main_; + spv::Function* function_main_; + // bool. + spv::Id var_main_predicate_; + // int4. + spv::Id var_main_address_relative_; + // int. + spv::Id var_main_address_absolute_; + // float4[register_count()]. spv::Id var_main_registers_; + // VS only - float3 (special exports). + spv::Id var_main_point_size_edge_flag_kill_vertex_; + spv::Block* main_loop_header_; + spv::Block* main_loop_continue_; + spv::Block* main_loop_merge_; + spv::Id main_loop_pc_next_; }; } // namespace gpu diff --git a/src/xenia/ui/vulkan/spirv_tools_context.cc b/src/xenia/ui/vulkan/spirv_tools_context.cc new file mode 100644 index 000000000..01078ca08 --- /dev/null +++ b/src/xenia/ui/vulkan/spirv_tools_context.cc @@ -0,0 +1,113 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/ui/vulkan/spirv_tools_context.h" + +#include +#include +#include + +#include "xenia/base/logging.h" +#include "xenia/base/platform.h" + +#if XE_PLATFORM_LINUX +#include +#elif XE_PLATFORM_WIN32 +#include "xenia/base/platform_win.h" +#endif + +namespace xe { +namespace ui { +namespace vulkan { + +bool SpirvToolsContext::Initialize() { + const char* vulkan_sdk_env = std::getenv("VULKAN_SDK"); + if (!vulkan_sdk_env) { + XELOGE("SPIRV-Tools: Failed to get the VULKAN_SDK environment variable"); + Shutdown(); + return false; + } + std::filesystem::path vulkan_sdk_path(vulkan_sdk_env); +#if XE_PLATFORM_LINUX + library_ = dlopen((vulkan_sdk_path / "bin/libSPIRV-Tools-shared.so").c_str(), + RTLD_NOW | RTLD_LOCAL); + if (!library_) { + XELOGE( + "SPIRV-Tools: Failed to load $VULKAN_SDK/bin/libSPIRV-Tools-shared.so"); + Shutdown(); + return false; + } +#elif XE_PLATFORM_WIN32 + library_ = LoadLibraryW( + (vulkan_sdk_path / "Bin/SPIRV-Tools-shared.dll").wstring().c_str()); + if (!library_) { + XELOGE( + "SPIRV-Tools: Failed to load %VULKAN_SDK%/Bin/SPIRV-Tools-shared.dll"); + Shutdown(); + return false; + } +#else +#error No SPIRV-Tools library loading provided for the target platform. +#endif + if (!LoadLibraryFunction(fn_spvContextCreate_, "spvContextCreate") || + !LoadLibraryFunction(fn_spvContextDestroy_, "spvContextDestroy") || + !LoadLibraryFunction(fn_spvValidateBinary_, "spvValidateBinary") || + !LoadLibraryFunction(fn_spvDiagnosticDestroy_, "spvDiagnosticDestroy")) { + XELOGE("SPIRV-Tools: Failed to get library function pointers"); + Shutdown(); + return false; + } + context_ = fn_spvContextCreate_(SPV_ENV_VULKAN_1_0); + if (!context_) { + XELOGE("SPIRV-Tools: Failed to create a Vulkan 1.0 context"); + Shutdown(); + return false; + } + return true; +} + +void SpirvToolsContext::Shutdown() { + if (context_) { + fn_spvContextDestroy_(context_); + context_ = nullptr; + } + if (library_) { +#if XE_PLATFORM_LINUX + dlclose(library_); +#elif XE_PLATFORM_WIN32 + FreeLibrary(library_); +#endif + library_ = nullptr; + } +} + +spv_result_t SpirvToolsContext::Validate(const uint32_t* words, + size_t num_words, + std::string* error) const { + if (error) { + error->clear(); + } + if (!context_) { + return SPV_UNSUPPORTED; + } + spv_diagnostic diagnostic = nullptr; + spv_result_t result = + fn_spvValidateBinary_(context_, words, num_words, &diagnostic); + if (diagnostic) { + if (error && diagnostic && diagnostic->error) { + *error = diagnostic->error; + } + fn_spvDiagnosticDestroy_(diagnostic); + } + return result; +} + +} // namespace vulkan +} // namespace ui +} // namespace xe diff --git a/src/xenia/ui/vulkan/spirv_tools_context.h b/src/xenia/ui/vulkan/spirv_tools_context.h new file mode 100644 index 000000000..87680c1a4 --- /dev/null +++ b/src/xenia/ui/vulkan/spirv_tools_context.h @@ -0,0 +1,72 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_UI_VULKAN_SPIRV_TOOLS_CONTEXT_H_ +#define XENIA_UI_VULKAN_SPIRV_TOOLS_CONTEXT_H_ + +#include +#include + +#include "third_party/SPIRV-Tools/include/spirv-tools/libspirv.h" +#include "xenia/base/platform.h" + +#if XE_PLATFORM_LINUX +#include +#elif XE_PLATFORM_WIN32 +#include "xenia/base/platform_win.h" +#endif + +namespace xe { +namespace ui { +namespace vulkan { + +class SpirvToolsContext { + public: + SpirvToolsContext() {} + SpirvToolsContext(const SpirvToolsContext& context) = delete; + SpirvToolsContext& operator=(const SpirvToolsContext& context) = delete; + ~SpirvToolsContext() { Shutdown(); } + bool Initialize(); + void Shutdown(); + + spv_result_t Validate(const uint32_t* words, size_t num_words, + std::string* error) const; + + private: +#if XE_PLATFORM_LINUX + void* library_ = nullptr; +#elif XE_PLATFORM_WIN32 + HMODULE library_ = nullptr; +#endif + + template + bool LoadLibraryFunction(FunctionPointer& function, const char* name) { +#if XE_PLATFORM_LINUX + function = reinterpret_cast(dlsym(library_, name)); +#elif XE_PLATFORM_WIN32 + function = + reinterpret_cast(GetProcAddress(library_, name)); +#else +#error No SPIRV-Tools LoadLibraryFunction provided for the target platform. +#endif + return function != nullptr; + } + decltype(&spvContextCreate) fn_spvContextCreate_ = nullptr; + decltype(&spvContextDestroy) fn_spvContextDestroy_ = nullptr; + decltype(&spvValidateBinary) fn_spvValidateBinary_ = nullptr; + decltype(&spvDiagnosticDestroy) fn_spvDiagnosticDestroy_ = nullptr; + + spv_context context_ = nullptr; +}; + +} // namespace vulkan +} // namespace ui +} // namespace xe + +#endif // XENIA_UI_VULKAN_SPIRV_TOOLS_CONTEXT_H_ diff --git a/third_party/SPIRV-Tools b/third_party/SPIRV-Tools new file mode 160000 index 000000000..dd534e877 --- /dev/null +++ b/third_party/SPIRV-Tools @@ -0,0 +1 @@ +Subproject commit dd534e877e725c9bb6f751c427442456a05384e4 diff --git a/third_party/glslang b/third_party/glslang index 5a9dfb674..f4f1d8a35 160000 --- a/third_party/glslang +++ b/third_party/glslang @@ -1 +1 @@ -Subproject commit 5a9dfb6741ca851f8bb57abc0fe808f5a0705fa2 +Subproject commit f4f1d8a352ca1908943aea2ad8c54b39b4879080 From 7846245b662598f9cbe471df62dd78f78d17934d Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 18 Oct 2020 15:36:02 +0300 Subject: [PATCH 028/123] [SPIR-V] Main program counter switch --- src/xenia/gpu/spirv_shader_translator.cc | 129 +++++++++++++++++++---- src/xenia/gpu/spirv_shader_translator.h | 7 ++ 2 files changed, 114 insertions(+), 22 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index d423ae509..8fa3eb73e 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -29,7 +29,8 @@ void SpirvShaderTranslator::Reset() { builder_.reset(); - // main_switch_cases_.reset(); + main_switch_op_.reset(); + main_switch_next_pc_phi_operands_.clear(); } void SpirvShaderTranslator::StartTranslation() { @@ -117,7 +118,6 @@ void SpirvShaderTranslator::StartTranslation() { } // Open the main loop. - spv::Block* main_loop_pre_header = builder_->getBuildPoint(); main_loop_header_ = &builder_->makeNewBlock(); spv::Block& main_loop_body = builder_->makeNewBlock(); @@ -130,18 +130,25 @@ void SpirvShaderTranslator::StartTranslation() { main_loop_merge_ = new spv::Block(builder_->getUniqueId(), *function_main_); builder_->createBranch(main_loop_header_); + // If no jumps, don't create a switch, but still create a loop so exece can + // break. + bool has_main_switch = !label_addresses().empty(); + // Main loop header - based on whether it's the first iteration (entered from // the function or from the continuation), choose the program counter. builder_->setBuildPoint(main_loop_header_); - id_vector_temp_.clear(); - id_vector_temp_.reserve(4); - id_vector_temp_.push_back(const_int_0_); - id_vector_temp_.push_back(main_loop_pre_header->getId()); - main_loop_pc_next_ = builder_->getUniqueId(); - id_vector_temp_.push_back(main_loop_pc_next_); - id_vector_temp_.push_back(main_loop_continue_->getId()); - spv::Id main_loop_pc_current = - builder_->createOp(spv::OpPhi, type_int_, id_vector_temp_); + spv::Id main_loop_pc_current = 0; + if (has_main_switch) { + id_vector_temp_.clear(); + id_vector_temp_.reserve(4); + id_vector_temp_.push_back(const_int_0_); + id_vector_temp_.push_back(main_loop_pre_header->getId()); + main_loop_pc_next_ = builder_->getUniqueId(); + id_vector_temp_.push_back(main_loop_pc_next_); + id_vector_temp_.push_back(main_loop_continue_->getId()); + main_loop_pc_current = + builder_->createOp(spv::OpPhi, type_int_, id_vector_temp_); + } uint_vector_temp_.clear(); builder_->createLoopMerge(main_loop_merge_, main_loop_continue_, spv::LoopControlDontUnrollMask, uint_vector_temp_); @@ -149,29 +156,86 @@ void SpirvShaderTranslator::StartTranslation() { // Main loop body. builder_->setBuildPoint(&main_loop_body); - // TODO(Triang3l): Create the switch, add the block for the case 0 and set the - // build point to it. + if (has_main_switch) { + // Create the program counter switch with cases for every label and for + // label 0. + main_switch_header_ = builder_->getBuildPoint(); + main_switch_merge_ = + new spv::Block(builder_->getUniqueId(), *function_main_); + { + std::unique_ptr main_switch_selection_merge_op = + std::make_unique(spv::OpSelectionMerge); + main_switch_selection_merge_op->addIdOperand(main_switch_merge_->getId()); + main_switch_selection_merge_op->addImmediateOperand( + spv::SelectionControlDontFlattenMask); + builder_->getBuildPoint()->addInstruction( + std::move(main_switch_selection_merge_op)); + } + main_switch_op_ = std::make_unique(spv::OpSwitch); + main_switch_op_->addIdOperand(main_loop_pc_current); + main_switch_op_->addIdOperand(main_switch_merge_->getId()); + // The default case (the merge here) must have the header as a predecessor. + main_switch_merge_->addPredecessor(main_switch_header_); + // The instruction will be inserted later, when all cases are filled. + // Insert and enter case 0. + spv::Block* main_switch_case_0_block = + new spv::Block(builder_->getUniqueId(), *function_main_); + main_switch_op_->addImmediateOperand(0); + main_switch_op_->addIdOperand(main_switch_case_0_block->getId()); + // Every switch case must have the OpSelectionMerge/OpSwitch block as a + // predecessor. + main_switch_case_0_block->addPredecessor(main_switch_header_); + function_main_->addBlock(main_switch_case_0_block); + builder_->setBuildPoint(main_switch_case_0_block); + } } std::vector SpirvShaderTranslator::CompleteTranslation() { - // Close the main loop. - // Break from the body after falling through the end or breaking. - builder_->createBranch(main_loop_merge_); + bool has_main_switch = !label_addresses().empty(); + // After the final exec (if it happened to be not exece, which would already + // have a break branch), break from the switch if it exists, or from the + // loop it doesn't. + if (!builder_->getBuildPoint()->isTerminated()) { + builder_->createBranch(has_main_switch ? main_switch_merge_ + : main_loop_merge_); + } + if (has_main_switch) { + // Insert the switch instruction with all cases added as operands. + builder_->setBuildPoint(main_switch_header_); + builder_->getBuildPoint()->addInstruction(std::move(main_switch_op_)); + // Build the main switch merge, breaking out of the loop after falling + // through the end or breaking from exece (only continuing if a jump - from + // a guest loop or from jmp/call - was made). + function_main_->addBlock(main_switch_merge_); + builder_->setBuildPoint(main_switch_merge_); + builder_->createBranch(main_loop_merge_); + } + // Main loop continuation - choose the program counter based on the path // taken (-1 if not from a jump as a safe fallback, which would result in not // hitting any switch case and reaching the final break in the body). function_main_->addBlock(main_loop_continue_); builder_->setBuildPoint(main_loop_continue_); - { + if (has_main_switch) { + // If labels were added, but not jumps (for example, due to the call + // instruction not being implemented as of October 18, 2020), send an + // impossible program counter value (-1) to the OpPhi at the next iteration. + if (main_switch_next_pc_phi_operands_.empty()) { + main_switch_next_pc_phi_operands_.push_back( + builder_->makeIntConstant(-1)); + } std::unique_ptr main_loop_pc_next_op = - std::make_unique(main_loop_pc_next_, type_int_, - spv::OpCopyObject); - // TODO(Triang3l): Phi between the continues in the switch cases and the - // switch merge block. - main_loop_pc_next_op->addIdOperand(builder_->makeIntConstant(-1)); + std::make_unique( + main_loop_pc_next_, type_int_, + main_switch_next_pc_phi_operands_.size() >= 2 ? spv::OpPhi + : spv::OpCopyObject); + for (spv::Id operand : main_switch_next_pc_phi_operands_) { + main_loop_pc_next_op->addIdOperand(operand); + } builder_->getBuildPoint()->addInstruction(std::move(main_loop_pc_next_op)); } builder_->createBranch(main_loop_header_); + // Add the main loop merge block and go back to the function. function_main_->addBlock(main_loop_merge_); builder_->setBuildPoint(main_loop_merge_); @@ -214,6 +278,27 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { return module_bytes; } +void SpirvShaderTranslator::ProcessLabel(uint32_t cf_index) { + if (cf_index == 0) { + // 0 already added in the beginning. + return; + } + spv::Function& function = builder_->getBuildPoint()->getParent(); + // Create the next switch case and fallthrough to it. + spv::Block* new_case = new spv::Block(builder_->getUniqueId(), function); + main_switch_op_->addImmediateOperand(cf_index); + main_switch_op_->addIdOperand(new_case->getId()); + // Every switch case must have the OpSelectionMerge/OpSwitch block as a + // predecessor. + new_case->addPredecessor(main_switch_header_); + // The previous block may have already been terminated if was exece. + if (!builder_->getBuildPoint()->isTerminated()) { + builder_->createBranch(new_case); + } + function.addBlock(new_case); + builder_->setBuildPoint(new_case); +} + void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() { // Create the inputs. if (IsSpirvTessEvalShader()) { diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 1fa777271..e8ca2fee9 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -12,6 +12,7 @@ #include #include +#include #include #include "third_party/glslang/SPIRV/SpvBuilder.h" @@ -32,6 +33,8 @@ class SpirvShaderTranslator : public ShaderTranslator { std::vector CompleteTranslation() override; + void ProcessLabel(uint32_t cf_index) override; + private: // TODO(Triang3l): Depth-only pixel shader. bool IsSpirvVertexOrTessEvalShader() const { return is_vertex_shader(); } @@ -104,6 +107,10 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Block* main_loop_continue_; spv::Block* main_loop_merge_; spv::Id main_loop_pc_next_; + spv::Block* main_switch_header_; + std::unique_ptr main_switch_op_; + spv::Block* main_switch_merge_; + std::vector main_switch_next_pc_phi_operands_; }; } // namespace gpu From afcf3c27c05e009c184f9f0a4c44a121930c02a5 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 18 Oct 2020 19:23:34 +0300 Subject: [PATCH 029/123] [Vulkan] Per-frame swapchain semaphores --- src/xenia/ui/vulkan/vulkan_context.cc | 70 ++++++++++++++------------- src/xenia/ui/vulkan/vulkan_context.h | 7 +-- 2 files changed, 40 insertions(+), 37 deletions(-) diff --git a/src/xenia/ui/vulkan/vulkan_context.cc b/src/xenia/ui/vulkan/vulkan_context.cc index 656cad3eb..0d89bee35 100644 --- a/src/xenia/ui/vulkan/vulkan_context.cc +++ b/src/xenia/ui/vulkan/vulkan_context.cc @@ -54,6 +54,11 @@ bool VulkanContext::Initialize() { fence_create_info.pNext = nullptr; fence_create_info.flags = VK_FENCE_CREATE_SIGNALED_BIT; + VkSemaphoreCreateInfo semaphore_create_info; + semaphore_create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + semaphore_create_info.pNext = nullptr; + semaphore_create_info.flags = 0; + VkCommandPoolCreateInfo command_pool_create_info; command_pool_create_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; command_pool_create_info.pNext = nullptr; @@ -76,6 +81,24 @@ bool VulkanContext::Initialize() { Shutdown(); return false; } + if (dfn.vkCreateSemaphore(device, &semaphore_create_info, nullptr, + &submission.image_acquisition_semaphore) != + VK_SUCCESS) { + XELOGE( + "Failed to create the Vulkan swap chain image acquisition " + "semaphores"); + Shutdown(); + return false; + } + if (dfn.vkCreateSemaphore(device, &semaphore_create_info, nullptr, + &submission.render_completion_semaphore) != + VK_SUCCESS) { + XELOGE( + "Failed to create the Vulkan swap chain rendering completion " + "semaphores"); + Shutdown(); + return false; + } if (dfn.vkCreateCommandPool(device, &command_pool_create_info, nullptr, &submission.command_pool) != VK_SUCCESS) { XELOGE("Failed to create the Vulkan composition command pools"); @@ -92,26 +115,6 @@ bool VulkanContext::Initialize() { } } - VkSemaphoreCreateInfo semaphore_create_info; - semaphore_create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; - semaphore_create_info.pNext = nullptr; - semaphore_create_info.flags = 0; - if (dfn.vkCreateSemaphore(device, &semaphore_create_info, nullptr, - &swap_image_acquisition_semaphore_) != VK_SUCCESS) { - XELOGE( - "Failed to create the Vulkan swap chain image acquisition semaphore"); - Shutdown(); - return false; - } - if (dfn.vkCreateSemaphore(device, &semaphore_create_info, nullptr, - &swap_render_completion_semaphore_) != VK_SUCCESS) { - XELOGE( - "Failed to create the Vulkan swap chain rendering completion " - "semaphore"); - Shutdown(); - return false; - } - immediate_drawer_ = std::make_unique(*this); if (!immediate_drawer_->Initialize()) { Shutdown(); @@ -147,23 +150,22 @@ void VulkanContext::Shutdown() { util::DestroyAndNullHandle(ifn.vkDestroySurfaceKHR, instance, swap_surface_); swap_swapchain_or_surface_recreation_needed_ = false; - util::DestroyAndNullHandle(dfn.vkDestroySemaphore, device, - swap_render_completion_semaphore_); - util::DestroyAndNullHandle(dfn.vkDestroySemaphore, device, - swap_image_acquisition_semaphore_); - swap_submission_completed_ = 0; swap_submission_current_ = 1; for (uint32_t i = 0; i < kSwapchainMaxImageCount; ++i) { - SwapSubmission& submission = swap_submissions_[i]; - submission.setup_command_buffer_index = UINT32_MAX; - util::DestroyAndNullHandle(dfn.vkDestroyCommandPool, device, - submission.command_pool); - util::DestroyAndNullHandle(dfn.vkDestroyFence, device, submission.fence); if (i < swap_setup_command_buffers_allocated_count_) { dfn.vkDestroyCommandPool(device, swap_setup_command_buffers_[i].first, nullptr); } + SwapSubmission& submission = swap_submissions_[i]; + submission.setup_command_buffer_index = UINT32_MAX; + util::DestroyAndNullHandle(dfn.vkDestroyCommandPool, device, + submission.command_pool); + util::DestroyAndNullHandle(dfn.vkDestroySemaphore, device, + submission.render_completion_semaphore); + util::DestroyAndNullHandle(dfn.vkDestroySemaphore, device, + submission.image_acquisition_semaphore); + util::DestroyAndNullHandle(dfn.vkDestroyFence, device, submission.fence); } swap_setup_command_buffers_free_bits_ = 0; swap_setup_command_buffers_allocated_count_ = 0; @@ -704,7 +706,7 @@ bool VulkanContext::BeginSwap() { // swapchain. uint32_t acquired_image_index; switch (dfn.vkAcquireNextImageKHR(device, swap_swapchain_, UINT64_MAX, - swap_image_acquisition_semaphore_, + submission.image_acquisition_semaphore, nullptr, &acquired_image_index)) { case VK_SUCCESS: case VK_SUBOPTIMAL_KHR: @@ -793,14 +795,14 @@ void VulkanContext::EndSwap() { submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; submit_info.pNext = nullptr; submit_info.waitSemaphoreCount = 1; - submit_info.pWaitSemaphores = &swap_image_acquisition_semaphore_; + submit_info.pWaitSemaphores = &submission.image_acquisition_semaphore; VkPipelineStageFlags image_acquisition_semaphore_wait_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; submit_info.pWaitDstStageMask = &image_acquisition_semaphore_wait_stage; submit_info.commandBufferCount = submit_command_buffer_count; submit_info.pCommandBuffers = submit_command_buffers; submit_info.signalSemaphoreCount = 1; - submit_info.pSignalSemaphores = &swap_render_completion_semaphore_; + submit_info.pSignalSemaphores = &submission.render_completion_semaphore; VkResult submit_result = provider.SubmitToGraphicsComputeQueue(1, &submit_info, submission.fence); if (submit_result != VK_SUCCESS) { @@ -815,7 +817,7 @@ void VulkanContext::EndSwap() { present_info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; present_info.pNext = nullptr; present_info.waitSemaphoreCount = 1; - present_info.pWaitSemaphores = &swap_render_completion_semaphore_; + present_info.pWaitSemaphores = &submission.render_completion_semaphore; present_info.swapchainCount = 1; present_info.pSwapchains = &swap_swapchain_; present_info.pImageIndices = &swap_swapchain_image_current_; diff --git a/src/xenia/ui/vulkan/vulkan_context.h b/src/xenia/ui/vulkan/vulkan_context.h index b2e34f7ec..00289c19b 100644 --- a/src/xenia/ui/vulkan/vulkan_context.h +++ b/src/xenia/ui/vulkan/vulkan_context.h @@ -106,6 +106,10 @@ class VulkanContext : public GraphicsContext { // recommended by Nvidia (Direct3D 12-like way): // https://developer.nvidia.com/sites/default/files/akamai/gameworks/blog/munich/mschott_vulkan_multi_threading.pdf VkFence fence = VK_NULL_HANDLE; + // One pair of semaphores per frame because queue operations may be done out + // of order. + VkSemaphore image_acquisition_semaphore = VK_NULL_HANDLE; + VkSemaphore render_completion_semaphore = VK_NULL_HANDLE; VkCommandPool command_pool = VK_NULL_HANDLE; VkCommandBuffer command_buffer; uint32_t setup_command_buffer_index = UINT32_MAX; @@ -114,9 +118,6 @@ class VulkanContext : public GraphicsContext { uint64_t swap_submission_current_ = 1; uint64_t swap_submission_completed_ = 0; - VkSemaphore swap_image_acquisition_semaphore_ = VK_NULL_HANDLE; - VkSemaphore swap_render_completion_semaphore_ = VK_NULL_HANDLE; - VkSurfaceKHR swap_surface_ = VK_NULL_HANDLE; VkSurfaceFormatKHR swap_surface_format_ = {VK_FORMAT_UNDEFINED, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR}; From fdbed734639d818d7ea8d3137d401ede4374f160 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 25 Oct 2020 15:09:39 +0300 Subject: [PATCH 030/123] [Vulkan/SPIR-V] Some pipeline layout parts + exec conditionals --- src/xenia/gpu/spirv_shader_translator.cc | 233 +++++++++++++++- src/xenia/gpu/spirv_shader_translator.h | 78 ++++++ .../gpu/vulkan/vulkan_command_processor.cc | 254 +++++++++++++++++- .../gpu/vulkan/vulkan_command_processor.h | 45 ++++ 4 files changed, 607 insertions(+), 3 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 8fa3eb73e..c1b376fc0 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -31,6 +31,9 @@ void SpirvShaderTranslator::Reset() { main_switch_op_.reset(); main_switch_next_pc_phi_operands_.clear(); + + cf_exec_conditional_merge_ = nullptr; + cf_instruction_predicate_merge_ = nullptr; } void SpirvShaderTranslator::StartTranslation() { @@ -50,12 +53,15 @@ void SpirvShaderTranslator::StartTranslation() { type_bool_ = builder_->makeBoolType(); type_int_ = builder_->makeIntType(32); type_int4_ = builder_->makeVectorType(type_int_, 4); + type_uint_ = builder_->makeUintType(32); + type_uint4_ = builder_->makeVectorType(type_uint_, 4); type_float_ = builder_->makeFloatType(32); type_float2_ = builder_->makeVectorType(type_float_, 2); type_float3_ = builder_->makeVectorType(type_float_, 3); type_float4_ = builder_->makeVectorType(type_float_, 4); const_int_0_ = builder_->makeIntConstant(0); + const_uint_0_ = builder_->makeUintConstant(0); id_vector_temp_.clear(); id_vector_temp_.reserve(4); for (uint32_t i = 0; i < 4; ++i) { @@ -71,6 +77,40 @@ void SpirvShaderTranslator::StartTranslation() { const_float4_0_ = builder_->makeCompositeConstant(type_float4_, id_vector_temp_); + // Common uniform buffer - bool and loop constants. + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + // 256 bool constants. + id_vector_temp_.push_back(builder_->makeArrayType( + type_uint4_, builder_->makeUintConstant(2), sizeof(uint32_t) * 4)); + // Currently (as of October 24, 2020) makeArrayType only uses the stride to + // check if deduplication can be done - the array stride decoration needs to + // be applied explicitly. + builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride, + sizeof(uint32_t) * 4); + // 32 loop constants. + id_vector_temp_.push_back(builder_->makeArrayType( + type_uint4_, builder_->makeUintConstant(8), sizeof(uint32_t) * 4)); + builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride, + sizeof(uint32_t) * 4); + spv::Id type_bool_loop_constants = + builder_->makeStructType(id_vector_temp_, "XeBoolLoopConstants"); + builder_->addMemberName(type_bool_loop_constants, 0, "bool_constants"); + builder_->addMemberDecoration(type_bool_loop_constants, 0, + spv::DecorationOffset, 0); + builder_->addMemberName(type_bool_loop_constants, 1, "loop_constants"); + builder_->addMemberDecoration(type_bool_loop_constants, 1, + spv::DecorationOffset, sizeof(uint32_t) * 8); + builder_->addDecoration(type_bool_loop_constants, spv::DecorationBlock); + uniform_bool_loop_constants_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassUniform, type_bool_loop_constants, + "xe_uniform_bool_loop_constants"); + builder_->addDecoration(uniform_bool_loop_constants_, + spv::DecorationDescriptorSet, + int(kDescriptorSetBoolLoopConstants)); + builder_->addDecoration(uniform_bool_loop_constants_, spv::DecorationBinding, + 0); + if (IsSpirvVertexOrTessEvalShader()) { StartVertexOrTessEvalShaderBeforeMain(); } @@ -118,7 +158,7 @@ void SpirvShaderTranslator::StartTranslation() { } // Open the main loop. - spv::Block* main_loop_pre_header = builder_->getBuildPoint(); + spv::Block& main_loop_pre_header = *builder_->getBuildPoint(); main_loop_header_ = &builder_->makeNewBlock(); spv::Block& main_loop_body = builder_->makeNewBlock(); // Added later because the body has nested control flow, but according to the @@ -142,7 +182,7 @@ void SpirvShaderTranslator::StartTranslation() { id_vector_temp_.clear(); id_vector_temp_.reserve(4); id_vector_temp_.push_back(const_int_0_); - id_vector_temp_.push_back(main_loop_pre_header->getId()); + id_vector_temp_.push_back(main_loop_pre_header.getId()); main_loop_pc_next_ = builder_->getUniqueId(); id_vector_temp_.push_back(main_loop_pc_next_); id_vector_temp_.push_back(main_loop_continue_->getId()); @@ -191,6 +231,8 @@ void SpirvShaderTranslator::StartTranslation() { } std::vector SpirvShaderTranslator::CompleteTranslation() { + // Close flow control within the last switch case. + CloseExecConditionals(); bool has_main_switch = !label_addresses().empty(); // After the final exec (if it happened to be not exece, which would already // have a break branch), break from the switch if it exists, or from the @@ -283,6 +325,12 @@ void SpirvShaderTranslator::ProcessLabel(uint32_t cf_index) { // 0 already added in the beginning. return; } + + assert_false(label_addresses().empty()); + + // Close flow control within the previous switch case. + CloseExecConditionals(); + spv::Function& function = builder_->getBuildPoint()->getParent(); // Create the next switch case and fallthrough to it. spv::Block* new_case = new spv::Block(builder_->getUniqueId(), function); @@ -299,6 +347,57 @@ void SpirvShaderTranslator::ProcessLabel(uint32_t cf_index) { builder_->setBuildPoint(new_case); } +void SpirvShaderTranslator::ProcessExecInstructionBegin( + const ParsedExecInstruction& instr) { + UpdateExecConditionals(instr.type, instr.bool_constant_index, + instr.condition); +} + +void SpirvShaderTranslator::ProcessExecInstructionEnd( + const ParsedExecInstruction& instr) { + if (instr.is_end) { + // Break out of the main switch (if exists) and the main loop. + CloseInstructionPredication(); + if (!builder_->getBuildPoint()->isTerminated()) { + builder_->createBranch(label_addresses().empty() ? main_loop_merge_ + : main_switch_merge_); + } + } + UpdateExecConditionals(instr.type, instr.bool_constant_index, + instr.condition); +} + +void SpirvShaderTranslator::ProcessJumpInstruction( + const ParsedJumpInstruction& instr) { + // Treat like exec, merge with execs if possible, since it's an if too. + ParsedExecInstruction::Type type; + if (instr.type == ParsedJumpInstruction::Type::kConditional) { + type = ParsedExecInstruction::Type::kConditional; + } else if (instr.type == ParsedJumpInstruction::Type::kPredicated) { + type = ParsedExecInstruction::Type::kPredicated; + } else { + type = ParsedExecInstruction::Type::kUnconditional; + } + UpdateExecConditionals(type, instr.bool_constant_index, instr.condition); + + // UpdateExecConditionals may not necessarily close the instruction-level + // predicate check (it's not necessary if the execs are merged), but here the + // instruction itself is on the control flow level, so the predicate check is + // on the control flow level too. + CloseInstructionPredication(); + + JumpToLabel(instr.target_address); +} + +void SpirvShaderTranslator::EnsureBuildPointAvailable() { + if (!builder_->getBuildPoint()->isTerminated()) { + return; + } + spv::Block& new_block = builder_->makeNewBlock(); + new_block.setUnreachable(); + builder_->setBuildPoint(&new_block); +} + void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() { // Create the inputs. if (IsSpirvTessEvalShader()) { @@ -373,5 +472,135 @@ void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderAfterMain( entry_point->addIdOperand(output_per_vertex_); } +void SpirvShaderTranslator::UpdateExecConditionals( + ParsedExecInstruction::Type type, uint32_t bool_constant_index, + bool condition) { + // Check if we can merge the new exec with the previous one, or the jump with + // the previous exec. The instruction-level predicate check is also merged in + // this case. + if (type == ParsedExecInstruction::Type::kConditional) { + // Can merge conditional with conditional, as long as the bool constant and + // the expected values are the same. + if (cf_exec_conditional_merge_ && + cf_exec_bool_constant_or_predicate_ == bool_constant_index && + cf_exec_condition_ == condition) { + return; + } + } else if (type == ParsedExecInstruction::Type::kPredicated) { + // Can merge predicated with predicated if the conditions are the same and + // the previous exec hasn't modified the predicate register. + if (!cf_exec_predicate_written_ && cf_exec_conditional_merge_ && + cf_exec_bool_constant_or_predicate_ == kCfExecBoolConstantPredicate && + cf_exec_condition_ == condition) { + return; + } + } else { + // Can merge unconditional with unconditional. + assert_true(type == ParsedExecInstruction::Type::kUnconditional); + if (!cf_exec_conditional_merge_) { + return; + } + } + + CloseExecConditionals(); + + if (type == ParsedExecInstruction::Type::kUnconditional) { + return; + } + + EnsureBuildPointAvailable(); + spv::Id condition_id; + if (type == ParsedExecInstruction::Type::kConditional) { + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + // Bool constants (member 0). + id_vector_temp_.push_back(const_int_0_); + // 128-bit vector. + id_vector_temp_.push_back( + builder_->makeIntConstant(int(bool_constant_index >> 7))); + // 32-bit scalar of a 128-bit vector. + id_vector_temp_.push_back( + builder_->makeIntConstant(int((bool_constant_index >> 5) & 2))); + spv::Id bool_constant_scalar = + builder_->createLoad(builder_->createAccessChain( + spv::StorageClassUniform, + uniform_bool_loop_constants_, id_vector_temp_), + spv::NoPrecision); + condition_id = builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, bool_constant_scalar, + builder_->makeUintConstant(uint32_t(1) + << (bool_constant_index & 31))), + const_uint_0_); + cf_exec_bool_constant_or_predicate_ = bool_constant_index; + } else if (type == ParsedExecInstruction::Type::kPredicated) { + condition_id = builder_->createLoad(var_main_predicate_, spv::NoPrecision); + cf_exec_bool_constant_or_predicate_ = kCfExecBoolConstantPredicate; + } else { + assert_unhandled_case(type); + return; + } + cf_exec_condition_ = condition; + spv::Function& function = builder_->getBuildPoint()->getParent(); + cf_exec_conditional_merge_ = + new spv::Block(builder_->getUniqueId(), function); + { + std::unique_ptr selection_merge_op = + std::make_unique(spv::OpSelectionMerge); + selection_merge_op->addIdOperand(cf_exec_conditional_merge_->getId()); + selection_merge_op->addImmediateOperand(spv::SelectionControlMaskNone); + builder_->getBuildPoint()->addInstruction(std::move(selection_merge_op)); + } + spv::Block& inner_block = builder_->makeNewBlock(); + builder_->createConditionalBranch( + condition_id, condition ? &inner_block : cf_exec_conditional_merge_, + condition ? cf_exec_conditional_merge_ : &inner_block); + builder_->setBuildPoint(&inner_block); +} + +void SpirvShaderTranslator::CloseInstructionPredication() { + if (!cf_instruction_predicate_merge_) { + return; + } + spv::Block& inner_block = *builder_->getBuildPoint(); + if (!inner_block.isTerminated()) { + builder_->createBranch(cf_instruction_predicate_merge_); + } + inner_block.getParent().addBlock(cf_instruction_predicate_merge_); + builder_->setBuildPoint(cf_instruction_predicate_merge_); + cf_instruction_predicate_merge_ = nullptr; +} + +void SpirvShaderTranslator::CloseExecConditionals() { + // Within the exec - instruction-level predicate check. + CloseInstructionPredication(); + // Exec level. + if (cf_exec_conditional_merge_) { + spv::Block& inner_block = *builder_->getBuildPoint(); + if (!inner_block.isTerminated()) { + builder_->createBranch(cf_exec_conditional_merge_); + } + inner_block.getParent().addBlock(cf_exec_conditional_merge_); + builder_->setBuildPoint(cf_exec_conditional_merge_); + cf_exec_conditional_merge_ = nullptr; + } + // Nothing relies on the predicate value being unchanged now. + cf_exec_predicate_written_ = false; +} + +void SpirvShaderTranslator::JumpToLabel(uint32_t address) { + assert_false(label_addresses().empty()); + spv::Block& origin_block = *builder_->getBuildPoint(); + if (origin_block.isTerminated()) { + // Unreachable jump for some reason. + return; + } + main_switch_next_pc_phi_operands_.push_back( + builder_->makeIntConstant(int(address))); + main_switch_next_pc_phi_operands_.push_back(origin_block.getId()); + builder_->createBranch(main_loop_continue_); +} + } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index e8ca2fee9..943385f20 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -23,6 +23,29 @@ namespace gpu { class SpirvShaderTranslator : public ShaderTranslator { public: + enum DescriptorSet : uint32_t { + // In order of update frequency. + // Very frequently changed, especially for UI draws, and for models drawn in + // multiple parts - contains vertex and texture fetch constants. + kDescriptorSetFetchConstants, + // Quite frequently changed (for one object drawn multiple times, for + // instance - may contain projection matrices). + kDescriptorSetFloatConstantsVertex, + // Less frequently changed (per-material). + kDescriptorSetFloatConstantsPixel, + // Per-material, combined images and samplers. + kDescriptorSetTexturesPixel, + // Rarely used at all, but may be changed at an unpredictable rate when + // vertex textures are used, combined images and samplers. + kDescriptorSetTexturesVertex, + // May stay the same across many draws. + kDescriptorSetSystemConstants, + // Pretty rarely used and rarely changed - flow control constants. + kDescriptorSetBoolLoopConstants, + // Never changed. + kDescriptorSetSharedMemoryAndEdram, + kDescriptorSetCount, + }; SpirvShaderTranslator(bool supports_clip_distance = true, bool supports_cull_distance = true); @@ -35,6 +58,10 @@ class SpirvShaderTranslator : public ShaderTranslator { void ProcessLabel(uint32_t cf_index) override; + void ProcessExecInstructionBegin(const ParsedExecInstruction& instr) override; + void ProcessExecInstructionEnd(const ParsedExecInstruction& instr) override; + void ProcessJumpInstruction(const ParsedJumpInstruction& instr) override; + private: // TODO(Triang3l): Depth-only pixel shader. bool IsSpirvVertexOrTessEvalShader() const { return is_vertex_shader(); } @@ -48,11 +75,34 @@ class SpirvShaderTranslator : public ShaderTranslator { } bool IsSpirvFragmentShader() const { return is_pixel_shader(); } + // Must be called before emitting any non-control-flow SPIR-V operations in + // translator callback to ensure that if the last instruction added was + // something like OpBranch - in this case, an unreachable block is created. + void EnsureBuildPointAvailable(); + void StartVertexOrTessEvalShaderBeforeMain(); void StartVertexOrTessEvalShaderInMain(); void CompleteVertexOrTessEvalShaderInMain(); void CompleteVertexOrTessEvalShaderAfterMain(spv::Instruction* entry_point); + // Updates the current flow control condition (to be called in the beginning + // of exec and in jumps), closing the previous conditionals if needed. + // However, if the condition is not different, the instruction-level predicate + // conditional also won't be closed - this must be checked separately if + // needed (for example, in jumps). + void UpdateExecConditionals(ParsedExecInstruction::Type type, + uint32_t bool_constant_index, bool condition); + // Closes the instruction-level predicate conditional if it's open, useful if + // a control flow instruction needs to do some code which needs to respect the + // current exec conditional, but can't itself be predicated. + void CloseInstructionPredication(); + // Closes conditionals opened by exec and instructions within them (but not by + // labels) and updates the state accordingly. + void CloseExecConditionals(); + // Sets the next iteration's program counter value (adding it to phi operands) + // and closes the current block. + void JumpToLabel(uint32_t address); + bool supports_clip_distance_; bool supports_cull_distance_; @@ -68,6 +118,7 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id type_int_; spv::Id type_int4_; spv::Id type_uint_; + spv::Id type_uint4_; spv::Id type_float_; spv::Id type_float2_; spv::Id type_float3_; @@ -75,9 +126,12 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id const_int_0_; spv::Id const_int4_0_; + spv::Id const_uint_0_; spv::Id const_float_0_; spv::Id const_float4_0_; + spv::Id uniform_bool_loop_constants_; + // VS as VS only - int. spv::Id input_vertex_index_; // VS as TES only - int. @@ -111,6 +165,30 @@ class SpirvShaderTranslator : public ShaderTranslator { std::unique_ptr main_switch_op_; spv::Block* main_switch_merge_; std::vector main_switch_next_pc_phi_operands_; + + // If the exec bool constant / predicate conditional is open, block after it + // (not added to the function yet). + spv::Block* cf_exec_conditional_merge_; + // If the instruction-level predicate conditional is open, block after it (not + // added to the function yet). + spv::Block* cf_instruction_predicate_merge_; + // When cf_exec_conditional_merge_ is not null: + // If the current exec conditional is based on a bool constant: the number of + // the bool constant. + // If it's based on the predicate value: kCfExecBoolConstantPredicate. + uint32_t cf_exec_bool_constant_or_predicate_; + static constexpr uint32_t kCfExecBoolConstantPredicate = UINT32_MAX; + // When cf_exec_conditional_merge_ is not null, the expected bool constant or + // predicate value for the current exec conditional. + bool cf_exec_condition_; + // When cf_instruction_predicate_merge_ is not null, the expected predicate + // value for the current or the last instruction. + bool cf_instruction_predicate_condition_; + // Whether there was a `setp` in the current exec before the current + // instruction, thus instruction-level predicate value can be different than + // the exec-level predicate value, and can't merge two execs with the same + // predicate condition anymore. + bool cf_exec_predicate_written_; }; } // namespace gpu diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 0fe4e0255..aa351bf81 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -14,8 +14,9 @@ #include "xenia/base/assert.h" #include "xenia/base/logging.h" +#include "xenia/base/math.h" #include "xenia/base/profiling.h" -#include "xenia/gpu/vulkan/deferred_command_buffer.h" +#include "xenia/gpu/spirv_shader_translator.h" #include "xenia/gpu/vulkan/vulkan_shared_memory.h" #include "xenia/ui/vulkan/vulkan_context.h" #include "xenia/ui/vulkan/vulkan_provider.h" @@ -43,6 +44,76 @@ bool VulkanCommandProcessor::SetupContext() { return false; } + const ui::vulkan::VulkanProvider& provider = + GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info; + descriptor_set_layout_create_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + descriptor_set_layout_create_info.pNext = nullptr; + descriptor_set_layout_create_info.flags = 0; + descriptor_set_layout_create_info.bindingCount = 0; + descriptor_set_layout_create_info.pBindings = nullptr; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layout_empty_) != VK_SUCCESS) { + XELOGE("Failed to create an empty Vulkan descriptor set layout"); + return false; + } + VkShaderStageFlags shader_stages_guest_vertex = + GetGuestVertexShaderStageFlags(); + VkDescriptorSetLayoutBinding descriptor_set_layout_binding_uniform_buffer; + descriptor_set_layout_binding_uniform_buffer.binding = 0; + descriptor_set_layout_binding_uniform_buffer.descriptorType = + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + descriptor_set_layout_binding_uniform_buffer.descriptorCount = 1; + descriptor_set_layout_binding_uniform_buffer.stageFlags = + shader_stages_guest_vertex; + descriptor_set_layout_binding_uniform_buffer.pImmutableSamplers = nullptr; + descriptor_set_layout_create_info.bindingCount = 1; + descriptor_set_layout_create_info.pBindings = + &descriptor_set_layout_binding_uniform_buffer; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layout_uniform_buffer_guest_vertex_) != VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for an uniform buffer " + "accessible by guest vertex shaders"); + return false; + } + descriptor_set_layout_binding_uniform_buffer.stageFlags = + VK_SHADER_STAGE_FRAGMENT_BIT; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layout_uniform_buffer_guest_pixel_) != VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for an uniform buffer " + "accessible by guest pixel shaders"); + return false; + } + descriptor_set_layout_binding_uniform_buffer.stageFlags = + VK_SHADER_STAGE_FRAGMENT_BIT; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layout_uniform_buffer_guest_pixel_) != VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for an uniform buffer " + "accessible by guest pixel shaders"); + return false; + } + descriptor_set_layout_binding_uniform_buffer.stageFlags = + shader_stages_guest_vertex | VK_SHADER_STAGE_FRAGMENT_BIT; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layout_uniform_buffer_guest_) != VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for an uniform buffer " + "accessible by guest shaders"); + return false; + } + shared_memory_ = std::make_unique(*this, *memory_, trace_writer_); if (!shared_memory_->Initialize()) { @@ -63,6 +134,30 @@ void VulkanCommandProcessor::ShutdownContext() { const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); + for (const auto& pipeline_layout_pair : pipeline_layouts_) { + dfn.vkDestroyPipelineLayout( + device, pipeline_layout_pair.second.pipeline_layout, nullptr); + } + pipeline_layouts_.clear(); + for (const auto& descriptor_set_layout_pair : + descriptor_set_layouts_textures_) { + dfn.vkDestroyDescriptorSetLayout(device, descriptor_set_layout_pair.second, + nullptr); + } + descriptor_set_layouts_textures_.clear(); + + ui::vulkan::util::DestroyAndNullHandle( + dfn.vkDestroyDescriptorSetLayout, device, + descriptor_set_layout_uniform_buffer_guest_); + ui::vulkan::util::DestroyAndNullHandle( + dfn.vkDestroyDescriptorSetLayout, device, + descriptor_set_layout_uniform_buffer_guest_pixel_); + ui::vulkan::util::DestroyAndNullHandle( + dfn.vkDestroyDescriptorSetLayout, device, + descriptor_set_layout_uniform_buffer_guest_vertex_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout, + device, descriptor_set_layout_empty_); + sparse_bind_wait_stage_mask_ = 0; sparse_buffer_binds_.clear(); sparse_memory_binds_.clear(); @@ -141,6 +236,152 @@ void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, EndSubmission(true); } +bool VulkanCommandProcessor::GetPipelineLayout( + uint32_t texture_count_pixel, uint32_t texture_count_vertex, + PipelineLayout& pipeline_layout_out) { + PipelineLayoutKey pipeline_layout_key; + pipeline_layout_key.texture_count_pixel = texture_count_pixel; + pipeline_layout_key.texture_count_vertex = texture_count_vertex; + { + auto it = pipeline_layouts_.find(pipeline_layout_key.key); + if (it != pipeline_layouts_.end()) { + pipeline_layout_out = it->second; + return true; + } + } + + const ui::vulkan::VulkanProvider& provider = + GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + VkDescriptorSetLayout descriptor_set_layout_textures_pixel; + if (texture_count_pixel) { + TextureDescriptorSetLayoutKey texture_descriptor_set_layout_key; + texture_descriptor_set_layout_key.is_vertex = 0; + texture_descriptor_set_layout_key.texture_count = texture_count_pixel; + auto it = descriptor_set_layouts_textures_.find( + texture_descriptor_set_layout_key.key); + if (it != descriptor_set_layouts_textures_.end()) { + descriptor_set_layout_textures_pixel = it->second; + } else { + VkDescriptorSetLayoutBinding descriptor_set_layout_binding; + descriptor_set_layout_binding.binding = 0; + descriptor_set_layout_binding.descriptorType = + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + descriptor_set_layout_binding.descriptorCount = texture_count_pixel; + descriptor_set_layout_binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + descriptor_set_layout_binding.pImmutableSamplers = nullptr; + VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info; + descriptor_set_layout_create_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + descriptor_set_layout_create_info.pNext = nullptr; + descriptor_set_layout_create_info.flags = 0; + descriptor_set_layout_create_info.bindingCount = 1; + descriptor_set_layout_create_info.pBindings = + &descriptor_set_layout_binding; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layout_textures_pixel) != VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for {} combined " + "images and samplers for guest pixel shaders", + texture_count_pixel); + return false; + } + descriptor_set_layouts_textures_.emplace( + texture_descriptor_set_layout_key.key, + descriptor_set_layout_textures_pixel); + } + } else { + descriptor_set_layout_textures_pixel = descriptor_set_layout_empty_; + } + + VkDescriptorSetLayout descriptor_set_layout_textures_vertex; + if (texture_count_vertex) { + TextureDescriptorSetLayoutKey texture_descriptor_set_layout_key; + texture_descriptor_set_layout_key.is_vertex = 0; + texture_descriptor_set_layout_key.texture_count = texture_count_vertex; + auto it = descriptor_set_layouts_textures_.find( + texture_descriptor_set_layout_key.key); + if (it != descriptor_set_layouts_textures_.end()) { + descriptor_set_layout_textures_vertex = it->second; + } else { + VkDescriptorSetLayoutBinding descriptor_set_layout_binding; + descriptor_set_layout_binding.binding = 0; + descriptor_set_layout_binding.descriptorType = + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + descriptor_set_layout_binding.descriptorCount = texture_count_vertex; + descriptor_set_layout_binding.stageFlags = + GetGuestVertexShaderStageFlags(); + descriptor_set_layout_binding.pImmutableSamplers = nullptr; + VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info; + descriptor_set_layout_create_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + descriptor_set_layout_create_info.pNext = nullptr; + descriptor_set_layout_create_info.flags = 0; + descriptor_set_layout_create_info.bindingCount = 1; + descriptor_set_layout_create_info.pBindings = + &descriptor_set_layout_binding; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layout_textures_vertex) != VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for {} combined " + "images and samplers for guest vertex shaders", + texture_count_vertex); + return false; + } + descriptor_set_layouts_textures_.emplace( + texture_descriptor_set_layout_key.key, + descriptor_set_layout_textures_vertex); + } + } else { + descriptor_set_layout_textures_vertex = descriptor_set_layout_empty_; + } + + VkDescriptorSetLayout + descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetCount]; + // Fill any unused set layouts with empty layouts. + // TODO(Triang3l): Remove this. + for (size_t i = 0; i < xe::countof(descriptor_set_layouts); ++i) { + descriptor_set_layouts[i] = descriptor_set_layout_empty_; + } + descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesPixel] = + descriptor_set_layout_textures_pixel; + descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesVertex] = + descriptor_set_layout_textures_vertex; + + VkPipelineLayoutCreateInfo pipeline_layout_create_info; + pipeline_layout_create_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + pipeline_layout_create_info.pNext = nullptr; + pipeline_layout_create_info.flags = 0; + pipeline_layout_create_info.setLayoutCount = + uint32_t(xe::countof(descriptor_set_layouts)); + pipeline_layout_create_info.pSetLayouts = descriptor_set_layouts; + pipeline_layout_create_info.pushConstantRangeCount = 0; + pipeline_layout_create_info.pPushConstantRanges = nullptr; + VkPipelineLayout pipeline_layout; + if (dfn.vkCreatePipelineLayout(device, &pipeline_layout_create_info, nullptr, + &pipeline_layout) != VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan pipeline layout for guest drawing with {} " + "pixel shader and {} vertex shader textures", + texture_count_pixel, texture_count_vertex); + return false; + } + PipelineLayout pipeline_layout_entry; + pipeline_layout_entry.pipeline_layout = pipeline_layout; + pipeline_layout_entry.descriptor_set_layout_textures_pixel_ref = + descriptor_set_layout_textures_pixel; + pipeline_layout_entry.descriptor_set_layout_textures_vertex_ref = + descriptor_set_layout_textures_vertex; + pipeline_layouts_.emplace(pipeline_layout_key.key, pipeline_layout_entry); + pipeline_layout_out = pipeline_layout_entry; + return true; +} + Shader* VulkanCommandProcessor::LoadShader(xenos::ShaderType shader_type, uint32_t guest_address, const uint32_t* host_address, @@ -545,6 +786,17 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { return true; } +VkShaderStageFlags VulkanCommandProcessor::GetGuestVertexShaderStageFlags() + const { + VkShaderStageFlags stages = VK_SHADER_STAGE_VERTEX_BIT; + const ui::vulkan::VulkanProvider& provider = + GetVulkanContext().GetVulkanProvider(); + if (provider.device_features().tessellationShader) { + stages |= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; + } + return stages; +} + } // namespace vulkan } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 036c391b1..b00cab90a 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -64,6 +65,15 @@ class VulkanCommandProcessor : public CommandProcessor { const VkSparseMemoryBind* binds, VkPipelineStageFlags wait_stage_mask); + struct PipelineLayout { + VkPipelineLayout pipeline_layout; + VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref; + VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref; + }; + bool GetPipelineLayout(uint32_t texture_count_pixel, + uint32_t texture_count_vertex, + PipelineLayout& pipeline_layout_out); + protected: bool SetupContext() override; void ShutdownContext() override; @@ -105,6 +115,8 @@ class VulkanCommandProcessor : public CommandProcessor { return !submission_open_ && submissions_in_flight_fences_.empty(); } + VkShaderStageFlags GetGuestVertexShaderStageFlags() const; + bool cache_clear_requested_ = false; std::vector fences_free_; @@ -150,6 +162,39 @@ class VulkanCommandProcessor : public CommandProcessor { std::vector sparse_buffer_bind_infos_temp_; VkPipelineStageFlags sparse_bind_wait_stage_mask_ = 0; + // Common descriptor set layouts, usable by anything that may need them. + VkDescriptorSetLayout descriptor_set_layout_empty_ = VK_NULL_HANDLE; + VkDescriptorSetLayout descriptor_set_layout_uniform_buffer_guest_vertex_ = + VK_NULL_HANDLE; + VkDescriptorSetLayout descriptor_set_layout_uniform_buffer_guest_pixel_ = + VK_NULL_HANDLE; + VkDescriptorSetLayout descriptor_set_layout_uniform_buffer_guest_ = + VK_NULL_HANDLE; + + union TextureDescriptorSetLayoutKey { + struct { + uint32_t is_vertex : 1; + // For 0, use descriptor_set_layout_empty_ instead as these are owning + // references. + uint32_t texture_count : 31; + }; + uint32_t key = 0; + }; + // TextureDescriptorSetLayoutKey::key -> VkDescriptorSetLayout. + std::unordered_map + descriptor_set_layouts_textures_; + union PipelineLayoutKey { + struct { + // Pixel textures in the low bits since those are varied much more + // commonly. + uint32_t texture_count_pixel : 16; + uint32_t texture_count_vertex : 16; + }; + uint32_t key = 0; + }; + // PipelineLayoutKey::key -> PipelineLayout. + std::unordered_map pipeline_layouts_; + std::unique_ptr shared_memory_; }; From 556c8de2abc3bce56bdf291dbae1bf7dfa172bfe Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 25 Oct 2020 20:24:48 +0300 Subject: [PATCH 031/123] [SPIR-V] Loops --- src/xenia/gpu/spirv_shader_translator.cc | 287 +++++++++++++++++++++-- src/xenia/gpu/spirv_shader_translator.h | 11 +- 2 files changed, 279 insertions(+), 19 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index c1b376fc0..bf64a4599 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -54,6 +54,7 @@ void SpirvShaderTranslator::StartTranslation() { type_int_ = builder_->makeIntType(32); type_int4_ = builder_->makeVectorType(type_int_, 4); type_uint_ = builder_->makeUintType(32); + type_uint3_ = builder_->makeVectorType(type_uint_, 3); type_uint4_ = builder_->makeVectorType(type_uint_, 4); type_float_ = builder_->makeFloatType(32); type_float2_ = builder_->makeVectorType(type_float_, 2); @@ -61,13 +62,20 @@ void SpirvShaderTranslator::StartTranslation() { type_float4_ = builder_->makeVectorType(type_float_, 4); const_int_0_ = builder_->makeIntConstant(0); - const_uint_0_ = builder_->makeUintConstant(0); id_vector_temp_.clear(); id_vector_temp_.reserve(4); for (uint32_t i = 0; i < 4; ++i) { id_vector_temp_.push_back(const_int_0_); } const_int4_0_ = builder_->makeCompositeConstant(type_int4_, id_vector_temp_); + const_uint_0_ = builder_->makeUintConstant(0); + id_vector_temp_.clear(); + id_vector_temp_.reserve(4); + for (uint32_t i = 0; i < 4; ++i) { + id_vector_temp_.push_back(const_uint_0_); + } + const_uint4_0_ = + builder_->makeCompositeConstant(type_uint4_, id_vector_temp_); const_float_0_ = builder_->makeFloatConstant(0.0f); id_vector_temp_.clear(); id_vector_temp_.reserve(4); @@ -128,6 +136,9 @@ void SpirvShaderTranslator::StartTranslation() { var_main_predicate_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassFunction, type_bool_, "xe_var_predicate", builder_->makeBoolConstant(false)); + var_main_loop_count_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassFunction, type_uint4_, + "xe_var_loop_count", const_uint4_0_); var_main_address_absolute_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassFunction, type_int_, "xe_var_address_absolute", const_int_0_); @@ -179,6 +190,7 @@ void SpirvShaderTranslator::StartTranslation() { builder_->setBuildPoint(main_loop_header_); spv::Id main_loop_pc_current = 0; if (has_main_switch) { + // OpPhi must be the first in the block. id_vector_temp_.clear(); id_vector_temp_.reserve(4); id_vector_temp_.push_back(const_int_0_); @@ -259,6 +271,7 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { function_main_->addBlock(main_loop_continue_); builder_->setBuildPoint(main_loop_continue_); if (has_main_switch) { + // OpPhi, if added, must be the first in the block. // If labels were added, but not jumps (for example, due to the call // instruction not being implemented as of October 18, 2020), send an // impossible program counter value (-1) to the OpPhi at the next iteration. @@ -367,6 +380,253 @@ void SpirvShaderTranslator::ProcessExecInstructionEnd( instr.condition); } +void SpirvShaderTranslator::ProcessLoopStartInstruction( + const ParsedLoopStartInstruction& instr) { + // loop il, L - loop with loop data il, end @ L + + // Loop control is outside execs - actually close the last exec. + CloseExecConditionals(); + + EnsureBuildPointAvailable(); + + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + // Loop constants (member 1). + id_vector_temp_.push_back(builder_->makeIntConstant(1)); + // 4-component vector. + id_vector_temp_.push_back( + builder_->makeIntConstant(int(instr.loop_constant_index >> 2))); + // Scalar within the vector. + id_vector_temp_.push_back( + builder_->makeIntConstant(int(instr.loop_constant_index & 3))); + // Count (unsigned) in bits 0:7 of the loop constant (struct member 1), + // initial aL (unsigned) in 8:15. + spv::Id loop_constant = + builder_->createLoad(builder_->createAccessChain( + spv::StorageClassUniform, + uniform_bool_loop_constants_, id_vector_temp_), + spv::NoPrecision); + + spv::Id const_int_8 = builder_->makeIntConstant(8); + + // Push the count to the loop count stack - move XYZ to YZW and set X to the + // new iteration count (swizzling the way glslang does it for similar GLSL). + spv::Id loop_count_stack_old = + builder_->createLoad(var_main_loop_count_, spv::NoPrecision); + spv::Id loop_count_new = + builder_->createTriOp(spv::OpBitFieldUExtract, type_uint_, loop_constant, + const_int_0_, const_int_8); + id_vector_temp_.clear(); + id_vector_temp_.reserve(4); + id_vector_temp_.push_back(loop_count_new); + for (unsigned int i = 0; i < 3; ++i) { + id_vector_temp_.push_back( + builder_->createCompositeExtract(loop_count_stack_old, type_uint_, i)); + } + builder_->createStore( + builder_->createCompositeConstruct(type_uint4_, id_vector_temp_), + var_main_loop_count_); + + // Push aL - keep the same value as in the previous loop if repeating, or the + // new one otherwise. + spv::Id address_relative_stack_old = + builder_->createLoad(var_main_address_relative_, spv::NoPrecision); + id_vector_temp_.clear(); + id_vector_temp_.reserve(4); + if (instr.is_repeat) { + id_vector_temp_.emplace_back(); + } else { + id_vector_temp_.push_back(builder_->createUnaryOp( + spv::OpBitcast, type_int_, + builder_->createTriOp(spv::OpBitFieldUExtract, type_uint_, + loop_constant, const_int_8, const_int_8))); + } + for (unsigned int i = 0; i < 3; ++i) { + id_vector_temp_.push_back(builder_->createCompositeExtract( + address_relative_stack_old, type_int_, i)); + } + if (instr.is_repeat) { + id_vector_temp_[0] = id_vector_temp_[1]; + } + builder_->createStore( + builder_->createCompositeConstruct(type_int4_, id_vector_temp_), + var_main_address_relative_); + + // Break (jump to the skip label) if the loop counter is 0 (since the + // condition is checked in the end). + spv::Block& head_block = *builder_->getBuildPoint(); + spv::Id loop_count_zero = builder_->createBinOp( + spv::OpIEqual, type_bool_, loop_count_new, const_uint_0_); + spv::Block& skip_block = builder_->makeNewBlock(); + spv::Block& body_block = builder_->makeNewBlock(); + { + std::unique_ptr selection_merge_op = + std::make_unique(spv::OpSelectionMerge); + selection_merge_op->addIdOperand(body_block.getId()); + selection_merge_op->addImmediateOperand(spv::SelectionControlMaskNone); + head_block.addInstruction(std::move(selection_merge_op)); + } + { + std::unique_ptr branch_conditional_op = + std::make_unique(spv::OpBranchConditional); + branch_conditional_op->addIdOperand(loop_count_zero); + branch_conditional_op->addIdOperand(skip_block.getId()); + branch_conditional_op->addIdOperand(body_block.getId()); + // More likely to enter than to skip. + branch_conditional_op->addImmediateOperand(1); + branch_conditional_op->addImmediateOperand(2); + head_block.addInstruction(std::move(branch_conditional_op)); + } + skip_block.addPredecessor(&head_block); + body_block.addPredecessor(&head_block); + builder_->setBuildPoint(&skip_block); + main_switch_next_pc_phi_operands_.push_back( + builder_->makeIntConstant(int(instr.loop_skip_address))); + main_switch_next_pc_phi_operands_.push_back( + builder_->getBuildPoint()->getId()); + builder_->createBranch(main_loop_continue_); + builder_->setBuildPoint(&body_block); +} + +void SpirvShaderTranslator::ProcessLoopEndInstruction( + const ParsedLoopEndInstruction& instr) { + // endloop il, L - end loop w/ data il, head @ L + + // Loop control is outside execs - actually close the last exec. + CloseExecConditionals(); + + EnsureBuildPointAvailable(); + + // Subtract 1 from the loop counter (will store later). + spv::Id loop_count_stack_old = + builder_->createLoad(var_main_loop_count_, spv::NoPrecision); + spv::Id loop_count = builder_->createBinOp( + spv::OpISub, type_uint_, + builder_->createCompositeExtract(loop_count_stack_old, type_uint_, 0), + builder_->makeUintConstant(1)); + spv::Id address_relative_stack_old = + builder_->createLoad(var_main_address_relative_, spv::NoPrecision); + + // Predicated break works like break if (loop_count == 0 || [!]p0). + // Three options, due to logical operations usage (so OpLogicalNot is not + // required): + // - Continue if (loop_count != 0). + // - Continue if (loop_count != 0 && p0), if breaking if !p0. + // - Break if (loop_count == 0 || p0), if breaking if p0. + bool break_is_true = instr.is_predicated_break && instr.predicate_condition; + spv::Id condition = + builder_->createBinOp(break_is_true ? spv::OpIEqual : spv::OpINotEqual, + type_bool_, loop_count, const_uint_0_); + if (instr.is_predicated_break) { + condition = builder_->createBinOp( + instr.predicate_condition ? spv::OpLogicalOr : spv::OpLogicalAnd, + type_bool_, condition, + builder_->createLoad(var_main_predicate_, spv::NoPrecision)); + } + + spv::Block& body_block = *builder_->getBuildPoint(); + spv::Block& continue_block = builder_->makeNewBlock(); + spv::Block& break_block = builder_->makeNewBlock(); + { + std::unique_ptr selection_merge_op = + std::make_unique(spv::OpSelectionMerge); + selection_merge_op->addIdOperand(break_block.getId()); + selection_merge_op->addImmediateOperand(spv::SelectionControlMaskNone); + body_block.addInstruction(std::move(selection_merge_op)); + } + { + std::unique_ptr branch_conditional_op = + std::make_unique(spv::OpBranchConditional); + branch_conditional_op->addIdOperand(condition); + // More likely to continue than to break. + if (break_is_true) { + branch_conditional_op->addIdOperand(break_block.getId()); + branch_conditional_op->addIdOperand(continue_block.getId()); + branch_conditional_op->addImmediateOperand(1); + branch_conditional_op->addImmediateOperand(2); + } else { + branch_conditional_op->addIdOperand(continue_block.getId()); + branch_conditional_op->addIdOperand(break_block.getId()); + branch_conditional_op->addImmediateOperand(2); + branch_conditional_op->addImmediateOperand(1); + } + body_block.addInstruction(std::move(branch_conditional_op)); + } + continue_block.addPredecessor(&body_block); + break_block.addPredecessor(&body_block); + + // Continue case. + builder_->setBuildPoint(&continue_block); + // Store the loop count with 1 subtracted. + builder_->createStore(builder_->createCompositeInsert( + loop_count, loop_count_stack_old, type_uint4_, 0), + var_main_loop_count_); + // Extract the value to add to aL (signed, in bits 16:23 of the loop + // constant). + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + // Loop constants (member 1). + id_vector_temp_.push_back(builder_->makeIntConstant(1)); + // 4-component vector. + id_vector_temp_.push_back( + builder_->makeIntConstant(int(instr.loop_constant_index >> 2))); + // Scalar within the vector. + id_vector_temp_.push_back( + builder_->makeIntConstant(int(instr.loop_constant_index & 3))); + spv::Id loop_constant = + builder_->createLoad(builder_->createAccessChain( + spv::StorageClassUniform, + uniform_bool_loop_constants_, id_vector_temp_), + spv::NoPrecision); + spv::Id address_relative_old = builder_->createCompositeExtract( + address_relative_stack_old, type_int_, 0); + builder_->createStore( + builder_->createCompositeInsert( + builder_->createBinOp( + spv::OpIAdd, type_int_, address_relative_old, + builder_->createTriOp( + spv::OpBitFieldSExtract, type_int_, + builder_->createUnaryOp(spv::OpBitcast, type_int_, + loop_constant), + builder_->makeIntConstant(16), builder_->makeIntConstant(8))), + address_relative_stack_old, type_int4_, 0), + var_main_address_relative_); + // Jump back to the beginning of the loop body. + main_switch_next_pc_phi_operands_.push_back( + builder_->makeIntConstant(int(instr.loop_body_address))); + main_switch_next_pc_phi_operands_.push_back( + builder_->getBuildPoint()->getId()); + builder_->createBranch(main_loop_continue_); + + // Break case. + builder_->setBuildPoint(&break_block); + // Pop the current loop off the loop counter and the relative address stacks - + // move YZW to XYZ and set W to 0. + id_vector_temp_.clear(); + id_vector_temp_.reserve(4); + for (unsigned int i = 1; i < 4; ++i) { + id_vector_temp_.push_back( + builder_->createCompositeExtract(loop_count_stack_old, type_uint_, i)); + } + id_vector_temp_.push_back(const_uint_0_); + builder_->createStore( + builder_->createCompositeConstruct(type_uint4_, id_vector_temp_), + var_main_loop_count_); + id_vector_temp_.clear(); + id_vector_temp_.reserve(4); + for (unsigned int i = 1; i < 4; ++i) { + id_vector_temp_.push_back(builder_->createCompositeExtract( + address_relative_stack_old, type_int_, i)); + } + id_vector_temp_.push_back(const_int_0_); + builder_->createStore( + builder_->createCompositeConstruct(type_int4_, id_vector_temp_), + var_main_address_relative_); + id_vector_temp_.clear(); + id_vector_temp_.reserve(4); + // Now going to fall through to the next control flow instruction. +} + void SpirvShaderTranslator::ProcessJumpInstruction( const ParsedJumpInstruction& instr) { // Treat like exec, merge with execs if possible, since it's an if too. @@ -386,7 +646,15 @@ void SpirvShaderTranslator::ProcessJumpInstruction( // on the control flow level too. CloseInstructionPredication(); - JumpToLabel(instr.target_address); + if (builder_->getBuildPoint()->isTerminated()) { + // Unreachable for some reason. + return; + } + main_switch_next_pc_phi_operands_.push_back( + builder_->makeIntConstant(int(instr.target_address))); + main_switch_next_pc_phi_operands_.push_back( + builder_->getBuildPoint()->getId()); + builder_->createBranch(main_loop_continue_); } void SpirvShaderTranslator::EnsureBuildPointAvailable() { @@ -520,7 +788,7 @@ void SpirvShaderTranslator::UpdateExecConditionals( builder_->makeIntConstant(int(bool_constant_index >> 7))); // 32-bit scalar of a 128-bit vector. id_vector_temp_.push_back( - builder_->makeIntConstant(int((bool_constant_index >> 5) & 2))); + builder_->makeIntConstant(int((bool_constant_index >> 5) & 3))); spv::Id bool_constant_scalar = builder_->createLoad(builder_->createAccessChain( spv::StorageClassUniform, @@ -589,18 +857,5 @@ void SpirvShaderTranslator::CloseExecConditionals() { cf_exec_predicate_written_ = false; } -void SpirvShaderTranslator::JumpToLabel(uint32_t address) { - assert_false(label_addresses().empty()); - spv::Block& origin_block = *builder_->getBuildPoint(); - if (origin_block.isTerminated()) { - // Unreachable jump for some reason. - return; - } - main_switch_next_pc_phi_operands_.push_back( - builder_->makeIntConstant(int(address))); - main_switch_next_pc_phi_operands_.push_back(origin_block.getId()); - builder_->createBranch(main_loop_continue_); -} - } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 943385f20..6cd94e415 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -60,6 +60,10 @@ class SpirvShaderTranslator : public ShaderTranslator { void ProcessExecInstructionBegin(const ParsedExecInstruction& instr) override; void ProcessExecInstructionEnd(const ParsedExecInstruction& instr) override; + void ProcessLoopStartInstruction( + const ParsedLoopStartInstruction& instr) override; + void ProcessLoopEndInstruction( + const ParsedLoopEndInstruction& instr) override; void ProcessJumpInstruction(const ParsedJumpInstruction& instr) override; private: @@ -99,9 +103,6 @@ class SpirvShaderTranslator : public ShaderTranslator { // Closes conditionals opened by exec and instructions within them (but not by // labels) and updates the state accordingly. void CloseExecConditionals(); - // Sets the next iteration's program counter value (adding it to phi operands) - // and closes the current block. - void JumpToLabel(uint32_t address); bool supports_clip_distance_; bool supports_cull_distance_; @@ -118,6 +119,7 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id type_int_; spv::Id type_int4_; spv::Id type_uint_; + spv::Id type_uint3_; spv::Id type_uint4_; spv::Id type_float_; spv::Id type_float2_; @@ -127,6 +129,7 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id const_int_0_; spv::Id const_int4_0_; spv::Id const_uint_0_; + spv::Id const_uint4_0_; spv::Id const_float_0_; spv::Id const_float4_0_; @@ -149,6 +152,8 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Function* function_main_; // bool. spv::Id var_main_predicate_; + // uint4. + spv::Id var_main_loop_count_; // int4. spv::Id var_main_address_relative_; // int. From 1c83c8dcfa299fb2f7f1d04337547b6c53aa6f33 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Mon, 26 Oct 2020 22:12:01 +0300 Subject: [PATCH 032/123] [SPIR-V] Instruction predication --- src/xenia/gpu/spirv_shader_translator.cc | 45 ++++++++++++++++++++ src/xenia/gpu/spirv_shader_translator.h | 5 +++ src/xenia/gpu/spirv_shader_translator_alu.cc | 28 ++++++++++++ 3 files changed, 78 insertions(+) create mode 100644 src/xenia/gpu/spirv_shader_translator_alu.cc diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index bf64a4599..dee001b99 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -827,6 +827,51 @@ void SpirvShaderTranslator::UpdateExecConditionals( builder_->setBuildPoint(&inner_block); } +void SpirvShaderTranslator::UpdateInstructionPredication(bool predicated, + bool condition) { + if (!predicated) { + CloseInstructionPredication(); + return; + } + + if (cf_instruction_predicate_merge_) { + if (cf_instruction_predicate_condition_ == condition) { + // Already in the needed instruction-level conditional. + return; + } + CloseInstructionPredication(); + } + + // If the instruction predicate condition is the same as the exec predicate + // condition, no need to open a check. However, if there was a `setp` prior + // to this instruction, the predicate value now may be different than it was + // in the beginning of the exec. + if (!cf_exec_predicate_written_ && cf_exec_conditional_merge_ && + cf_exec_bool_constant_or_predicate_ == kCfExecBoolConstantPredicate && + cf_exec_condition_ == condition) { + return; + } + + cf_instruction_predicate_condition_ = condition; + EnsureBuildPointAvailable(); + spv::Id predicate_id = + builder_->createLoad(var_main_predicate_, spv::NoPrecision); + spv::Block& predicated_block = builder_->makeNewBlock(); + cf_instruction_predicate_merge_ = &builder_->makeNewBlock(); + { + std::unique_ptr selection_merge_op = + std::make_unique(spv::OpSelectionMerge); + selection_merge_op->addIdOperand(cf_instruction_predicate_merge_->getId()); + selection_merge_op->addImmediateOperand(spv::SelectionControlMaskNone); + builder_->getBuildPoint()->addInstruction(std::move(selection_merge_op)); + } + builder_->createConditionalBranch( + predicate_id, + condition ? &predicated_block : cf_instruction_predicate_merge_, + condition ? cf_instruction_predicate_merge_ : &predicated_block); + builder_->setBuildPoint(&predicated_block); +} + void SpirvShaderTranslator::CloseInstructionPredication() { if (!cf_instruction_predicate_merge_) { return; diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 6cd94e415..473afb65c 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -66,6 +66,8 @@ class SpirvShaderTranslator : public ShaderTranslator { const ParsedLoopEndInstruction& instr) override; void ProcessJumpInstruction(const ParsedJumpInstruction& instr) override; + void ProcessAluInstruction(const ParsedAluInstruction& instr) override; + private: // TODO(Triang3l): Depth-only pixel shader. bool IsSpirvVertexOrTessEvalShader() const { return is_vertex_shader(); } @@ -96,6 +98,9 @@ class SpirvShaderTranslator : public ShaderTranslator { // needed (for example, in jumps). void UpdateExecConditionals(ParsedExecInstruction::Type type, uint32_t bool_constant_index, bool condition); + // Opens or reopens the predicate check conditional for the instruction. + // Should be called before processing a non-control-flow instruction. + void UpdateInstructionPredication(bool predicated, bool condition); // Closes the instruction-level predicate conditional if it's open, useful if // a control flow instruction needs to do some code which needs to respect the // current exec conditional, but can't itself be predicated. diff --git a/src/xenia/gpu/spirv_shader_translator_alu.cc b/src/xenia/gpu/spirv_shader_translator_alu.cc new file mode 100644 index 000000000..4a051012a --- /dev/null +++ b/src/xenia/gpu/spirv_shader_translator_alu.cc @@ -0,0 +1,28 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/spirv_shader_translator.h" + +namespace xe { +namespace gpu { + +void SpirvShaderTranslator::ProcessAluInstruction( + const ParsedAluInstruction& instr) { + if (instr.IsNop()) { + // Don't even disassemble or update predication. + return; + } + + UpdateInstructionPredication(instr.is_predicated, instr.predicate_condition); + + // TODO(Triang3l): Translate the ALU instruction. +} + +} // namespace gpu +} // namespace xe From 4dba2d8d892b4fafda351abb11026ae9062a6bad Mon Sep 17 00:00:00 2001 From: Triang3l Date: Tue, 27 Oct 2020 22:48:47 +0300 Subject: [PATCH 033/123] [SPIR-V] Operand loading --- src/xenia/gpu/spirv_shader_translator.cc | 175 ++++++++++++++++++- src/xenia/gpu/spirv_shader_translator.h | 57 +++++- src/xenia/gpu/spirv_shader_translator_alu.cc | 74 +++++++- 3 files changed, 294 insertions(+), 12 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index dee001b99..ee8df339e 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -29,6 +29,10 @@ void SpirvShaderTranslator::Reset() { builder_.reset(); + uniform_float_constants_ = spv::NoResult; + + var_main_registers_ = spv::NoResult; + main_switch_op_.reset(); main_switch_next_pc_phi_operands_.clear(); @@ -85,15 +89,42 @@ void SpirvShaderTranslator::StartTranslation() { const_float4_0_ = builder_->makeCompositeConstant(type_float4_, id_vector_temp_); + // Common uniform buffer - float constants. + uint32_t float_constant_count = constant_register_map().float_count; + if (float_constant_count) { + id_vector_temp_.clear(); + id_vector_temp_.reserve(1); + id_vector_temp_.push_back(builder_->makeArrayType( + type_float4_, builder_->makeUintConstant(float_constant_count), + sizeof(float) * 4)); + // Currently (as of October 24, 2020) makeArrayType only uses the stride to + // check if deduplication can be done - the array stride decoration needs to + // be applied explicitly. + builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride, + sizeof(float) * 4); + spv::Id type_float_constants = + builder_->makeStructType(id_vector_temp_, "XeFloatConstants"); + builder_->addMemberName(type_float_constants, 0, "float_constants"); + builder_->addMemberDecoration(type_float_constants, 0, + spv::DecorationOffset, 0); + builder_->addDecoration(type_float_constants, spv::DecorationBlock); + uniform_float_constants_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassUniform, type_float_constants, + "xe_uniform_float_constants"); + builder_->addDecoration( + uniform_float_constants_, spv::DecorationDescriptorSet, + int(IsSpirvFragmentShader() ? kDescriptorSetFloatConstantsPixel + : kDescriptorSetFloatConstantsVertex)); + builder_->addDecoration(uniform_float_constants_, spv::DecorationBinding, + 0); + } + // Common uniform buffer - bool and loop constants. id_vector_temp_.clear(); id_vector_temp_.reserve(2); // 256 bool constants. id_vector_temp_.push_back(builder_->makeArrayType( type_uint4_, builder_->makeUintConstant(2), sizeof(uint32_t) * 4)); - // Currently (as of October 24, 2020) makeArrayType only uses the stride to - // check if deduplication can be done - the array stride decoration needs to - // be applied explicitly. builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride, sizeof(uint32_t) * 4); // 32 loop constants. @@ -188,7 +219,7 @@ void SpirvShaderTranslator::StartTranslation() { // Main loop header - based on whether it's the first iteration (entered from // the function or from the continuation), choose the program counter. builder_->setBuildPoint(main_loop_header_); - spv::Id main_loop_pc_current = 0; + spv::Id main_loop_pc_current = spv::NoResult; if (has_main_switch) { // OpPhi must be the first in the block. id_vector_temp_.clear(); @@ -704,15 +735,24 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() { builder_->makeArrayType(type_float_, builder_->makeUintConstant(1), 0)); spv::Id type_struct_per_vertex = builder_->makeStructType(struct_per_vertex_members, "gl_PerVertex"); + builder_->addMemberDecoration(type_struct_per_vertex, + kOutputPerVertexMemberPosition, + spv::DecorationInvariant); builder_->addMemberDecoration(type_struct_per_vertex, kOutputPerVertexMemberPosition, spv::DecorationBuiltIn, spv::BuiltInPosition); builder_->addMemberDecoration(type_struct_per_vertex, kOutputPerVertexMemberPointSize, spv::DecorationBuiltIn, spv::BuiltInPointSize); + builder_->addMemberDecoration(type_struct_per_vertex, + kOutputPerVertexMemberClipDistance, + spv::DecorationInvariant); builder_->addMemberDecoration( type_struct_per_vertex, kOutputPerVertexMemberClipDistance, spv::DecorationBuiltIn, spv::BuiltInClipDistance); + builder_->addMemberDecoration(type_struct_per_vertex, + kOutputPerVertexMemberCullDistance, + spv::DecorationInvariant); builder_->addMemberDecoration( type_struct_per_vertex, kOutputPerVertexMemberCullDistance, spv::DecorationBuiltIn, spv::BuiltInCullDistance); @@ -902,5 +942,132 @@ void SpirvShaderTranslator::CloseExecConditionals() { cf_exec_predicate_written_ = false; } +spv::Id SpirvShaderTranslator::GetStorageAddressingIndex( + InstructionStorageAddressingMode addressing_mode, uint32_t storage_index) { + EnsureBuildPointAvailable(); + spv::Id base_pointer = spv::NoResult; + switch (addressing_mode) { + case InstructionStorageAddressingMode::kStatic: + return builder_->makeIntConstant(int(storage_index)); + case InstructionStorageAddressingMode::kAddressAbsolute: + base_pointer = var_main_address_absolute_; + break; + case InstructionStorageAddressingMode::kAddressRelative: + // Load X component. + id_vector_temp_util_.clear(); + id_vector_temp_util_.reserve(1); + id_vector_temp_util_.push_back(const_int_0_); + base_pointer = builder_->createAccessChain(spv::StorageClassFunction, + var_main_address_relative_, + id_vector_temp_util_); + break; + } + assert_not_zero(base_pointer); + spv::Id index = builder_->createLoad(base_pointer, spv::NoPrecision); + if (storage_index) { + index = + builder_->createBinOp(spv::OpIAdd, type_int_, index, + builder_->makeIntConstant(int(storage_index))); + } + return index; +} + +spv::Id SpirvShaderTranslator::LoadOperandStorage( + const InstructionOperand& operand) { + spv::Id index = GetStorageAddressingIndex(operand.storage_addressing_mode, + operand.storage_index); + EnsureBuildPointAvailable(); + spv::Id vec4_pointer = spv::NoResult; + switch (operand.storage_source) { + case InstructionStorageSource::kRegister: + assert_not_zero(var_main_registers_); + id_vector_temp_util_.clear(); + id_vector_temp_util_.reserve(1); + // Array element. + id_vector_temp_util_.push_back(index); + vec4_pointer = builder_->createAccessChain( + spv::StorageClassFunction, var_main_registers_, id_vector_temp_util_); + break; + case InstructionStorageSource::kConstantFloat: + assert_not_zero(uniform_float_constants_); + id_vector_temp_util_.clear(); + id_vector_temp_util_.reserve(2); + // The first and the only structure member. + id_vector_temp_util_.push_back(const_int_0_); + // Array element. + id_vector_temp_util_.push_back(index); + vec4_pointer = builder_->createAccessChain(spv::StorageClassUniform, + uniform_float_constants_, + id_vector_temp_util_); + break; + default: + assert_unhandled_case(operand.storage_source); + } + assert_not_zero(vec4_pointer); + return builder_->createLoad(vec4_pointer, spv::NoPrecision); +} + +spv::Id SpirvShaderTranslator::ApplyOperandModifiers( + spv::Id operand_value, const InstructionOperand& original_operand, + bool invert_negate, bool force_absolute) { + spv::Id type = builder_->getTypeId(operand_value); + assert_true(type != spv::NoType); + if (type == spv::NoType) { + return operand_value; + } + if (original_operand.is_absolute_value || force_absolute) { + EnsureBuildPointAvailable(); + id_vector_temp_util_.clear(); + id_vector_temp_util_.reserve(1); + id_vector_temp_util_.push_back(operand_value); + operand_value = builder_->createBuiltinCall( + type, ext_inst_glsl_std_450_, GLSLstd450FAbs, id_vector_temp_util_); + } + if (original_operand.is_negated != invert_negate) { + EnsureBuildPointAvailable(); + operand_value = + builder_->createUnaryOp(spv::OpFNegate, type, operand_value); + builder_->addDecoration(operand_value, spv::DecorationNoContraction); + } + return operand_value; +} + +spv::Id SpirvShaderTranslator::GetUnmodifiedOperandComponents( + spv::Id operand_storage, const InstructionOperand& original_operand, + uint32_t components) { + assert_not_zero(components); + if (!components) { + return spv::NoResult; + } + assert_true(components <= 0b1111); + if (components == 0b1111 && original_operand.IsStandardSwizzle()) { + return operand_storage; + } + EnsureBuildPointAvailable(); + uint32_t component_count = xe::bit_count(components); + if (component_count == 1) { + uint32_t scalar_index; + xe::bit_scan_forward(components, &scalar_index); + return builder_->createCompositeExtract( + operand_storage, type_float_, + static_cast(original_operand.GetComponent(scalar_index)) - + static_cast(SwizzleSource::kX)); + } + id_vector_temp_util_.clear(); + id_vector_temp_util_.reserve(component_count); + uint32_t components_remaining = components; + uint32_t component_index; + while (xe::bit_scan_forward(components_remaining, &component_index)) { + components_remaining &= ~(uint32_t(1) << component_index); + id_vector_temp_util_.push_back( + static_cast( + original_operand.GetComponent(component_index)) - + static_cast(SwizzleSource::kX)); + } + return builder_->createRvalueSwizzle(spv::NoPrecision, + type_float_vectors_[component_count - 1], + operand_storage, id_vector_temp_util_); +} + } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 473afb65c..c4dbe2c1a 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -81,9 +81,10 @@ class SpirvShaderTranslator : public ShaderTranslator { } bool IsSpirvFragmentShader() const { return is_pixel_shader(); } - // Must be called before emitting any non-control-flow SPIR-V operations in - // translator callback to ensure that if the last instruction added was - // something like OpBranch - in this case, an unreachable block is created. + // Must be called before emitting any SPIR-V operations that must be in a + // block in translator callbacks to ensure that if the last instruction added + // was something like OpBranch - in this case, an unreachable block is + // created. void EnsureBuildPointAvailable(); void StartVertexOrTessEvalShaderBeforeMain(); @@ -109,12 +110,47 @@ class SpirvShaderTranslator : public ShaderTranslator { // labels) and updates the state accordingly. void CloseExecConditionals(); + spv::Id GetStorageAddressingIndex( + InstructionStorageAddressingMode addressing_mode, uint32_t storage_index); + // Loads unswizzled operand without sign modifiers as float4. + spv::Id LoadOperandStorage(const InstructionOperand& operand); + spv::Id ApplyOperandModifiers(spv::Id operand_value, + const InstructionOperand& original_operand, + bool invert_negate = false, + bool force_absolute = false); + // Returns the requested components, with the operand's swizzle applied, in a + // condensed form, but without negation / absolute value modifiers. The + // storage is float4, no matter what the component count of original_operand + // is (the storage will be either r# or c#, but the instruction may be + // scalar). + spv::Id GetUnmodifiedOperandComponents( + spv::Id operand_storage, const InstructionOperand& original_operand, + uint32_t components); + spv::Id GetOperandComponents(spv::Id operand_storage, + const InstructionOperand& original_operand, + uint32_t components, bool invert_negate = false, + bool force_absolute = false) { + return ApplyOperandModifiers( + GetUnmodifiedOperandComponents(operand_storage, original_operand, + components), + original_operand, invert_negate, force_absolute); + } + + // Return type is a float vector of xe::bit_count(result.GetUsedWriteMask()) + // or a single float, depending on whether it's a reduction instruction (check + // getTypeId of the result), or returns spv::NoResult if nothing to store. + spv::Id ProcessVectorAluOperation(const ParsedAluInstruction& instr, + bool& predicate_written); + bool supports_clip_distance_; bool supports_cull_distance_; std::unique_ptr builder_; std::vector id_vector_temp_; + // For helper functions like operand loading, so they don't conflict with + // id_vector_temp_ usage in bigger callbacks. + std::vector id_vector_temp_util_; std::vector uint_vector_temp_; spv::Id ext_inst_glsl_std_450_; @@ -126,10 +162,16 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id type_uint_; spv::Id type_uint3_; spv::Id type_uint4_; - spv::Id type_float_; - spv::Id type_float2_; - spv::Id type_float3_; - spv::Id type_float4_; + union { + struct { + spv::Id type_float_; + spv::Id type_float2_; + spv::Id type_float3_; + spv::Id type_float4_; + }; + // Index = component count - 1. + spv::Id type_float_vectors_[4]; + }; spv::Id const_int_0_; spv::Id const_int4_0_; @@ -138,6 +180,7 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id const_float_0_; spv::Id const_float4_0_; + spv::Id uniform_float_constants_; spv::Id uniform_bool_loop_constants_; // VS as VS only - int. diff --git a/src/xenia/gpu/spirv_shader_translator_alu.cc b/src/xenia/gpu/spirv_shader_translator_alu.cc index 4a051012a..4edf4c6df 100644 --- a/src/xenia/gpu/spirv_shader_translator_alu.cc +++ b/src/xenia/gpu/spirv_shader_translator_alu.cc @@ -9,6 +9,8 @@ #include "xenia/gpu/spirv_shader_translator.h" +#include "xenia/base/math.h" + namespace xe { namespace gpu { @@ -21,7 +23,77 @@ void SpirvShaderTranslator::ProcessAluInstruction( UpdateInstructionPredication(instr.is_predicated, instr.predicate_condition); - // TODO(Triang3l): Translate the ALU instruction. + // Floating-point arithmetic operations (addition, subtraction, negation, + // multiplication, dot product, division, modulo - see isArithmeticOperation + // in propagateNoContraction of glslang) must have the NoContraction + // decoration to prevent reordering to make sure floating-point calculations + // are optimized predictably and exactly the same in different shaders to + // allow for multipass rendering (in addition to the Invariant decoration on + // outputs). + + // Whether the instruction has changed the predicate, and it needs to be + // checked again later. + bool predicate_written_vector = false; + ProcessVectorAluOperation(instr, predicate_written_vector); + // TODO(Triang3l): Process the ALU scalar operation. + + if (predicate_written_vector) { + cf_exec_predicate_written_ = true; + CloseInstructionPredication(); + } +} + +spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( + const ParsedAluInstruction& instr, bool& predicate_written) { + predicate_written = false; + + uint32_t used_result_components = + instr.vector_and_constant_result.GetUsedResultComponents(); + if (!used_result_components && + !AluVectorOpHasSideEffects(instr.vector_opcode)) { + return spv::NoResult; + } + uint32_t used_result_component_count = xe::bit_count(used_result_components); + + // Load operand storage without swizzle and sign modifiers. + // A small shortcut, operands of cube are the same, but swizzled. + uint32_t operand_count; + if (instr.vector_opcode == ucode::AluVectorOpcode::kCube) { + operand_count = 1; + } else { + operand_count = instr.vector_operand_count; + } + spv::Id operand_storage[3] = {}; + for (uint32_t i = 0; i < operand_count; ++i) { + operand_storage[i] = LoadOperandStorage(instr.vector_operands[i]); + } + spv::Id result_vector_type = + used_result_component_count + ? type_float_vectors_[used_result_component_count - 1] + : spv::NoType; + + // In case the paired scalar instruction (if processed first) terminates the + // block (like via OpKill). + EnsureBuildPointAvailable(); + + switch (instr.vector_opcode) { + case ucode::AluVectorOpcode::kAdd: { + spv::Id result = builder_->createBinOp( + spv::OpFAdd, result_vector_type, + GetOperandComponents(operand_storage[0], instr.vector_operands[0], + used_result_components), + GetOperandComponents(operand_storage[1], instr.vector_operands[1], + used_result_components)); + builder_->addDecoration(result, spv::DecorationNoContraction); + return result; + } break; + // TODO(Triang3l): Handle all instructions. + default: + break; + } + + // Invalid instruction. + return spv::NoResult; } } // namespace gpu From 738cb0b84731d225096e513909f625b413bace8c Mon Sep 17 00:00:00 2001 From: Triang3l Date: Thu, 29 Oct 2020 22:07:02 +0300 Subject: [PATCH 034/123] [SPIR-V] Result storing --- src/xenia/gpu/shader.h | 37 ++- src/xenia/gpu/spirv_shader_translator.cc | 312 ++++++++++++++++++- src/xenia/gpu/spirv_shader_translator.h | 36 ++- src/xenia/gpu/spirv_shader_translator_alu.cc | 5 +- 4 files changed, 364 insertions(+), 26 deletions(-) diff --git a/src/xenia/gpu/shader.h b/src/xenia/gpu/shader.h index 2c25e682d..ab1ab32c9 100644 --- a/src/xenia/gpu/shader.h +++ b/src/xenia/gpu/shader.h @@ -65,17 +65,17 @@ enum class InstructionStorageTarget { // disassembly (because oPts.x000 will be assembled, but oPts.x00_ has both // skipped components and zeros, which cannot be encoded, and therefore it will // not). -constexpr uint32_t GetInstructionStorageTargetUsedComponents( +constexpr uint32_t GetInstructionStorageTargetUsedComponentCount( InstructionStorageTarget target) { switch (target) { case InstructionStorageTarget::kNone: - return 0b0000; + return 0; case InstructionStorageTarget::kPointSizeEdgeFlagKillVertex: - return 0b0111; + return 3; case InstructionStorageTarget::kDepth: - return 0b0001; + return 1; default: - return 0b1111; + return 4; } } @@ -136,8 +136,9 @@ struct InstructionResult { // Returns the write mask containing only components actually present in the // target. uint32_t GetUsedWriteMask() const { - return original_write_mask & - GetInstructionStorageTargetUsedComponents(storage_target); + uint32_t target_component_count = + GetInstructionStorageTargetUsedComponentCount(storage_target); + return original_write_mask & ((1 << target_component_count) - 1); } // True if the components are in their 'standard' swizzle arrangement (xyzw). bool IsStandardSwizzle() const { @@ -161,6 +162,28 @@ struct InstructionResult { } return used_components; } + // Returns which components of the used write mask are constant, and what + // values they have. + uint32_t GetUsedConstantComponents(uint32_t& constant_values_out) const { + uint32_t constant_components = 0; + uint32_t constant_values = 0; + uint32_t used_write_mask = GetUsedWriteMask(); + for (uint32_t i = 0; i < 4; ++i) { + if (!(used_write_mask & (1 << i))) { + continue; + } + SwizzleSource component = components[i]; + if (component >= SwizzleSource::kX && component <= SwizzleSource::kW) { + continue; + } + constant_components |= 1 << i; + if (component == SwizzleSource::k1) { + constant_values |= 1 << i; + } + } + constant_values_out = constant_values; + return constant_components; + } }; enum class InstructionStorageSource { diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index ee8df339e..e80a55444 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -9,6 +9,7 @@ #include "xenia/gpu/spirv_shader_translator.h" +#include #include #include #include @@ -83,17 +84,32 @@ void SpirvShaderTranslator::StartTranslation() { const_float_0_ = builder_->makeFloatConstant(0.0f); id_vector_temp_.clear(); id_vector_temp_.reserve(4); - for (uint32_t i = 0; i < 4; ++i) { + id_vector_temp_.push_back(const_float_0_); + for (uint32_t i = 1; i < 4; ++i) { id_vector_temp_.push_back(const_float_0_); + const_float_vectors_0_[i] = builder_->makeCompositeConstant( + type_float_vectors_[i], id_vector_temp_); } - const_float4_0_ = - builder_->makeCompositeConstant(type_float4_, id_vector_temp_); + const_float_1_ = builder_->makeFloatConstant(1.0f); + id_vector_temp_.clear(); + id_vector_temp_.reserve(4); + id_vector_temp_.push_back(const_float_1_); + for (uint32_t i = 1; i < 4; ++i) { + id_vector_temp_.push_back(const_float_1_); + const_float_vectors_1_[i] = builder_->makeCompositeConstant( + type_float_vectors_[i], id_vector_temp_); + } + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + id_vector_temp_.push_back(const_float_0_); + id_vector_temp_.push_back(const_float_1_); + const_float2_0_1_ = + builder_->makeCompositeConstant(type_float2_, id_vector_temp_); // Common uniform buffer - float constants. uint32_t float_constant_count = constant_register_map().float_count; if (float_constant_count) { id_vector_temp_.clear(); - id_vector_temp_.reserve(1); id_vector_temp_.push_back(builder_->makeArrayType( type_float4_, builder_->makeUintConstant(float_constant_count), sizeof(float) * 4)); @@ -120,6 +136,9 @@ void SpirvShaderTranslator::StartTranslation() { } // Common uniform buffer - bool and loop constants. + // Uniform buffers must have std140 packing, so using arrays of 4-component + // vectors instead of scalar arrays because the latter would have padding to + // 16 bytes in each element. id_vector_temp_.clear(); id_vector_temp_.reserve(2); // 256 bool constants. @@ -653,8 +672,6 @@ void SpirvShaderTranslator::ProcessLoopEndInstruction( builder_->createStore( builder_->createCompositeConstruct(type_int4_, id_vector_temp_), var_main_address_relative_); - id_vector_temp_.clear(); - id_vector_temp_.reserve(4); // Now going to fall through to the next control flow instruction. } @@ -955,14 +972,13 @@ spv::Id SpirvShaderTranslator::GetStorageAddressingIndex( case InstructionStorageAddressingMode::kAddressRelative: // Load X component. id_vector_temp_util_.clear(); - id_vector_temp_util_.reserve(1); id_vector_temp_util_.push_back(const_int_0_); base_pointer = builder_->createAccessChain(spv::StorageClassFunction, var_main_address_relative_, id_vector_temp_util_); break; } - assert_not_zero(base_pointer); + assert_true(base_pointer != spv::NoResult); spv::Id index = builder_->createLoad(base_pointer, spv::NoPrecision); if (storage_index) { index = @@ -980,16 +996,15 @@ spv::Id SpirvShaderTranslator::LoadOperandStorage( spv::Id vec4_pointer = spv::NoResult; switch (operand.storage_source) { case InstructionStorageSource::kRegister: - assert_not_zero(var_main_registers_); + assert_true(var_main_registers_ != spv::NoResult); id_vector_temp_util_.clear(); - id_vector_temp_util_.reserve(1); // Array element. id_vector_temp_util_.push_back(index); vec4_pointer = builder_->createAccessChain( spv::StorageClassFunction, var_main_registers_, id_vector_temp_util_); break; case InstructionStorageSource::kConstantFloat: - assert_not_zero(uniform_float_constants_); + assert_true(uniform_float_constants_ != spv::NoResult); id_vector_temp_util_.clear(); id_vector_temp_util_.reserve(2); // The first and the only structure member. @@ -1003,7 +1018,7 @@ spv::Id SpirvShaderTranslator::LoadOperandStorage( default: assert_unhandled_case(operand.storage_source); } - assert_not_zero(vec4_pointer); + assert_true(vec4_pointer != spv::NoResult); return builder_->createLoad(vec4_pointer, spv::NoPrecision); } @@ -1018,7 +1033,6 @@ spv::Id SpirvShaderTranslator::ApplyOperandModifiers( if (original_operand.is_absolute_value || force_absolute) { EnsureBuildPointAvailable(); id_vector_temp_util_.clear(); - id_vector_temp_util_.reserve(1); id_vector_temp_util_.push_back(operand_value); operand_value = builder_->createBuiltinCall( type, ext_inst_glsl_std_450_, GLSLstd450FAbs, id_vector_temp_util_); @@ -1069,5 +1083,277 @@ spv::Id SpirvShaderTranslator::GetUnmodifiedOperandComponents( operand_storage, id_vector_temp_util_); } +void SpirvShaderTranslator::StoreResult(const InstructionResult& result, + spv::Id value) { + uint32_t used_write_mask = result.GetUsedWriteMask(); + if (!used_write_mask) { + return; + } + + EnsureBuildPointAvailable(); + + spv::Id target_pointer = spv::NoResult; + switch (result.storage_target) { + case InstructionStorageTarget::kNone: + break; + case InstructionStorageTarget::kRegister: { + assert_true(var_main_registers_ != spv::NoResult); + // Must call GetStorageAddressingIndex first because of + // id_vector_temp_util_ usage in it. + spv::Id register_index = GetStorageAddressingIndex( + result.storage_addressing_mode, result.storage_index); + id_vector_temp_util_.clear(); + // Array element. + id_vector_temp_util_.push_back(register_index); + target_pointer = builder_->createAccessChain( + spv::StorageClassFunction, var_main_registers_, id_vector_temp_util_); + } break; + case InstructionStorageTarget::kPosition: + assert_true(IsSpirvVertexOrTessEvalShader()); + id_vector_temp_util_.clear(); + id_vector_temp_util_.push_back( + builder_->makeIntConstant(kOutputPerVertexMemberPosition)); + target_pointer = builder_->createAccessChain( + spv::StorageClassOutput, output_per_vertex_, id_vector_temp_util_); + break; + default: + // TODO(Triang3l): All storage targets. + break; + } + if (target_pointer == spv::NoResult) { + return; + } + + uint32_t constant_values; + uint32_t constant_components = + result.GetUsedConstantComponents(constant_values); + if (value == spv::NoResult) { + // The instruction processing function decided that nothing useful needs to + // be stored for some reason, however, some components still need to be + // written on the guest side - fill them with zeros. + constant_components = used_write_mask; + } + uint32_t non_constant_components = used_write_mask & ~constant_components; + + unsigned int value_num_components = + value != spv::NoResult + ? static_cast(builder_->getNumComponents(value)) + : 0; + + if (result.is_clamped && non_constant_components) { + // Apply the saturation modifier to the result. + id_vector_temp_util_.clear(); + id_vector_temp_util_.reserve(3); + id_vector_temp_util_.push_back(value); + id_vector_temp_util_.push_back( + const_float_vectors_0_[value_num_components - 1]); + id_vector_temp_util_.push_back( + const_float_vectors_1_[value_num_components - 1]); + value = builder_->createBuiltinCall( + type_float_vectors_[value_num_components - 1], ext_inst_glsl_std_450_, + GLSLstd450NClamp, id_vector_temp_util_); + } + + // The value contains either result.GetUsedResultComponents() in a condensed + // way, or a scalar to be replicated. Decompress them to create a mapping from + // guest result components to the ones in the value vector. + uint32_t used_result_components = result.GetUsedResultComponents(); + unsigned int result_unswizzled_value_components[4] = {}; + if (value_num_components > 1) { + unsigned int value_component = 0; + uint32_t used_result_components_remaining = used_result_components; + uint32_t result_component; + while (xe::bit_scan_forward(used_result_components_remaining, + &result_component)) { + used_result_components_remaining &= ~(1 << result_component); + result_unswizzled_value_components[result_component] = + std::min(value_component++, value_num_components - 1); + } + } + + // Get swizzled mapping of non-constant components to the components of + // `value`. + unsigned int result_swizzled_value_components[4] = {}; + for (uint32_t i = 0; i < 4; ++i) { + if (!(non_constant_components & (1 << i))) { + continue; + } + SwizzleSource swizzle = result.components[i]; + assert_true(swizzle >= SwizzleSource::kX && swizzle <= SwizzleSource::kW); + result_swizzled_value_components[i] = + result_unswizzled_value_components[uint32_t(swizzle) - + uint32_t(SwizzleSource::kX)]; + } + + spv::Id target_type = builder_->getDerefTypeId(target_pointer); + unsigned int target_num_components = + builder_->getNumTypeComponents(target_type); + assert_true( + target_num_components == + GetInstructionStorageTargetUsedComponentCount(result.storage_target)); + uint32_t target_component_mask = (1 << target_num_components) - 1; + assert_zero(used_write_mask & ~target_component_mask); + + spv::Id value_to_store; + if (target_component_mask == used_write_mask) { + // All components are overwritten - no need to load the original value. + // Possible cases: + // * Non-constants only. + // * Vector target. + // * Vector source. + // * Identity swizzle - store directly. + // * Non-identity swizzle - shuffle. + // * Scalar source - smear. + // * Scalar target. + // * Vector source - extract. + // * Scalar source - store directly. + // * Constants only. + // * Vector target - make composite constant. + // * Scalar target - store directly. + // * Mixed non-constants and constants (only for vector targets - scalar + // targets fully covered by the previous cases). + // * Vector source - shuffle with {0, 1} also applying swizzle. + // * Scalar source - construct composite. + if (!constant_components) { + if (target_num_components > 1) { + if (value_num_components > 1) { + // Non-constants only - vector target, vector source. + bool is_identity_swizzle = + target_num_components == value_num_components; + for (uint32_t i = 0; is_identity_swizzle && i < target_num_components; + ++i) { + is_identity_swizzle &= result_swizzled_value_components[i] == i; + } + if (is_identity_swizzle) { + value_to_store = value; + } else { + uint_vector_temp_util_.clear(); + uint_vector_temp_util_.reserve(target_num_components); + uint_vector_temp_util_.insert( + uint_vector_temp_util_.cend(), result_swizzled_value_components, + result_swizzled_value_components + target_num_components); + value_to_store = builder_->createRvalueSwizzle( + spv::NoPrecision, target_type, value, uint_vector_temp_util_); + } + } else { + // Non-constants only - vector target, scalar source. + value_to_store = + builder_->smearScalar(spv::NoPrecision, value, target_type); + } + } else { + if (value_num_components > 1) { + // Non-constants only - scalar target, vector source. + value_to_store = builder_->createCompositeExtract( + value, type_float_, result_swizzled_value_components[0]); + } else { + // Non-constants only - scalar target, scalar source. + value_to_store = value; + } + } + } else if (!non_constant_components) { + if (target_num_components > 1) { + // Constants only - vector target. + id_vector_temp_util_.clear(); + id_vector_temp_util_.reserve(target_num_components); + for (uint32_t i = 0; i < target_num_components; ++i) { + id_vector_temp_util_.push_back( + (constant_values & (1 << i)) ? const_float_1_ : const_float_0_); + } + value_to_store = + builder_->makeCompositeConstant(target_type, id_vector_temp_util_); + } else { + // Constants only - scalar target. + value_to_store = + (constant_values & 0b0001) ? const_float_1_ : const_float_0_; + } + } else { + assert_true(target_num_components > 1); + if (value_num_components > 1) { + // Mixed non-constants and constants - vector source. + value_to_store = builder_->getUniqueId(); + std::unique_ptr shuffle_op = + std::make_unique(value_to_store, target_type, + spv::OpVectorShuffle); + shuffle_op->addIdOperand(value); + shuffle_op->addIdOperand(const_float2_0_1_); + for (uint32_t i = 0; i < target_num_components; ++i) { + shuffle_op->addImmediateOperand( + (constant_components & (1 << i)) + ? value_num_components + ((constant_values >> i) & 1) + : result_swizzled_value_components[i]); + } + builder_->getBuildPoint()->addInstruction(std::move(shuffle_op)); + } else { + // Mixed non-constants and constants - scalar source. + id_vector_temp_util_.clear(); + id_vector_temp_util_.reserve(target_num_components); + for (uint32_t i = 0; i < target_num_components; ++i) { + if (constant_components & (1 << i)) { + id_vector_temp_util_.push_back( + (constant_values & (1 << i)) ? const_float_1_ : const_float_0_); + } else { + id_vector_temp_util_.push_back(value); + } + } + value_to_store = builder_->createCompositeConstruct( + target_type, id_vector_temp_util_); + } + } + } else { + // Only certain components are overwritten. + // Scalar targets are always overwritten fully, can't reach this case for + // them. + assert_true(target_num_components > 1); + value_to_store = builder_->createLoad(target_pointer, spv::NoPrecision); + // Two steps: + // 1) Insert constants by shuffling (first so dependency chain of step 2 is + // simpler if constants are written first). + // 2) Insert value components - via shuffling for vector source, via + // composite inserts for scalar value. + if (constant_components) { + spv::Id shuffle_result = builder_->getUniqueId(); + std::unique_ptr shuffle_op = + std::make_unique(shuffle_result, target_type, + spv::OpVectorShuffle); + shuffle_op->addIdOperand(value_to_store); + shuffle_op->addIdOperand(const_float2_0_1_); + for (uint32_t i = 0; i < target_num_components; ++i) { + shuffle_op->addImmediateOperand((constant_components & (1 << i)) + ? target_num_components + + ((constant_values >> i) & 1) + : i); + } + builder_->getBuildPoint()->addInstruction(std::move(shuffle_op)); + value_to_store = shuffle_result; + } + if (non_constant_components) { + if (value_num_components > 1) { + spv::Id shuffle_result = builder_->getUniqueId(); + std::unique_ptr shuffle_op = + std::make_unique(shuffle_result, target_type, + spv::OpVectorShuffle); + shuffle_op->addIdOperand(value_to_store); + shuffle_op->addIdOperand(value); + for (uint32_t i = 0; i < target_num_components; ++i) { + shuffle_op->addImmediateOperand( + (non_constant_components & (1 << i)) + ? target_num_components + result_swizzled_value_components[i] + : i); + } + builder_->getBuildPoint()->addInstruction(std::move(shuffle_op)); + value_to_store = shuffle_result; + } else { + for (uint32_t i = 0; i < target_num_components; ++i) { + if (non_constant_components & (1 << i)) { + value_to_store = builder_->createCompositeInsert( + value, value_to_store, target_type, i); + } + } + } + } + } + builder_->createStore(value_to_store, target_pointer); +} + } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index c4dbe2c1a..1350a4c39 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -135,10 +135,16 @@ class SpirvShaderTranslator : public ShaderTranslator { components), original_operand, invert_negate, force_absolute); } + // The type of the value must be a float vector consisting of + // xe::bit_count(result.GetUsedResultComponents()) elements, or (to replicate + // a scalar into all used components) float, or the value can be spv::NoResult + // if there's no result to store (like constants only). + void StoreResult(const InstructionResult& result, spv::Id value); - // Return type is a float vector of xe::bit_count(result.GetUsedWriteMask()) - // or a single float, depending on whether it's a reduction instruction (check - // getTypeId of the result), or returns spv::NoResult if nothing to store. + // Return type is a xe::bit_count(result.GetUsedResultComponents())-component + // float vector or a single float, depending on whether it's a reduction + // instruction (check getTypeId of the result), or returns spv::NoResult if + // nothing to store. spv::Id ProcessVectorAluOperation(const ParsedAluInstruction& instr, bool& predicate_written); @@ -152,6 +158,7 @@ class SpirvShaderTranslator : public ShaderTranslator { // id_vector_temp_ usage in bigger callbacks. std::vector id_vector_temp_util_; std::vector uint_vector_temp_; + std::vector uint_vector_temp_util_; spv::Id ext_inst_glsl_std_450_; @@ -177,8 +184,27 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id const_int4_0_; spv::Id const_uint_0_; spv::Id const_uint4_0_; - spv::Id const_float_0_; - spv::Id const_float4_0_; + union { + struct { + spv::Id const_float_0_; + spv::Id const_float2_0_; + spv::Id const_float3_0_; + spv::Id const_float4_0_; + }; + spv::Id const_float_vectors_0_[4]; + }; + union { + struct { + spv::Id const_float_1_; + spv::Id const_float2_1_; + spv::Id const_float3_1_; + spv::Id const_float4_1_; + }; + spv::Id const_float_vectors_1_[4]; + }; + // vec2(0.0, 1.0), to arbitrarily VectorShuffle non-constant and constant + // components. + spv::Id const_float2_0_1_; spv::Id uniform_float_constants_; spv::Id uniform_bool_loop_constants_; diff --git a/src/xenia/gpu/spirv_shader_translator_alu.cc b/src/xenia/gpu/spirv_shader_translator_alu.cc index 4edf4c6df..613d9d066 100644 --- a/src/xenia/gpu/spirv_shader_translator_alu.cc +++ b/src/xenia/gpu/spirv_shader_translator_alu.cc @@ -34,9 +34,12 @@ void SpirvShaderTranslator::ProcessAluInstruction( // Whether the instruction has changed the predicate, and it needs to be // checked again later. bool predicate_written_vector = false; - ProcessVectorAluOperation(instr, predicate_written_vector); + spv::Id vector_result = + ProcessVectorAluOperation(instr, predicate_written_vector); // TODO(Triang3l): Process the ALU scalar operation. + StoreResult(instr.vector_and_constant_result, vector_result); + if (predicate_written_vector) { cf_exec_predicate_written_ = true; CloseInstructionPredication(); From 52a8ed8e6d6bbb1943be018d3b1bccf6e472308f Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 31 Oct 2020 16:22:15 +0300 Subject: [PATCH 035/123] [SPIR-V] Version, float controls --- src/xenia/gpu/shader_compiler_main.cc | 5 +- src/xenia/gpu/spirv_shader_translator.cc | 84 ++++++++++++++++------ src/xenia/gpu/spirv_shader_translator.h | 18 +++-- src/xenia/ui/vulkan/spirv_tools_context.cc | 14 +++- src/xenia/ui/vulkan/spirv_tools_context.h | 2 +- src/xenia/ui/vulkan/vulkan_provider.cc | 29 +++++++- src/xenia/ui/vulkan/vulkan_provider.h | 4 ++ 7 files changed, 122 insertions(+), 34 deletions(-) diff --git a/src/xenia/gpu/shader_compiler_main.cc b/src/xenia/gpu/shader_compiler_main.cc index f79e36df0..a9a2ed609 100644 --- a/src/xenia/gpu/shader_compiler_main.cc +++ b/src/xenia/gpu/shader_compiler_main.cc @@ -108,9 +108,10 @@ int shader_compiler_main(const std::vector& args) { shader_type, ucode_data_hash, ucode_dwords.data(), ucode_dwords.size()); std::unique_ptr translator; + SpirvShaderTranslator::Features spirv_features(true); if (cvars::shader_output_type == "spirv" || cvars::shader_output_type == "spirvtext") { - translator = std::make_unique(); + translator = std::make_unique(spirv_features); } else if (cvars::shader_output_type == "dxbc" || cvars::shader_output_type == "dxbctext") { translator = std::make_unique( @@ -161,7 +162,7 @@ int shader_compiler_main(const std::vector& args) { spv::Disassemble(spirv_disasm_stream, spirv_source); spirv_disasm = std::move(spirv_disasm_stream.str()); ui::vulkan::SpirvToolsContext spirv_tools_context; - if (spirv_tools_context.Initialize()) { + if (spirv_tools_context.Initialize(spirv_features.spirv_version)) { std::string spirv_validation_error; spirv_tools_context.Validate( reinterpret_cast(spirv_source.data()), diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index e80a55444..3c5a1c71e 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -20,10 +20,34 @@ namespace xe { namespace gpu { -SpirvShaderTranslator::SpirvShaderTranslator(bool supports_clip_distance, - bool supports_cull_distance) - : supports_clip_distance_(supports_clip_distance), - supports_cull_distance_(supports_cull_distance) {} +SpirvShaderTranslator::Features::Features(bool all) + : spirv_version(all ? spv::Spv_1_5 : spv::Spv_1_0), + clip_distance(all), + cull_distance(all), + float_controls(all) {} + +SpirvShaderTranslator::Features::Features( + const ui::vulkan::VulkanProvider& provider) + : clip_distance(provider.device_features().shaderClipDistance), + cull_distance(provider.device_features().shaderCullDistance) { + uint32_t device_version = provider.device_properties().apiVersion; + const ui::vulkan::VulkanProvider::DeviceExtensions& device_extensions = + provider.device_extensions(); + if (device_version >= VK_MAKE_VERSION(1, 2, 0)) { + spirv_version = spv::Spv_1_5; + } else if (device_extensions.khr_spirv_1_4) { + spirv_version = spv::Spv_1_4; + } else if (device_version >= VK_MAKE_VERSION(1, 1, 0)) { + spirv_version = spv::Spv_1_3; + } else { + spirv_version = spv::Spv_1_0; + } + float_controls = spirv_version >= spv::Spv_1_4 || + device_extensions.khr_shader_float_controls; +} + +SpirvShaderTranslator::SpirvShaderTranslator(const Features& features) + : features_(features) {} void SpirvShaderTranslator::Reset() { ShaderTranslator::Reset(); @@ -32,6 +56,7 @@ void SpirvShaderTranslator::Reset() { uniform_float_constants_ = spv::NoResult; + main_interface_.clear(); var_main_registers_ = spv::NoResult; main_switch_op_.reset(); @@ -45,10 +70,16 @@ void SpirvShaderTranslator::StartTranslation() { // Tool ID 26 "Xenia Emulator Microcode Translator". // https://github.com/KhronosGroup/SPIRV-Headers/blob/c43a43c7cc3af55910b9bec2a71e3e8a622443cf/include/spirv/spir-v.xml#L79 // TODO(Triang3l): Logger. - builder_ = std::make_unique(1 << 16, (26 << 16) | 1, nullptr); + builder_ = std::make_unique(features_.spirv_version, + (26 << 16) | 1, nullptr); builder_->addCapability(IsSpirvTessEvalShader() ? spv::CapabilityTessellation : spv::CapabilityShader); + if (features_.spirv_version < spv::Spv_1_4) { + if (features_.float_controls) { + builder_->addExtension("SPV_KHR_float_controls"); + } + } ext_inst_glsl_std_450_ = builder_->import("GLSL.std.450"); builder_->setMemoryModel(spv::AddressingModelLogical, spv::MemoryModelGLSL450); @@ -133,6 +164,9 @@ void SpirvShaderTranslator::StartTranslation() { : kDescriptorSetFloatConstantsVertex)); builder_->addDecoration(uniform_float_constants_, spv::DecorationBinding, 0); + if (features_.spirv_version >= spv::Spv_1_4) { + main_interface_.push_back(uniform_float_constants_); + } } // Common uniform buffer - bool and loop constants. @@ -168,6 +202,9 @@ void SpirvShaderTranslator::StartTranslation() { int(kDescriptorSetBoolLoopConstants)); builder_->addDecoration(uniform_bool_loop_constants_, spv::DecorationBinding, 0); + if (features_.spirv_version >= spv::Spv_1_4) { + main_interface_.push_back(uniform_bool_loop_constants_); + } if (IsSpirvVertexOrTessEvalShader()) { StartVertexOrTessEvalShaderBeforeMain(); @@ -364,11 +401,23 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { ? spv::ExecutionModelTessellationEvaluation : spv::ExecutionModelVertex; } + if (features_.float_controls) { + // Flush to zero, similar to the real hardware, also for things like Shader + // Model 3 multiplication emulation. + builder_->addCapability(spv::CapabilityDenormFlushToZero); + builder_->addExecutionMode(function_main_, + spv::ExecutionModeDenormFlushToZero, 32); + // Signed zero used to get VFACE from ps_param_gen, also special behavior + // for infinity in certain instructions (such as logarithm, reciprocal, + // muls_prev2). + builder_->addCapability(spv::CapabilitySignedZeroInfNanPreserve); + builder_->addExecutionMode(function_main_, + spv::ExecutionModeSignedZeroInfNanPreserve, 32); + } spv::Instruction* entry_point = builder_->addEntryPoint(execution_model, function_main_, "main"); - - if (IsSpirvVertexOrTessEvalShader()) { - CompleteVertexOrTessEvalShaderAfterMain(entry_point); + for (spv::Id interface_id : main_interface_) { + entry_point->addIdOperand(interface_id); } // TODO(Triang3l): Avoid copy? @@ -721,11 +770,13 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() { spv::NoPrecision, spv::StorageClassInput, type_int_, "gl_PrimitiveID"); builder_->addDecoration(input_primitive_id_, spv::DecorationBuiltIn, spv::BuiltInPrimitiveId); + main_interface_.push_back(input_primitive_id_); } else { input_vertex_index_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassInput, type_int_, "gl_VertexIndex"); builder_->addDecoration(input_vertex_index_, spv::DecorationBuiltIn, spv::BuiltInVertexIndex); + main_interface_.push_back(input_vertex_index_); } // Create the entire GLSL 4.50 gl_PerVertex output similar to what glslang @@ -733,10 +784,10 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() { // ClipDistance and CullDistance may exist even if the device doesn't support // them, as long as the capabilities aren't enabled, and nothing is stored to // them. - if (supports_clip_distance_) { + if (features_.clip_distance) { builder_->addCapability(spv::CapabilityClipDistance); } - if (supports_cull_distance_) { + if (features_.cull_distance) { builder_->addCapability(spv::CapabilityCullDistance); } std::vector struct_per_vertex_members; @@ -746,7 +797,7 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() { // TODO(Triang3l): Specialization constant for ucp_cull_only_ena, for 6 + 1 // or 1 + 7 array sizes. struct_per_vertex_members.push_back(builder_->makeArrayType( - type_float_, builder_->makeUintConstant(supports_clip_distance_ ? 6 : 1), + type_float_, builder_->makeUintConstant(features_.clip_distance ? 6 : 1), 0)); struct_per_vertex_members.push_back( builder_->makeArrayType(type_float_, builder_->makeUintConstant(1), 0)); @@ -777,6 +828,7 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() { output_per_vertex_ = builder_->createVariable(spv::NoPrecision, spv::StorageClassOutput, type_struct_per_vertex, "xe_out_gl_PerVertex"); + main_interface_.push_back(output_per_vertex_); } void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() { @@ -787,16 +839,6 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() { void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() {} -void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderAfterMain( - spv::Instruction* entry_point) { - if (IsSpirvTessEvalShader()) { - entry_point->addIdOperand(input_primitive_id_); - } else { - entry_point->addIdOperand(input_vertex_index_); - } - entry_point->addIdOperand(output_per_vertex_); -} - void SpirvShaderTranslator::UpdateExecConditionals( ParsedExecInstruction::Type type, uint32_t bool_constant_index, bool condition) { diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 1350a4c39..07620f081 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -17,6 +17,7 @@ #include "third_party/glslang/SPIRV/SpvBuilder.h" #include "xenia/gpu/shader_translator.h" +#include "xenia/ui/vulkan/vulkan_provider.h" namespace xe { namespace gpu { @@ -46,8 +47,16 @@ class SpirvShaderTranslator : public ShaderTranslator { kDescriptorSetSharedMemoryAndEdram, kDescriptorSetCount, }; - SpirvShaderTranslator(bool supports_clip_distance = true, - bool supports_cull_distance = true); + + struct Features { + explicit Features(const ui::vulkan::VulkanProvider& provider); + explicit Features(bool all = false); + unsigned int spirv_version; + bool clip_distance; + bool cull_distance; + bool float_controls; + }; + SpirvShaderTranslator(const Features& features); protected: void Reset() override; @@ -90,7 +99,6 @@ class SpirvShaderTranslator : public ShaderTranslator { void StartVertexOrTessEvalShaderBeforeMain(); void StartVertexOrTessEvalShaderInMain(); void CompleteVertexOrTessEvalShaderInMain(); - void CompleteVertexOrTessEvalShaderAfterMain(spv::Instruction* entry_point); // Updates the current flow control condition (to be called in the beginning // of exec and in jumps), closing the previous conditionals if needed. @@ -148,8 +156,7 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id ProcessVectorAluOperation(const ParsedAluInstruction& instr, bool& predicate_written); - bool supports_clip_distance_; - bool supports_cull_distance_; + Features features_; std::unique_ptr builder_; @@ -223,6 +230,7 @@ class SpirvShaderTranslator : public ShaderTranslator { }; spv::Id output_per_vertex_; + std::vector main_interface_; spv::Function* function_main_; // bool. spv::Id var_main_predicate_; diff --git a/src/xenia/ui/vulkan/spirv_tools_context.cc b/src/xenia/ui/vulkan/spirv_tools_context.cc index 01078ca08..0565e1f60 100644 --- a/src/xenia/ui/vulkan/spirv_tools_context.cc +++ b/src/xenia/ui/vulkan/spirv_tools_context.cc @@ -26,7 +26,7 @@ namespace xe { namespace ui { namespace vulkan { -bool SpirvToolsContext::Initialize() { +bool SpirvToolsContext::Initialize(unsigned int spirv_version) { const char* vulkan_sdk_env = std::getenv("VULKAN_SDK"); if (!vulkan_sdk_env) { XELOGE("SPIRV-Tools: Failed to get the VULKAN_SDK environment variable"); @@ -63,7 +63,17 @@ bool SpirvToolsContext::Initialize() { Shutdown(); return false; } - context_ = fn_spvContextCreate_(SPV_ENV_VULKAN_1_0); + spv_target_env target_env; + if (spirv_version >= 0x10500) { + target_env = SPV_ENV_VULKAN_1_2; + } else if (spirv_version >= 0x10400) { + target_env = SPV_ENV_VULKAN_1_1_SPIRV_1_4; + } else if (spirv_version >= 0x10300) { + target_env = SPV_ENV_VULKAN_1_1; + } else { + target_env = SPV_ENV_VULKAN_1_0; + } + context_ = fn_spvContextCreate_(target_env); if (!context_) { XELOGE("SPIRV-Tools: Failed to create a Vulkan 1.0 context"); Shutdown(); diff --git a/src/xenia/ui/vulkan/spirv_tools_context.h b/src/xenia/ui/vulkan/spirv_tools_context.h index 87680c1a4..2ffea1ebd 100644 --- a/src/xenia/ui/vulkan/spirv_tools_context.h +++ b/src/xenia/ui/vulkan/spirv_tools_context.h @@ -32,7 +32,7 @@ class SpirvToolsContext { SpirvToolsContext(const SpirvToolsContext& context) = delete; SpirvToolsContext& operator=(const SpirvToolsContext& context) = delete; ~SpirvToolsContext() { Shutdown(); } - bool Initialize(); + bool Initialize(unsigned int spirv_version); void Shutdown(); spv_result_t Validate(const uint32_t* words, size_t num_words, diff --git a/src/xenia/ui/vulkan/vulkan_provider.cc b/src/xenia/ui/vulkan/vulkan_provider.cc index 179d8f40f..672b17162 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.cc +++ b/src/xenia/ui/vulkan/vulkan_provider.cc @@ -392,6 +392,10 @@ bool VulkanProvider::Initialize() { std::memset(&device_extensions_, 0, sizeof(device_extensions_)); if (device_properties_.apiVersion >= VK_MAKE_VERSION(1, 1, 0)) { device_extensions_.khr_dedicated_allocation = true; + if (device_properties_.apiVersion >= VK_MAKE_VERSION(1, 2, 0)) { + device_extensions_.khr_shader_float_controls = true; + device_extensions_.khr_spirv_1_4 = true; + } } bool device_supports_swapchain = false; for (const VkExtensionProperties& device_extension : @@ -405,6 +409,13 @@ bool VulkanProvider::Initialize() { !std::strcmp(device_extension_name, "VK_KHR_dedicated_allocation")) { device_extensions_.khr_dedicated_allocation = true; + } else if (!device_extensions_.khr_shader_float_controls && + !std::strcmp(device_extension_name, + "VK_KHR_shader_float_controls")) { + device_extensions_.khr_shader_float_controls = true; + } else if (!device_extensions_.khr_spirv_1_4 && + !std::strcmp(device_extension_name, "VK_KHR_spirv_1_4")) { + device_extensions_.khr_spirv_1_4 = true; } else if (!device_supports_swapchain && !std::strcmp(device_extension_name, "VK_KHR_swapchain")) { device_supports_swapchain = true; @@ -466,6 +477,10 @@ bool VulkanProvider::Initialize() { device_extensions_.ext_fragment_shader_interlock ? "yes" : "no"); XELOGVK("* VK_KHR_dedicated_allocation: {}", device_extensions_.khr_dedicated_allocation ? "yes" : "no"); + XELOGVK("* VK_KHR_shader_float_controls: {}", + device_extensions_.khr_shader_float_controls ? "yes" : "no"); + XELOGVK("* VK_KHR_spirv_1_4: {}", + device_extensions_.khr_spirv_1_4 ? "yes" : "no"); // TODO(Triang3l): Report properties, features. // Create the device. @@ -493,9 +508,17 @@ bool VulkanProvider::Initialize() { if (device_extensions_.ext_fragment_shader_interlock) { device_extensions_enabled.push_back("VK_EXT_fragment_shader_interlock"); } - if (device_properties_.apiVersion < VK_MAKE_VERSION(1, 1, 0)) { - if (device_extensions_.khr_dedicated_allocation) { - device_extensions_enabled.push_back("VK_KHR_dedicated_allocation"); + if (device_properties_.apiVersion < VK_MAKE_VERSION(1, 2, 0)) { + if (device_properties_.apiVersion < VK_MAKE_VERSION(1, 1, 0)) { + if (device_extensions_.khr_dedicated_allocation) { + device_extensions_enabled.push_back("VK_KHR_dedicated_allocation"); + } + } + if (device_extensions_.khr_shader_float_controls) { + device_extensions_enabled.push_back("VK_KHR_shader_float_controls"); + } + if (device_extensions_.khr_spirv_1_4) { + device_extensions_enabled.push_back("VK_KHR_spirv_1_4"); } } VkDeviceCreateInfo device_create_info; diff --git a/src/xenia/ui/vulkan/vulkan_provider.h b/src/xenia/ui/vulkan/vulkan_provider.h index 9fc117a50..1345dea61 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.h +++ b/src/xenia/ui/vulkan/vulkan_provider.h @@ -104,6 +104,10 @@ class VulkanProvider : public GraphicsProvider { bool ext_fragment_shader_interlock; // Core since 1.1.0. bool khr_dedicated_allocation; + // Core since 1.2.0. + bool khr_shader_float_controls; + // Core since 1.2.0. + bool khr_spirv_1_4; }; const DeviceExtensions& device_extensions() const { return device_extensions_; From 1acc5eff0519e5dd53be251e400b30c7a72c9015 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 31 Oct 2020 17:56:46 +0300 Subject: [PATCH 036/123] [SPIR-V] Vector mul, mad --- src/xenia/gpu/spirv_shader_translator.cc | 23 ++-- src/xenia/gpu/spirv_shader_translator.h | 13 +- src/xenia/gpu/spirv_shader_translator_alu.cc | 129 ++++++++++++++++++- 3 files changed, 150 insertions(+), 15 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 3c5a1c71e..982d9a8d9 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -87,6 +87,9 @@ void SpirvShaderTranslator::StartTranslation() { type_void_ = builder_->makeVoidType(); type_bool_ = builder_->makeBoolType(); + type_bool2_ = builder_->makeVectorType(type_bool_, 2); + type_bool3_ = builder_->makeVectorType(type_bool_, 3); + type_bool4_ = builder_->makeVectorType(type_bool_, 4); type_int_ = builder_->makeIntType(32); type_int4_ = builder_->makeVectorType(type_int_, 4); type_uint_ = builder_->makeUintType(32); @@ -1312,10 +1315,9 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result, assert_true(target_num_components > 1); if (value_num_components > 1) { // Mixed non-constants and constants - vector source. - value_to_store = builder_->getUniqueId(); std::unique_ptr shuffle_op = - std::make_unique(value_to_store, target_type, - spv::OpVectorShuffle); + std::make_unique( + builder_->getUniqueId(), target_type, spv::OpVectorShuffle); shuffle_op->addIdOperand(value); shuffle_op->addIdOperand(const_float2_0_1_); for (uint32_t i = 0; i < target_num_components; ++i) { @@ -1324,6 +1326,7 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result, ? value_num_components + ((constant_values >> i) & 1) : result_swizzled_value_components[i]); } + value_to_store = shuffle_op->getResultId(); builder_->getBuildPoint()->addInstruction(std::move(shuffle_op)); } else { // Mixed non-constants and constants - scalar source. @@ -1353,10 +1356,9 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result, // 2) Insert value components - via shuffling for vector source, via // composite inserts for scalar value. if (constant_components) { - spv::Id shuffle_result = builder_->getUniqueId(); std::unique_ptr shuffle_op = - std::make_unique(shuffle_result, target_type, - spv::OpVectorShuffle); + std::make_unique(builder_->getUniqueId(), + target_type, spv::OpVectorShuffle); shuffle_op->addIdOperand(value_to_store); shuffle_op->addIdOperand(const_float2_0_1_); for (uint32_t i = 0; i < target_num_components; ++i) { @@ -1365,15 +1367,14 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result, ((constant_values >> i) & 1) : i); } + value_to_store = shuffle_op->getResultId(); builder_->getBuildPoint()->addInstruction(std::move(shuffle_op)); - value_to_store = shuffle_result; } if (non_constant_components) { if (value_num_components > 1) { - spv::Id shuffle_result = builder_->getUniqueId(); std::unique_ptr shuffle_op = - std::make_unique(shuffle_result, target_type, - spv::OpVectorShuffle); + std::make_unique( + builder_->getUniqueId(), target_type, spv::OpVectorShuffle); shuffle_op->addIdOperand(value_to_store); shuffle_op->addIdOperand(value); for (uint32_t i = 0; i < target_num_components; ++i) { @@ -1382,8 +1383,8 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result, ? target_num_components + result_swizzled_value_components[i] : i); } + value_to_store = shuffle_op->getResultId(); builder_->getBuildPoint()->addInstruction(std::move(shuffle_op)); - value_to_store = shuffle_result; } else { for (uint32_t i = 0; i < target_num_components; ++i) { if (non_constant_components & (1 << i)) { diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 07620f081..395733b62 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -170,7 +170,17 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id ext_inst_glsl_std_450_; spv::Id type_void_; - spv::Id type_bool_; + + union { + struct { + spv::Id type_bool_; + spv::Id type_bool2_; + spv::Id type_bool3_; + spv::Id type_bool4_; + }; + // Index = component count - 1. + spv::Id type_bool_vectors_[4]; + }; spv::Id type_int_; spv::Id type_int4_; spv::Id type_uint_; @@ -183,7 +193,6 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id type_float3_; spv::Id type_float4_; }; - // Index = component count - 1. spv::Id type_float_vectors_[4]; }; diff --git a/src/xenia/gpu/spirv_shader_translator_alu.cc b/src/xenia/gpu/spirv_shader_translator_alu.cc index 613d9d066..9c69e8e8a 100644 --- a/src/xenia/gpu/spirv_shader_translator_alu.cc +++ b/src/xenia/gpu/spirv_shader_translator_alu.cc @@ -9,6 +9,10 @@ #include "xenia/gpu/spirv_shader_translator.h" +#include + +#include "third_party/glslang/SPIRV/GLSL.std.450.h" +#include "xenia/base/assert.h" #include "xenia/base/math.h" namespace xe { @@ -70,7 +74,7 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( for (uint32_t i = 0; i < operand_count; ++i) { operand_storage[i] = LoadOperandStorage(instr.vector_operands[i]); } - spv::Id result_vector_type = + spv::Id result_type = used_result_component_count ? type_float_vectors_[used_result_component_count - 1] : spv::NoType; @@ -82,7 +86,7 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( switch (instr.vector_opcode) { case ucode::AluVectorOpcode::kAdd: { spv::Id result = builder_->createBinOp( - spv::OpFAdd, result_vector_type, + spv::OpFAdd, result_type, GetOperandComponents(operand_storage[0], instr.vector_operands[0], used_result_components), GetOperandComponents(operand_storage[1], instr.vector_operands[1], @@ -90,6 +94,127 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( builder_->addDecoration(result, spv::DecorationNoContraction); return result; } break; + case ucode::AluVectorOpcode::kMul: + case ucode::AluVectorOpcode::kMad: { + spv::Id multiplicands[2]; + for (uint32_t i = 0; i < 2; ++i) { + multiplicands[i] = + GetOperandComponents(operand_storage[i], instr.vector_operands[i], + used_result_components); + } + spv::Id result = builder_->createBinOp( + spv::OpFMul, result_type, multiplicands[0], multiplicands[1]); + builder_->addDecoration(result, spv::DecorationNoContraction); + uint32_t multiplicands_different = + used_result_components & + ~instr.vector_operands[0].GetIdenticalComponents( + instr.vector_operands[1]); + if (multiplicands_different) { + // Shader Model 3: +0 or denormal * anything = +-0. + spv::Id different_operands[2] = {multiplicands[0], multiplicands[1]}; + spv::Id different_result = result; + uint32_t different_count = xe::bit_count(multiplicands_different); + spv::Id different_type = type_float_vectors_[different_count - 1]; + // Extract the different components, if not all are different. + if (multiplicands_different != used_result_components) { + uint_vector_temp_.clear(); + uint_vector_temp_.reserve(different_count); + uint32_t components_remaining = used_result_components; + for (uint32_t i = 0; i < used_result_component_count; ++i) { + uint32_t component; + xe::bit_scan_forward(components_remaining, &component); + components_remaining &= ~(1 << component); + if (multiplicands_different & (1 << component)) { + uint_vector_temp_.push_back(i); + } + } + assert_true(uint_vector_temp_.size() == different_count); + if (different_count > 1) { + for (uint32_t i = 0; i < 2; ++i) { + different_operands[i] = builder_->createRvalueSwizzle( + spv::NoPrecision, different_type, different_operands[i], + uint_vector_temp_); + } + different_result = builder_->createRvalueSwizzle( + spv::NoPrecision, different_type, different_result, + uint_vector_temp_); + } else { + for (uint32_t i = 0; i < 2; ++i) { + different_operands[i] = builder_->createCompositeExtract( + different_operands[i], different_type, uint_vector_temp_[0]); + } + different_result = builder_->createCompositeExtract( + different_result, different_type, uint_vector_temp_[0]); + } + } + // Check if the different components in any of the operands are zero, + // even if the other is NaN - if min(|a|, |b|) is 0. + for (uint32_t i = 0; i < 2; ++i) { + if (instr.vector_operands[i].is_absolute_value && + !instr.vector_operands[i].is_negated) { + continue; + } + id_vector_temp_.clear(); + id_vector_temp_.push_back(different_operands[i]); + different_operands[i] = builder_->createBuiltinCall( + different_type, ext_inst_glsl_std_450_, GLSLstd450FAbs, + id_vector_temp_); + } + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + id_vector_temp_.push_back(different_operands[0]); + id_vector_temp_.push_back(different_operands[1]); + spv::Id different_abs_min = + builder_->createBuiltinCall(different_type, ext_inst_glsl_std_450_, + GLSLstd450NMin, id_vector_temp_); + spv::Id different_zero = builder_->createBinOp( + spv::OpFOrdEqual, type_bool_vectors_[different_count - 1], + different_abs_min, const_float_vectors_0_[different_count - 1]); + // Replace with +0. + different_result = builder_->createTriOp( + spv::OpSelect, different_type, different_zero, + const_float_vectors_0_[different_count - 1], different_result); + // Insert the different components back to the result. + if (multiplicands_different != used_result_components) { + if (different_count > 1) { + std::unique_ptr shuffle_op = + std::make_unique( + builder_->getUniqueId(), result_type, spv::OpVectorShuffle); + shuffle_op->addIdOperand(result); + shuffle_op->addIdOperand(different_result); + uint32_t components_remaining = used_result_components; + unsigned int different_shuffle_index = used_result_component_count; + for (uint32_t i = 0; i < used_result_component_count; ++i) { + uint32_t component; + xe::bit_scan_forward(components_remaining, &component); + components_remaining &= ~(1 << component); + shuffle_op->addImmediateOperand( + (multiplicands_different & (1 << component)) + ? different_shuffle_index++ + : i); + } + result = shuffle_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(shuffle_op)); + } else { + result = builder_->createCompositeInsert( + different_result, result, result_type, + xe::bit_count(used_result_components & + (multiplicands_different - 1))); + } + } else { + result = different_result; + } + } + if (instr.vector_opcode == ucode::AluVectorOpcode::kMad) { + // Not replacing true `0 + term` with conditional selection of the term + // because +0 + -0 should result in +0, not -0. + result = builder_->createBinOp( + spv::OpFAdd, result_type, result, + GetOperandComponents(operand_storage[2], instr.vector_operands[2], + used_result_components)); + builder_->addDecoration(result, spv::DecorationNoContraction); + } + } break; // TODO(Triang3l): Handle all instructions. default: break; From 06502d80d91745c63618c15cd351ec84b3781394 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 31 Oct 2020 19:15:00 +0300 Subject: [PATCH 037/123] [SPIR-V] Vector max/min, comparison, unary --- src/xenia/gpu/spirv_shader_translator_alu.cc | 135 ++++++++++++++++++- 1 file changed, 133 insertions(+), 2 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator_alu.cc b/src/xenia/gpu/spirv_shader_translator_alu.cc index 9c69e8e8a..51caec3f6 100644 --- a/src/xenia/gpu/spirv_shader_translator_alu.cc +++ b/src/xenia/gpu/spirv_shader_translator_alu.cc @@ -83,6 +83,40 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( // block (like via OpKill). EnsureBuildPointAvailable(); + // Lookup table for variants of instructions with similar structure. + static const unsigned int kOps[] = { + static_cast(spv::OpNop), // kAdd + static_cast(spv::OpNop), // kMul + static_cast(spv::OpFOrdGreaterThanEqual), // kMax + static_cast(spv::OpFOrdLessThan), // kMin + static_cast(spv::OpFOrdEqual), // kSeq + static_cast(spv::OpFOrdGreaterThan), // kSgt + static_cast(spv::OpFOrdGreaterThanEqual), // kSge + static_cast(spv::OpFUnordNotEqual), // kSne + static_cast(GLSLstd450Fract), // kFrc + static_cast(GLSLstd450Trunc), // kTrunc + static_cast(GLSLstd450Floor), // kFloor + static_cast(spv::OpNop), // kMad + static_cast(spv::OpFOrdEqual), // kCndEq + static_cast(spv::OpFOrdGreaterThanEqual), // kCndGe + static_cast(spv::OpFOrdGreaterThan), // kCndGt + static_cast(spv::OpNop), // kDp4 + static_cast(spv::OpNop), // kDp3 + static_cast(spv::OpNop), // kDp2Add + static_cast(spv::OpNop), // kCube + static_cast(spv::OpNop), // kMax4 + static_cast(spv::OpFOrdEqual), // kSetpEqPush + static_cast(spv::OpFUnordNotEqual), // kSetpNePush + static_cast(spv::OpFOrdGreaterThan), // kSetpGtPush + static_cast(spv::OpFOrdGreaterThanEqual), // kSetpGePush + static_cast(spv::OpFOrdEqual), // kKillEq + static_cast(spv::OpFOrdGreaterThan), // kKillGt + static_cast(spv::OpFOrdGreaterThanEqual), // kKillGe + static_cast(spv::OpFUnordNotEqual), // kKillNe + static_cast(spv::OpNop), // kDst + static_cast(spv::OpNop), // kMaxA + }; + switch (instr.vector_opcode) { case ucode::AluVectorOpcode::kAdd: { spv::Id result = builder_->createBinOp( @@ -93,7 +127,7 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( used_result_components)); builder_->addDecoration(result, spv::DecorationNoContraction); return result; - } break; + } case ucode::AluVectorOpcode::kMul: case ucode::AluVectorOpcode::kMad: { spv::Id multiplicands[2]; @@ -214,7 +248,104 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( used_result_components)); builder_->addDecoration(result, spv::DecorationNoContraction); } - } break; + return result; + } + + case ucode::AluVectorOpcode::kMax: + case ucode::AluVectorOpcode::kMin: { + spv::Id operand_0 = GetOperandComponents( + operand_storage[0], instr.vector_operands[0], used_result_components); + // max is commonly used as mov. + uint32_t identical = instr.vector_operands[0].GetIdenticalComponents( + instr.vector_operands[1]) & + used_result_components; + if (identical == used_result_components) { + // All components are identical - mov. + return operand_0; + } + spv::Id operand_1 = GetOperandComponents( + operand_storage[1], instr.vector_operands[1], used_result_components); + // Shader Model 3 NaN behavior (a op b ? a : b, not SPIR-V FMax/FMin which + // are undefined for NaN or NMax/NMin which return the non-NaN operand). + spv::Op op = spv::Op(kOps[size_t(instr.vector_opcode)]); + if (!identical) { + // All components are different - max/min of the scalars or the entire + // vectors. + return builder_->createTriOp( + spv::OpSelect, result_type, + builder_->createBinOp( + op, type_bool_vectors_[used_result_component_count - 1], + operand_0, operand_1), + operand_0, operand_1); + } + // Mixed identical and different components. + assert_true(used_result_component_count > 1); + id_vector_temp_.clear(); + id_vector_temp_.reserve(used_result_component_count); + uint32_t components_remaining = used_result_components; + for (uint32_t i = 0; i < used_result_component_count; ++i) { + spv::Id result_component = + builder_->createCompositeExtract(operand_0, type_float_, i); + uint32_t component_index; + xe::bit_scan_forward(components_remaining, &component_index); + components_remaining &= ~(1 << component_index); + if (!(identical & (1 << component_index))) { + spv::Id operand_1_component = + builder_->createCompositeExtract(operand_1, type_float_, i); + result_component = builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp(op, type_bool_, result_component, + operand_1_component), + result_component, operand_1_component); + } + id_vector_temp_.push_back(result_component); + } + return builder_->createCompositeConstruct(result_type, id_vector_temp_); + } + + case ucode::AluVectorOpcode::kSeq: + case ucode::AluVectorOpcode::kSgt: + case ucode::AluVectorOpcode::kSge: + case ucode::AluVectorOpcode::kSne: + return builder_->createTriOp( + spv::OpSelect, result_type, + builder_->createBinOp( + spv::Op(kOps[size_t(instr.vector_opcode)]), + type_bool_vectors_[used_result_component_count - 1], + GetOperandComponents(operand_storage[0], instr.vector_operands[0], + used_result_components), + GetOperandComponents(operand_storage[1], instr.vector_operands[1], + used_result_components)), + const_float_vectors_1_[used_result_component_count - 1], + const_float_vectors_0_[used_result_component_count - 1]); + + case ucode::AluVectorOpcode::kFrc: + case ucode::AluVectorOpcode::kTrunc: + case ucode::AluVectorOpcode::kFloor: + id_vector_temp_.clear(); + id_vector_temp_.push_back(GetOperandComponents(operand_storage[0], + instr.vector_operands[0], + used_result_components)); + return builder_->createBuiltinCall( + result_type, ext_inst_glsl_std_450_, + GLSLstd450(kOps[size_t(instr.vector_opcode)]), id_vector_temp_); + + case ucode::AluVectorOpcode::kCndEq: + case ucode::AluVectorOpcode::kCndGe: + case ucode::AluVectorOpcode::kCndGt: + return builder_->createTriOp( + spv::OpSelect, result_type, + builder_->createBinOp( + spv::Op(kOps[size_t(instr.vector_opcode)]), + type_bool_vectors_[used_result_component_count - 1], + GetOperandComponents(operand_storage[0], instr.vector_operands[0], + used_result_components), + const_float_vectors_0_[used_result_component_count - 1]), + GetOperandComponents(operand_storage[1], instr.vector_operands[1], + used_result_components), + GetOperandComponents(operand_storage[2], instr.vector_operands[2], + used_result_components)); + // TODO(Triang3l): Handle all instructions. default: break; From 0949fac8264fb0246afffe725190dbff7c60abf4 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 31 Oct 2020 20:15:01 +0300 Subject: [PATCH 038/123] [SPIR-V] Dot product --- src/xenia/gpu/spirv_shader_translator_alu.cc | 86 +++++++++++++++++++- 1 file changed, 84 insertions(+), 2 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator_alu.cc b/src/xenia/gpu/spirv_shader_translator_alu.cc index 51caec3f6..853b0e673 100644 --- a/src/xenia/gpu/spirv_shader_translator_alu.cc +++ b/src/xenia/gpu/spirv_shader_translator_alu.cc @@ -28,8 +28,10 @@ void SpirvShaderTranslator::ProcessAluInstruction( UpdateInstructionPredication(instr.is_predicated, instr.predicate_condition); // Floating-point arithmetic operations (addition, subtraction, negation, - // multiplication, dot product, division, modulo - see isArithmeticOperation - // in propagateNoContraction of glslang) must have the NoContraction + // multiplication, division, modulo - see isArithmeticOperation in + // propagateNoContraction of glslang; though for some reason it's not applied + // to SPIR-V OpDot, at least in the February 16, 2020 version installed on + // http://shader-playground.timjones.io/) must have the NoContraction // decoration to prevent reordering to make sure floating-point calculations // are optimized predictably and exactly the same in different shaders to // allow for multipass rendering (in addition to the Invariant decoration on @@ -346,6 +348,86 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( GetOperandComponents(operand_storage[2], instr.vector_operands[2], used_result_components)); + case ucode::AluVectorOpcode::kDp4: + case ucode::AluVectorOpcode::kDp3: + case ucode::AluVectorOpcode::kDp2Add: { + // Not using OpDot for predictable optimization (especially addition + // order) and NoContraction (which, for some reason, isn't placed on dot + // in glslang as of the February 16, 2020 version). + uint32_t component_count; + if (instr.vector_opcode == ucode::AluVectorOpcode::kDp2Add) { + component_count = 2; + } else if (instr.vector_opcode == ucode::AluVectorOpcode::kDp3) { + component_count = 3; + } else { + component_count = 4; + } + uint32_t component_mask = (1 << component_count) - 1; + spv::Id operands[2]; + for (uint32_t i = 0; i < 2; ++i) { + operands[i] = GetOperandComponents( + operand_storage[i], instr.vector_operands[i], component_mask); + } + uint32_t different = + component_mask & ~instr.vector_operands[0].GetIdenticalComponents( + instr.vector_operands[1]); + spv::Id result = spv::NoResult; + for (uint32_t i = 0; i < component_count; ++i) { + spv::Id operand_components[2]; + for (unsigned int j = 0; j < 2; ++j) { + operand_components[j] = + builder_->createCompositeExtract(operands[j], type_float_, i); + } + spv::Id product = + builder_->createBinOp(spv::OpFMul, type_float_, + operand_components[0], operand_components[1]); + builder_->addDecoration(product, spv::DecorationNoContraction); + if (different & (1 << i)) { + // Shader Model 3: +0 or denormal * anything = +-0. + // Check if the different components in any of the operands are zero, + // even if the other is NaN - if min(|a|, |b|) is 0. + for (uint32_t j = 0; j < 2; ++j) { + if (instr.vector_operands[j].is_absolute_value && + !instr.vector_operands[j].is_negated) { + continue; + } + id_vector_temp_.clear(); + id_vector_temp_.push_back(operand_components[j]); + operand_components[j] = + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450FAbs, id_vector_temp_); + } + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + id_vector_temp_.push_back(operand_components[0]); + id_vector_temp_.push_back(operand_components[1]); + product = builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp(spv::OpFOrdEqual, type_bool_, + builder_->createBuiltinCall( + type_float_, ext_inst_glsl_std_450_, + GLSLstd450NMin, id_vector_temp_), + const_float_0_), + const_float_0_, product); + } + if (!i) { + result = product; + continue; + } + result = + builder_->createBinOp(spv::OpFAdd, type_float_, result, product); + builder_->addDecoration(result, spv::DecorationNoContraction); + } + if (instr.vector_opcode == ucode::AluVectorOpcode::kDp2Add) { + result = builder_->createBinOp( + spv::OpFAdd, type_float_, result, + GetOperandComponents(operand_storage[2], instr.vector_operands[2], + 0b0001)); + builder_->addDecoration(result, spv::DecorationNoContraction); + } + return result; + } + // TODO(Triang3l): Handle all instructions. default: break; From b32ca5fb4f9e1a98fd6c7b7c12bb9be1959bf5a9 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 31 Oct 2020 21:42:51 +0300 Subject: [PATCH 039/123] [SPIR-V] Cube vector instruction --- src/xenia/gpu/spirv_shader_translator_alu.cc | 210 +++++++++++++++++++ 1 file changed, 210 insertions(+) diff --git a/src/xenia/gpu/spirv_shader_translator_alu.cc b/src/xenia/gpu/spirv_shader_translator_alu.cc index 853b0e673..9051607ac 100644 --- a/src/xenia/gpu/spirv_shader_translator_alu.cc +++ b/src/xenia/gpu/spirv_shader_translator_alu.cc @@ -428,6 +428,216 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( return result; } + case ucode::AluVectorOpcode::kCube: { + // operands[0] is .z_xy. + // Result is T coordinate, S coordinate, 2 * major axis, face ID. + // Skipping the second component of the operand, so 120, not 230. + spv::Id operand_vector = GetOperandComponents( + operand_storage[0], instr.vector_operands[0], 0b1101); + // Remapped from ZXY (Z_XY without the skipped component) to XYZ. + spv::Id operand[3]; + for (unsigned int i = 0; i < 3; ++i) { + operand[i] = builder_->createCompositeExtract(operand_vector, + type_float_, (i + 1) % 3); + } + spv::Id operand_abs[3]; + if (!instr.vector_operands[0].is_absolute_value || + instr.vector_operands[0].is_negated) { + for (unsigned int i = 0; i < 3; ++i) { + id_vector_temp_.clear(); + id_vector_temp_.push_back(operand[i]); + operand_abs[i] = + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450FAbs, id_vector_temp_); + } + } else { + for (unsigned int i = 0; i < 3; ++i) { + operand_abs[i] = operand[i]; + } + } + spv::Id operand_neg[3] = {}; + if (used_result_components & 0b0001) { + operand_neg[1] = + builder_->createUnaryOp(spv::OpFNegate, type_float_, operand[1]); + builder_->addDecoration(operand_neg[1], spv::DecorationNoContraction); + } + if (used_result_components & 0b0010) { + operand_neg[0] = + builder_->createUnaryOp(spv::OpFNegate, type_float_, operand[0]); + builder_->addDecoration(operand_neg[0], spv::DecorationNoContraction); + operand_neg[2] = + builder_->createUnaryOp(spv::OpFNegate, type_float_, operand[2]); + builder_->addDecoration(operand_neg[2], spv::DecorationNoContraction); + } + + // Check if the major axis is Z (abs(z) >= abs(x) && abs(z) >= abs(y)). + // Selection merge must be the penultimate instruction in the block, check + // the condition before it. + spv::Id ma_z_condition = builder_->createBinOp( + spv::OpLogicalAnd, type_bool_, + builder_->createBinOp(spv::OpFOrdGreaterThanEqual, type_bool_, + operand_abs[2], operand_abs[0]), + builder_->createBinOp(spv::OpFOrdGreaterThanEqual, type_bool_, + operand_abs[2], operand_abs[1])); + spv::Function& function = builder_->getBuildPoint()->getParent(); + spv::Block& ma_z_block = builder_->makeNewBlock(); + spv::Block& ma_yx_block = builder_->makeNewBlock(); + spv::Block* ma_merge_block = + new spv::Block(builder_->getUniqueId(), function); + { + std::unique_ptr selection_merge_op = + std::make_unique(spv::OpSelectionMerge); + selection_merge_op->addIdOperand(ma_merge_block->getId()); + selection_merge_op->addImmediateOperand(spv::SelectionControlMaskNone); + builder_->getBuildPoint()->addInstruction( + std::move(selection_merge_op)); + } + builder_->createConditionalBranch(ma_z_condition, &ma_z_block, + &ma_yx_block); + + builder_->setBuildPoint(&ma_z_block); + // The major axis is Z. + spv::Id ma_z_result[4] = {}; + // tc = -y + ma_z_result[0] = operand_neg[1]; + // ma/2 = z + ma_z_result[2] = operand[2]; + if (used_result_components & 0b1010) { + spv::Id z_is_neg = builder_->createBinOp( + spv::OpFOrdLessThan, type_bool_, operand[2], const_float_0_); + if (used_result_components & 0b0010) { + // sc = z < 0.0 ? -x : x + ma_z_result[1] = builder_->createTriOp( + spv::OpSelect, type_float_, z_is_neg, operand_neg[0], operand[0]); + } + if (used_result_components & 0b1000) { + // id = z < 0.0 ? 5.0 : 4.0 + ma_z_result[3] = + builder_->createTriOp(spv::OpSelect, type_float_, z_is_neg, + builder_->makeFloatConstant(5.0f), + builder_->makeFloatConstant(4.0f)); + } + } + builder_->createBranch(ma_merge_block); + + builder_->setBuildPoint(&ma_yx_block); + // The major axis is not Z - create an inner conditional to check if the + // major axis is Y (abs(y) >= abs(x)). + // Selection merge must be the penultimate instruction in the block, check + // the condition before it. + spv::Id ma_y_condition = + builder_->createBinOp(spv::OpFOrdGreaterThanEqual, type_bool_, + operand_abs[1], operand_abs[0]); + spv::Block& ma_y_block = builder_->makeNewBlock(); + spv::Block& ma_x_block = builder_->makeNewBlock(); + spv::Block& ma_yx_merge_block = builder_->makeNewBlock(); + { + std::unique_ptr selection_merge_op = + std::make_unique(spv::OpSelectionMerge); + selection_merge_op->addIdOperand(ma_yx_merge_block.getId()); + selection_merge_op->addImmediateOperand(spv::SelectionControlMaskNone); + builder_->getBuildPoint()->addInstruction( + std::move(selection_merge_op)); + } + builder_->createConditionalBranch(ma_y_condition, &ma_y_block, + &ma_x_block); + + builder_->setBuildPoint(&ma_y_block); + // The major axis is Y. + spv::Id ma_y_result[4] = {}; + // sc = x + ma_y_result[1] = operand[0]; + // ma/2 = y + ma_y_result[2] = operand[1]; + if (used_result_components & 0b1001) { + spv::Id y_is_neg = builder_->createBinOp( + spv::OpFOrdLessThan, type_bool_, operand[1], const_float_0_); + if (used_result_components & 0b0001) { + // tc = y < 0.0 ? -z : z + ma_y_result[0] = builder_->createTriOp( + spv::OpSelect, type_float_, y_is_neg, operand_neg[2], operand[2]); + // id = y < 0.0 ? 3.0 : 2.0 + ma_y_result[3] = + builder_->createTriOp(spv::OpSelect, type_float_, y_is_neg, + builder_->makeFloatConstant(3.0f), + builder_->makeFloatConstant(2.0f)); + } + } + builder_->createBranch(&ma_yx_merge_block); + + builder_->setBuildPoint(&ma_x_block); + // The major axis is X. + spv::Id ma_x_result[4] = {}; + // tc = -y + ma_x_result[0] = operand_neg[1]; + // ma/2 = x + ma_x_result[2] = operand[2]; + if (used_result_components & 0b1010) { + spv::Id x_is_neg = builder_->createBinOp( + spv::OpFOrdLessThan, type_bool_, operand[0], const_float_0_); + if (used_result_components & 0b0010) { + // sc = x < 0.0 ? z : -z + ma_x_result[1] = builder_->createTriOp( + spv::OpSelect, type_float_, x_is_neg, operand[2], operand_neg[2]); + } + if (used_result_components & 0b1000) { + // id = x < 0.0 ? 1.0 : 0.0 + ma_x_result[3] = + builder_->createTriOp(spv::OpSelect, type_float_, x_is_neg, + const_float_1_, const_float_0_); + } + } + builder_->createBranch(&ma_yx_merge_block); + + builder_->setBuildPoint(&ma_yx_merge_block); + // The major axis is Y or X - choose the options of the result from Y and + // X. + spv::Id ma_yx_result[4] = {}; + for (uint32_t i = 0; i < 4; ++i) { + if (!(used_result_components & (1 << i))) { + continue; + } + std::unique_ptr phi_op = + std::make_unique(builder_->getUniqueId(), + type_float_, spv::OpPhi); + phi_op->addIdOperand(ma_y_result[i]); + phi_op->addIdOperand(ma_y_block.getId()); + phi_op->addIdOperand(ma_x_result[i]); + phi_op->addIdOperand(ma_x_block.getId()); + ma_yx_result[i] = phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(phi_op)); + } + builder_->createBranch(ma_merge_block); + + function.addBlock(ma_merge_block); + builder_->setBuildPoint(ma_merge_block); + // Choose the result options from Z and YX cases. + id_vector_temp_.clear(); + id_vector_temp_.reserve(used_result_component_count); + for (uint32_t i = 0; i < 4; ++i) { + if (!(used_result_components & (1 << i))) { + continue; + } + std::unique_ptr phi_op = + std::make_unique(builder_->getUniqueId(), + type_float_, spv::OpPhi); + phi_op->addIdOperand(ma_z_result[i]); + phi_op->addIdOperand(ma_z_block.getId()); + phi_op->addIdOperand(ma_yx_result[i]); + phi_op->addIdOperand(ma_yx_merge_block.getId()); + id_vector_temp_.push_back(phi_op->getResultId()); + builder_->getBuildPoint()->addInstruction(std::move(phi_op)); + } + assert_true(id_vector_temp_.size() == used_result_component_count); + if (used_result_component_count == 1) { + // Only one component - not composite. + return id_vector_temp_[0]; + } + return builder_->createCompositeConstruct( + type_float_vectors_[used_result_component_count - 1], + id_vector_temp_); + } + // TODO(Triang3l): Handle all instructions. default: break; From d9f57c1ad83b1c913c8c9e536b0195012fbf40f0 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 1 Nov 2020 13:12:31 +0300 Subject: [PATCH 040/123] [SPIR-V] Max4 --- src/xenia/gpu/spirv_shader_translator.cc | 2 +- src/xenia/gpu/spirv_shader_translator_alu.cc | 42 ++++++++++++++++++-- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 982d9a8d9..48b5b6e63 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -1210,7 +1210,7 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result, uint32_t result_component; while (xe::bit_scan_forward(used_result_components_remaining, &result_component)) { - used_result_components_remaining &= ~(1 << result_component); + used_result_components_remaining &= ~(uint32_t(1) << result_component); result_unswizzled_value_components[result_component] = std::min(value_component++, value_num_components - 1); } diff --git a/src/xenia/gpu/spirv_shader_translator_alu.cc b/src/xenia/gpu/spirv_shader_translator_alu.cc index 9051607ac..0a1956d3f 100644 --- a/src/xenia/gpu/spirv_shader_translator_alu.cc +++ b/src/xenia/gpu/spirv_shader_translator_alu.cc @@ -159,7 +159,7 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( for (uint32_t i = 0; i < used_result_component_count; ++i) { uint32_t component; xe::bit_scan_forward(components_remaining, &component); - components_remaining &= ~(1 << component); + components_remaining &= ~(uint32_t(1) << component); if (multiplicands_different & (1 << component)) { uint_vector_temp_.push_back(i); } @@ -223,7 +223,7 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( for (uint32_t i = 0; i < used_result_component_count; ++i) { uint32_t component; xe::bit_scan_forward(components_remaining, &component); - components_remaining &= ~(1 << component); + components_remaining &= ~(uint32_t(1) << component); shuffle_op->addImmediateOperand( (multiplicands_different & (1 << component)) ? different_shuffle_index++ @@ -290,7 +290,7 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( builder_->createCompositeExtract(operand_0, type_float_, i); uint32_t component_index; xe::bit_scan_forward(components_remaining, &component_index); - components_remaining &= ~(1 << component_index); + components_remaining &= ~(uint32_t(1) << component_index); if (!(identical & (1 << component_index))) { spv::Id operand_1_component = builder_->createCompositeExtract(operand_1, type_float_, i); @@ -638,6 +638,42 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( id_vector_temp_); } + case ucode::AluVectorOpcode::kMax4: { + // Find max of all different components of the first operand. + // FIXME(Triang3l): Not caring about NaN because no info about the + // correct order, just using NMax here, which replaces them with the + // non-NaN component (however, there's one nice thing about it is that it + // may be compiled into max3 + max on GCN). + uint32_t components_remaining = 0b0000; + for (uint32_t i = 0; i < 4; ++i) { + SwizzleSource swizzle_source = instr.vector_operands[0].GetComponent(i); + assert_true(swizzle_source >= SwizzleSource::kX && + swizzle_source <= SwizzleSource::kW); + components_remaining |= + 1 << (uint32_t(swizzle_source) - uint32_t(SwizzleSource::kX)); + } + assert_not_zero(components_remaining); + spv::Id operand = + ApplyOperandModifiers(operand_storage[0], instr.vector_operands[0]); + uint32_t component; + xe::bit_scan_forward(components_remaining, &component); + components_remaining &= ~(uint32_t(1) << component); + spv::Id result = builder_->createCompositeExtract( + operand, type_float_, static_cast(component)); + while (xe::bit_scan_forward(components_remaining, &component)) { + components_remaining &= ~(uint32_t(1) << component); + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + id_vector_temp_.push_back(result); + id_vector_temp_.push_back(builder_->createCompositeExtract( + operand, type_float_, static_cast(component))); + result = + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450NMax, id_vector_temp_); + } + return result; + } + // TODO(Triang3l): Handle all instructions. default: break; From c173ecc4ef530856df31eac6e84f950d89fd7423 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 1 Nov 2020 15:45:26 +0300 Subject: [PATCH 041/123] [SPIR-V] Remaining VALU instructions --- src/xenia/gpu/spirv_shader_translator_alu.cc | 290 ++++++++++++++++++- 1 file changed, 276 insertions(+), 14 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator_alu.cc b/src/xenia/gpu/spirv_shader_translator_alu.cc index 0a1956d3f..e30489682 100644 --- a/src/xenia/gpu/spirv_shader_translator_alu.cc +++ b/src/xenia/gpu/spirv_shader_translator_alu.cc @@ -116,7 +116,7 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( static_cast(spv::OpFOrdGreaterThanEqual), // kKillGe static_cast(spv::OpFUnordNotEqual), // kKillNe static_cast(spv::OpNop), // kDst - static_cast(spv::OpNop), // kMaxA + static_cast(spv::OpFOrdGreaterThanEqual), // kMaxA }; switch (instr.vector_opcode) { @@ -254,16 +254,87 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( } case ucode::AluVectorOpcode::kMax: - case ucode::AluVectorOpcode::kMin: { + case ucode::AluVectorOpcode::kMin: + case ucode::AluVectorOpcode::kMaxA: { + bool is_maxa = instr.vector_opcode == ucode::AluVectorOpcode::kMaxA; spv::Id operand_0 = GetOperandComponents( - operand_storage[0], instr.vector_operands[0], used_result_components); + operand_storage[0], instr.vector_operands[0], + used_result_components | (is_maxa ? 0b1000 : 0b0000)); + spv::Id maxa_operand_0_w = spv::NoResult; + if (is_maxa) { + // a0 = (int)clamp(floor(src0.w + 0.5), -256.0, 255.0) + int operand_0_num_components = builder_->getNumComponents(operand_0); + if (operand_0_num_components > 1) { + maxa_operand_0_w = builder_->createCompositeExtract( + operand_0, type_float_, + static_cast(operand_0_num_components - 1)); + } else { + maxa_operand_0_w = operand_0; + } + spv::Id maxa_address = + builder_->createBinOp(spv::OpFAdd, type_float_, maxa_operand_0_w, + builder_->makeFloatConstant(0.5f)); + builder_->addDecoration(maxa_address, spv::DecorationNoContraction); + id_vector_temp_.clear(); + id_vector_temp_.push_back(maxa_address); + maxa_address = + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450Floor, id_vector_temp_); + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + id_vector_temp_.push_back(maxa_address); + id_vector_temp_.push_back(builder_->makeFloatConstant(-256.0f)); + id_vector_temp_.push_back(builder_->makeFloatConstant(255.0f)); + builder_->createStore( + builder_->createUnaryOp( + spv::OpConvertFToS, type_int_, + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450NClamp, id_vector_temp_)), + var_main_address_absolute_); + } + if (!used_result_components) { + // maxa returning nothing - can't load src1. + return spv::NoResult; + } // max is commonly used as mov. uint32_t identical = instr.vector_operands[0].GetIdenticalComponents( instr.vector_operands[1]) & used_result_components; + spv::Id operand_0_per_component; + if (is_maxa && !(used_result_components & 0b1000) && + (identical == used_result_components || !identical)) { + // operand_0 and operand_1 have different lengths though if src0.w is + // forced without W being in the write mask for maxa purposes - + // shuffle/extract the needed part if src0.w is only needed for setting + // a0. + // This is only needed for cases without mixed identical and different + // components - the mixed case uses CompositeExtract, which works fine. + if (used_result_component_count > 1) { + // Need all but the last (W) element of operand_0 as a vector. + uint_vector_temp_.clear(); + uint_vector_temp_.reserve(used_result_component_count); + for (unsigned int i = 0; i < used_result_component_count; ++i) { + uint_vector_temp_.push_back(i); + } + operand_0_per_component = builder_->createRvalueSwizzle( + spv::NoPrecision, + type_float_vectors_[used_result_component_count - 1], operand_0, + uint_vector_temp_); + } else { + // Need the non-W component as scalar. + operand_0_per_component = + builder_->createCompositeExtract(operand_0, type_float_, 0); + } + } else { + operand_0_per_component = operand_0; + } if (identical == used_result_components) { - // All components are identical - mov. - return operand_0; + // All components are identical - mov (with the correct length in case + // of maxa). Don't access operand_1 at all in this case (operand_0 is + // already accessed for W in case of maxa). + assert_true(builder_->getNumComponents(operand_0_per_component) == + used_result_component_count); + return operand_0_per_component; } spv::Id operand_1 = GetOperandComponents( operand_storage[1], instr.vector_operands[1], used_result_components); @@ -272,13 +343,15 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( spv::Op op = spv::Op(kOps[size_t(instr.vector_opcode)]); if (!identical) { // All components are different - max/min of the scalars or the entire - // vectors. + // vectors (with the correct length in case of maxa). + assert_true(builder_->getNumComponents(operand_0_per_component) == + used_result_component_count); return builder_->createTriOp( spv::OpSelect, result_type, builder_->createBinOp( op, type_bool_vectors_[used_result_component_count - 1], - operand_0, operand_1), - operand_0, operand_1); + operand_0_per_component, operand_1), + operand_0_per_component, operand_1); } // Mixed identical and different components. assert_true(used_result_component_count > 1); @@ -286,8 +359,16 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( id_vector_temp_.reserve(used_result_component_count); uint32_t components_remaining = used_result_components; for (uint32_t i = 0; i < used_result_component_count; ++i) { + // Composite extraction of operand_0[i] works fine even it's maxa with + // src0.w forced without W being in the write mask - src0.w would be the + // last, so all indices before it are still valid. Don't extract twice + // if already extracted though. spv::Id result_component = - builder_->createCompositeExtract(operand_0, type_float_, i); + ((used_result_components & 0b1000) && + i + 1 >= used_result_component_count && + maxa_operand_0_w != spv::NoResult) + ? maxa_operand_0_w + : builder_->createCompositeExtract(operand_0, type_float_, i); uint32_t component_index; xe::bit_scan_forward(components_remaining, &component_index); components_remaining &= ~(uint32_t(1) << component_index); @@ -385,7 +466,8 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( if (different & (1 << i)) { // Shader Model 3: +0 or denormal * anything = +-0. // Check if the different components in any of the operands are zero, - // even if the other is NaN - if min(|a|, |b|) is 0. + // even if the other is NaN - if min(|a|, |b|) is 0, if yes, replace + // the result with zero. for (uint32_t j = 0; j < 2; ++j) { if (instr.vector_operands[j].is_absolute_value && !instr.vector_operands[j].is_negated) { @@ -674,12 +756,192 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( return result; } - // TODO(Triang3l): Handle all instructions. - default: - break; + case ucode::AluVectorOpcode::kSetpEqPush: + case ucode::AluVectorOpcode::kSetpNePush: + case ucode::AluVectorOpcode::kSetpGtPush: + case ucode::AluVectorOpcode::kSetpGePush: { + // X is only needed for the result, W is needed for the predicate. + spv::Id operands[2]; + spv::Id operands_w[2]; + for (uint32_t i = 0; i < 2; ++i) { + operands[i] = + GetOperandComponents(operand_storage[i], instr.vector_operands[i], + used_result_components ? 0b1001 : 0b1000); + if (used_result_components) { + operands_w[i] = + builder_->createCompositeExtract(operands[i], type_float_, 1); + } else { + operands_w[i] = operands[i]; + } + } + spv::Op op = spv::Op(kOps[size_t(instr.vector_opcode)]); + // p0 = src0.w == 0.0 && src1.w op 0.0 + builder_->createStore( + builder_->createBinOp( + spv::OpLogicalAnd, type_bool_, + builder_->createBinOp(spv::OpFOrdEqual, type_bool_, operands_w[0], + const_float_0_), + builder_->createBinOp(op, type_bool_, operands_w[1], + const_float_0_)), + var_main_predicate_); + predicate_written = true; + if (!used_result_components) { + return spv::NoResult; + } + // result = (src0.x == 0.0 && src1.x op 0.0) ? 0.0 : src0.x + 1.0 + // Or: + // result = ((src0.x == 0.0 && src1.x op 0.0) ? -1.0 : src0.x) + 1.0 + spv::Id operands_x[2]; + for (uint32_t i = 0; i < 2; ++i) { + operands_x[i] = + builder_->createCompositeExtract(operands[i], type_float_, 0); + } + spv::Id condition = builder_->createBinOp( + spv::OpLogicalAnd, type_bool_, + builder_->createBinOp(spv::OpFOrdEqual, type_bool_, operands_x[0], + const_float_0_), + builder_->createBinOp(op, type_bool_, operands_x[1], const_float_0_)); + spv::Id result = builder_->createBinOp( + spv::OpFAdd, type_float_, + builder_->createTriOp(spv::OpSelect, type_float_, condition, + builder_->makeFloatConstant(-1.0f), + operands_x[0]), + const_float_1_); + builder_->addDecoration(result, spv::DecorationNoContraction); + return result; + } + + case ucode::AluVectorOpcode::kKillEq: + case ucode::AluVectorOpcode::kKillGt: + case ucode::AluVectorOpcode::kKillGe: + case ucode::AluVectorOpcode::kKillNe: { + // Selection merge must be the penultimate instruction in the block, check + // the condition before it. + spv::Id condition = builder_->createUnaryOp( + spv::OpAny, type_bool_, + builder_->createBinOp( + spv::Op(kOps[size_t(instr.vector_opcode)]), type_bool4_, + GetOperandComponents(operand_storage[0], instr.vector_operands[0], + 0b1111), + GetOperandComponents(operand_storage[1], instr.vector_operands[1], + 0b1111))); + spv::Block& kill_block = builder_->makeNewBlock(); + spv::Block& merge_block = builder_->makeNewBlock(); + { + std::unique_ptr selection_merge_op = + std::make_unique(spv::OpSelectionMerge); + selection_merge_op->addIdOperand(merge_block.getId()); + selection_merge_op->addImmediateOperand(spv::SelectionControlMaskNone); + builder_->getBuildPoint()->addInstruction( + std::move(selection_merge_op)); + } + builder_->createConditionalBranch(condition, &kill_block, &merge_block); + builder_->setBuildPoint(&kill_block); + // TODO(Triang3l): Demote to helper invocation to keep derivatives if + // needed (and return const_float4_1_ if killed in this case). + builder_->createNoResultOp(spv::OpKill); + builder_->setBuildPoint(&merge_block); + return const_float4_0_; + } + + case ucode::AluVectorOpcode::kDst: { + spv::Id operands[2] = {}; + if (used_result_components & 0b0110) { + // result.yz is needed: [0] = y, [1] = z. + // resuly.y is needed: scalar = y. + // resuly.z is needed: scalar = z. + operands[0] = + GetOperandComponents(operand_storage[0], instr.vector_operands[0], + used_result_components & 0b0110); + } + if (used_result_components & 0b1010) { + // result.yw is needed: [0] = y, [1] = w. + // resuly.y is needed: scalar = y. + // resuly.w is needed: scalar = w. + operands[1] = + GetOperandComponents(operand_storage[1], instr.vector_operands[1], + used_result_components & 0b1010); + } + // y = src0.y * src1.y + spv::Id result_y = spv::NoResult; + if (used_result_components & 0b0010) { + spv::Id operands_y[2]; + operands_y[0] = + (used_result_components & 0b0100) + ? builder_->createCompositeExtract(operands[0], type_float_, 0) + : operands[0]; + operands_y[1] = + (used_result_components & 0b1000) + ? builder_->createCompositeExtract(operands[1], type_float_, 0) + : operands[1]; + result_y = builder_->createBinOp(spv::OpFMul, type_float_, + operands_y[0], operands_y[1]); + builder_->addDecoration(result_y, spv::DecorationNoContraction); + if (!(instr.vector_operands[0].GetIdenticalComponents( + instr.vector_operands[1]) & + 0b0010)) { + for (uint32_t i = 0; i < 2; ++i) { + if (instr.vector_operands[i].is_absolute_value && + !instr.vector_operands[i].is_negated) { + continue; + } + id_vector_temp_.clear(); + id_vector_temp_.push_back(operands_y[i]); + operands_y[i] = + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450FAbs, id_vector_temp_); + } + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + id_vector_temp_.push_back(operands_y[0]); + id_vector_temp_.push_back(operands_y[1]); + result_y = builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp(spv::OpFOrdEqual, type_bool_, + builder_->createBuiltinCall( + type_float_, ext_inst_glsl_std_450_, + GLSLstd450NMin, id_vector_temp_), + const_float_0_), + const_float_0_, result_y); + } + } + id_vector_temp_.clear(); + id_vector_temp_.reserve(used_result_component_count); + if (used_result_components & 0b0001) { + // x = 1.0 + id_vector_temp_.push_back(const_float_1_); + } + if (used_result_components & 0b0010) { + // y = src0.y * src1.y + id_vector_temp_.push_back(result_y); + } + if (used_result_components & 0b0100) { + // z = src0.z + id_vector_temp_.push_back( + (used_result_components & 0b0010) + ? builder_->createCompositeExtract(operands[0], type_float_, 1) + : operands[0]); + } + if (used_result_components & 0b1000) { + // w = src1.w + id_vector_temp_.push_back( + (used_result_components & 0b0010) + ? builder_->createCompositeExtract(operands[1], type_float_, 1) + : operands[1]); + } + assert_true(id_vector_temp_.size() == used_result_component_count); + if (used_result_component_count == 1) { + // Only one component - not composite. + return id_vector_temp_[0]; + } + return builder_->createCompositeConstruct( + type_float_vectors_[used_result_component_count - 1], + id_vector_temp_); + } } - // Invalid instruction. + assert_unhandled_case(instr.vector_opcode); + EmitTranslationError("Unknown ALU vector operation"); return spv::NoResult; } From fd4ba5622a622561d9a384165e3109018662c086 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 1 Nov 2020 15:56:51 +0300 Subject: [PATCH 042/123] [SPIR-V] Cube: multiply the major axis by 2 --- src/xenia/gpu/spirv_shader_translator_alu.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/xenia/gpu/spirv_shader_translator_alu.cc b/src/xenia/gpu/spirv_shader_translator_alu.cc index e30489682..6f4d4397e 100644 --- a/src/xenia/gpu/spirv_shader_translator_alu.cc +++ b/src/xenia/gpu/spirv_shader_translator_alu.cc @@ -711,6 +711,14 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( builder_->getBuildPoint()->addInstruction(std::move(phi_op)); } assert_true(id_vector_temp_.size() == used_result_component_count); + if (used_result_components & 0b0100) { + // Multiply the major axis by 2. + spv::Id& ma2 = id_vector_temp_[xe::bit_count(used_result_components & + ((1 << 2) - 1))]; + ma2 = builder_->createBinOp(spv::OpFMul, type_float_, + builder_->makeFloatConstant(2.0f), ma2); + builder_->addDecoration(ma2, spv::DecorationNoContraction); + } if (used_result_component_count == 1) { // Only one component - not composite. return id_vector_temp_[0]; From 06b47d1df04bd5808c19c2cbd7bd83712f6f5d82 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 1 Nov 2020 20:42:59 +0300 Subject: [PATCH 043/123] [SPIR-V] Scalar addition and multiplication --- src/xenia/gpu/spirv_shader_translator.cc | 28 ++ src/xenia/gpu/spirv_shader_translator.h | 22 ++ src/xenia/gpu/spirv_shader_translator_alu.cc | 339 +++++++++++++++---- 3 files changed, 329 insertions(+), 60 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 48b5b6e63..edaad6344 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -235,6 +235,9 @@ void SpirvShaderTranslator::StartTranslation() { var_main_address_relative_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassFunction, type_int4_, "xe_var_address_relative", const_int4_0_); + var_main_previous_scalar_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassFunction, type_float_, + "xe_var_previous_scalar", const_float_0_); uint32_t register_array_size = register_count(); if (register_array_size) { id_vector_temp_.clear(); @@ -1128,6 +1131,31 @@ spv::Id SpirvShaderTranslator::GetUnmodifiedOperandComponents( operand_storage, id_vector_temp_util_); } +void SpirvShaderTranslator::GetOperandScalarXY( + spv::Id operand_storage, const InstructionOperand& original_operand, + spv::Id& a_out, spv::Id& b_out, bool invert_negate, bool force_absolute) { + spv::Id a = GetOperandComponents(operand_storage, original_operand, 0b0001, + invert_negate, force_absolute); + a_out = a; + b_out = original_operand.GetComponent(0) != original_operand.GetComponent(1) + ? GetOperandComponents(operand_storage, original_operand, 0b0010, + invert_negate, force_absolute) + : a; +} + +spv::Id SpirvShaderTranslator::GetAbsoluteOperand( + spv::Id operand_storage, const InstructionOperand& original_operand) { + if (original_operand.is_absolute_value && !original_operand.is_negated) { + return operand_storage; + } + EnsureBuildPointAvailable(); + id_vector_temp_util_.clear(); + id_vector_temp_util_.push_back(operand_storage); + return builder_->createBuiltinCall(builder_->getTypeId(operand_storage), + ext_inst_glsl_std_450_, GLSLstd450FAbs, + id_vector_temp_util_); +} + void SpirvShaderTranslator::StoreResult(const InstructionResult& result, spv::Id value) { uint32_t used_write_mask = result.GetUsedWriteMask(); diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 395733b62..cdf812791 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -143,18 +143,38 @@ class SpirvShaderTranslator : public ShaderTranslator { components), original_operand, invert_negate, force_absolute); } + // If components are identical, the same Id will be written to both outputs. + void GetOperandScalarXY(spv::Id operand_storage, + const InstructionOperand& original_operand, + spv::Id& a_out, spv::Id& b_out, + bool invert_negate = false, + bool force_absolute = false); + // Gets the absolute value of the loaded operand if it's not absolute already. + spv::Id GetAbsoluteOperand(spv::Id operand_storage, + const InstructionOperand& original_operand); // The type of the value must be a float vector consisting of // xe::bit_count(result.GetUsedResultComponents()) elements, or (to replicate // a scalar into all used components) float, or the value can be spv::NoResult // if there's no result to store (like constants only). void StoreResult(const InstructionResult& result, spv::Id value); + // For Shader Model 3 multiplication (+-0 or denormal * anything = +0), + // replaces the value with +0 if the minimum of the two operands is 0. This + // must be called with absolute values of operands - use GetAbsoluteOperand! + spv::Id ZeroIfAnyOperandIsZero(spv::Id value, spv::Id operand_0_abs, + spv::Id operand_1_abs); // Return type is a xe::bit_count(result.GetUsedResultComponents())-component // float vector or a single float, depending on whether it's a reduction // instruction (check getTypeId of the result), or returns spv::NoResult if // nothing to store. spv::Id ProcessVectorAluOperation(const ParsedAluInstruction& instr, bool& predicate_written); + // Returns a float value to write to the previous scalar register and to the + // destination. If the return value is ps itself (in the retain_prev case), + // returns spv::NoResult (handled as a special case, so if it's retain_prev, + // but don't need to write to anywhere, no OpLoad(ps) will be done). + spv::Id ProcessScalarAluOperation(const ParsedAluInstruction& instr, + bool& predicate_written); Features features_; @@ -249,6 +269,8 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id var_main_address_relative_; // int. spv::Id var_main_address_absolute_; + // float. + spv::Id var_main_previous_scalar_; // float4[register_count()]. spv::Id var_main_registers_; // VS only - float3 (special exports). diff --git a/src/xenia/gpu/spirv_shader_translator_alu.cc b/src/xenia/gpu/spirv_shader_translator_alu.cc index 6f4d4397e..0aaf46473 100644 --- a/src/xenia/gpu/spirv_shader_translator_alu.cc +++ b/src/xenia/gpu/spirv_shader_translator_alu.cc @@ -9,6 +9,7 @@ #include "xenia/gpu/spirv_shader_translator.h" +#include #include #include "third_party/glslang/SPIRV/GLSL.std.450.h" @@ -18,6 +19,28 @@ namespace xe { namespace gpu { +spv::Id SpirvShaderTranslator::ZeroIfAnyOperandIsZero(spv::Id value, + spv::Id operand_0_abs, + spv::Id operand_1_abs) { + EnsureBuildPointAvailable(); + int num_components = builder_->getNumComponents(value); + assert_true(builder_->getNumComponents(operand_0_abs) == num_components); + assert_true(builder_->getNumComponents(operand_1_abs) == num_components); + id_vector_temp_util_.clear(); + id_vector_temp_util_.reserve(2); + id_vector_temp_util_.push_back(operand_0_abs); + id_vector_temp_util_.push_back(operand_1_abs); + return builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp( + spv::OpFOrdEqual, type_bool_vectors_[num_components - 1], + builder_->createBuiltinCall(type_float_vectors_[num_components - 1], + ext_inst_glsl_std_450_, GLSLstd450NMin, + id_vector_temp_util_), + const_float_vectors_0_[num_components - 1]), + const_float_vectors_0_[num_components - 1], value); +} + void SpirvShaderTranslator::ProcessAluInstruction( const ParsedAluInstruction& instr) { if (instr.IsNop()) { @@ -42,11 +65,25 @@ void SpirvShaderTranslator::ProcessAluInstruction( bool predicate_written_vector = false; spv::Id vector_result = ProcessVectorAluOperation(instr, predicate_written_vector); - // TODO(Triang3l): Process the ALU scalar operation. + bool predicate_written_scalar = false; + spv::Id scalar_result = + ProcessScalarAluOperation(instr, predicate_written_scalar); + + if (scalar_result != spv::NoResult) { + builder_->createStore(scalar_result, var_main_previous_scalar_); + } else { + // Special retain_prev case - load ps only if needed and don't store the + // same value back to ps. + if (instr.scalar_result.GetUsedWriteMask()) { + scalar_result = + builder_->createLoad(var_main_previous_scalar_, spv::NoPrecision); + } + } StoreResult(instr.vector_and_constant_result, vector_result); + StoreResult(instr.scalar_result, scalar_result); - if (predicate_written_vector) { + if (predicate_written_vector || predicate_written_scalar) { cf_exec_predicate_written_ = true; CloseInstructionPredication(); } @@ -186,15 +223,8 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( // Check if the different components in any of the operands are zero, // even if the other is NaN - if min(|a|, |b|) is 0. for (uint32_t i = 0; i < 2; ++i) { - if (instr.vector_operands[i].is_absolute_value && - !instr.vector_operands[i].is_negated) { - continue; - } - id_vector_temp_.clear(); - id_vector_temp_.push_back(different_operands[i]); - different_operands[i] = builder_->createBuiltinCall( - different_type, ext_inst_glsl_std_450_, GLSLstd450FAbs, - id_vector_temp_); + different_operands[i] = GetAbsoluteOperand(different_operands[i], + instr.vector_operands[i]); } id_vector_temp_.clear(); id_vector_temp_.reserve(2); @@ -465,32 +495,12 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( builder_->addDecoration(product, spv::DecorationNoContraction); if (different & (1 << i)) { // Shader Model 3: +0 or denormal * anything = +-0. - // Check if the different components in any of the operands are zero, - // even if the other is NaN - if min(|a|, |b|) is 0, if yes, replace - // the result with zero. - for (uint32_t j = 0; j < 2; ++j) { - if (instr.vector_operands[j].is_absolute_value && - !instr.vector_operands[j].is_negated) { - continue; - } - id_vector_temp_.clear(); - id_vector_temp_.push_back(operand_components[j]); - operand_components[j] = - builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, - GLSLstd450FAbs, id_vector_temp_); - } - id_vector_temp_.clear(); - id_vector_temp_.reserve(2); - id_vector_temp_.push_back(operand_components[0]); - id_vector_temp_.push_back(operand_components[1]); - product = builder_->createTriOp( - spv::OpSelect, type_float_, - builder_->createBinOp(spv::OpFOrdEqual, type_bool_, - builder_->createBuiltinCall( - type_float_, ext_inst_glsl_std_450_, - GLSLstd450NMin, id_vector_temp_), - const_float_0_), - const_float_0_, product); + product = ZeroIfAnyOperandIsZero( + product, + GetAbsoluteOperand(operand_components[0], + instr.vector_operands[0]), + GetAbsoluteOperand(operand_components[1], + instr.vector_operands[1])); } if (!i) { result = product; @@ -888,29 +898,11 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( if (!(instr.vector_operands[0].GetIdenticalComponents( instr.vector_operands[1]) & 0b0010)) { - for (uint32_t i = 0; i < 2; ++i) { - if (instr.vector_operands[i].is_absolute_value && - !instr.vector_operands[i].is_negated) { - continue; - } - id_vector_temp_.clear(); - id_vector_temp_.push_back(operands_y[i]); - operands_y[i] = - builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, - GLSLstd450FAbs, id_vector_temp_); - } - id_vector_temp_.clear(); - id_vector_temp_.reserve(2); - id_vector_temp_.push_back(operands_y[0]); - id_vector_temp_.push_back(operands_y[1]); - result_y = builder_->createTriOp( - spv::OpSelect, type_float_, - builder_->createBinOp(spv::OpFOrdEqual, type_bool_, - builder_->createBuiltinCall( - type_float_, ext_inst_glsl_std_450_, - GLSLstd450NMin, id_vector_temp_), - const_float_0_), - const_float_0_, result_y); + // Shader Model 3: +0 or denormal * anything = +-0. + result_y = ZeroIfAnyOperandIsZero( + result_y, + GetAbsoluteOperand(operands_y[0], instr.vector_operands[0]), + GetAbsoluteOperand(operands_y[1], instr.vector_operands[1])); } } id_vector_temp_.clear(); @@ -953,5 +945,232 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( return spv::NoResult; } +spv::Id SpirvShaderTranslator::ProcessScalarAluOperation( + const ParsedAluInstruction& instr, bool& predicate_written) { + predicate_written = false; + + spv::Id operand_storage[2] = {}; + for (uint32_t i = 0; i < instr.scalar_operand_count; ++i) { + operand_storage[i] = LoadOperandStorage(instr.scalar_operands[i]); + } + + // In case the paired vector instruction (if processed first) terminates the + // block (like via OpKill). + EnsureBuildPointAvailable(); + + // Lookup table for variants of instructions with similar structure. + static const unsigned int kOps[] = { + static_cast(spv::OpFAdd), // kAdds + static_cast(spv::OpFAdd), // kAddsPrev + static_cast(spv::OpNop), // kMuls + static_cast(spv::OpNop), // kMulsPrev + static_cast(spv::OpNop), // kMulsPrev2 + static_cast(spv::OpNop), // kMaxs + static_cast(spv::OpNop), // kMins + static_cast(spv::OpNop), // kSeqs + static_cast(spv::OpNop), // kSgts + static_cast(spv::OpNop), // kSges + static_cast(spv::OpNop), // kSnes + static_cast(spv::OpNop), // kFrcs + static_cast(spv::OpNop), // kTruncs + static_cast(spv::OpNop), // kFloors + static_cast(spv::OpNop), // kExp + static_cast(spv::OpNop), // kLogc + static_cast(spv::OpNop), // kLog + static_cast(spv::OpNop), // kRcpc + static_cast(spv::OpNop), // kRcpf + static_cast(spv::OpNop), // kRcp + static_cast(spv::OpNop), // kRsqc + static_cast(spv::OpNop), // kRsqf + static_cast(spv::OpNop), // kRsq + static_cast(spv::OpNop), // kMaxAs + static_cast(spv::OpNop), // kMaxAsf + static_cast(spv::OpFSub), // kSubs + static_cast(spv::OpFSub), // kSubsPrev + static_cast(spv::OpNop), // kSetpEq + static_cast(spv::OpNop), // kSetpNe + static_cast(spv::OpNop), // kSetpGt + static_cast(spv::OpNop), // kSetpGe + static_cast(spv::OpNop), // kSetpInv + static_cast(spv::OpNop), // kSetpPop + static_cast(spv::OpNop), // kSetpClr + static_cast(spv::OpNop), // kSetpRstr + static_cast(spv::OpNop), // kKillsEq + static_cast(spv::OpNop), // kKillsGt + static_cast(spv::OpNop), // kKillsGe + static_cast(spv::OpNop), // kKillsNe + static_cast(spv::OpNop), // kKillsOne + static_cast(spv::OpNop), // kSqrt + static_cast(spv::OpNop), // Invalid + static_cast(spv::OpNop), // kMulsc0 + static_cast(spv::OpNop), // kMulsc1 + static_cast(spv::OpNop), // kAddsc0 + static_cast(spv::OpNop), // kAddsc1 + static_cast(spv::OpNop), // kSubsc0 + static_cast(spv::OpNop), // kSubsc1 + static_cast(spv::OpNop), // kSin + static_cast(spv::OpNop), // kCos + static_cast(spv::OpNop), // kRetainPrev + }; + + switch (instr.scalar_opcode) { + case ucode::AluScalarOpcode::kAdds: + case ucode::AluScalarOpcode::kSubs: { + spv::Id a, b; + GetOperandScalarXY(operand_storage[0], instr.scalar_operands[0], a, b); + spv::Id result = builder_->createBinOp( + spv::Op(kOps[size_t(instr.scalar_opcode)]), type_float_, a, b); + builder_->addDecoration(result, spv::DecorationNoContraction); + return result; + } + case ucode::AluScalarOpcode::kAddsPrev: + case ucode::AluScalarOpcode::kSubsPrev: { + spv::Id result = builder_->createBinOp( + spv::Op(kOps[size_t(instr.scalar_opcode)]), type_float_, + GetOperandComponents(operand_storage[0], instr.scalar_operands[0], + 0b0001), + builder_->createLoad(var_main_previous_scalar_, spv::NoPrecision)); + builder_->addDecoration(result, spv::DecorationNoContraction); + return result; + } + case ucode::AluScalarOpcode::kMuls: { + spv::Id a, b; + GetOperandScalarXY(operand_storage[0], instr.scalar_operands[0], a, b); + spv::Id result = builder_->createBinOp(spv::OpFMul, type_float_, a, b); + builder_->addDecoration(result, spv::DecorationNoContraction); + if (a != b) { + // Shader Model 3: +0 or denormal * anything = +-0. + result = ZeroIfAnyOperandIsZero( + result, GetAbsoluteOperand(a, instr.vector_operands[0]), + GetAbsoluteOperand(b, instr.vector_operands[0])); + } + return result; + } + case ucode::AluScalarOpcode::kMulsPrev: { + spv::Id a = GetOperandComponents(operand_storage[0], + instr.scalar_operands[0], 0b0001); + spv::Id ps = + builder_->createLoad(var_main_previous_scalar_, spv::NoPrecision); + spv::Id result = builder_->createBinOp(spv::OpFMul, type_float_, a, ps); + builder_->addDecoration(result, spv::DecorationNoContraction); + // Shader Model 3: +0 or denormal * anything = +-0. + id_vector_temp_.clear(); + id_vector_temp_.push_back(ps); + return ZeroIfAnyOperandIsZero( + result, GetAbsoluteOperand(a, instr.scalar_operands[0]), + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450FAbs, id_vector_temp_)); + } + case ucode::AluScalarOpcode::kMulsPrev2: { + // Check if need to select the src0.a * ps case. + // Selection merge must be the penultimate instruction in the block, check + // the condition before it. + spv::Id ps = + builder_->createLoad(var_main_previous_scalar_, spv::NoPrecision); + // ps != -FLT_MAX. + spv::Id const_float_max_neg = builder_->makeFloatConstant(-FLT_MAX); + spv::Id condition = builder_->createBinOp( + spv::OpFUnordNotEqual, type_bool_, ps, const_float_max_neg); + // isfinite(ps), or |ps| <= FLT_MAX, or -|ps| >= -FLT_MAX, since -FLT_MAX + // is already loaded to an SGPR, this is also false if it's NaN. + id_vector_temp_.clear(); + id_vector_temp_.push_back(ps); + spv::Id ps_abs = builder_->createBuiltinCall( + type_float_, ext_inst_glsl_std_450_, GLSLstd450FAbs, id_vector_temp_); + spv::Id ps_abs_neg = + builder_->createUnaryOp(spv::OpFNegate, type_float_, ps_abs); + builder_->addDecoration(ps_abs_neg, spv::DecorationNoContraction); + condition = builder_->createBinOp( + spv::OpLogicalAnd, type_bool_, condition, + builder_->createBinOp(spv::OpFOrdGreaterThanEqual, type_bool_, + ps_abs_neg, const_float_max_neg)); + // isfinite(src0.b), or -|src0.b| >= -FLT_MAX for the same reason. + spv::Id b = GetOperandComponents(operand_storage[0], + instr.scalar_operands[0], 0b0010); + spv::Id b_abs_neg = b; + if (!instr.scalar_operands[0].is_absolute_value) { + id_vector_temp_.clear(); + id_vector_temp_.push_back(b_abs_neg); + b_abs_neg = + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450FAbs, id_vector_temp_); + } + if (!instr.scalar_operands[0].is_absolute_value || + !instr.scalar_operands[0].is_negated) { + b_abs_neg = + builder_->createUnaryOp(spv::OpFNegate, type_float_, b_abs_neg); + builder_->addDecoration(b_abs_neg, spv::DecorationNoContraction); + } + condition = builder_->createBinOp( + spv::OpLogicalAnd, type_bool_, condition, + builder_->createBinOp(spv::OpFOrdGreaterThanEqual, type_bool_, + b_abs_neg, const_float_max_neg)); + // src0.b > 0 (need !(src0.b <= 0), but src0.b has already been checked + // for NaN). + condition = builder_->createBinOp( + spv::OpLogicalAnd, type_bool_, condition, + builder_->createBinOp(spv::OpFOrdGreaterThan, type_bool_, b, + const_float_0_)); + spv::Block& multiply_block = builder_->makeNewBlock(); + spv::Block& merge_block = builder_->makeNewBlock(); + { + std::unique_ptr selection_merge_op = + std::make_unique(spv::OpSelectionMerge); + selection_merge_op->addIdOperand(merge_block.getId()); + selection_merge_op->addImmediateOperand(spv::SelectionControlMaskNone); + builder_->getBuildPoint()->addInstruction( + std::move(selection_merge_op)); + } + { + std::unique_ptr branch_conditional_op = + std::make_unique(spv::OpBranchConditional); + branch_conditional_op->addIdOperand(condition); + branch_conditional_op->addIdOperand(multiply_block.getId()); + branch_conditional_op->addIdOperand(merge_block.getId()); + // More likely to multiply that to return -FLT_MAX. + branch_conditional_op->addImmediateOperand(2); + branch_conditional_op->addImmediateOperand(1); + builder_->getBuildPoint()->addInstruction( + std::move(branch_conditional_op)); + } + spv::Block& head_block = *builder_->getBuildPoint(); + multiply_block.addPredecessor(&head_block); + merge_block.addPredecessor(&head_block); + // Multiplication case. + builder_->setBuildPoint(&multiply_block); + spv::Id a = instr.scalar_operands[0].GetComponent(0) != + instr.scalar_operands[0].GetComponent(1) + ? GetOperandComponents(operand_storage[0], + instr.scalar_operands[0], 0b0001) + : b; + spv::Id product = builder_->createBinOp(spv::OpFMul, type_float_, a, ps); + builder_->addDecoration(product, spv::DecorationNoContraction); + // Shader Model 3: +0 or denormal * anything = +-0. + product = ZeroIfAnyOperandIsZero( + product, GetAbsoluteOperand(a, instr.scalar_operands[0]), ps_abs); + builder_->createBranch(&merge_block); + // Merge case - choose between the product and -FLT_MAX. + builder_->setBuildPoint(&merge_block); + { + std::unique_ptr phi_op = + std::make_unique(builder_->getUniqueId(), + type_float_, spv::OpPhi); + phi_op->addIdOperand(product); + phi_op->addIdOperand(multiply_block.getId()); + phi_op->addIdOperand(const_float_max_neg); + phi_op->addIdOperand(head_block.getId()); + spv::Id phi_result = phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(phi_op)); + return phi_result; + } + } + // TODO(Triang3l): Implement the rest of instructions. + } + + /* assert_unhandled_case(instr.vector_opcode); + EmitTranslationError("Unknown ALU scalar operation"); */ + return spv::NoResult; +} + } // namespace gpu } // namespace xe From d466ebbbe19bd0185a304ec155f0859bb55c78ab Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 1 Nov 2020 21:12:09 +0300 Subject: [PATCH 044/123] [SPIR-V] Scalar ALU comparison and simple unary --- src/xenia/gpu/spirv_shader_translator_alu.cc | 151 ++++++++++++------- 1 file changed, 99 insertions(+), 52 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator_alu.cc b/src/xenia/gpu/spirv_shader_translator_alu.cc index 0aaf46473..819ca15ed 100644 --- a/src/xenia/gpu/spirv_shader_translator_alu.cc +++ b/src/xenia/gpu/spirv_shader_translator_alu.cc @@ -960,57 +960,57 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation( // Lookup table for variants of instructions with similar structure. static const unsigned int kOps[] = { - static_cast(spv::OpFAdd), // kAdds - static_cast(spv::OpFAdd), // kAddsPrev - static_cast(spv::OpNop), // kMuls - static_cast(spv::OpNop), // kMulsPrev - static_cast(spv::OpNop), // kMulsPrev2 - static_cast(spv::OpNop), // kMaxs - static_cast(spv::OpNop), // kMins - static_cast(spv::OpNop), // kSeqs - static_cast(spv::OpNop), // kSgts - static_cast(spv::OpNop), // kSges - static_cast(spv::OpNop), // kSnes - static_cast(spv::OpNop), // kFrcs - static_cast(spv::OpNop), // kTruncs - static_cast(spv::OpNop), // kFloors - static_cast(spv::OpNop), // kExp - static_cast(spv::OpNop), // kLogc - static_cast(spv::OpNop), // kLog - static_cast(spv::OpNop), // kRcpc - static_cast(spv::OpNop), // kRcpf - static_cast(spv::OpNop), // kRcp - static_cast(spv::OpNop), // kRsqc - static_cast(spv::OpNop), // kRsqf - static_cast(spv::OpNop), // kRsq - static_cast(spv::OpNop), // kMaxAs - static_cast(spv::OpNop), // kMaxAsf - static_cast(spv::OpFSub), // kSubs - static_cast(spv::OpFSub), // kSubsPrev - static_cast(spv::OpNop), // kSetpEq - static_cast(spv::OpNop), // kSetpNe - static_cast(spv::OpNop), // kSetpGt - static_cast(spv::OpNop), // kSetpGe - static_cast(spv::OpNop), // kSetpInv - static_cast(spv::OpNop), // kSetpPop - static_cast(spv::OpNop), // kSetpClr - static_cast(spv::OpNop), // kSetpRstr - static_cast(spv::OpNop), // kKillsEq - static_cast(spv::OpNop), // kKillsGt - static_cast(spv::OpNop), // kKillsGe - static_cast(spv::OpNop), // kKillsNe - static_cast(spv::OpNop), // kKillsOne - static_cast(spv::OpNop), // kSqrt - static_cast(spv::OpNop), // Invalid - static_cast(spv::OpNop), // kMulsc0 - static_cast(spv::OpNop), // kMulsc1 - static_cast(spv::OpNop), // kAddsc0 - static_cast(spv::OpNop), // kAddsc1 - static_cast(spv::OpNop), // kSubsc0 - static_cast(spv::OpNop), // kSubsc1 - static_cast(spv::OpNop), // kSin - static_cast(spv::OpNop), // kCos - static_cast(spv::OpNop), // kRetainPrev + static_cast(spv::OpFAdd), // kAdds + static_cast(spv::OpFAdd), // kAddsPrev + static_cast(spv::OpNop), // kMuls + static_cast(spv::OpNop), // kMulsPrev + static_cast(spv::OpNop), // kMulsPrev2 + static_cast(spv::OpFOrdGreaterThanEqual), // kMaxs + static_cast(spv::OpFOrdLessThan), // kMins + static_cast(spv::OpFOrdEqual), // kSeqs + static_cast(spv::OpFOrdGreaterThan), // kSgts + static_cast(spv::OpFOrdGreaterThanEqual), // kSges + static_cast(spv::OpFUnordNotEqual), // kSnes + static_cast(GLSLstd450Fract), // kFrcs + static_cast(GLSLstd450Trunc), // kTruncs + static_cast(GLSLstd450Floor), // kFloors + static_cast(GLSLstd450Exp2), // kExp + static_cast(spv::OpNop), // kLogc + static_cast(GLSLstd450Log2), // kLog + static_cast(spv::OpNop), // kRcpc + static_cast(spv::OpNop), // kRcpf + static_cast(spv::OpNop), // kRcp + static_cast(spv::OpNop), // kRsqc + static_cast(spv::OpNop), // kRsqf + static_cast(GLSLstd450InverseSqrt), // kRsq + static_cast(spv::OpNop), // kMaxAs + static_cast(spv::OpNop), // kMaxAsf + static_cast(spv::OpFSub), // kSubs + static_cast(spv::OpFSub), // kSubsPrev + static_cast(spv::OpNop), // kSetpEq + static_cast(spv::OpNop), // kSetpNe + static_cast(spv::OpNop), // kSetpGt + static_cast(spv::OpNop), // kSetpGe + static_cast(spv::OpNop), // kSetpInv + static_cast(spv::OpNop), // kSetpPop + static_cast(spv::OpNop), // kSetpClr + static_cast(spv::OpNop), // kSetpRstr + static_cast(spv::OpNop), // kKillsEq + static_cast(spv::OpNop), // kKillsGt + static_cast(spv::OpNop), // kKillsGe + static_cast(spv::OpNop), // kKillsNe + static_cast(spv::OpNop), // kKillsOne + static_cast(GLSLstd450Sqrt), // kSqrt + static_cast(spv::OpNop), // Invalid + static_cast(spv::OpNop), // kMulsc0 + static_cast(spv::OpNop), // kMulsc1 + static_cast(spv::OpNop), // kAddsc0 + static_cast(spv::OpNop), // kAddsc1 + static_cast(spv::OpNop), // kSubsc0 + static_cast(spv::OpNop), // kSubsc1 + static_cast(GLSLstd450Sin), // kSin + static_cast(GLSLstd450Cos), // kCos + static_cast(spv::OpNop), // kRetainPrev }; switch (instr.scalar_opcode) { @@ -1164,10 +1164,57 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation( return phi_result; } } + + case ucode::AluScalarOpcode::kMaxs: + case ucode::AluScalarOpcode::kMins: { + spv::Id a, b; + GetOperandScalarXY(operand_storage[0], instr.scalar_operands[0], a, b); + if (a == b) { + // max is commonly used as mov. + return a; + } + // Shader Model 3 NaN behavior (a op b ? a : b, not SPIR-V FMax/FMin which + // are undefined for NaN or NMax/NMin which return the non-NaN operand). + return builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp(spv::Op(kOps[size_t(instr.scalar_opcode)]), + type_bool_, a, b), + a, b); + } + + case ucode::AluScalarOpcode::kSeqs: + case ucode::AluScalarOpcode::kSgts: + case ucode::AluScalarOpcode::kSges: + case ucode::AluScalarOpcode::kSnes: + return builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp( + spv::Op(kOps[size_t(instr.scalar_opcode)]), type_bool_, + GetOperandComponents(operand_storage[0], instr.scalar_operands[0], + 0b0001), + const_float_0_), + const_float_1_, const_float_0_); + + case ucode::AluScalarOpcode::kFrcs: + case ucode::AluScalarOpcode::kTruncs: + case ucode::AluScalarOpcode::kFloors: + case ucode::AluScalarOpcode::kExp: + case ucode::AluScalarOpcode::kLog: + case ucode::AluScalarOpcode::kRsq: + case ucode::AluScalarOpcode::kSqrt: + case ucode::AluScalarOpcode::kSin: + case ucode::AluScalarOpcode::kCos: + id_vector_temp_.clear(); + id_vector_temp_.push_back(GetOperandComponents( + operand_storage[0], instr.scalar_operands[0], 0b0001)); + return builder_->createBuiltinCall( + type_float_, ext_inst_glsl_std_450_, + GLSLstd450(kOps[size_t(instr.scalar_opcode)]), id_vector_temp_); + // TODO(Triang3l): Implement the rest of instructions. } - /* assert_unhandled_case(instr.vector_opcode); + /* assert_unhandled_case(instr.scalar_opcode); EmitTranslationError("Unknown ALU scalar operation"); */ return spv::NoResult; } From 0f6aff6f74cddc8e5b53ebabe388fb87aae4ebb7 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 1 Nov 2020 21:48:14 +0300 Subject: [PATCH 045/123] [SPIR-V] Unary math functions --- src/xenia/gpu/spirv_shader_translator_alu.cc | 100 +++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/src/xenia/gpu/spirv_shader_translator_alu.cc b/src/xenia/gpu/spirv_shader_translator_alu.cc index 819ca15ed..ed054f70a 100644 --- a/src/xenia/gpu/spirv_shader_translator_alu.cc +++ b/src/xenia/gpu/spirv_shader_translator_alu.cc @@ -10,6 +10,8 @@ #include "xenia/gpu/spirv_shader_translator.h" #include +#include +#include #include #include "third_party/glslang/SPIRV/GLSL.std.450.h" @@ -1210,6 +1212,104 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation( return builder_->createBuiltinCall( type_float_, ext_inst_glsl_std_450_, GLSLstd450(kOps[size_t(instr.scalar_opcode)]), id_vector_temp_); + case ucode::AluScalarOpcode::kLogc: { + id_vector_temp_.clear(); + id_vector_temp_.push_back(GetOperandComponents( + operand_storage[0], instr.scalar_operands[0], 0b0001)); + spv::Id result = builder_->createBuiltinCall( + type_float_, ext_inst_glsl_std_450_, GLSLstd450Log2, id_vector_temp_); + return builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp(spv::OpFOrdEqual, type_bool_, result, + builder_->makeFloatConstant(-INFINITY)), + builder_->makeFloatConstant(-FLT_MAX), result); + } + case ucode::AluScalarOpcode::kRcpc: { + spv::Id result = builder_->createBinOp( + spv::OpFDiv, type_float_, const_float_1_, + GetOperandComponents(operand_storage[0], instr.scalar_operands[0], + 0b0001)); + builder_->addDecoration(result, spv::DecorationNoContraction); + result = builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp(spv::OpFOrdEqual, type_bool_, result, + builder_->makeFloatConstant(-INFINITY)), + builder_->makeFloatConstant(-FLT_MAX), result); + return builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp(spv::OpFOrdEqual, type_bool_, result, + builder_->makeFloatConstant(INFINITY)), + builder_->makeFloatConstant(FLT_MAX), result); + } + case ucode::AluScalarOpcode::kRcpf: { + spv::Id result = builder_->createBinOp( + spv::OpFDiv, type_float_, const_float_1_, + GetOperandComponents(operand_storage[0], instr.scalar_operands[0], + 0b0001)); + builder_->addDecoration(result, spv::DecorationNoContraction); + result = builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp(spv::OpFOrdEqual, type_bool_, result, + builder_->makeFloatConstant(INFINITY)), + const_float_0_, result); + // Can't create -0.0f with makeFloatConstant due to float comparison + // internally, cast to bit pattern. + result = builder_->createTriOp( + spv::OpSelect, type_uint_, + builder_->createBinOp(spv::OpFOrdEqual, type_bool_, result, + builder_->makeFloatConstant(-INFINITY)), + builder_->makeUintConstant(uint32_t(INT32_MIN)), + builder_->createUnaryOp(spv::OpBitcast, type_uint_, result)); + return builder_->createUnaryOp(spv::OpBitcast, type_float_, result); + } + case ucode::AluScalarOpcode::kRcp: { + spv::Id result = builder_->createBinOp( + spv::OpFDiv, type_float_, const_float_1_, + GetOperandComponents(operand_storage[0], instr.scalar_operands[0], + 0b0001)); + builder_->addDecoration(result, spv::DecorationNoContraction); + return result; + } + case ucode::AluScalarOpcode::kRsqc: { + id_vector_temp_.clear(); + id_vector_temp_.push_back(GetOperandComponents( + operand_storage[0], instr.scalar_operands[0], 0b0001)); + spv::Id result = + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450InverseSqrt, id_vector_temp_); + result = builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp(spv::OpFOrdEqual, type_bool_, result, + builder_->makeFloatConstant(-INFINITY)), + builder_->makeFloatConstant(-FLT_MAX), result); + return builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp(spv::OpFOrdEqual, type_bool_, result, + builder_->makeFloatConstant(INFINITY)), + builder_->makeFloatConstant(FLT_MAX), result); + } + case ucode::AluScalarOpcode::kRsqf: { + id_vector_temp_.clear(); + id_vector_temp_.push_back(GetOperandComponents( + operand_storage[0], instr.scalar_operands[0], 0b0001)); + spv::Id result = + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450InverseSqrt, id_vector_temp_); + result = builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp(spv::OpFOrdEqual, type_bool_, result, + builder_->makeFloatConstant(INFINITY)), + const_float_0_, result); + // Can't create -0.0f with makeFloatConstant due to float comparison + // internally, cast to bit pattern. + result = builder_->createTriOp( + spv::OpSelect, type_uint_, + builder_->createBinOp(spv::OpFOrdEqual, type_bool_, result, + builder_->makeFloatConstant(-INFINITY)), + builder_->makeUintConstant(uint32_t(INT32_MIN)), + builder_->createUnaryOp(spv::OpBitcast, type_uint_, result)); + return builder_->createUnaryOp(spv::OpBitcast, type_float_, result); + } // TODO(Triang3l): Implement the rest of instructions. } From 7512560416342b1f6ff7c34e6afae276b4220b09 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 1 Nov 2020 22:17:12 +0300 Subject: [PATCH 046/123] [SPIR-V] SALU address and predicate --- src/xenia/gpu/spirv_shader_translator_alu.cc | 107 +++++++++++++++++-- 1 file changed, 98 insertions(+), 9 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator_alu.cc b/src/xenia/gpu/spirv_shader_translator_alu.cc index ed054f70a..87b132853 100644 --- a/src/xenia/gpu/spirv_shader_translator_alu.cc +++ b/src/xenia/gpu/spirv_shader_translator_alu.cc @@ -858,10 +858,10 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( builder_->createConditionalBranch(condition, &kill_block, &merge_block); builder_->setBuildPoint(&kill_block); // TODO(Triang3l): Demote to helper invocation to keep derivatives if - // needed (and return const_float4_1_ if killed in this case). + // needed (and return 1 if killed in this case). builder_->createNoResultOp(spv::OpKill); builder_->setBuildPoint(&merge_block); - return const_float4_0_; + return const_float_0_; } case ucode::AluVectorOpcode::kDst: { @@ -985,14 +985,14 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation( static_cast(spv::OpNop), // kRsqc static_cast(spv::OpNop), // kRsqf static_cast(GLSLstd450InverseSqrt), // kRsq - static_cast(spv::OpNop), // kMaxAs - static_cast(spv::OpNop), // kMaxAsf + static_cast(spv::OpFOrdGreaterThanEqual), // kMaxAs + static_cast(spv::OpFOrdGreaterThanEqual), // kMaxAsf static_cast(spv::OpFSub), // kSubs static_cast(spv::OpFSub), // kSubsPrev - static_cast(spv::OpNop), // kSetpEq - static_cast(spv::OpNop), // kSetpNe - static_cast(spv::OpNop), // kSetpGt - static_cast(spv::OpNop), // kSetpGe + static_cast(spv::OpFOrdEqual), // kSetpEq + static_cast(spv::OpFUnordNotEqual), // kSetpNe + static_cast(spv::OpFOrdGreaterThan), // kSetpGt + static_cast(spv::OpFOrdGreaterThanEqual), // kSetpGe static_cast(spv::OpNop), // kSetpInv static_cast(spv::OpNop), // kSetpPop static_cast(spv::OpNop), // kSetpClr @@ -1168,9 +1168,40 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation( } case ucode::AluScalarOpcode::kMaxs: - case ucode::AluScalarOpcode::kMins: { + case ucode::AluScalarOpcode::kMins: + case ucode::AluScalarOpcode::kMaxAs: + case ucode::AluScalarOpcode::kMaxAsf: { spv::Id a, b; GetOperandScalarXY(operand_storage[0], instr.scalar_operands[0], a, b); + if (instr.scalar_opcode == ucode::AluScalarOpcode::kMaxAs || + instr.scalar_opcode == ucode::AluScalarOpcode::kMaxAsf) { + // maxas: a0 = (int)clamp(floor(src0.a + 0.5), -256.0, 255.0) + // maxasf: a0 = (int)clamp(floor(src0.a), -256.0, 255.0) + spv::Id maxa_address; + if (instr.scalar_opcode == ucode::AluScalarOpcode::kMaxAs) { + maxa_address = builder_->createBinOp( + spv::OpFAdd, type_float_, a, builder_->makeFloatConstant(0.5f)); + builder_->addDecoration(maxa_address, spv::DecorationNoContraction); + } else { + maxa_address = a; + } + id_vector_temp_.clear(); + id_vector_temp_.push_back(maxa_address); + maxa_address = + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450Floor, id_vector_temp_); + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + id_vector_temp_.push_back(maxa_address); + id_vector_temp_.push_back(builder_->makeFloatConstant(-256.0f)); + id_vector_temp_.push_back(builder_->makeFloatConstant(255.0f)); + builder_->createStore( + builder_->createUnaryOp( + spv::OpConvertFToS, type_int_, + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450NClamp, id_vector_temp_)), + var_main_address_absolute_); + } if (a == b) { // max is commonly used as mov. return a; @@ -1311,6 +1342,64 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation( return builder_->createUnaryOp(spv::OpBitcast, type_float_, result); } + case ucode::AluScalarOpcode::kSetpEq: + case ucode::AluScalarOpcode::kSetpNe: + case ucode::AluScalarOpcode::kSetpGt: + case ucode::AluScalarOpcode::kSetpGe: { + spv::Id predicate = builder_->createBinOp( + spv::Op(kOps[size_t(instr.scalar_opcode)]), type_bool_, + GetOperandComponents(operand_storage[0], instr.scalar_operands[0], + 0b0001), + const_float_0_); + builder_->createStore(predicate, var_main_predicate_); + predicate_written = true; + return builder_->createTriOp(spv::OpSelect, type_float_, predicate, + const_float_0_, const_float_1_); + } + case ucode::AluScalarOpcode::kSetpInv: { + spv::Id a = GetOperandComponents(operand_storage[0], + instr.scalar_operands[0], 0b0001); + spv::Id predicate = builder_->createBinOp(spv::OpFOrdEqual, type_bool_, a, + const_float_1_); + builder_->createStore(predicate, var_main_predicate_); + predicate_written = true; + return builder_->createTriOp( + spv::OpSelect, type_float_, predicate, const_float_0_, + builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp(spv::OpFOrdEqual, type_bool_, a, + const_float_0_), + const_float_1_, a)); + } + case ucode::AluScalarOpcode::kSetpPop: { + spv::Id a_minus_1 = builder_->createBinOp( + spv::OpFSub, type_float_, + GetOperandComponents(operand_storage[0], instr.scalar_operands[0], + 0b0001), + const_float_1_); + builder_->addDecoration(a_minus_1, spv::DecorationNoContraction); + spv::Id predicate = builder_->createBinOp( + spv::OpFOrdLessThanEqual, type_bool_, a_minus_1, const_float_0_); + builder_->createStore(predicate, var_main_predicate_); + predicate_written = true; + return builder_->createTriOp(spv::OpSelect, type_float_, predicate, + const_float_0_, a_minus_1); + } + case ucode::AluScalarOpcode::kSetpClr: + builder_->createStore(builder_->makeBoolConstant(false), + var_main_predicate_); + return builder_->makeFloatConstant(FLT_MAX); + case ucode::AluScalarOpcode::kSetpRstr: { + spv::Id a = GetOperandComponents(operand_storage[0], + instr.scalar_operands[0], 0b0001); + spv::Id predicate = builder_->createBinOp(spv::OpFOrdEqual, type_bool_, a, + const_float_0_); + builder_->createStore(predicate, var_main_predicate_); + predicate_written = true; + return builder_->createTriOp(spv::OpSelect, type_float_, predicate, + const_float_0_, a); + } + // TODO(Triang3l): Implement the rest of instructions. } From cb35aaf13b8fb443b4c346cce2cff8b79b1dc0f8 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 1 Nov 2020 22:22:34 +0300 Subject: [PATCH 047/123] [SPIR-V] Scalar kill instructions --- src/xenia/gpu/spirv_shader_translator_alu.cc | 47 +++++++++++++++++--- 1 file changed, 40 insertions(+), 7 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator_alu.cc b/src/xenia/gpu/spirv_shader_translator_alu.cc index 87b132853..83681a621 100644 --- a/src/xenia/gpu/spirv_shader_translator_alu.cc +++ b/src/xenia/gpu/spirv_shader_translator_alu.cc @@ -997,11 +997,11 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation( static_cast(spv::OpNop), // kSetpPop static_cast(spv::OpNop), // kSetpClr static_cast(spv::OpNop), // kSetpRstr - static_cast(spv::OpNop), // kKillsEq - static_cast(spv::OpNop), // kKillsGt - static_cast(spv::OpNop), // kKillsGe - static_cast(spv::OpNop), // kKillsNe - static_cast(spv::OpNop), // kKillsOne + static_cast(spv::OpFOrdEqual), // kKillsEq + static_cast(spv::OpFOrdGreaterThan), // kKillsGt + static_cast(spv::OpFOrdGreaterThanEqual), // kKillsGe + static_cast(spv::OpFUnordNotEqual), // kKillsNe + static_cast(spv::OpFOrdEqual), // kKillsOne static_cast(GLSLstd450Sqrt), // kSqrt static_cast(spv::OpNop), // Invalid static_cast(spv::OpNop), // kMulsc0 @@ -1043,8 +1043,8 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation( if (a != b) { // Shader Model 3: +0 or denormal * anything = +-0. result = ZeroIfAnyOperandIsZero( - result, GetAbsoluteOperand(a, instr.vector_operands[0]), - GetAbsoluteOperand(b, instr.vector_operands[0])); + result, GetAbsoluteOperand(a, instr.scalar_operands[0]), + GetAbsoluteOperand(b, instr.scalar_operands[0])); } return result; } @@ -1400,6 +1400,39 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation( const_float_0_, a); } + case ucode::AluScalarOpcode::kKillsEq: + case ucode::AluScalarOpcode::kKillsGt: + case ucode::AluScalarOpcode::kKillsGe: + case ucode::AluScalarOpcode::kKillsNe: + case ucode::AluScalarOpcode::kKillsOne: { + // Selection merge must be the penultimate instruction in the block, check + // the condition before it. + spv::Id condition = builder_->createBinOp( + spv::Op(kOps[size_t(instr.scalar_opcode)]), type_bool_, + GetOperandComponents(operand_storage[0], instr.scalar_operands[0], + 0b0001), + instr.scalar_opcode == ucode::AluScalarOpcode::kKillsOne + ? const_float_1_ + : const_float_0_); + spv::Block& kill_block = builder_->makeNewBlock(); + spv::Block& merge_block = builder_->makeNewBlock(); + { + std::unique_ptr selection_merge_op = + std::make_unique(spv::OpSelectionMerge); + selection_merge_op->addIdOperand(merge_block.getId()); + selection_merge_op->addImmediateOperand(spv::SelectionControlMaskNone); + builder_->getBuildPoint()->addInstruction( + std::move(selection_merge_op)); + } + builder_->createConditionalBranch(condition, &kill_block, &merge_block); + builder_->setBuildPoint(&kill_block); + // TODO(Triang3l): Demote to helper invocation to keep derivatives if + // needed (and return 1 if killed in this case). + builder_->createNoResultOp(spv::OpKill); + builder_->setBuildPoint(&merge_block); + return const_float_0_; + } + // TODO(Triang3l): Implement the rest of instructions. } From de8b0a85ac338bc375dca8842f1e9796fa8403c7 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 1 Nov 2020 22:30:05 +0300 Subject: [PATCH 048/123] [SPIR-V] Remaining scalar ALU instructions --- src/xenia/gpu/spirv_shader_translator_alu.cc | 50 +++++++++++++++++--- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator_alu.cc b/src/xenia/gpu/spirv_shader_translator_alu.cc index 83681a621..1cf5ddd1d 100644 --- a/src/xenia/gpu/spirv_shader_translator_alu.cc +++ b/src/xenia/gpu/spirv_shader_translator_alu.cc @@ -1006,10 +1006,10 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation( static_cast(spv::OpNop), // Invalid static_cast(spv::OpNop), // kMulsc0 static_cast(spv::OpNop), // kMulsc1 - static_cast(spv::OpNop), // kAddsc0 - static_cast(spv::OpNop), // kAddsc1 - static_cast(spv::OpNop), // kSubsc0 - static_cast(spv::OpNop), // kSubsc1 + static_cast(spv::OpFAdd), // kAddsc0 + static_cast(spv::OpFAdd), // kAddsc1 + static_cast(spv::OpFSub), // kSubsc0 + static_cast(spv::OpFSub), // kSubsc1 static_cast(GLSLstd450Sin), // kSin static_cast(GLSLstd450Cos), // kCos static_cast(spv::OpNop), // kRetainPrev @@ -1433,11 +1433,47 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation( return const_float_0_; } - // TODO(Triang3l): Implement the rest of instructions. + case ucode::AluScalarOpcode::kMulsc0: + case ucode::AluScalarOpcode::kMulsc1: { + spv::Id operand_0 = GetOperandComponents( + operand_storage[0], instr.scalar_operands[0], 0b0001); + spv::Id operand_1 = GetOperandComponents( + operand_storage[1], instr.scalar_operands[1], 0b0001); + spv::Id result = + builder_->createBinOp(spv::OpFMul, type_float_, operand_0, operand_1); + builder_->addDecoration(result, spv::DecorationNoContraction); + if (!(instr.scalar_operands[0].GetIdenticalComponents( + instr.scalar_operands[1]) & + 0b0001)) { + // Shader Model 3: +0 or denormal * anything = +-0. + result = ZeroIfAnyOperandIsZero( + result, GetAbsoluteOperand(operand_0, instr.scalar_operands[0]), + GetAbsoluteOperand(operand_1, instr.scalar_operands[1])); + } + return result; + } + case ucode::AluScalarOpcode::kAddsc0: + case ucode::AluScalarOpcode::kAddsc1: + case ucode::AluScalarOpcode::kSubsc0: + case ucode::AluScalarOpcode::kSubsc1: { + spv::Id result = builder_->createBinOp( + spv::Op(kOps[size_t(instr.scalar_opcode)]), type_float_, + GetOperandComponents(operand_storage[0], instr.scalar_operands[0], + 0b0001), + GetOperandComponents(operand_storage[1], instr.scalar_operands[1], + 0b0001)); + builder_->addDecoration(result, spv::DecorationNoContraction); + return result; + } + + case ucode::AluScalarOpcode::kRetainPrev: + // Special case in ProcessAluInstruction - loading ps only if writing to + // anywhere. + return spv::NoResult; } - /* assert_unhandled_case(instr.scalar_opcode); - EmitTranslationError("Unknown ALU scalar operation"); */ + assert_unhandled_case(instr.scalar_opcode); + EmitTranslationError("Unknown ALU scalar operation"); return spv::NoResult; } From 6030a4cf72a3aba13e2ff482da3e40dd893ef83e Mon Sep 17 00:00:00 2001 From: Triang3l Date: Mon, 2 Nov 2020 12:17:47 +0300 Subject: [PATCH 049/123] [SPIR-V] Add missing EnsureBuildPointAvailable in ALU --- src/xenia/gpu/spirv_shader_translator_alu.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/xenia/gpu/spirv_shader_translator_alu.cc b/src/xenia/gpu/spirv_shader_translator_alu.cc index 1cf5ddd1d..7b6f54e5a 100644 --- a/src/xenia/gpu/spirv_shader_translator_alu.cc +++ b/src/xenia/gpu/spirv_shader_translator_alu.cc @@ -71,17 +71,19 @@ void SpirvShaderTranslator::ProcessAluInstruction( bool predicate_written_scalar = false; spv::Id scalar_result = ProcessScalarAluOperation(instr, predicate_written_scalar); - if (scalar_result != spv::NoResult) { + EnsureBuildPointAvailable(); builder_->createStore(scalar_result, var_main_previous_scalar_); } else { // Special retain_prev case - load ps only if needed and don't store the // same value back to ps. if (instr.scalar_result.GetUsedWriteMask()) { + EnsureBuildPointAvailable(); scalar_result = builder_->createLoad(var_main_previous_scalar_, spv::NoPrecision); } } + StoreResult(instr.vector_and_constant_result, vector_result); StoreResult(instr.scalar_result, scalar_result); From 8665fa751765701643255964f6bdb9158b661013 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Tue, 3 Nov 2020 23:31:52 +0300 Subject: [PATCH 050/123] [SPIR-V] Vertex index writing to r0.x --- src/xenia/gpu/spirv_shader_translator.cc | 245 ++++++++++++++++--- src/xenia/gpu/spirv_shader_translator.h | 33 +++ src/xenia/gpu/spirv_shader_translator_alu.cc | 45 +--- src/xenia/ui/vulkan/vulkan_provider.cc | 6 + 4 files changed, 252 insertions(+), 77 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index edaad6344..e85dee4eb 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -10,12 +10,14 @@ #include "xenia/gpu/spirv_shader_translator.h" #include +#include #include #include #include #include "third_party/glslang/SPIRV/GLSL.std.450.h" #include "xenia/base/assert.h" +#include "xenia/base/math.h" namespace xe { namespace gpu { @@ -140,6 +142,45 @@ void SpirvShaderTranslator::StartTranslation() { const_float2_0_1_ = builder_->makeCompositeConstant(type_float2_, id_vector_temp_); + // Common uniform buffer - system constants. + struct SystemConstant { + const char* name; + size_t offset; + spv::Id type; + }; + const SystemConstant system_constants[] = { + {"vertex_index_endian", offsetof(SystemConstants, vertex_index_endian), + type_uint_}, + {"vertex_base_index", offsetof(SystemConstants, vertex_base_index), + type_int_}, + }; + id_vector_temp_.clear(); + id_vector_temp_.reserve(xe::countof(system_constants)); + for (size_t i = 0; i < xe::countof(system_constants); ++i) { + id_vector_temp_.push_back(system_constants[i].type); + } + spv::Id type_system_constants = + builder_->makeStructType(id_vector_temp_, "XeSystemConstants"); + for (size_t i = 0; i < xe::countof(system_constants); ++i) { + const SystemConstant& system_constant = system_constants[i]; + builder_->addMemberName(type_system_constants, static_cast(i), + system_constant.name); + builder_->addMemberDecoration( + type_system_constants, static_cast(i), + spv::DecorationOffset, int(system_constant.offset)); + } + builder_->addDecoration(type_system_constants, spv::DecorationBlock); + uniform_system_constants_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassUniform, type_system_constants, + "xe_uniform_system_constants"); + builder_->addDecoration(uniform_system_constants_, + spv::DecorationDescriptorSet, + kDescriptorSetSystemConstants); + builder_->addDecoration(uniform_system_constants_, spv::DecorationBinding, 0); + if (features_.spirv_version >= spv::Spv_1_4) { + main_interface_.push_back(uniform_system_constants_); + } + // Common uniform buffer - float constants. uint32_t float_constant_count = constant_register_map().float_count; if (float_constant_count) { @@ -307,15 +348,8 @@ void SpirvShaderTranslator::StartTranslation() { main_switch_header_ = builder_->getBuildPoint(); main_switch_merge_ = new spv::Block(builder_->getUniqueId(), *function_main_); - { - std::unique_ptr main_switch_selection_merge_op = - std::make_unique(spv::OpSelectionMerge); - main_switch_selection_merge_op->addIdOperand(main_switch_merge_->getId()); - main_switch_selection_merge_op->addImmediateOperand( - spv::SelectionControlDontFlattenMask); - builder_->getBuildPoint()->addInstruction( - std::move(main_switch_selection_merge_op)); - } + SpirvCreateSelectionMerge(main_switch_merge_->getId(), + spv::SelectionControlDontFlattenMask); main_switch_op_ = std::make_unique(spv::OpSwitch); main_switch_op_->addIdOperand(main_loop_pc_current); main_switch_op_->addIdOperand(main_switch_merge_->getId()); @@ -564,13 +598,7 @@ void SpirvShaderTranslator::ProcessLoopStartInstruction( spv::OpIEqual, type_bool_, loop_count_new, const_uint_0_); spv::Block& skip_block = builder_->makeNewBlock(); spv::Block& body_block = builder_->makeNewBlock(); - { - std::unique_ptr selection_merge_op = - std::make_unique(spv::OpSelectionMerge); - selection_merge_op->addIdOperand(body_block.getId()); - selection_merge_op->addImmediateOperand(spv::SelectionControlMaskNone); - head_block.addInstruction(std::move(selection_merge_op)); - } + SpirvCreateSelectionMerge(body_block.getId()); { std::unique_ptr branch_conditional_op = std::make_unique(spv::OpBranchConditional); @@ -632,13 +660,7 @@ void SpirvShaderTranslator::ProcessLoopEndInstruction( spv::Block& body_block = *builder_->getBuildPoint(); spv::Block& continue_block = builder_->makeNewBlock(); spv::Block& break_block = builder_->makeNewBlock(); - { - std::unique_ptr selection_merge_op = - std::make_unique(spv::OpSelectionMerge); - selection_merge_op->addIdOperand(break_block.getId()); - selection_merge_op->addImmediateOperand(spv::SelectionControlMaskNone); - body_block.addInstruction(std::move(selection_merge_op)); - } + SpirvCreateSelectionMerge(break_block.getId()); { std::unique_ptr branch_conditional_op = std::make_unique(spv::OpBranchConditional); @@ -841,6 +863,53 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() { var_main_point_size_edge_flag_kill_vertex_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassFunction, type_float3_, "xe_var_point_size_edge_flag_kill_vertex"); + + // Load the vertex index or the tessellation parameters. + if (register_count()) { + // TODO(Triang3l): Barycentric coordinates and patch index. + if (IsSpirvVertexShader()) { + // TODO(Triang3l): Fetch the vertex index from the shared memory when + // fullDrawIndexUint32 isn't available and the index is 32-bit and needs + // endian swap. + // TODO(Triang3l): Close line loop primitive. + // Load the unswapped index as uint for swapping. + spv::Id vertex_index = builder_->createUnaryOp( + spv::OpBitcast, type_uint_, + builder_->createLoad(input_vertex_index_, spv::NoPrecision)); + // Endian-swap the index and convert to int. + id_vector_temp_.clear(); + id_vector_temp_.push_back( + builder_->makeIntConstant(kSystemConstantIndexVertexIndexEndian)); + spv::Id vertex_index_endian = + builder_->createLoad(builder_->createAccessChain( + spv::StorageClassUniform, + uniform_system_constants_, id_vector_temp_), + spv::NoPrecision); + vertex_index = builder_->createUnaryOp( + spv::OpBitcast, type_int_, + EndianSwap32Uint(vertex_index, vertex_index_endian)); + // Add the base to the index. + id_vector_temp_.clear(); + id_vector_temp_.push_back( + builder_->makeIntConstant(kSystemConstantIndexVertexBaseIndex)); + vertex_index = builder_->createBinOp( + spv::OpIAdd, type_int_, vertex_index, + builder_->createLoad(builder_->createAccessChain( + spv::StorageClassUniform, + uniform_system_constants_, id_vector_temp_), + spv::NoPrecision)); + // Write the index to r0.x as float. + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + id_vector_temp_.push_back(const_int_0_); + id_vector_temp_.push_back(const_int_0_); + builder_->createStore( + builder_->createUnaryOp(spv::OpConvertSToF, type_float_, + vertex_index), + builder_->createAccessChain(spv::StorageClassFunction, + var_main_registers_, id_vector_temp_)); + } + } } void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() {} @@ -918,13 +987,7 @@ void SpirvShaderTranslator::UpdateExecConditionals( spv::Function& function = builder_->getBuildPoint()->getParent(); cf_exec_conditional_merge_ = new spv::Block(builder_->getUniqueId(), function); - { - std::unique_ptr selection_merge_op = - std::make_unique(spv::OpSelectionMerge); - selection_merge_op->addIdOperand(cf_exec_conditional_merge_->getId()); - selection_merge_op->addImmediateOperand(spv::SelectionControlMaskNone); - builder_->getBuildPoint()->addInstruction(std::move(selection_merge_op)); - } + SpirvCreateSelectionMerge(cf_exec_conditional_merge_->getId()); spv::Block& inner_block = builder_->makeNewBlock(); builder_->createConditionalBranch( condition_id, condition ? &inner_block : cf_exec_conditional_merge_, @@ -963,13 +1026,7 @@ void SpirvShaderTranslator::UpdateInstructionPredication(bool predicated, builder_->createLoad(var_main_predicate_, spv::NoPrecision); spv::Block& predicated_block = builder_->makeNewBlock(); cf_instruction_predicate_merge_ = &builder_->makeNewBlock(); - { - std::unique_ptr selection_merge_op = - std::make_unique(spv::OpSelectionMerge); - selection_merge_op->addIdOperand(cf_instruction_predicate_merge_->getId()); - selection_merge_op->addImmediateOperand(spv::SelectionControlMaskNone); - builder_->getBuildPoint()->addInstruction(std::move(selection_merge_op)); - } + SpirvCreateSelectionMerge(cf_instruction_predicate_merge_->getId()); builder_->createConditionalBranch( predicate_id, condition ? &predicated_block : cf_instruction_predicate_merge_, @@ -1426,5 +1483,119 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result, builder_->createStore(value_to_store, target_pointer); } +spv::Id SpirvShaderTranslator::EndianSwap32Uint(spv::Id value, spv::Id endian) { + spv::Id type = builder_->getTypeId(value); + spv::Id const_uint_8_scalar = builder_->makeUintConstant(8); + spv::Id const_uint_00ff00ff_scalar = builder_->makeUintConstant(0x00FF00FF); + spv::Id const_uint_16_scalar = builder_->makeUintConstant(16); + spv::Id const_uint_8_typed, const_uint_00ff00ff_typed, const_uint_16_typed; + int num_components = builder_->getNumTypeComponents(type); + if (num_components > 1) { + id_vector_temp_.reserve(num_components); + id_vector_temp_.clear(); + id_vector_temp_.insert(id_vector_temp_.cend(), num_components, + const_uint_8_scalar); + const_uint_8_typed = builder_->makeCompositeConstant(type, id_vector_temp_); + id_vector_temp_.clear(); + id_vector_temp_.insert(id_vector_temp_.cend(), num_components, + const_uint_00ff00ff_scalar); + const_uint_00ff00ff_typed = + builder_->makeCompositeConstant(type, id_vector_temp_); + id_vector_temp_.clear(); + id_vector_temp_.insert(id_vector_temp_.cend(), num_components, + const_uint_16_scalar); + const_uint_16_typed = + builder_->makeCompositeConstant(type, id_vector_temp_); + } else { + const_uint_8_typed = const_uint_8_scalar; + const_uint_00ff00ff_typed = const_uint_00ff00ff_scalar; + const_uint_16_typed = const_uint_16_scalar; + } + + // 8-in-16 or one half of 8-in-32 (doing 8-in-16 swap). + spv::Id is_8in16 = builder_->createBinOp( + spv::OpIEqual, type_bool_, endian, + builder_->makeUintConstant( + static_cast(xenos::Endian::k8in16))); + spv::Id is_8in32 = builder_->createBinOp( + spv::OpIEqual, type_bool_, endian, + builder_->makeUintConstant( + static_cast(xenos::Endian::k8in32))); + spv::Id is_8in16_or_8in32 = + builder_->createBinOp(spv::OpLogicalAnd, type_bool_, is_8in16, is_8in32); + spv::Block& block_pre_8in16 = *builder_->getBuildPoint(); + assert_false(block_pre_8in16.isTerminated()); + spv::Block& block_8in16 = builder_->makeNewBlock(); + spv::Block& block_8in16_merge = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_8in16_merge.getId()); + builder_->createConditionalBranch(is_8in16_or_8in32, &block_8in16, + &block_8in16_merge); + builder_->setBuildPoint(&block_8in16); + spv::Id swapped_8in16 = builder_->createBinOp( + spv::OpBitwiseOr, type, + builder_->createBinOp( + spv::OpBitwiseAnd, type, + builder_->createBinOp(spv::OpShiftRightLogical, type, value, + const_uint_8_typed), + const_uint_00ff00ff_typed), + builder_->createBinOp( + spv::OpShiftLeftLogical, type, + builder_->createBinOp(spv::OpBitwiseAnd, type, value, + const_uint_00ff00ff_typed), + const_uint_8_typed)); + builder_->createBranch(&block_8in16_merge); + builder_->setBuildPoint(&block_8in16_merge); + { + std::unique_ptr phi_op = + std::make_unique(builder_->getUniqueId(), type, + spv::OpPhi); + phi_op->addIdOperand(swapped_8in16); + phi_op->addIdOperand(block_8in16.getId()); + phi_op->addIdOperand(value); + phi_op->addIdOperand(block_pre_8in16.getId()); + value = phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(phi_op)); + } + + // 16-in-32 or another half of 8-in-32 (doing 16-in-32 swap). + spv::Id is_16in32 = builder_->createBinOp( + spv::OpIEqual, type_bool_, endian, + builder_->makeUintConstant( + static_cast(xenos::Endian::k16in32))); + spv::Id is_8in32_or_16in32 = + builder_->createBinOp(spv::OpLogicalAnd, type_bool_, is_8in32, is_16in32); + spv::Block& block_pre_16in32 = *builder_->getBuildPoint(); + spv::Block& block_16in32 = builder_->makeNewBlock(); + spv::Block& block_16in32_merge = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_16in32_merge.getId()); + builder_->createConditionalBranch(is_8in32_or_16in32, &block_16in32, + &block_16in32_merge); + builder_->setBuildPoint(&block_16in32); + id_vector_temp_.clear(); + id_vector_temp_.reserve(4); + id_vector_temp_.push_back(builder_->createBinOp( + spv::OpShiftRightLogical, type, value, const_uint_16_typed)); + id_vector_temp_.push_back(value); + id_vector_temp_.insert(id_vector_temp_.cend(), 2, + builder_->makeIntConstant(16)); + spv::Id swapped_16in32 = + builder_->createOp(spv::OpBitFieldInsert, type, id_vector_temp_); + builder_->createBranch(&block_16in32_merge); + builder_->setBuildPoint(&block_16in32_merge); + { + std::unique_ptr phi_op = + std::make_unique(builder_->getUniqueId(), type, + spv::OpPhi); + phi_op->addIdOperand(swapped_16in32); + phi_op->addIdOperand(block_16in32.getId()); + phi_op->addIdOperand(value); + phi_op->addIdOperand(block_pre_16in32.getId()); + value = phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(phi_op)); + } + + return value; +} + } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index cdf812791..42a211ffb 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -17,6 +17,7 @@ #include "third_party/glslang/SPIRV/SpvBuilder.h" #include "xenia/gpu/shader_translator.h" +#include "xenia/gpu/xenos.h" #include "xenia/ui/vulkan/vulkan_provider.h" namespace xe { @@ -24,6 +25,19 @@ namespace gpu { class SpirvShaderTranslator : public ShaderTranslator { public: + // IF SYSTEM CONSTANTS ARE CHANGED OR ADDED, THE FOLLOWING MUST BE UPDATED: + // - SystemConstantIndex enum. + // - Structure members in BeginTranslation. + struct SystemConstants { + xenos::Endian vertex_index_endian; + int32_t vertex_base_index; + }; + + // The minimum limit for maxPerStageDescriptorStorageBuffers is 4, and for + // maxStorageBufferRange it's 128 MB. These are the values of those limits on + // Arm Mali as of November 2020. Xenia needs 512 MB shared memory to be bound, + // therefore SSBOs must only be used for shared memory - all other storage + // resources must be images or texel buffers. enum DescriptorSet : uint32_t { // In order of update frequency. // Very frequently changed, especially for UI draws, and for models drawn in @@ -78,6 +92,17 @@ class SpirvShaderTranslator : public ShaderTranslator { void ProcessAluInstruction(const ParsedAluInstruction& instr) override; private: + // Builder helpers. + void SpirvCreateSelectionMerge( + spv::Id merge_block_id, spv::SelectionControlMask selection_control_mask = + spv::SelectionControlMaskNone) { + std::unique_ptr selection_merge_op = + std::make_unique(spv::OpSelectionMerge); + selection_merge_op->addIdOperand(merge_block_id); + selection_merge_op->addImmediateOperand(selection_control_mask); + builder_->getBuildPoint()->addInstruction(std::move(selection_merge_op)); + } + // TODO(Triang3l): Depth-only pixel shader. bool IsSpirvVertexOrTessEvalShader() const { return is_vertex_shader(); } bool IsSpirvVertexShader() const { @@ -176,6 +201,9 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id ProcessScalarAluOperation(const ParsedAluInstruction& instr, bool& predicate_written); + // Perform endian swap of a uint scalar or vector. + spv::Id EndianSwap32Uint(spv::Id value, spv::Id endian); + Features features_; std::unique_ptr builder_; @@ -242,6 +270,11 @@ class SpirvShaderTranslator : public ShaderTranslator { // components. spv::Id const_float2_0_1_; + enum SystemConstantIndex : unsigned int { + kSystemConstantIndexVertexIndexEndian, + kSystemConstantIndexVertexBaseIndex, + }; + spv::Id uniform_system_constants_; spv::Id uniform_float_constants_; spv::Id uniform_bool_loop_constants_; diff --git a/src/xenia/gpu/spirv_shader_translator_alu.cc b/src/xenia/gpu/spirv_shader_translator_alu.cc index 7b6f54e5a..bdf4e7f9e 100644 --- a/src/xenia/gpu/spirv_shader_translator_alu.cc +++ b/src/xenia/gpu/spirv_shader_translator_alu.cc @@ -580,14 +580,7 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( spv::Block& ma_yx_block = builder_->makeNewBlock(); spv::Block* ma_merge_block = new spv::Block(builder_->getUniqueId(), function); - { - std::unique_ptr selection_merge_op = - std::make_unique(spv::OpSelectionMerge); - selection_merge_op->addIdOperand(ma_merge_block->getId()); - selection_merge_op->addImmediateOperand(spv::SelectionControlMaskNone); - builder_->getBuildPoint()->addInstruction( - std::move(selection_merge_op)); - } + SpirvCreateSelectionMerge(ma_merge_block->getId()); builder_->createConditionalBranch(ma_z_condition, &ma_z_block, &ma_yx_block); @@ -627,14 +620,7 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( spv::Block& ma_y_block = builder_->makeNewBlock(); spv::Block& ma_x_block = builder_->makeNewBlock(); spv::Block& ma_yx_merge_block = builder_->makeNewBlock(); - { - std::unique_ptr selection_merge_op = - std::make_unique(spv::OpSelectionMerge); - selection_merge_op->addIdOperand(ma_yx_merge_block.getId()); - selection_merge_op->addImmediateOperand(spv::SelectionControlMaskNone); - builder_->getBuildPoint()->addInstruction( - std::move(selection_merge_op)); - } + SpirvCreateSelectionMerge(ma_yx_merge_block.getId()); builder_->createConditionalBranch(ma_y_condition, &ma_y_block, &ma_x_block); @@ -849,14 +835,7 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( 0b1111))); spv::Block& kill_block = builder_->makeNewBlock(); spv::Block& merge_block = builder_->makeNewBlock(); - { - std::unique_ptr selection_merge_op = - std::make_unique(spv::OpSelectionMerge); - selection_merge_op->addIdOperand(merge_block.getId()); - selection_merge_op->addImmediateOperand(spv::SelectionControlMaskNone); - builder_->getBuildPoint()->addInstruction( - std::move(selection_merge_op)); - } + SpirvCreateSelectionMerge(merge_block.getId()); builder_->createConditionalBranch(condition, &kill_block, &merge_block); builder_->setBuildPoint(&kill_block); // TODO(Triang3l): Demote to helper invocation to keep derivatives if @@ -1117,14 +1096,7 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation( const_float_0_)); spv::Block& multiply_block = builder_->makeNewBlock(); spv::Block& merge_block = builder_->makeNewBlock(); - { - std::unique_ptr selection_merge_op = - std::make_unique(spv::OpSelectionMerge); - selection_merge_op->addIdOperand(merge_block.getId()); - selection_merge_op->addImmediateOperand(spv::SelectionControlMaskNone); - builder_->getBuildPoint()->addInstruction( - std::move(selection_merge_op)); - } + SpirvCreateSelectionMerge(merge_block.getId()); { std::unique_ptr branch_conditional_op = std::make_unique(spv::OpBranchConditional); @@ -1418,14 +1390,7 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation( : const_float_0_); spv::Block& kill_block = builder_->makeNewBlock(); spv::Block& merge_block = builder_->makeNewBlock(); - { - std::unique_ptr selection_merge_op = - std::make_unique(spv::OpSelectionMerge); - selection_merge_op->addIdOperand(merge_block.getId()); - selection_merge_op->addImmediateOperand(spv::SelectionControlMaskNone); - builder_->getBuildPoint()->addInstruction( - std::move(selection_merge_op)); - } + SpirvCreateSelectionMerge(merge_block.getId()); builder_->createConditionalBranch(condition, &kill_block, &merge_block); builder_->setBuildPoint(&kill_block); // TODO(Triang3l): Demote to helper invocation to keep derivatives if diff --git a/src/xenia/ui/vulkan/vulkan_provider.cc b/src/xenia/ui/vulkan/vulkan_provider.cc index 672b17162..6f0e1e707 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.cc +++ b/src/xenia/ui/vulkan/vulkan_provider.cc @@ -287,6 +287,12 @@ bool VulkanProvider::Initialize() { // Get physical device features and check if the needed ones are supported. ifn_.vkGetPhysicalDeviceFeatures(physical_device_current, &device_features_); + // Passing indices directly from guest memory, where they are big-endian; a + // workaround using fetch from shared memory for 32-bit indices that need + // swapping isn't implemented yet. Not supported only Qualcomm Adreno 4xx. + if (!device_features_.fullDrawIndexUint32) { + continue; + } // TODO(Triang3l): Make geometry shaders optional by providing compute // shader fallback (though that would require vertex shader stores). if (!device_features_.geometryShader) { From 533bdf21147eb12c56b3eee0c3ca1c323337c2f7 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Wed, 4 Nov 2020 18:57:08 +0300 Subject: [PATCH 051/123] [SPIR-V] Shared memory SSBOs --- src/xenia/gpu/spirv_shader_translator.cc | 128 ++++++++++++++++++++++- src/xenia/gpu/spirv_shader_translator.h | 20 ++++ 2 files changed, 147 insertions(+), 1 deletion(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index e85dee4eb..90984b985 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -24,13 +25,16 @@ namespace gpu { SpirvShaderTranslator::Features::Features(bool all) : spirv_version(all ? spv::Spv_1_5 : spv::Spv_1_0), + max_storage_buffer_range(all ? UINT32_MAX : (128 * 1024 * 1024)), clip_distance(all), cull_distance(all), float_controls(all) {} SpirvShaderTranslator::Features::Features( const ui::vulkan::VulkanProvider& provider) - : clip_distance(provider.device_features().shaderClipDistance), + : max_storage_buffer_range( + provider.device_properties().limits.maxStorageBufferRange), + clip_distance(provider.device_features().shaderClipDistance), cull_distance(provider.device_features().shaderCullDistance) { uint32_t device_version = provider.device_properties().apiVersion; const ui::vulkan::VulkanProvider::DeviceExtensions& device_extensions = @@ -250,6 +254,50 @@ void SpirvShaderTranslator::StartTranslation() { main_interface_.push_back(uniform_bool_loop_constants_); } + // Common storage buffers - shared memory uint[], each 128 MB or larger, + // depending on what's possible on the device. glslang generates everything, + // including all the types, for each storage buffer separately. + uint32_t shared_memory_binding_count = + 1 << GetSharedMemoryStorageBufferCountLog2(); + char shared_memory_struct_name[] = "XeSharedMemory0"; + char shared_memory_buffer_name[] = "xe_shared_memory_0"; + for (uint32_t i = 0; i < shared_memory_binding_count; ++i) { + id_vector_temp_.clear(); + id_vector_temp_.push_back(builder_->makeRuntimeArray(type_uint_)); + // Storage buffers have std430 packing, no padding to 4-component vectors. + builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride, + sizeof(uint32_t)); + shared_memory_struct_name[xe::countof(shared_memory_struct_name) - 2] = + '0' + i; + spv::Id type_shared_memory = + builder_->makeStructType(id_vector_temp_, shared_memory_struct_name); + builder_->addMemberName(type_shared_memory, 0, "memory"); + // TODO(Triang3l): Make writable when memexport is implemented. + builder_->addMemberDecoration(type_shared_memory, 0, + spv::DecorationNonWritable); + builder_->addMemberDecoration(type_shared_memory, 0, spv::DecorationOffset, + 0); + builder_->addDecoration(type_shared_memory, + features_.spirv_version >= spv::Spv_1_3 + ? spv::DecorationBlock + : spv::DecorationBufferBlock); + shared_memory_buffer_name[xe::countof(shared_memory_buffer_name) - 2] = + '0' + i; + spv::Id buffer_shared_memory = builder_->createVariable( + spv::NoPrecision, + features_.spirv_version >= spv::Spv_1_3 ? spv::StorageClassStorageBuffer + : spv::StorageClassUniform, + type_shared_memory, shared_memory_buffer_name); + buffers_shared_memory_[i] = buffer_shared_memory; + builder_->addDecoration(buffer_shared_memory, spv::DecorationDescriptorSet, + int(kDescriptorSetSharedMemoryAndEdram)); + builder_->addDecoration(buffer_shared_memory, spv::DecorationBinding, + int(i)); + if (features_.spirv_version >= spv::Spv_1_4) { + main_interface_.push_back(buffer_shared_memory); + } + } + if (IsSpirvVertexOrTessEvalShader()) { StartVertexOrTessEvalShaderBeforeMain(); } @@ -1597,5 +1645,83 @@ spv::Id SpirvShaderTranslator::EndianSwap32Uint(spv::Id value, spv::Id endian) { return value; } +spv::Id SpirvShaderTranslator::LoadUint32FromSharedMemory( + spv::Id address_dwords_int) { + spv::Block& head_block = *builder_->getBuildPoint(); + assert_false(head_block.isTerminated()); + + spv::StorageClass storage_class = features_.spirv_version >= spv::Spv_1_3 + ? spv::StorageClassStorageBuffer + : spv::StorageClassUniform; + uint32_t buffer_count_log2 = GetSharedMemoryStorageBufferCountLog2(); + if (!buffer_count_log2) { + // Single binding - load directly. + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + // The only SSBO struct member. + id_vector_temp_.push_back(const_int_0_); + id_vector_temp_.push_back(address_dwords_int); + return builder_->createLoad( + builder_->createAccessChain(storage_class, buffers_shared_memory_[0], + id_vector_temp_), + spv::NoPrecision); + } + + // The memory is split into multiple bindings - check which binding to load + // from. 29 is log2(512 MB), but addressing in dwords (4 B). + uint32_t binding_address_bits = (29 - 2) - buffer_count_log2; + spv::Id binding_index = builder_->createBinOp( + spv::OpShiftRightLogical, type_uint_, + builder_->createUnaryOp(spv::OpBitcast, type_uint_, address_dwords_int), + builder_->makeUintConstant(binding_address_bits)); + spv::Id binding_address = builder_->createBinOp( + spv::OpBitwiseAnd, type_int_, address_dwords_int, + builder_->makeIntConstant( + int((uint32_t(1) << binding_address_bits) - 1))); + uint32_t buffer_count = 1 << buffer_count_log2; + spv::Block* switch_case_blocks[512 / 128]; + for (uint32_t i = 0; i < buffer_count; ++i) { + switch_case_blocks[i] = &builder_->makeNewBlock(); + } + spv::Block& switch_merge_block = builder_->makeNewBlock(); + spv::Id value_phi_result = builder_->getUniqueId(); + std::unique_ptr value_phi_op = + std::make_unique(value_phi_result, type_uint_, + spv::OpPhi); + SpirvCreateSelectionMerge(switch_merge_block.getId(), + spv::SelectionControlDontFlattenMask); + { + std::unique_ptr switch_op = + std::make_unique(spv::OpSwitch); + switch_op->addIdOperand(binding_index); + // Highest binding index is the default case. + switch_op->addIdOperand(switch_case_blocks[buffer_count - 1]->getId()); + switch_case_blocks[buffer_count - 1]->addPredecessor(&head_block); + for (uint32_t i = 0; i < buffer_count - 1; ++i) { + switch_op->addImmediateOperand(int(i)); + switch_op->addIdOperand(switch_case_blocks[i]->getId()); + switch_case_blocks[i]->addPredecessor(&head_block); + } + builder_->getBuildPoint()->addInstruction(std::move(switch_op)); + } + // Set up the access chain indices. + id_vector_temp_.clear(); + // The only SSBO struct member. + id_vector_temp_.push_back(const_int_0_); + id_vector_temp_.push_back(binding_address); + for (uint32_t i = 0; i < buffer_count; ++i) { + builder_->setBuildPoint(switch_case_blocks[i]); + value_phi_op->addIdOperand(builder_->createLoad( + builder_->createAccessChain(storage_class, buffers_shared_memory_[i], + id_vector_temp_), + spv::NoPrecision)); + value_phi_op->addIdOperand(switch_case_blocks[i]->getId()); + builder_->createBranch(&switch_merge_block); + } + builder_->setBuildPoint(&switch_merge_block); + builder_->getBuildPoint()->addInstruction(std::move(value_phi_op)); + return value_phi_result; +} + } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 42a211ffb..0cb265fc7 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -66,12 +66,28 @@ class SpirvShaderTranslator : public ShaderTranslator { explicit Features(const ui::vulkan::VulkanProvider& provider); explicit Features(bool all = false); unsigned int spirv_version; + uint32_t max_storage_buffer_range; bool clip_distance; bool cull_distance; bool float_controls; }; SpirvShaderTranslator(const Features& features); + static constexpr uint32_t GetSharedMemoryStorageBufferCountLog2( + uint32_t max_storage_buffer_range) { + if (max_storage_buffer_range >= 512 * 1024 * 1024) { + return 0; + } + if (max_storage_buffer_range >= 256 * 1024 * 1024) { + return 1; + } + return 2; + } + uint32_t GetSharedMemoryStorageBufferCountLog2() const { + return GetSharedMemoryStorageBufferCountLog2( + features_.max_storage_buffer_range); + } + protected: void Reset() override; @@ -204,6 +220,8 @@ class SpirvShaderTranslator : public ShaderTranslator { // Perform endian swap of a uint scalar or vector. spv::Id EndianSwap32Uint(spv::Id value, spv::Id endian); + spv::Id LoadUint32FromSharedMemory(spv::Id address_dwords_int); + Features features_; std::unique_ptr builder_; @@ -278,6 +296,8 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id uniform_float_constants_; spv::Id uniform_bool_loop_constants_; + spv::Id buffers_shared_memory_[512 / 128]; + // VS as VS only - int. spv::Id input_vertex_index_; // VS as TES only - int. From a82b85a8ac19ed3156c2f3a73a3aa39758711ab8 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Wed, 4 Nov 2020 23:56:12 +0300 Subject: [PATCH 052/123] [SPIR-V] vfetch: 32_* and 32_*_FLOAT formats --- src/xenia/gpu/dxbc_shader_translator_fetch.cc | 4 +- src/xenia/gpu/spirv_shader_translator.cc | 31 +- src/xenia/gpu/spirv_shader_translator.h | 26 +- .../gpu/spirv_shader_translator_fetch.cc | 283 ++++++++++++++++++ 4 files changed, 336 insertions(+), 8 deletions(-) create mode 100644 src/xenia/gpu/spirv_shader_translator_fetch.cc diff --git a/src/xenia/gpu/dxbc_shader_translator_fetch.cc b/src/xenia/gpu/dxbc_shader_translator_fetch.cc index 0a86f7ff6..6bea4109f 100644 --- a/src/xenia/gpu/dxbc_shader_translator_fetch.cc +++ b/src/xenia/gpu/dxbc_shader_translator_fetch.cc @@ -99,8 +99,8 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( DxbcOpAnd(address_dest, fetch_constant_src.SelectFromSwizzled(0), DxbcSrc::LU(~uint32_t(3))); } - // Add the word offset from the instruction, plus the offset of the first - // needed word within the element. + // Add the word offset from the instruction (signed), plus the offset of the + // first needed word within the element. uint32_t first_word_index; xe::bit_scan_forward(needed_words, &first_word_index); int32_t first_word_buffer_offset = diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 90984b985..3fbc29220 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -97,8 +97,11 @@ void SpirvShaderTranslator::StartTranslation() { type_bool3_ = builder_->makeVectorType(type_bool_, 3); type_bool4_ = builder_->makeVectorType(type_bool_, 4); type_int_ = builder_->makeIntType(32); + type_int2_ = builder_->makeVectorType(type_int_, 2); + type_int3_ = builder_->makeVectorType(type_int_, 3); type_int4_ = builder_->makeVectorType(type_int_, 4); type_uint_ = builder_->makeUintType(32); + type_uint2_ = builder_->makeVectorType(type_uint_, 2); type_uint3_ = builder_->makeVectorType(type_uint_, 3); type_uint4_ = builder_->makeVectorType(type_uint_, 4); type_float_ = builder_->makeFloatType(32); @@ -254,6 +257,31 @@ void SpirvShaderTranslator::StartTranslation() { main_interface_.push_back(uniform_bool_loop_constants_); } + // Common uniform buffer - fetch constants (32 x 6 uints packed in std140 as + // 4-component vectors). + id_vector_temp_.clear(); + id_vector_temp_.push_back(builder_->makeArrayType( + type_uint4_, builder_->makeUintConstant(32 * 6 / 4), + sizeof(uint32_t) * 4)); + builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride, + sizeof(uint32_t) * 4); + spv::Id type_fetch_constants = + builder_->makeStructType(id_vector_temp_, "XeFetchConstants"); + builder_->addMemberName(type_fetch_constants, 0, "fetch_constants"); + builder_->addMemberDecoration(type_fetch_constants, 0, spv::DecorationOffset, + 0); + builder_->addDecoration(type_fetch_constants, spv::DecorationBlock); + uniform_fetch_constants_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassUniform, type_fetch_constants, + "xe_uniform_fetch_constants"); + builder_->addDecoration(uniform_fetch_constants_, + spv::DecorationDescriptorSet, + int(kDescriptorSetFetchConstants)); + builder_->addDecoration(uniform_fetch_constants_, spv::DecorationBinding, 0); + if (features_.spirv_version >= spv::Spv_1_4) { + main_interface_.push_back(uniform_fetch_constants_); + } + // Common storage buffers - shared memory uint[], each 128 MB or larger, // depending on what's possible on the device. glslang generates everything, // including all the types, for each storage buffer separately. @@ -271,7 +299,7 @@ void SpirvShaderTranslator::StartTranslation() { '0' + i; spv::Id type_shared_memory = builder_->makeStructType(id_vector_temp_, shared_memory_struct_name); - builder_->addMemberName(type_shared_memory, 0, "memory"); + builder_->addMemberName(type_shared_memory, 0, "shared_memory"); // TODO(Triang3l): Make writable when memexport is implemented. builder_->addMemberDecoration(type_shared_memory, 0, spv::DecorationNonWritable); @@ -1706,6 +1734,7 @@ spv::Id SpirvShaderTranslator::LoadUint32FromSharedMemory( } // Set up the access chain indices. id_vector_temp_.clear(); + id_vector_temp_.reserve(2); // The only SSBO struct member. id_vector_temp_.push_back(const_int_0_); id_vector_temp_.push_back(binding_address); diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 0cb265fc7..e98df7d2e 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -105,6 +105,8 @@ class SpirvShaderTranslator : public ShaderTranslator { const ParsedLoopEndInstruction& instr) override; void ProcessJumpInstruction(const ParsedJumpInstruction& instr) override; + void ProcessVertexFetchInstruction( + const ParsedVertexFetchInstruction& instr) override; void ProcessAluInstruction(const ParsedAluInstruction& instr) override; private: @@ -247,11 +249,24 @@ class SpirvShaderTranslator : public ShaderTranslator { // Index = component count - 1. spv::Id type_bool_vectors_[4]; }; - spv::Id type_int_; - spv::Id type_int4_; - spv::Id type_uint_; - spv::Id type_uint3_; - spv::Id type_uint4_; + union { + struct { + spv::Id type_int_; + spv::Id type_int2_; + spv::Id type_int3_; + spv::Id type_int4_; + }; + spv::Id type_int_vectors_[4]; + }; + union { + struct { + spv::Id type_uint_; + spv::Id type_uint2_; + spv::Id type_uint3_; + spv::Id type_uint4_; + }; + spv::Id type_uint_vectors_[4]; + }; union { struct { spv::Id type_float_; @@ -295,6 +310,7 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id uniform_system_constants_; spv::Id uniform_float_constants_; spv::Id uniform_bool_loop_constants_; + spv::Id uniform_fetch_constants_; spv::Id buffers_shared_memory_[512 / 128]; diff --git a/src/xenia/gpu/spirv_shader_translator_fetch.cc b/src/xenia/gpu/spirv_shader_translator_fetch.cc new file mode 100644 index 000000000..a885c2dea --- /dev/null +++ b/src/xenia/gpu/spirv_shader_translator_fetch.cc @@ -0,0 +1,283 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/spirv_shader_translator.h" + +#include +#include +#include + +#include "third_party/glslang/SPIRV/GLSL.std.450.h" +#include "xenia/base/math.h" + +namespace xe { +namespace gpu { + +void SpirvShaderTranslator::ProcessVertexFetchInstruction( + const ParsedVertexFetchInstruction& instr) { + UpdateInstructionPredication(instr.is_predicated, instr.predicate_condition); + + uint32_t used_result_components = instr.result.GetUsedResultComponents(); + uint32_t needed_words = xenos::GetVertexFormatNeededWords( + instr.attributes.data_format, used_result_components); + if (!needed_words) { + // Nothing to load - just constant 0/1 writes, or the swizzle includes only + // components that don't exist in the format (writing zero instead of them). + // Unpacking assumes at least some word is needed. + StoreResult(instr.result, spv::NoResult); + return; + } + + EnsureBuildPointAvailable(); + + // Get the base address in dwords from the bits 2:31 of the first fetch + // constant word. + uint32_t fetch_constant_word_0_index = instr.operands[1].storage_index << 1; + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + // The only element of the fetch constant buffer. + id_vector_temp_.push_back(const_int_0_); + // Vector index. + id_vector_temp_.push_back( + builder_->makeIntConstant(int(fetch_constant_word_0_index >> 2))); + // Component index. + id_vector_temp_.push_back( + builder_->makeIntConstant(int(fetch_constant_word_0_index & 3))); + spv::Id fetch_constant_word_0 = builder_->createLoad( + builder_->createAccessChain(spv::StorageClassUniform, + uniform_fetch_constants_, id_vector_temp_), + spv::NoPrecision); + // TODO(Triang3l): Verify the fetch constant type (that it's a vertex fetch, + // not a texture fetch) here instead of dropping draws with invalid vertex + // fetch constants on the CPU when proper bound checks are added - vfetch may + // be conditional, so fetch constants may also be used conditionally. + spv::Id address = builder_->createUnaryOp( + spv::OpBitcast, type_int_, + builder_->createBinOp(spv::OpShiftRightLogical, type_uint_, + fetch_constant_word_0, + builder_->makeUintConstant(2))); + if (instr.attributes.stride) { + // Convert the index to an integer by flooring or by rounding to the nearest + // (as floor(index + 0.5) because rounding to the nearest even makes no + // sense for addressing, both 1.5 and 2.5 would be 2). + // http://web.archive.org/web/20100302145413/http://msdn.microsoft.com:80/en-us/library/bb313960.aspx + spv::Id index = GetOperandComponents(LoadOperandStorage(instr.operands[0]), + instr.operands[0], 0b0001); + if (instr.attributes.is_index_rounded) { + index = builder_->createBinOp(spv::OpFAdd, type_float_, index, + builder_->makeFloatConstant(0.5f)); + builder_->addDecoration(index, spv::DecorationNoContraction); + } + id_vector_temp_.clear(); + id_vector_temp_.push_back(index); + index = builder_->createUnaryOp( + spv::OpConvertFToS, type_int_, + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450Floor, id_vector_temp_)); + if (instr.attributes.stride > 1) { + index = builder_->createBinOp( + spv::OpIMul, type_int_, index, + builder_->makeIntConstant(int(instr.attributes.stride))); + } + address = builder_->createBinOp(spv::OpIAdd, type_int_, address, index); + } + // Add the word offset from the instruction (signed), plus the offset of the + // first needed word within the element. + uint32_t first_word_index; + xe::bit_scan_forward(needed_words, &first_word_index); + int32_t first_word_buffer_offset = + instr.attributes.offset + int32_t(first_word_index); + if (first_word_buffer_offset) { + // Add the constant word offset. + address = builder_->createBinOp( + spv::OpIAdd, type_int_, address, + builder_->makeIntConstant(int(first_word_buffer_offset))); + } + + // Load the needed words. + unsigned int word_composite_indices[4] = {}; + spv::Id word_composite_construct[4]; + uint32_t word_count = 0; + uint32_t words_remaining = needed_words; + uint32_t word_index; + while (xe::bit_scan_forward(words_remaining, &word_index)) { + words_remaining &= ~(1 << word_index); + spv::Id word_address = address; + // Add the word offset from the instruction (signed), plus the offset of the + // word within the element. + int32_t word_offset = instr.attributes.offset + word_index; + if (word_offset) { + word_address = + builder_->createBinOp(spv::OpIAdd, type_int_, word_address, + builder_->makeIntConstant(int(word_offset))); + } + word_composite_indices[word_index] = word_count; + // FIXME(Triang3l): Bound checking is not done here, but haven't encountered + // any games relying on out-of-bounds access. On Adreno 200 on Android (LG + // P705), however, words (not full elements) out of glBufferData bounds + // contain 0. + word_composite_construct[word_count++] = + LoadUint32FromSharedMemory(word_address); + } + spv::Id words; + if (word_count > 1) { + // Copying from the array to id_vector_temp_ now, not in the loop above, + // because of the LoadUint32FromSharedMemory call (potentially using + // id_vector_temp_ internally). + id_vector_temp_.clear(); + id_vector_temp_.reserve(word_count); + id_vector_temp_.insert(id_vector_temp_.cend(), word_composite_construct, + word_composite_construct + word_count); + words = builder_->createCompositeConstruct( + type_uint_vectors_[word_count - 1], id_vector_temp_); + } else { + words = word_composite_construct[0]; + } + + // Endian swap the words, getting the endianness from bits 0:1 of the second + // fetch constant word. + uint32_t fetch_constant_word_1_index = fetch_constant_word_0_index + 1; + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + // The only element of the fetch constant buffer. + id_vector_temp_.push_back(const_int_0_); + // Vector index. + id_vector_temp_.push_back( + builder_->makeIntConstant(int(fetch_constant_word_1_index >> 2))); + // Component index. + id_vector_temp_.push_back( + builder_->makeIntConstant(int(fetch_constant_word_1_index & 3))); + spv::Id fetch_constant_word_1 = builder_->createLoad( + builder_->createAccessChain(spv::StorageClassUniform, + uniform_fetch_constants_, id_vector_temp_), + spv::NoPrecision); + words = EndianSwap32Uint( + words, builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, + fetch_constant_word_1, + builder_->makeUintConstant(0b11))); + + spv::Id result = spv::NoResult; + + // Convert the format. + uint32_t used_format_components = + used_result_components & ((1 << xenos::GetVertexFormatComponentCount( + instr.attributes.data_format)) - + 1); + // If needed_words is not zero (checked in the beginning), this must not be + // zero too. For simplicity, it's assumed that something will be unpacked + // here. + assert_not_zero(used_format_components); + uint32_t used_format_component_count = xe::bit_count(used_format_components); + spv::Id result_type = type_float_vectors_[used_format_component_count - 1]; + switch (instr.attributes.data_format) { + // TODO(Triang3l): All format conversion. + + case xenos::VertexFormat::k_32: + case xenos::VertexFormat::k_32_32: + case xenos::VertexFormat::k_32_32_32_32: + assert_true(used_format_components == needed_words); + if (instr.attributes.is_signed) { + result = builder_->createUnaryOp( + spv::OpBitcast, type_int_vectors_[used_format_component_count - 1], + words); + result = + builder_->createUnaryOp(spv::OpConvertSToF, result_type, result); + } else { + result = + builder_->createUnaryOp(spv::OpConvertUToF, result_type, words); + } + if (!instr.attributes.is_integer) { + if (instr.attributes.is_signed) { + switch (instr.attributes.signed_rf_mode) { + case xenos::SignedRepeatingFractionMode::kZeroClampMinusOne: + result = builder_->createBinOp( + spv::OpVectorTimesScalar, result_type, result, + builder_->makeFloatConstant(1.0f / 2147483647.0f)); + builder_->addDecoration(result, spv::DecorationNoContraction); + // No need to clamp to -1 if signed - 1/(2^31-1) is rounded to + // 1/(2^31) as float32. + break; + case xenos::SignedRepeatingFractionMode::kNoZero: { + result = builder_->createBinOp( + spv::OpVectorTimesScalar, result_type, result, + builder_->makeFloatConstant(1.0f / 2147483647.5f)); + builder_->addDecoration(result, spv::DecorationNoContraction); + spv::Id const_no_zero = + builder_->makeFloatConstant(0.5f / 2147483647.5f); + if (used_format_component_count > 1) { + id_vector_temp_.clear(); + id_vector_temp_.reserve(used_format_component_count); + id_vector_temp_.insert(id_vector_temp_.cend(), + used_format_component_count, + const_no_zero); + const_no_zero = builder_->makeCompositeConstant( + result_type, id_vector_temp_); + } + result = builder_->createBinOp(spv::OpFAdd, result_type, result, + const_no_zero); + builder_->addDecoration(result, spv::DecorationNoContraction); + } break; + default: + assert_unhandled_case(instr.attributes.signed_rf_mode); + } + } else { + result = builder_->createBinOp( + spv::OpVectorTimesScalar, result_type, result, + builder_->makeFloatConstant(1.0f / 4294967295.0f)); + builder_->addDecoration(result, spv::DecorationNoContraction); + } + } + break; + + case xenos::VertexFormat::k_32_FLOAT: + case xenos::VertexFormat::k_32_32_FLOAT: + case xenos::VertexFormat::k_32_32_32_32_FLOAT: + case xenos::VertexFormat::k_32_32_32_FLOAT: + assert_true(used_format_components == needed_words); + result = builder_->createUnaryOp( + spv::OpBitcast, type_float_vectors_[word_count - 1], words); + break; + } + + if (result != spv::NoResult) { + // Apply the exponent bias. + if (instr.attributes.exp_adjust) { + result = builder_->createBinOp(spv::OpVectorTimesScalar, + builder_->getTypeId(result), result, + builder_->makeFloatConstant(std::ldexp( + 1.0f, instr.attributes.exp_adjust))); + builder_->addDecoration(result, spv::DecorationNoContraction); + } + + // If any components not present in the format were requested, pad the + // resulting vector with zeros. + uint32_t used_missing_components = + used_result_components & ~used_format_components; + if (used_missing_components) { + // Bypassing the assertion in spv::Builder::createCompositeConstruct - can + // construct vectors by concatenating vectors, not just from individual + // scalars. + std::unique_ptr composite_construct_op = + std::make_unique( + builder_->getUniqueId(), + type_float_vectors_[xe::bit_count(used_result_components) - 1], + spv::OpCompositeConstruct); + composite_construct_op->addIdOperand(result); + composite_construct_op->addIdOperand( + const_float_vectors_0_[xe::bit_count(used_missing_components) - 1]); + result = composite_construct_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(composite_construct_op)); + } + } + StoreResult(instr.result, result); +} + +} // namespace gpu +} // namespace xe From 45d3dc4806e5ae3aa6e0e3fafe8b8489c6f7f164 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Thu, 5 Nov 2020 21:49:36 +0300 Subject: [PATCH 053/123] [SPIR-V] vfetch: 16-bit float --- .../gpu/spirv_shader_translator_fetch.cc | 57 ++++++++++++++++++- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator_fetch.cc b/src/xenia/gpu/spirv_shader_translator_fetch.cc index a885c2dea..b0e4ff70d 100644 --- a/src/xenia/gpu/spirv_shader_translator_fetch.cc +++ b/src/xenia/gpu/spirv_shader_translator_fetch.cc @@ -178,6 +178,57 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction( switch (instr.attributes.data_format) { // TODO(Triang3l): All format conversion. + case xenos::VertexFormat::k_16_16_FLOAT: + case xenos::VertexFormat::k_16_16_16_16_FLOAT: { + // FIXME(Triang3l): This converts from GLSL float16 with NaNs instead of + // Xbox 360 float16 with extended range. However, haven't encountered + // games relying on that yet. + spv::Id word_needed_component_values[2] = {}; + for (uint32_t i = 0; i < 2; ++i) { + uint32_t word_needed_components = + (used_format_components >> (i * 2)) & 0b11; + if (!word_needed_components) { + continue; + } + spv::Id word; + if (word_count > 1) { + word = builder_->createCompositeExtract(words, type_uint_, + word_composite_indices[i]); + } else { + word = words; + } + id_vector_temp_.clear(); + id_vector_temp_.push_back(word); + word = builder_->createBuiltinCall(type_float2_, ext_inst_glsl_std_450_, + GLSLstd450UnpackHalf2x16, + id_vector_temp_); + if (word_needed_components != 0b11) { + // If only one of two components is needed, extract it. + word = builder_->createCompositeExtract( + word, type_float_, (word_needed_components & 0b01) ? 0 : 1); + } + word_needed_component_values[i] = word; + } + if (word_needed_component_values[1] == spv::NoResult) { + result = word_needed_component_values[0]; + } else if (word_needed_component_values[0] == spv::NoResult) { + result = word_needed_component_values[1]; + } else { + // Bypassing the assertion in spv::Builder::createCompositeConstruct as + // of November 5, 2020 - can construct vectors by concatenating vectors, + // not just from individual scalars. + std::unique_ptr composite_construct_op = + std::make_unique(builder_->getUniqueId(), + result_type, + spv::OpCompositeConstruct); + composite_construct_op->addIdOperand(word_needed_component_values[0]); + composite_construct_op->addIdOperand(word_needed_component_values[1]); + result = composite_construct_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(composite_construct_op)); + } + } break; + case xenos::VertexFormat::k_32: case xenos::VertexFormat::k_32_32: case xenos::VertexFormat::k_32_32_32_32: @@ -260,9 +311,9 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction( uint32_t used_missing_components = used_result_components & ~used_format_components; if (used_missing_components) { - // Bypassing the assertion in spv::Builder::createCompositeConstruct - can - // construct vectors by concatenating vectors, not just from individual - // scalars. + // Bypassing the assertion in spv::Builder::createCompositeConstruct as of + // November 5, 2020 - can construct vectors by concatenating vectors, not + // just from individual scalars. std::unique_ptr composite_construct_op = std::make_unique( builder_->getUniqueId(), From d926e37f95856baaa58ff077cb8b58a24f8eda8f Mon Sep 17 00:00:00 2001 From: Triang3l Date: Thu, 5 Nov 2020 23:49:39 +0300 Subject: [PATCH 054/123] [SPIR-V] vfetch: packed formats --- .../gpu/spirv_shader_translator_fetch.cc | 222 ++++++++++++++++-- 1 file changed, 204 insertions(+), 18 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator_fetch.cc b/src/xenia/gpu/spirv_shader_translator_fetch.cc index b0e4ff70d..51a4d720f 100644 --- a/src/xenia/gpu/spirv_shader_translator_fetch.cc +++ b/src/xenia/gpu/spirv_shader_translator_fetch.cc @@ -9,6 +9,7 @@ #include "xenia/gpu/spirv_shader_translator.h" +#include #include #include #include @@ -87,22 +88,10 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction( } address = builder_->createBinOp(spv::OpIAdd, type_int_, address, index); } - // Add the word offset from the instruction (signed), plus the offset of the - // first needed word within the element. - uint32_t first_word_index; - xe::bit_scan_forward(needed_words, &first_word_index); - int32_t first_word_buffer_offset = - instr.attributes.offset + int32_t(first_word_index); - if (first_word_buffer_offset) { - // Add the constant word offset. - address = builder_->createBinOp( - spv::OpIAdd, type_int_, address, - builder_->makeIntConstant(int(first_word_buffer_offset))); - } // Load the needed words. unsigned int word_composite_indices[4] = {}; - spv::Id word_composite_construct[4]; + spv::Id word_composite_constituents[4]; uint32_t word_count = 0; uint32_t words_remaining = needed_words; uint32_t word_index; @@ -122,7 +111,7 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction( // any games relying on out-of-bounds access. On Adreno 200 on Android (LG // P705), however, words (not full elements) out of glBufferData bounds // contain 0. - word_composite_construct[word_count++] = + word_composite_constituents[word_count++] = LoadUint32FromSharedMemory(word_address); } spv::Id words; @@ -132,12 +121,12 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction( // id_vector_temp_ internally). id_vector_temp_.clear(); id_vector_temp_.reserve(word_count); - id_vector_temp_.insert(id_vector_temp_.cend(), word_composite_construct, - word_composite_construct + word_count); + id_vector_temp_.insert(id_vector_temp_.cend(), word_composite_constituents, + word_composite_constituents + word_count); words = builder_->createCompositeConstruct( type_uint_vectors_[word_count - 1], id_vector_temp_); } else { - words = word_composite_construct[0]; + words = word_composite_constituents[0]; } // Endian swap the words, getting the endianness from bits 0:1 of the second @@ -175,8 +164,52 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction( assert_not_zero(used_format_components); uint32_t used_format_component_count = xe::bit_count(used_format_components); spv::Id result_type = type_float_vectors_[used_format_component_count - 1]; + bool format_is_packed = false; + int packed_widths[4] = {}, packed_offsets[4] = {}; + uint32_t packed_words[4] = {}; switch (instr.attributes.data_format) { - // TODO(Triang3l): All format conversion. + case xenos::VertexFormat::k_8_8_8_8: + format_is_packed = true; + packed_widths[0] = packed_widths[1] = packed_widths[2] = + packed_widths[3] = 8; + packed_offsets[1] = 8; + packed_offsets[2] = 16; + packed_offsets[3] = 24; + break; + case xenos::VertexFormat::k_2_10_10_10: + format_is_packed = true; + packed_widths[0] = packed_widths[1] = packed_widths[2] = 10; + packed_widths[3] = 2; + packed_offsets[1] = 10; + packed_offsets[2] = 20; + packed_offsets[3] = 30; + break; + case xenos::VertexFormat::k_10_11_11: + format_is_packed = true; + packed_widths[0] = packed_widths[1] = 11; + packed_widths[2] = 10; + packed_offsets[1] = 11; + packed_offsets[2] = 22; + break; + case xenos::VertexFormat::k_11_11_10: + format_is_packed = true; + packed_widths[0] = 10; + packed_widths[1] = packed_widths[2] = 11; + packed_offsets[1] = 10; + packed_offsets[2] = 21; + break; + case xenos::VertexFormat::k_16_16: + format_is_packed = true; + packed_widths[0] = packed_widths[1] = 16; + packed_offsets[1] = 16; + break; + case xenos::VertexFormat::k_16_16_16_16: + format_is_packed = true; + packed_widths[0] = packed_widths[1] = packed_widths[2] = + packed_widths[3] = 16; + packed_offsets[1] = packed_offsets[3] = 16; + packed_words[2] = packed_words[3] = 1; + break; case xenos::VertexFormat::k_16_16_FLOAT: case xenos::VertexFormat::k_16_16_16_16_FLOAT: { @@ -294,6 +327,159 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction( result = builder_->createUnaryOp( spv::OpBitcast, type_float_vectors_[word_count - 1], words); break; + + default: + assert_unhandled_case(instr.attributes.data_format); + } + + if (format_is_packed) { + assert_true(result == spv::NoResult); + // Extract the components from the words as individual ints or uints. + if (instr.attributes.is_signed) { + // Sign-extending extraction - in GLSL the sign-extending overload accepts + // int. + words = builder_->createUnaryOp(spv::OpBitcast, + type_int_vectors_[word_count - 1], words); + } + int extracted_widths[4] = {}; + spv::Id extracted_components[4] = {}; + uint32_t extracted_component_count = 0; + unsigned int extraction_word_current_index = UINT_MAX; + // Default is `words` itself if 1 word loaded. + spv::Id extraction_word_current = words; + for (uint32_t i = 0; i < 4; ++i) { + if (!(used_format_components & (1 << i))) { + continue; + } + if (word_count > 1) { + unsigned int extraction_word_new_index = + word_composite_indices[packed_words[i]]; + if (extraction_word_current_index != extraction_word_new_index) { + extraction_word_current_index = extraction_word_new_index; + extraction_word_current = builder_->createCompositeExtract( + words, instr.attributes.is_signed ? type_int_ : type_uint_, + extraction_word_new_index); + } + } + int extraction_width = packed_widths[i]; + assert_not_zero(extraction_width); + extracted_widths[extracted_component_count] = extraction_width; + extracted_components[extracted_component_count] = builder_->createTriOp( + instr.attributes.is_signed ? spv::OpBitFieldSExtract + : spv::OpBitFieldUExtract, + instr.attributes.is_signed ? type_int_ : type_uint_, + extraction_word_current, builder_->makeIntConstant(packed_offsets[i]), + builder_->makeIntConstant(extraction_width)); + ++extracted_component_count; + } + // Combine extracted components into a vector. + assert_true(extracted_component_count == used_format_component_count); + if (used_format_component_count > 1) { + id_vector_temp_.clear(); + id_vector_temp_.reserve(used_format_component_count); + id_vector_temp_.insert( + id_vector_temp_.cend(), extracted_components, + extracted_components + used_format_component_count); + result = builder_->createCompositeConstruct( + instr.attributes.is_signed + ? type_int_vectors_[used_format_component_count - 1] + : type_uint_vectors_[used_format_component_count - 1], + id_vector_temp_); + } else { + result = extracted_components[0]; + } + // Convert to floating-point. + result = builder_->createUnaryOp( + instr.attributes.is_signed ? spv::OpConvertSToF : spv::OpConvertUToF, + result_type, result); + // Normalize. + if (!instr.attributes.is_integer) { + float packed_scales[4]; + bool packed_scales_same = true; + for (uint32_t i = 0; i < used_format_component_count; ++i) { + int extracted_width = extracted_widths[i]; + // The signed case would result in 1.0 / 0.0 for 1-bit components, but + // there are no Xenos formats with them. + assert_true(extracted_width >= 2); + packed_scales_same &= extracted_width != extracted_widths[0]; + float packed_scale_inv; + if (instr.attributes.is_signed) { + packed_scale_inv = float((uint32_t(1) << (extracted_width - 1)) - 1); + if (instr.attributes.signed_rf_mode == + xenos::SignedRepeatingFractionMode::kNoZero) { + packed_scale_inv += 0.5f; + } + } else { + packed_scale_inv = float((uint32_t(1) << extracted_width) - 1); + } + packed_scales[i] = 1.0f / packed_scale_inv; + } + spv::Id const_packed_scale = + builder_->makeFloatConstant(packed_scales[0]); + spv::Op packed_scale_mul_op; + if (used_format_component_count > 1) { + if (packed_scales_same) { + packed_scale_mul_op = spv::OpVectorTimesScalar; + } else { + packed_scale_mul_op = spv::OpFMul; + id_vector_temp_.clear(); + id_vector_temp_.reserve(used_format_component_count); + id_vector_temp_.push_back(const_packed_scale); + for (uint32_t i = 1; i < used_format_component_count; ++i) { + id_vector_temp_.push_back( + builder_->makeFloatConstant(packed_scales[i])); + } + const_packed_scale = + builder_->makeCompositeConstant(result_type, id_vector_temp_); + } + } else { + packed_scale_mul_op = spv::OpFMul; + } + result = builder_->createBinOp(packed_scale_mul_op, result_type, result, + const_packed_scale); + builder_->addDecoration(result, spv::DecorationNoContraction); + if (instr.attributes.is_signed) { + switch (instr.attributes.signed_rf_mode) { + case xenos::SignedRepeatingFractionMode::kZeroClampMinusOne: { + // Treat both -(2^(n-1)) and -(2^(n-1)-1) as -1. Using regular FMax, + // not NMax, because the number is known not to be NaN. + spv::Id const_minus_1 = builder_->makeFloatConstant(-1.0f); + if (used_format_component_count > 1) { + id_vector_temp_.clear(); + id_vector_temp_.reserve(used_format_component_count); + id_vector_temp_.insert(id_vector_temp_.cend(), + used_format_component_count, + const_minus_1); + const_minus_1 = + builder_->makeCompositeConstant(result_type, id_vector_temp_); + } + id_vector_temp_.clear(); + id_vector_temp_.push_back(result); + id_vector_temp_.push_back(const_minus_1); + result = + builder_->createBuiltinCall(result_type, ext_inst_glsl_std_450_, + GLSLstd450FMax, id_vector_temp_); + } break; + case xenos::SignedRepeatingFractionMode::kNoZero: + id_vector_temp_.clear(); + id_vector_temp_.reserve(used_format_component_count); + for (uint32_t i = 0; i < used_format_component_count; ++i) { + id_vector_temp_.push_back( + builder_->makeFloatConstant(0.5f * packed_scales[i])); + } + result = + builder_->createBinOp(spv::OpFAdd, result_type, result, + used_format_component_count > 1 + ? builder_->makeCompositeConstant( + result_type, id_vector_temp_) + : id_vector_temp_[0]); + builder_->addDecoration(result, spv::DecorationNoContraction); + break; + default: + assert_unhandled_case(instr.attributes.signed_rf_mode); + } + } + } } if (result != spv::NoResult) { From d7341f9873b5c47d93f695cd7736b0c6ca0ecb50 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 7 Nov 2020 14:03:31 +0300 Subject: [PATCH 055/123] [Vulkan] Internal descriptor set layouts --- src/xenia/gpu/spirv_shader_translator.cc | 93 +++++++++---------- src/xenia/gpu/spirv_shader_translator.h | 2 +- .../gpu/vulkan/vulkan_command_processor.cc | 88 +++++++++++++----- .../gpu/vulkan/vulkan_command_processor.h | 12 ++- 4 files changed, 118 insertions(+), 77 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 3fbc29220..dd7cac5ba 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -283,47 +283,41 @@ void SpirvShaderTranslator::StartTranslation() { } // Common storage buffers - shared memory uint[], each 128 MB or larger, - // depending on what's possible on the device. glslang generates everything, - // including all the types, for each storage buffer separately. - uint32_t shared_memory_binding_count = + // depending on what's possible on the device. + id_vector_temp_.clear(); + id_vector_temp_.push_back(builder_->makeRuntimeArray(type_uint_)); + // Storage buffers have std430 packing, no padding to 4-component vectors. + builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride, + sizeof(uint32_t) * 4); + spv::Id type_shared_memory = + builder_->makeStructType(id_vector_temp_, "XeSharedMemory"); + builder_->addMemberName(type_shared_memory, 0, "shared_memory"); + // TODO(Triang3l): Make writable when memexport is implemented. + builder_->addMemberDecoration(type_shared_memory, 0, + spv::DecorationNonWritable); + builder_->addMemberDecoration(type_shared_memory, 0, spv::DecorationOffset, + 0); + builder_->addDecoration(type_shared_memory, + features_.spirv_version >= spv::Spv_1_3 + ? spv::DecorationBlock + : spv::DecorationBufferBlock); + unsigned int shared_memory_binding_count = 1 << GetSharedMemoryStorageBufferCountLog2(); - char shared_memory_struct_name[] = "XeSharedMemory0"; - char shared_memory_buffer_name[] = "xe_shared_memory_0"; - for (uint32_t i = 0; i < shared_memory_binding_count; ++i) { - id_vector_temp_.clear(); - id_vector_temp_.push_back(builder_->makeRuntimeArray(type_uint_)); - // Storage buffers have std430 packing, no padding to 4-component vectors. - builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride, - sizeof(uint32_t)); - shared_memory_struct_name[xe::countof(shared_memory_struct_name) - 2] = - '0' + i; - spv::Id type_shared_memory = - builder_->makeStructType(id_vector_temp_, shared_memory_struct_name); - builder_->addMemberName(type_shared_memory, 0, "shared_memory"); - // TODO(Triang3l): Make writable when memexport is implemented. - builder_->addMemberDecoration(type_shared_memory, 0, - spv::DecorationNonWritable); - builder_->addMemberDecoration(type_shared_memory, 0, spv::DecorationOffset, - 0); - builder_->addDecoration(type_shared_memory, - features_.spirv_version >= spv::Spv_1_3 - ? spv::DecorationBlock - : spv::DecorationBufferBlock); - shared_memory_buffer_name[xe::countof(shared_memory_buffer_name) - 2] = - '0' + i; - spv::Id buffer_shared_memory = builder_->createVariable( - spv::NoPrecision, - features_.spirv_version >= spv::Spv_1_3 ? spv::StorageClassStorageBuffer - : spv::StorageClassUniform, - type_shared_memory, shared_memory_buffer_name); - buffers_shared_memory_[i] = buffer_shared_memory; - builder_->addDecoration(buffer_shared_memory, spv::DecorationDescriptorSet, - int(kDescriptorSetSharedMemoryAndEdram)); - builder_->addDecoration(buffer_shared_memory, spv::DecorationBinding, - int(i)); - if (features_.spirv_version >= spv::Spv_1_4) { - main_interface_.push_back(buffer_shared_memory); - } + if (shared_memory_binding_count > 1) { + type_shared_memory = builder_->makeArrayType( + type_shared_memory, + builder_->makeUintConstant(shared_memory_binding_count), 0); + } + buffers_shared_memory_ = builder_->createVariable( + spv::NoPrecision, + features_.spirv_version >= spv::Spv_1_3 ? spv::StorageClassStorageBuffer + : spv::StorageClassUniform, + type_shared_memory, "xe_shared_memory"); + builder_->addDecoration(buffers_shared_memory_, spv::DecorationDescriptorSet, + int(kDescriptorSetSharedMemoryAndEdram)); + builder_->addDecoration(buffers_shared_memory_, spv::DecorationBinding, 0); + if (features_.spirv_version >= spv::Spv_1_4) { + main_interface_.push_back(buffers_shared_memory_); } if (IsSpirvVertexOrTessEvalShader()) { @@ -1690,13 +1684,14 @@ spv::Id SpirvShaderTranslator::LoadUint32FromSharedMemory( id_vector_temp_.push_back(const_int_0_); id_vector_temp_.push_back(address_dwords_int); return builder_->createLoad( - builder_->createAccessChain(storage_class, buffers_shared_memory_[0], + builder_->createAccessChain(storage_class, buffers_shared_memory_, id_vector_temp_), spv::NoPrecision); } // The memory is split into multiple bindings - check which binding to load - // from. 29 is log2(512 MB), but addressing in dwords (4 B). + // from. 29 is log2(512 MB), but addressing in dwords (4 B). Not indexing the + // array with the variable itself because it needs VK_EXT_descriptor_indexing. uint32_t binding_address_bits = (29 - 2) - buffer_count_log2; spv::Id binding_index = builder_->createBinOp( spv::OpShiftRightLogical, type_uint_, @@ -1732,16 +1727,16 @@ spv::Id SpirvShaderTranslator::LoadUint32FromSharedMemory( } builder_->getBuildPoint()->addInstruction(std::move(switch_op)); } - // Set up the access chain indices. - id_vector_temp_.clear(); - id_vector_temp_.reserve(2); - // The only SSBO struct member. - id_vector_temp_.push_back(const_int_0_); - id_vector_temp_.push_back(binding_address); for (uint32_t i = 0; i < buffer_count; ++i) { builder_->setBuildPoint(switch_case_blocks[i]); + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + id_vector_temp_.push_back(builder_->makeIntConstant(int(i))); + // The only SSBO struct member. + id_vector_temp_.push_back(const_int_0_); + id_vector_temp_.push_back(binding_address); value_phi_op->addIdOperand(builder_->createLoad( - builder_->createAccessChain(storage_class, buffers_shared_memory_[i], + builder_->createAccessChain(storage_class, buffers_shared_memory_, id_vector_temp_), spv::NoPrecision)); value_phi_op->addIdOperand(switch_case_blocks[i]->getId()); diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index e98df7d2e..bb74d5ab9 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -312,7 +312,7 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id uniform_bool_loop_constants_; spv::Id uniform_fetch_constants_; - spv::Id buffers_shared_memory_[512 / 128]; + spv::Id buffers_shared_memory_; // VS as VS only - int. spv::Id input_vertex_index_; diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index aa351bf81..4a7f1e5af 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -70,47 +70,81 @@ bool VulkanCommandProcessor::SetupContext() { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; descriptor_set_layout_binding_uniform_buffer.descriptorCount = 1; descriptor_set_layout_binding_uniform_buffer.stageFlags = - shader_stages_guest_vertex; + shader_stages_guest_vertex | VK_SHADER_STAGE_FRAGMENT_BIT; descriptor_set_layout_binding_uniform_buffer.pImmutableSamplers = nullptr; descriptor_set_layout_create_info.bindingCount = 1; descriptor_set_layout_create_info.pBindings = &descriptor_set_layout_binding_uniform_buffer; if (dfn.vkCreateDescriptorSetLayout( device, &descriptor_set_layout_create_info, nullptr, - &descriptor_set_layout_uniform_buffer_guest_vertex_) != VK_SUCCESS) { + &descriptor_set_layout_ub_fetch_bool_loop_constants_) != VK_SUCCESS) { XELOGE( - "Failed to create a Vulkan descriptor set layout for an uniform buffer " - "accessible by guest vertex shaders"); + "Failed to create a Vulkan descriptor set layout for the fetch, bool " + "and loop constants uniform buffer"); + return false; + } + descriptor_set_layout_binding_uniform_buffer.stageFlags = + shader_stages_guest_vertex; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layout_ub_float_constants_vertex_) != VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for the vertex shader " + "float constants uniform buffer"); return false; } descriptor_set_layout_binding_uniform_buffer.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; if (dfn.vkCreateDescriptorSetLayout( device, &descriptor_set_layout_create_info, nullptr, - &descriptor_set_layout_uniform_buffer_guest_pixel_) != VK_SUCCESS) { + &descriptor_set_layout_ub_float_constants_pixel_) != VK_SUCCESS) { XELOGE( - "Failed to create a Vulkan descriptor set layout for an uniform buffer " - "accessible by guest pixel shaders"); - return false; - } - descriptor_set_layout_binding_uniform_buffer.stageFlags = - VK_SHADER_STAGE_FRAGMENT_BIT; - if (dfn.vkCreateDescriptorSetLayout( - device, &descriptor_set_layout_create_info, nullptr, - &descriptor_set_layout_uniform_buffer_guest_pixel_) != VK_SUCCESS) { - XELOGE( - "Failed to create a Vulkan descriptor set layout for an uniform buffer " - "accessible by guest pixel shaders"); + "Failed to create a Vulkan descriptor set layout for the pixel shader " + "float constants uniform buffer"); return false; } descriptor_set_layout_binding_uniform_buffer.stageFlags = shader_stages_guest_vertex | VK_SHADER_STAGE_FRAGMENT_BIT; + if (provider.device_features().tessellationShader) { + descriptor_set_layout_binding_uniform_buffer.stageFlags |= + VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; + } if (dfn.vkCreateDescriptorSetLayout( device, &descriptor_set_layout_create_info, nullptr, - &descriptor_set_layout_uniform_buffer_guest_) != VK_SUCCESS) { + &descriptor_set_layout_ub_system_constants_) != VK_SUCCESS) { XELOGE( - "Failed to create a Vulkan descriptor set layout for an uniform buffer " - "accessible by guest shaders"); + "Failed to create a Vulkan descriptor set layout for the system " + "constants uniform buffer"); + return false; + } + uint32_t shared_memory_binding_count_log2 = + SpirvShaderTranslator::GetSharedMemoryStorageBufferCountLog2( + provider.device_properties().limits.maxStorageBufferRange); + uint32_t shared_memory_binding_count = uint32_t(1) + << shared_memory_binding_count_log2; + VkDescriptorSetLayoutBinding + descriptor_set_layout_binding_shared_memory_and_edram[1]; + descriptor_set_layout_binding_shared_memory_and_edram[0].binding = 0; + descriptor_set_layout_binding_shared_memory_and_edram[0].descriptorType = + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + descriptor_set_layout_binding_shared_memory_and_edram[0].descriptorCount = + shared_memory_binding_count; + // TODO(Triang3l): When fullDrawIndexUint32 fallback is added, force host + // vertex shader access to the shared memory for the tessellation vertex + // shader (to retrieve tessellation factors). + descriptor_set_layout_binding_shared_memory_and_edram[0].stageFlags = + shader_stages_guest_vertex | VK_SHADER_STAGE_FRAGMENT_BIT; + descriptor_set_layout_binding_shared_memory_and_edram[0].pImmutableSamplers = + nullptr; + // TODO(Triang3l): EDRAM binding for the fragment shader interlocks case. + descriptor_set_layout_create_info.pBindings = + descriptor_set_layout_binding_shared_memory_and_edram; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layout_shared_memory_and_edram_) != VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for the shared memory " + "and the EDRAM"); return false; } @@ -148,13 +182,19 @@ void VulkanCommandProcessor::ShutdownContext() { ui::vulkan::util::DestroyAndNullHandle( dfn.vkDestroyDescriptorSetLayout, device, - descriptor_set_layout_uniform_buffer_guest_); + descriptor_set_layout_shared_memory_and_edram_); ui::vulkan::util::DestroyAndNullHandle( dfn.vkDestroyDescriptorSetLayout, device, - descriptor_set_layout_uniform_buffer_guest_pixel_); + descriptor_set_layout_ub_system_constants_); ui::vulkan::util::DestroyAndNullHandle( dfn.vkDestroyDescriptorSetLayout, device, - descriptor_set_layout_uniform_buffer_guest_vertex_); + descriptor_set_layout_ub_float_constants_pixel_); + ui::vulkan::util::DestroyAndNullHandle( + dfn.vkDestroyDescriptorSetLayout, device, + descriptor_set_layout_ub_float_constants_vertex_); + ui::vulkan::util::DestroyAndNullHandle( + dfn.vkDestroyDescriptorSetLayout, device, + descriptor_set_layout_ub_fetch_bool_loop_constants_); ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout, device, descriptor_set_layout_empty_); @@ -794,6 +834,8 @@ VkShaderStageFlags VulkanCommandProcessor::GetGuestVertexShaderStageFlags() if (provider.device_features().tessellationShader) { stages |= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; } + // TODO(Triang3l): Vertex to compute translation for rectangle and possibly + // point emulation. return stages; } diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index b00cab90a..607016731 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -162,13 +162,17 @@ class VulkanCommandProcessor : public CommandProcessor { std::vector sparse_buffer_bind_infos_temp_; VkPipelineStageFlags sparse_bind_wait_stage_mask_ = 0; - // Common descriptor set layouts, usable by anything that may need them. + // Descriptor set layouts used by different shaders. VkDescriptorSetLayout descriptor_set_layout_empty_ = VK_NULL_HANDLE; - VkDescriptorSetLayout descriptor_set_layout_uniform_buffer_guest_vertex_ = + VkDescriptorSetLayout descriptor_set_layout_ub_fetch_bool_loop_constants_ = VK_NULL_HANDLE; - VkDescriptorSetLayout descriptor_set_layout_uniform_buffer_guest_pixel_ = + VkDescriptorSetLayout descriptor_set_layout_ub_float_constants_vertex_ = VK_NULL_HANDLE; - VkDescriptorSetLayout descriptor_set_layout_uniform_buffer_guest_ = + VkDescriptorSetLayout descriptor_set_layout_ub_float_constants_pixel_ = + VK_NULL_HANDLE; + VkDescriptorSetLayout descriptor_set_layout_ub_system_constants_ = + VK_NULL_HANDLE; + VkDescriptorSetLayout descriptor_set_layout_shared_memory_and_edram_ = VK_NULL_HANDLE; union TextureDescriptorSetLayoutKey { From afe304b328b0cfe81ecb449b07122bd81014ed84 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 7 Nov 2020 20:43:45 +0300 Subject: [PATCH 056/123] [Vulkan] Shared memory descriptor set --- src/xenia/gpu/shared_memory.h | 6 +- .../gpu/vulkan/vulkan_command_processor.cc | 92 +++++++++++++++++-- .../gpu/vulkan/vulkan_command_processor.h | 3 + 3 files changed, 88 insertions(+), 13 deletions(-) diff --git a/src/xenia/gpu/shared_memory.h b/src/xenia/gpu/shared_memory.h index 496836a38..98719b670 100644 --- a/src/xenia/gpu/shared_memory.h +++ b/src/xenia/gpu/shared_memory.h @@ -25,6 +25,9 @@ namespace gpu { // system page size granularity. class SharedMemory { public: + static constexpr uint32_t kBufferSizeLog2 = 29; + static constexpr uint32_t kBufferSize = 1 << kBufferSizeLog2; + virtual ~SharedMemory(); // Call in the implementation-specific ClearCache. virtual void ClearCache(); @@ -98,9 +101,6 @@ class SharedMemory { // destructor. void ShutdownCommon(); - static constexpr uint32_t kBufferSizeLog2 = 29; - static constexpr uint32_t kBufferSize = 1 << kBufferSizeLog2; - // Sparse allocations are 4 MB, so not too many of them are allocated, but // also not to waste too much memory for padding (with 16 MB there's too // much). diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 4a7f1e5af..a042facaf 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -123,22 +123,23 @@ bool VulkanCommandProcessor::SetupContext() { uint32_t shared_memory_binding_count = uint32_t(1) << shared_memory_binding_count_log2; VkDescriptorSetLayoutBinding - descriptor_set_layout_binding_shared_memory_and_edram[1]; - descriptor_set_layout_binding_shared_memory_and_edram[0].binding = 0; - descriptor_set_layout_binding_shared_memory_and_edram[0].descriptorType = + descriptor_set_layout_bindings_shared_memory_and_edram[1]; + descriptor_set_layout_bindings_shared_memory_and_edram[0].binding = 0; + descriptor_set_layout_bindings_shared_memory_and_edram[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - descriptor_set_layout_binding_shared_memory_and_edram[0].descriptorCount = + descriptor_set_layout_bindings_shared_memory_and_edram[0].descriptorCount = shared_memory_binding_count; // TODO(Triang3l): When fullDrawIndexUint32 fallback is added, force host // vertex shader access to the shared memory for the tessellation vertex // shader (to retrieve tessellation factors). - descriptor_set_layout_binding_shared_memory_and_edram[0].stageFlags = + descriptor_set_layout_bindings_shared_memory_and_edram[0].stageFlags = shader_stages_guest_vertex | VK_SHADER_STAGE_FRAGMENT_BIT; - descriptor_set_layout_binding_shared_memory_and_edram[0].pImmutableSamplers = + descriptor_set_layout_bindings_shared_memory_and_edram[0].pImmutableSamplers = nullptr; - // TODO(Triang3l): EDRAM binding for the fragment shader interlocks case. + // TODO(Triang3l): EDRAM storage image binding for the fragment shader + // interlocks case. descriptor_set_layout_create_info.pBindings = - descriptor_set_layout_binding_shared_memory_and_edram; + descriptor_set_layout_bindings_shared_memory_and_edram; if (dfn.vkCreateDescriptorSetLayout( device, &descriptor_set_layout_create_info, nullptr, &descriptor_set_layout_shared_memory_and_edram_) != VK_SUCCESS) { @@ -155,19 +156,90 @@ bool VulkanCommandProcessor::SetupContext() { return false; } + // Shared memory and EDRAM common bindings. + VkDescriptorPoolSize descriptor_pool_sizes[1]; + descriptor_pool_sizes[0].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + descriptor_pool_sizes[0].descriptorCount = shared_memory_binding_count; + // TODO(Triang3l): EDRAM storage image binding for the fragment shader + // interlocks case. + VkDescriptorPoolCreateInfo descriptor_pool_create_info; + descriptor_pool_create_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + descriptor_pool_create_info.pNext = nullptr; + descriptor_pool_create_info.flags = 0; + descriptor_pool_create_info.maxSets = 1; + descriptor_pool_create_info.poolSizeCount = 1; + descriptor_pool_create_info.pPoolSizes = descriptor_pool_sizes; + if (dfn.vkCreateDescriptorPool(device, &descriptor_pool_create_info, nullptr, + &shared_memory_and_edram_descriptor_pool_) != + VK_SUCCESS) { + XELOGE( + "Failed to create the Vulkan descriptor pool for shared memory and " + "EDRAM"); + return false; + } + VkDescriptorSetAllocateInfo descriptor_set_allocate_info; + descriptor_set_allocate_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + descriptor_set_allocate_info.pNext = nullptr; + descriptor_set_allocate_info.descriptorPool = + shared_memory_and_edram_descriptor_pool_; + descriptor_set_allocate_info.descriptorSetCount = 1; + descriptor_set_allocate_info.pSetLayouts = + &descriptor_set_layout_shared_memory_and_edram_; + if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info, + &shared_memory_and_edram_descriptor_set_) != + VK_SUCCESS) { + XELOGE( + "Failed to allocate the Vulkan descriptor set for shared memory and " + "EDRAM"); + return false; + } + VkDescriptorBufferInfo + shared_memory_descriptor_buffers_info[SharedMemory::kBufferSize / + (128 << 20)]; + uint32_t shared_memory_binding_range = + SharedMemory::kBufferSize >> shared_memory_binding_count_log2; + for (uint32_t i = 0; i < shared_memory_binding_count; ++i) { + VkDescriptorBufferInfo& shared_memory_descriptor_buffer_info = + shared_memory_descriptor_buffers_info[i]; + shared_memory_descriptor_buffer_info.buffer = shared_memory_->buffer(); + shared_memory_descriptor_buffer_info.offset = + shared_memory_binding_range * i; + shared_memory_descriptor_buffer_info.range = shared_memory_binding_range; + } + VkWriteDescriptorSet write_descriptor_sets[1]; + write_descriptor_sets[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write_descriptor_sets[0].pNext = nullptr; + write_descriptor_sets[0].dstSet = shared_memory_and_edram_descriptor_set_; + write_descriptor_sets[0].dstBinding = 0; + write_descriptor_sets[0].dstArrayElement = 0; + write_descriptor_sets[0].descriptorCount = shared_memory_binding_count; + write_descriptor_sets[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + write_descriptor_sets[0].pImageInfo = nullptr; + write_descriptor_sets[0].pBufferInfo = shared_memory_descriptor_buffers_info; + write_descriptor_sets[0].pTexelBufferView = nullptr; + // TODO(Triang3l): EDRAM storage image binding for the fragment shader + // interlocks case. + dfn.vkUpdateDescriptorSets(device, 1, write_descriptor_sets, 0, nullptr); + return true; } void VulkanCommandProcessor::ShutdownContext() { AwaitAllQueueOperationsCompletion(); - shared_memory_.reset(); - const ui::vulkan::VulkanProvider& provider = GetVulkanContext().GetVulkanProvider(); const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); + ui::vulkan::util::DestroyAndNullHandle( + dfn.vkDestroyDescriptorPool, device, + shared_memory_and_edram_descriptor_pool_); + + shared_memory_.reset(); + for (const auto& pipeline_layout_pair : pipeline_layouts_) { dfn.vkDestroyPipelineLayout( device, pipeline_layout_pair.second.pipeline_layout, nullptr); diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 607016731..983599d5d 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -200,6 +200,9 @@ class VulkanCommandProcessor : public CommandProcessor { std::unordered_map pipeline_layouts_; std::unique_ptr shared_memory_; + + VkDescriptorPool shared_memory_and_edram_descriptor_pool_ = VK_NULL_HANDLE; + VkDescriptorSet shared_memory_and_edram_descriptor_set_; }; } // namespace vulkan From 93f6a00201cdc9198d902c1779b603c559f8177d Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 7 Nov 2020 23:18:28 +0300 Subject: [PATCH 057/123] [Vulkan] Transient descriptor pool + other cleanup for future drawing --- .../gpu/vulkan/vulkan_command_processor.cc | 51 ++++-- .../gpu/vulkan/vulkan_command_processor.h | 13 +- src/xenia/ui/graphics_upload_buffer_pool.cc | 4 +- .../ui/vulkan/transient_descriptor_pool.cc | 160 ++++++++++++++++++ .../ui/vulkan/transient_descriptor_pool.h | 61 +++++++ src/xenia/ui/vulkan/vulkan_provider.cc | 1 + src/xenia/ui/vulkan/vulkan_provider.h | 1 + 7 files changed, 272 insertions(+), 19 deletions(-) create mode 100644 src/xenia/ui/vulkan/transient_descriptor_pool.cc create mode 100644 src/xenia/ui/vulkan/transient_descriptor_pool.h diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index a042facaf..7b895f48c 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -49,6 +49,12 @@ bool VulkanCommandProcessor::SetupContext() { const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); + // No specific reason for 32768, just the "too much" amount from Direct3D 12 + // PIX warnings. + transient_descriptor_pool_uniform_buffers_ = + std::make_unique( + provider, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 32768, 32768); + VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info; descriptor_set_layout_create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; @@ -77,7 +83,7 @@ bool VulkanCommandProcessor::SetupContext() { &descriptor_set_layout_binding_uniform_buffer; if (dfn.vkCreateDescriptorSetLayout( device, &descriptor_set_layout_create_info, nullptr, - &descriptor_set_layout_ub_fetch_bool_loop_constants_) != VK_SUCCESS) { + &descriptor_set_layout_fetch_bool_loop_constants_) != VK_SUCCESS) { XELOGE( "Failed to create a Vulkan descriptor set layout for the fetch, bool " "and loop constants uniform buffer"); @@ -87,7 +93,7 @@ bool VulkanCommandProcessor::SetupContext() { shader_stages_guest_vertex; if (dfn.vkCreateDescriptorSetLayout( device, &descriptor_set_layout_create_info, nullptr, - &descriptor_set_layout_ub_float_constants_vertex_) != VK_SUCCESS) { + &descriptor_set_layout_float_constants_vertex_) != VK_SUCCESS) { XELOGE( "Failed to create a Vulkan descriptor set layout for the vertex shader " "float constants uniform buffer"); @@ -97,7 +103,7 @@ bool VulkanCommandProcessor::SetupContext() { VK_SHADER_STAGE_FRAGMENT_BIT; if (dfn.vkCreateDescriptorSetLayout( device, &descriptor_set_layout_create_info, nullptr, - &descriptor_set_layout_ub_float_constants_pixel_) != VK_SUCCESS) { + &descriptor_set_layout_float_constants_pixel_) != VK_SUCCESS) { XELOGE( "Failed to create a Vulkan descriptor set layout for the pixel shader " "float constants uniform buffer"); @@ -111,7 +117,7 @@ bool VulkanCommandProcessor::SetupContext() { } if (dfn.vkCreateDescriptorSetLayout( device, &descriptor_set_layout_create_info, nullptr, - &descriptor_set_layout_ub_system_constants_) != VK_SUCCESS) { + &descriptor_set_layout_system_constants_) != VK_SUCCESS) { XELOGE( "Failed to create a Vulkan descriptor set layout for the system " "constants uniform buffer"); @@ -257,19 +263,21 @@ void VulkanCommandProcessor::ShutdownContext() { descriptor_set_layout_shared_memory_and_edram_); ui::vulkan::util::DestroyAndNullHandle( dfn.vkDestroyDescriptorSetLayout, device, - descriptor_set_layout_ub_system_constants_); + descriptor_set_layout_system_constants_); ui::vulkan::util::DestroyAndNullHandle( dfn.vkDestroyDescriptorSetLayout, device, - descriptor_set_layout_ub_float_constants_pixel_); + descriptor_set_layout_float_constants_pixel_); ui::vulkan::util::DestroyAndNullHandle( dfn.vkDestroyDescriptorSetLayout, device, - descriptor_set_layout_ub_float_constants_vertex_); + descriptor_set_layout_float_constants_vertex_); ui::vulkan::util::DestroyAndNullHandle( dfn.vkDestroyDescriptorSetLayout, device, - descriptor_set_layout_ub_fetch_bool_loop_constants_); + descriptor_set_layout_fetch_bool_loop_constants_); ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout, device, descriptor_set_layout_empty_); + transient_descriptor_pool_uniform_buffers_.reset(); + sparse_bind_wait_stage_mask_ = 0; sparse_buffer_binds_.clear(); sparse_memory_binds_.clear(); @@ -454,15 +462,26 @@ bool VulkanCommandProcessor::GetPipelineLayout( VkDescriptorSetLayout descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetCount]; - // Fill any unused set layouts with empty layouts. - // TODO(Triang3l): Remove this. - for (size_t i = 0; i < xe::countof(descriptor_set_layouts); ++i) { - descriptor_set_layouts[i] = descriptor_set_layout_empty_; - } + descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetFetchConstants] = + descriptor_set_layout_fetch_bool_loop_constants_; + descriptor_set_layouts + [SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex] = + descriptor_set_layout_float_constants_vertex_; + descriptor_set_layouts + [SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel] = + descriptor_set_layout_float_constants_pixel_; descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesPixel] = descriptor_set_layout_textures_pixel; descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesVertex] = descriptor_set_layout_textures_vertex; + descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetSystemConstants] = + descriptor_set_layout_system_constants_; + descriptor_set_layouts + [SpirvShaderTranslator::kDescriptorSetBoolLoopConstants] = + descriptor_set_layout_fetch_bool_loop_constants_; + descriptor_set_layouts + [SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram] = + descriptor_set_layout_shared_memory_and_edram_; VkPipelineLayoutCreateInfo pipeline_layout_create_info; pipeline_layout_create_info.sType = @@ -640,6 +659,9 @@ void VulkanCommandProcessor::CheckSubmissionFence(uint64_t await_submission) { command_buffers_submitted_.pop_front(); } + // Reclaim descriptor pools. + transient_descriptor_pool_uniform_buffers_->Reclaim(submission_completed_); + shared_memory_->CompletedSubmissionUpdated(); } @@ -888,6 +910,9 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { if (cache_clear_requested_ && AwaitAllQueueOperationsCompletion()) { cache_clear_requested_ = false; + transient_descriptor_pool_uniform_buffers_->ClearCache(); + + assert_true(command_buffers_submitted_.empty()); for (const CommandBuffer& command_buffer : command_buffers_writable_) { dfn.vkDestroyCommandPool(device, command_buffer.pool, nullptr); } diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 983599d5d..90df3f39b 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -22,6 +23,7 @@ #include "xenia/gpu/vulkan/vulkan_shared_memory.h" #include "xenia/gpu/xenos.h" #include "xenia/kernel/kernel_state.h" +#include "xenia/ui/vulkan/transient_descriptor_pool.h" #include "xenia/ui/vulkan/vulkan_context.h" namespace xe { @@ -162,15 +164,18 @@ class VulkanCommandProcessor : public CommandProcessor { std::vector sparse_buffer_bind_infos_temp_; VkPipelineStageFlags sparse_bind_wait_stage_mask_ = 0; + std::unique_ptr + transient_descriptor_pool_uniform_buffers_; + // Descriptor set layouts used by different shaders. VkDescriptorSetLayout descriptor_set_layout_empty_ = VK_NULL_HANDLE; - VkDescriptorSetLayout descriptor_set_layout_ub_fetch_bool_loop_constants_ = + VkDescriptorSetLayout descriptor_set_layout_fetch_bool_loop_constants_ = VK_NULL_HANDLE; - VkDescriptorSetLayout descriptor_set_layout_ub_float_constants_vertex_ = + VkDescriptorSetLayout descriptor_set_layout_float_constants_vertex_ = VK_NULL_HANDLE; - VkDescriptorSetLayout descriptor_set_layout_ub_float_constants_pixel_ = + VkDescriptorSetLayout descriptor_set_layout_float_constants_pixel_ = VK_NULL_HANDLE; - VkDescriptorSetLayout descriptor_set_layout_ub_system_constants_ = + VkDescriptorSetLayout descriptor_set_layout_system_constants_ = VK_NULL_HANDLE; VkDescriptorSetLayout descriptor_set_layout_shared_memory_and_edram_ = VK_NULL_HANDLE; diff --git a/src/xenia/ui/graphics_upload_buffer_pool.cc b/src/xenia/ui/graphics_upload_buffer_pool.cc index 2a780b0c9..5eb04fba3 100644 --- a/src/xenia/ui/graphics_upload_buffer_pool.cc +++ b/src/xenia/ui/graphics_upload_buffer_pool.cc @@ -71,7 +71,7 @@ void GraphicsUploadBufferPool::FlushWrites() { GraphicsUploadBufferPool::Page* GraphicsUploadBufferPool::Request( uint64_t submission_index, size_t size, size_t alignment, size_t& offset_out) { - assert_not_zero(alignment); + alignment = std::max(alignment, size_t(1)); assert_true(xe::is_pow2(alignment)); size = xe::align(size, alignment); assert_true(size <= page_size_); @@ -126,7 +126,7 @@ GraphicsUploadBufferPool::Page* GraphicsUploadBufferPool::Request( GraphicsUploadBufferPool::Page* GraphicsUploadBufferPool::RequestPartial( uint64_t submission_index, size_t size, size_t alignment, size_t& offset_out, size_t& size_out) { - assert_not_zero(alignment); + alignment = std::max(alignment, size_t(1)); assert_true(xe::is_pow2(alignment)); size = xe::align(size, alignment); size = std::min(size, page_size_); diff --git a/src/xenia/ui/vulkan/transient_descriptor_pool.cc b/src/xenia/ui/vulkan/transient_descriptor_pool.cc new file mode 100644 index 000000000..af2c0f424 --- /dev/null +++ b/src/xenia/ui/vulkan/transient_descriptor_pool.cc @@ -0,0 +1,160 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/ui/vulkan/transient_descriptor_pool.h" + +#include + +#include "xenia/base/assert.h" +#include "xenia/base/logging.h" + +namespace xe { +namespace ui { +namespace vulkan { + +TransientDescriptorPool::TransientDescriptorPool( + const VulkanProvider& provider, VkDescriptorType descriptor_type, + uint32_t page_descriptor_set_count, uint32_t page_descriptor_count) + : provider_(provider), + descriptor_type_(descriptor_type), + page_descriptor_set_count_(page_descriptor_set_count), + page_descriptor_count_(page_descriptor_count) { + assert_not_zero(page_descriptor_set_count); + assert_true(page_descriptor_set_count <= page_descriptor_count); +} + +TransientDescriptorPool::~TransientDescriptorPool() { ClearCache(); } + +void TransientDescriptorPool::Reclaim(uint64_t completed_submission_index) { + const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); + VkDevice device = provider_.device(); + while (!pages_submitted_.empty()) { + const auto& descriptor_pool_pair = pages_submitted_.front(); + if (descriptor_pool_pair.second > completed_submission_index) { + break; + } + dfn.vkResetDescriptorPool(device, descriptor_pool_pair.first, 0); + pages_writable_.push_back(descriptor_pool_pair.first); + pages_submitted_.pop_front(); + } +} + +void TransientDescriptorPool::ClearCache() { + const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); + VkDevice device = provider_.device(); + for (const auto& descriptor_pool_pair : pages_submitted_) { + dfn.vkDestroyDescriptorPool(device, descriptor_pool_pair.first, nullptr); + } + pages_submitted_.clear(); + page_current_descriptors_used_ = 0; + page_current_descriptor_sets_used_ = 0; + page_current_last_submission_ = 0; + for (VkDescriptorPool descriptor_pool : pages_writable_) { + dfn.vkDestroyDescriptorPool(device, descriptor_pool, nullptr); + } + pages_writable_.clear(); +} + +VkDescriptorSet TransientDescriptorPool::Request( + uint64_t submission_index, VkDescriptorSetLayout layout, + uint32_t layout_descriptor_count) { + assert_true(submission_index >= page_current_last_submission_); + assert_not_zero(layout_descriptor_count); + assert_true(layout_descriptor_count <= page_descriptor_count_); + + const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); + VkDevice device = provider_.device(); + + VkDescriptorSetAllocateInfo descriptor_set_allocate_info; + descriptor_set_allocate_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + descriptor_set_allocate_info.pNext = nullptr; + descriptor_set_allocate_info.descriptorSetCount = 1; + descriptor_set_allocate_info.pSetLayouts = &layout; + VkDescriptorSet descriptor_set; + + // Try to allocate as normal. + if (!pages_writable_.empty()) { + if (page_current_descriptor_sets_used_ < page_descriptor_set_count_ && + page_current_descriptors_used_ + layout_descriptor_count <= + page_descriptor_count_) { + descriptor_set_allocate_info.descriptorPool = pages_writable_.front(); + switch (dfn.vkAllocateDescriptorSets( + device, &descriptor_set_allocate_info, &descriptor_set)) { + case VK_SUCCESS: + page_current_last_submission_ = submission_index; + ++page_current_descriptor_sets_used_; + page_current_descriptors_used_ += layout_descriptor_count; + return descriptor_set; + case VK_ERROR_FRAGMENTED_POOL: + case VK_ERROR_OUT_OF_POOL_MEMORY: + // Need to create a new pool. + break; + default: + XELOGE( + "Failed to allocate a transient Vulkan descriptor set with {} " + "descriptors of type {}", + layout_descriptor_count, uint32_t(descriptor_type_)); + return VK_NULL_HANDLE; + } + } + + // Overflow - go to the next pool. + pages_submitted_.emplace_back(pages_writable_.front(), + page_current_last_submission_); + pages_writable_.front() = pages_writable_.back(); + pages_writable_.pop_back(); + page_current_descriptor_sets_used_ = 0; + page_current_descriptors_used_ = 0; + } + + if (pages_writable_.empty()) { + VkDescriptorPoolSize descriptor_pool_size; + descriptor_pool_size.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + descriptor_pool_size.descriptorCount = page_descriptor_count_; + VkDescriptorPoolCreateInfo descriptor_pool_create_info; + descriptor_pool_create_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + descriptor_pool_create_info.pNext = nullptr; + descriptor_pool_create_info.flags = 0; + descriptor_pool_create_info.maxSets = page_descriptor_set_count_; + descriptor_pool_create_info.poolSizeCount = 1; + descriptor_pool_create_info.pPoolSizes = &descriptor_pool_size; + VkDescriptorPool descriptor_pool; + if (dfn.vkCreateDescriptorPool(device, &descriptor_pool_create_info, + nullptr, &descriptor_pool) != VK_SUCCESS) { + XELOGE( + "Failed to create a transient Vulkan descriptor pool for {} sets of " + "up to {} descriptors of type {}", + page_descriptor_set_count_, page_descriptor_count_, + uint32_t(descriptor_type_)); + return VK_NULL_HANDLE; + } + pages_writable_.push_back(descriptor_pool); + } + + // Try to allocate after handling overflow. + descriptor_set_allocate_info.descriptorPool = pages_writable_.front(); + if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info, + &descriptor_set) != VK_SUCCESS) { + XELOGE( + "Failed to allocate a transient Vulkan descriptor set with {} " + "descriptors of type {}", + layout_descriptor_count, uint32_t(descriptor_type_)); + return VK_NULL_HANDLE; + } + page_current_last_submission_ = submission_index; + ++page_current_descriptor_sets_used_; + page_current_descriptors_used_ += layout_descriptor_count; + return descriptor_set; +} + +} // namespace vulkan +} // namespace ui +} // namespace xe diff --git a/src/xenia/ui/vulkan/transient_descriptor_pool.h b/src/xenia/ui/vulkan/transient_descriptor_pool.h new file mode 100644 index 000000000..07760aff0 --- /dev/null +++ b/src/xenia/ui/vulkan/transient_descriptor_pool.h @@ -0,0 +1,61 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_UI_VULKAN_TRANSIENT_DESCRIPTOR_POOL_H_ +#define XENIA_UI_VULKAN_TRANSIENT_DESCRIPTOR_POOL_H_ + +#include +#include +#include +#include + +#include "xenia/ui/vulkan/vulkan_provider.h" + +namespace xe { +namespace ui { +namespace vulkan { + +// A pool of descriptor pools for single-submission use. For simplicity of +// tracking when overflow happens, only allocating descriptors for sets +// containing descriptors of a single type. +class TransientDescriptorPool { + public: + TransientDescriptorPool(const VulkanProvider& provider, + VkDescriptorType descriptor_type, + uint32_t page_descriptor_set_count, + uint32_t page_descriptor_count); + ~TransientDescriptorPool(); + + void Reclaim(uint64_t completed_submission_index); + void ClearCache(); + + // Returns the allocated set, or VK_NULL_HANDLE if failed to allocate. + VkDescriptorSet Request(uint64_t submission_index, + VkDescriptorSetLayout layout, + uint32_t layout_descriptor_count); + + private: + const VulkanProvider& provider_; + + VkDescriptorType descriptor_type_; + uint32_t page_descriptor_set_count_; + uint32_t page_descriptor_count_; + + std::vector pages_writable_; + uint64_t page_current_last_submission_ = 0; + uint32_t page_current_descriptor_sets_used_ = 0; + uint32_t page_current_descriptors_used_ = 0; + std::deque> pages_submitted_; +}; + +} // namespace vulkan +} // namespace ui +} // namespace xe + +#endif // XENIA_UI_VULKAN_TRANSIENT_DESCRIPTOR_POOL_H_ diff --git a/src/xenia/ui/vulkan/vulkan_provider.cc b/src/xenia/ui/vulkan/vulkan_provider.cc index 6f0e1e707..ba43f43d0 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.cc +++ b/src/xenia/ui/vulkan/vulkan_provider.cc @@ -615,6 +615,7 @@ bool VulkanProvider::Initialize() { XE_VULKAN_LOAD_DFN(vkGetSwapchainImagesKHR); XE_VULKAN_LOAD_DFN(vkMapMemory); XE_VULKAN_LOAD_DFN(vkResetCommandPool); + XE_VULKAN_LOAD_DFN(vkResetDescriptorPool); XE_VULKAN_LOAD_DFN(vkResetFences); XE_VULKAN_LOAD_DFN(vkQueueBindSparse); XE_VULKAN_LOAD_DFN(vkQueuePresentKHR); diff --git a/src/xenia/ui/vulkan/vulkan_provider.h b/src/xenia/ui/vulkan/vulkan_provider.h index 1345dea61..31753472c 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.h +++ b/src/xenia/ui/vulkan/vulkan_provider.h @@ -193,6 +193,7 @@ class VulkanProvider : public GraphicsProvider { PFN_vkGetSwapchainImagesKHR vkGetSwapchainImagesKHR; PFN_vkMapMemory vkMapMemory; PFN_vkResetCommandPool vkResetCommandPool; + PFN_vkResetDescriptorPool vkResetDescriptorPool; PFN_vkResetFences vkResetFences; PFN_vkQueueBindSparse vkQueueBindSparse; PFN_vkQueuePresentKHR vkQueuePresentKHR; From 65c8d2b28e0c3182efc7f1ccb5b5fcebf59a97eb Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 14 Nov 2020 14:16:04 +0300 Subject: [PATCH 058/123] [Vulkan] Basic draw call architecture + [D3D12] Some cleanup --- .../gpu/d3d12/d3d12_command_processor.cc | 47 +- src/xenia/gpu/d3d12/pipeline_cache.cc | 25 +- src/xenia/gpu/d3d12/pipeline_cache.h | 5 +- src/xenia/gpu/spirv_shader_translator.cc | 58 +- src/xenia/gpu/spirv_shader_translator.h | 56 +- .../gpu/vulkan/deferred_command_buffer.cc | 125 ++- .../gpu/vulkan/deferred_command_buffer.h | 193 ++++- .../gpu/vulkan/vulkan_command_processor.cc | 795 +++++++++++++++++- .../gpu/vulkan/vulkan_command_processor.h | 157 +++- src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc | 443 ++++++++++ src/xenia/gpu/vulkan/vulkan_pipeline_cache.h | 183 ++++ .../gpu/vulkan/vulkan_render_target_cache.cc | 136 +++ .../gpu/vulkan/vulkan_render_target_cache.h | 95 +++ src/xenia/gpu/vulkan/vulkan_shader.cc | 48 ++ src/xenia/gpu/vulkan/vulkan_shader.h | 39 + src/xenia/gpu/vulkan/vulkan_shared_memory.cc | 5 +- .../ui/vulkan/transient_descriptor_pool.cc | 2 + 17 files changed, 2235 insertions(+), 177 deletions(-) create mode 100644 src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc create mode 100644 src/xenia/gpu/vulkan/vulkan_pipeline_cache.h create mode 100644 src/xenia/gpu/vulkan/vulkan_render_target_cache.cc create mode 100644 src/xenia/gpu/vulkan/vulkan_render_target_cache.h create mode 100644 src/xenia/gpu/vulkan/vulkan_shader.cc create mode 100644 src/xenia/gpu/vulkan/vulkan_shader.h diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index f0be8c50e..30c7d2c13 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -2005,14 +2005,15 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, } // Must not call anything that can change the descriptor heap from now on! - // Ensure vertex and index buffers are resident and draw. + // Ensure vertex buffers are resident. // TODO(Triang3l): Cache residency for ranges in a way similar to how texture - // validity will be tracked. + // validity is tracked. uint64_t vertex_buffers_resident[2] = {}; - for (const auto& vertex_binding : vertex_shader->vertex_bindings()) { + for (const Shader::VertexBinding& vertex_binding : + vertex_shader->vertex_bindings()) { uint32_t vfetch_index = vertex_binding.fetch_constant; if (vertex_buffers_resident[vfetch_index >> 6] & - (1ull << (vfetch_index & 63))) { + (uint64_t(1) << (vfetch_index & 63))) { continue; } const auto& vfetch_constant = regs.Get( @@ -2045,7 +2046,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, vfetch_constant.address << 2, vfetch_constant.size << 2); return false; } - vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63); + vertex_buffers_resident[vfetch_index >> 6] |= uint64_t(1) + << (vfetch_index & 63); } // Gather memexport ranges and ensure the heaps for them are resident, and @@ -2745,12 +2747,12 @@ void D3D12CommandProcessor::ClearCommandAllocatorCache() { } void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { - auto& regs = *register_file_; - #if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES + const RegisterFile& regs = *register_file_; + // Window parameters. // http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h // See r200UpdateWindow: @@ -2846,14 +2848,14 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { scissor.right = pa_sc_window_scissor_br.br_x; scissor.bottom = pa_sc_window_scissor_br.br_y; if (!pa_sc_window_scissor_tl.window_offset_disable) { - scissor.left = - std::max(scissor.left + pa_sc_window_offset.window_x_offset, LONG(0)); - scissor.top = - std::max(scissor.top + pa_sc_window_offset.window_y_offset, LONG(0)); - scissor.right = - std::max(scissor.right + pa_sc_window_offset.window_x_offset, LONG(0)); - scissor.bottom = - std::max(scissor.bottom + pa_sc_window_offset.window_y_offset, LONG(0)); + scissor.left = std::max( + LONG(scissor.left + pa_sc_window_offset.window_x_offset), LONG(0)); + scissor.top = std::max( + LONG(scissor.top + pa_sc_window_offset.window_y_offset), LONG(0)); + scissor.right = std::max( + LONG(scissor.right + pa_sc_window_offset.window_x_offset), LONG(0)); + scissor.bottom = std::max( + LONG(scissor.bottom + pa_sc_window_offset.window_y_offset), LONG(0)); } scissor.left *= pixel_size_x; scissor.top *= pixel_size_y; @@ -2915,12 +2917,11 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( uint32_t line_loop_closing_index, xenos::Endian index_endian, uint32_t used_texture_mask, bool early_z, uint32_t color_mask, const RenderTargetCache::PipelineRenderTarget render_targets[4]) { - auto& regs = *register_file_; - #if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES + const RegisterFile& regs = *register_file_; auto pa_cl_clip_cntl = regs.Get(); auto pa_cl_vte_cntl = regs.Get(); auto pa_su_point_minmax = regs.Get(); @@ -3103,14 +3104,14 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( dirty |= system_constants_.line_loop_closing_index != line_loop_closing_index; system_constants_.line_loop_closing_index = line_loop_closing_index; - // Vertex index offset. - dirty |= system_constants_.vertex_base_index != vgt_indx_offset; - system_constants_.vertex_base_index = vgt_indx_offset; - // Index or tessellation edge factor buffer endianness. dirty |= system_constants_.vertex_index_endian != index_endian; system_constants_.vertex_index_endian = index_endian; + // Vertex index offset. + dirty |= system_constants_.vertex_base_index != vgt_indx_offset; + system_constants_.vertex_base_index = vgt_indx_offset; + // User clip planes (UCP_ENA_#), when not CLIP_DISABLE. if (!pa_cl_clip_cntl.clip_disable) { for (uint32_t i = 0; i < 6; ++i) { @@ -3574,7 +3575,7 @@ bool D3D12CommandProcessor::UpdateBindings( float_constant_map_vertex.float_bitmap[i]; // If no float constants at all, we can reuse any buffer for them, so not // invalidating. - if (float_constant_map_vertex.float_count != 0) { + if (float_constant_count_vertex) { cbuffer_binding_float_vertex_.up_to_date = false; } } @@ -3589,7 +3590,7 @@ bool D3D12CommandProcessor::UpdateBindings( float_constant_map_pixel.float_bitmap[i]) { current_float_constant_map_pixel_[i] = float_constant_map_pixel.float_bitmap[i]; - if (float_constant_map_pixel.float_count != 0) { + if (float_constant_count_pixel) { cbuffer_binding_float_pixel_.up_to_date = false; } } diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc index 94b7b4998..a600301bb 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.cc +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -223,10 +223,10 @@ void PipelineCache::ClearCache(bool shutting_down) { } texture_binding_layout_map_.clear(); texture_binding_layouts_.clear(); - for (auto it : shader_map_) { + for (auto it : shaders_) { delete it.second; } - shader_map_.clear(); + shaders_.clear(); if (reinitialize_shader_storage) { InitializeShaderStorage(shader_storage_root, shader_storage_title_id, @@ -374,8 +374,7 @@ void PipelineCache::InitializeShaderStorage( } size_t ucode_byte_count = shader_header.ucode_dword_count * sizeof(uint32_t); - if (shader_map_.find(shader_header.ucode_data_hash) != - shader_map_.end()) { + if (shaders_.find(shader_header.ucode_data_hash) != shaders_.end()) { // Already added - usually shaders aren't added without the intention of // translating them imminently, so don't do additional checks to // actually ensure that translation happens right now (they would cause @@ -402,7 +401,7 @@ void PipelineCache::InitializeShaderStorage( D3D12Shader* shader = new D3D12Shader(shader_header.type, ucode_data_hash, ucode_dwords.data(), shader_header.ucode_dword_count); - shader_map_.insert({ucode_data_hash, shader}); + shaders_.insert({ucode_data_hash, shader}); // Create new threads if the currently existing threads can't keep up with // file reading, but not more than the number of logical processors minus // one. @@ -439,7 +438,7 @@ void PipelineCache::InitializeShaderStorage( } shader_translation_threads.clear(); for (D3D12Shader* shader : shaders_failed_to_translate) { - shader_map_.erase(shader->ucode_data_hash()); + shaders_.erase(shader->ucode_data_hash()); delete shader; } } @@ -576,8 +575,8 @@ void PipelineCache::InitializeShaderStorage( PipelineRuntimeDescription pipeline_runtime_description; auto vertex_shader_it = - shader_map_.find(pipeline_description.vertex_shader_hash); - if (vertex_shader_it == shader_map_.end()) { + shaders_.find(pipeline_description.vertex_shader_hash); + if (vertex_shader_it == shaders_.end()) { continue; } pipeline_runtime_description.vertex_shader = vertex_shader_it->second; @@ -586,8 +585,8 @@ void PipelineCache::InitializeShaderStorage( } if (pipeline_description.pixel_shader_hash) { auto pixel_shader_it = - shader_map_.find(pipeline_description.pixel_shader_hash); - if (pixel_shader_it == shader_map_.end()) { + shaders_.find(pipeline_description.pixel_shader_hash); + if (pixel_shader_it == shaders_.end()) { continue; } pipeline_runtime_description.pixel_shader = pixel_shader_it->second; @@ -779,8 +778,8 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type, uint32_t dword_count) { // Hash the input memory and lookup the shader. uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0); - auto it = shader_map_.find(data_hash); - if (it != shader_map_.end()) { + auto it = shaders_.find(data_hash); + if (it != shaders_.end()) { // Shader has been previously loaded. return it->second; } @@ -790,7 +789,7 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type, // again. D3D12Shader* shader = new D3D12Shader(shader_type, data_hash, host_address, dword_count); - shader_map_.insert({data_hash, shader}); + shaders_.insert({data_hash, shader}); return shader; } diff --git a/src/xenia/gpu/d3d12/pipeline_cache.h b/src/xenia/gpu/d3d12/pipeline_cache.h index cdc6ed5f3..7798d3810 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.h +++ b/src/xenia/gpu/d3d12/pipeline_cache.h @@ -29,6 +29,7 @@ #include "xenia/gpu/dxbc_shader_translator.h" #include "xenia/gpu/register_file.h" #include "xenia/gpu/xenos.h" +#include "xenia/ui/d3d12/d3d12_api.h" namespace xe { namespace gpu { @@ -255,9 +256,9 @@ class PipelineCache { IDxcUtils* dxc_utils_ = nullptr; IDxcCompiler* dxc_compiler_ = nullptr; - // All loaded shaders mapped by their guest hash key. + // Ucode hash -> shader. std::unordered_map> - shader_map_; + shaders_; struct LayoutUID { size_t uid; diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index dd7cac5ba..f13418355 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -288,7 +288,7 @@ void SpirvShaderTranslator::StartTranslation() { id_vector_temp_.push_back(builder_->makeRuntimeArray(type_uint_)); // Storage buffers have std430 packing, no padding to 4-component vectors. builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride, - sizeof(uint32_t) * 4); + sizeof(uint32_t)); spv::Id type_shared_memory = builder_->makeStructType(id_vector_temp_, "XeSharedMemory"); builder_->addMemberName(type_shared_memory, 0, "shared_memory"); @@ -511,7 +511,9 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { ? spv::ExecutionModelTessellationEvaluation : spv::ExecutionModelVertex; } - if (features_.float_controls) { + // TODO(Triang3l): Re-enable float controls when + // VkPhysicalDeviceFloatControlsPropertiesKHR are handled. + /* if (features_.float_controls) { // Flush to zero, similar to the real hardware, also for things like Shader // Model 3 multiplication emulation. builder_->addCapability(spv::CapabilityDenormFlushToZero); @@ -523,7 +525,7 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { builder_->addCapability(spv::CapabilitySignedZeroInfNanPreserve); builder_->addExecutionMode(function_main_, spv::ExecutionModeSignedZeroInfNanPreserve, 32); - } + } */ spv::Instruction* entry_point = builder_->addEntryPoint(execution_model, function_main_, "main"); for (spv::Id interface_id : main_interface_) { @@ -982,7 +984,19 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() { } } -void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() {} +void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() { + // Write 1 to point size (using a geometry shader or another kind of fallback + // to expand point sprites - point size support is not guaranteed, and the + // size would also be limited, and can't be controlled independently along two + // axes). + id_vector_temp_.clear(); + id_vector_temp_.push_back( + builder_->makeIntConstant(kOutputPerVertexMemberPointSize)); + builder_->createStore( + const_float_1_, + builder_->createAccessChain(spv::StorageClassOutput, output_per_vertex_, + id_vector_temp_)); +} void SpirvShaderTranslator::UpdateExecConditionals( ParsedExecInstruction::Type type, uint32_t bool_constant_index, @@ -1054,9 +1068,8 @@ void SpirvShaderTranslator::UpdateExecConditionals( return; } cf_exec_condition_ = condition; - spv::Function& function = builder_->getBuildPoint()->getParent(); - cf_exec_conditional_merge_ = - new spv::Block(builder_->getUniqueId(), function); + cf_exec_conditional_merge_ = new spv::Block( + builder_->getUniqueId(), builder_->getBuildPoint()->getParent()); SpirvCreateSelectionMerge(cf_exec_conditional_merge_->getId()); spv::Block& inner_block = builder_->makeNewBlock(); builder_->createConditionalBranch( @@ -1095,7 +1108,8 @@ void SpirvShaderTranslator::UpdateInstructionPredication(bool predicated, spv::Id predicate_id = builder_->createLoad(var_main_predicate_, spv::NoPrecision); spv::Block& predicated_block = builder_->makeNewBlock(); - cf_instruction_predicate_merge_ = &builder_->makeNewBlock(); + cf_instruction_predicate_merge_ = new spv::Block( + builder_->getUniqueId(), builder_->getBuildPoint()->getParent()); SpirvCreateSelectionMerge(cf_instruction_predicate_merge_->getId()); builder_->createConditionalBranch( predicate_id, @@ -1135,12 +1149,23 @@ void SpirvShaderTranslator::CloseExecConditionals() { } spv::Id SpirvShaderTranslator::GetStorageAddressingIndex( - InstructionStorageAddressingMode addressing_mode, uint32_t storage_index) { + InstructionStorageAddressingMode addressing_mode, uint32_t storage_index, + bool is_float_constant) { EnsureBuildPointAvailable(); spv::Id base_pointer = spv::NoResult; switch (addressing_mode) { - case InstructionStorageAddressingMode::kStatic: - return builder_->makeIntConstant(int(storage_index)); + case InstructionStorageAddressingMode::kStatic: { + uint32_t static_storage_index = storage_index; + if (is_float_constant) { + static_storage_index = + constant_register_map().GetPackedFloatConstantIndex(storage_index); + assert_true(static_storage_index != UINT32_MAX); + if (static_storage_index == UINT32_MAX) { + static_storage_index = 0; + } + } + return builder_->makeIntConstant(int(static_storage_index)); + } case InstructionStorageAddressingMode::kAddressAbsolute: base_pointer = var_main_address_absolute_; break; @@ -1153,6 +1178,8 @@ spv::Id SpirvShaderTranslator::GetStorageAddressingIndex( id_vector_temp_util_); break; } + assert_true(!is_float_constant || + constant_register_map().float_dynamic_addressing); assert_true(base_pointer != spv::NoResult); spv::Id index = builder_->createLoad(base_pointer, spv::NoPrecision); if (storage_index) { @@ -1165,8 +1192,9 @@ spv::Id SpirvShaderTranslator::GetStorageAddressingIndex( spv::Id SpirvShaderTranslator::LoadOperandStorage( const InstructionOperand& operand) { - spv::Id index = GetStorageAddressingIndex(operand.storage_addressing_mode, - operand.storage_index); + spv::Id index = GetStorageAddressingIndex( + operand.storage_addressing_mode, operand.storage_index, + operand.storage_source == InstructionStorageSource::kConstantFloat); EnsureBuildPointAvailable(); spv::Id vec4_pointer = spv::NoResult; switch (operand.storage_source) { @@ -1592,7 +1620,7 @@ spv::Id SpirvShaderTranslator::EndianSwap32Uint(spv::Id value, spv::Id endian) { builder_->makeUintConstant( static_cast(xenos::Endian::k8in32))); spv::Id is_8in16_or_8in32 = - builder_->createBinOp(spv::OpLogicalAnd, type_bool_, is_8in16, is_8in32); + builder_->createBinOp(spv::OpLogicalOr, type_bool_, is_8in16, is_8in32); spv::Block& block_pre_8in16 = *builder_->getBuildPoint(); assert_false(block_pre_8in16.isTerminated()); spv::Block& block_8in16 = builder_->makeNewBlock(); @@ -1633,7 +1661,7 @@ spv::Id SpirvShaderTranslator::EndianSwap32Uint(spv::Id value, spv::Id endian) { builder_->makeUintConstant( static_cast(xenos::Endian::k16in32))); spv::Id is_8in32_or_16in32 = - builder_->createBinOp(spv::OpLogicalAnd, type_bool_, is_8in32, is_16in32); + builder_->createBinOp(spv::OpLogicalOr, type_bool_, is_8in32, is_16in32); spv::Block& block_pre_16in32 = *builder_->getBuildPoint(); spv::Block& block_16in32 = builder_->makeNewBlock(); spv::Block& block_16in32_merge = builder_->makeNewBlock(); diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index bb74d5ab9..978ad8789 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -39,26 +39,49 @@ class SpirvShaderTranslator : public ShaderTranslator { // therefore SSBOs must only be used for shared memory - all other storage // resources must be images or texel buffers. enum DescriptorSet : uint32_t { - // In order of update frequency. - // Very frequently changed, especially for UI draws, and for models drawn in - // multiple parts - contains vertex and texture fetch constants. - kDescriptorSetFetchConstants, + // According to the "Pipeline Layout Compatibility" section of the Vulkan + // specification: + // "Two pipeline layouts are defined to be "compatible for set N" if they + // were created with identically defined descriptor set layouts for sets + // zero through N, and if they were created with identical push constant + // ranges." + // "Place the least frequently changing descriptor sets near the start of + // the pipeline layout, and place the descriptor sets representing the most + // frequently changing resources near the end. When pipelines are switched, + // only the descriptor set bindings that have been invalidated will need to + // be updated and the remainder of the descriptor set bindings will remain + // in place." + // This is partially the reverse of the Direct3D 12's rule of placing the + // most frequently changed descriptor sets in the beginning. Here all + // descriptor sets with an immutable layout are placed first, in reverse + // frequency of changing, and sets that may be different for different + // pipeline states last. + + // Always the same descriptor set layouts for all pipeline layouts: + + // Never changed. + kDescriptorSetSharedMemoryAndEdram, + // Pretty rarely used and rarely changed - flow control constants. + kDescriptorSetBoolLoopConstants, + // May stay the same across many draws. + kDescriptorSetSystemConstants, + // Less frequently changed (per-material). + kDescriptorSetFloatConstantsPixel, // Quite frequently changed (for one object drawn multiple times, for // instance - may contain projection matrices). kDescriptorSetFloatConstantsVertex, - // Less frequently changed (per-material). - kDescriptorSetFloatConstantsPixel, - // Per-material, combined images and samplers. - kDescriptorSetTexturesPixel, + // Very frequently changed, especially for UI draws, and for models drawn in + // multiple parts - contains vertex and texture fetch constants. + kDescriptorSetFetchConstants, + + // Mutable part of the pipeline layout: + kDescriptorSetMutableLayoutsStart, + // Rarely used at all, but may be changed at an unpredictable rate when // vertex textures are used, combined images and samplers. - kDescriptorSetTexturesVertex, - // May stay the same across many draws. - kDescriptorSetSystemConstants, - // Pretty rarely used and rarely changed - flow control constants. - kDescriptorSetBoolLoopConstants, - // Never changed. - kDescriptorSetSharedMemoryAndEdram, + kDescriptorSetTexturesVertex = kDescriptorSetMutableLayoutsStart, + // Per-material, combined images and samplers. + kDescriptorSetTexturesPixel, kDescriptorSetCount, }; @@ -162,7 +185,8 @@ class SpirvShaderTranslator : public ShaderTranslator { void CloseExecConditionals(); spv::Id GetStorageAddressingIndex( - InstructionStorageAddressingMode addressing_mode, uint32_t storage_index); + InstructionStorageAddressingMode addressing_mode, uint32_t storage_index, + bool is_float_constant = false); // Loads unswizzled operand without sign modifiers as float4. spv::Id LoadOperandStorage(const InstructionOperand& operand); spv::Id ApplyOperandModifiers(spv::Id operand_value, diff --git a/src/xenia/gpu/vulkan/deferred_command_buffer.cc b/src/xenia/gpu/vulkan/deferred_command_buffer.cc index f9c359506..641843fc9 100644 --- a/src/xenia/gpu/vulkan/deferred_command_buffer.cc +++ b/src/xenia/gpu/vulkan/deferred_command_buffer.cc @@ -46,15 +46,65 @@ void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) { stream_remaining -= kCommandHeaderSizeElements; switch (header.command) { + case Command::kVkBeginRenderPass: { + auto& args = *reinterpret_cast(stream); + size_t offset_bytes = sizeof(ArgsVkBeginRenderPass); + VkRenderPassBeginInfo render_pass_begin_info; + render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + render_pass_begin_info.pNext = nullptr; + render_pass_begin_info.renderPass = args.render_pass; + render_pass_begin_info.framebuffer = args.framebuffer; + render_pass_begin_info.renderArea = args.render_area; + render_pass_begin_info.clearValueCount = args.clear_value_count; + if (render_pass_begin_info.clearValueCount) { + offset_bytes = xe::align(offset_bytes, alignof(VkClearValue)); + render_pass_begin_info.pClearValues = + reinterpret_cast( + reinterpret_cast(stream) + offset_bytes); + offset_bytes += + sizeof(VkClearValue) * render_pass_begin_info.clearValueCount; + } else { + render_pass_begin_info.pClearValues = nullptr; + } + dfn.vkCmdBeginRenderPass(command_buffer, &render_pass_begin_info, + args.contents); + } break; + + case Command::kVkBindDescriptorSets: { + auto& args = *reinterpret_cast(stream); + size_t offset_bytes = xe::align(sizeof(ArgsVkBindDescriptorSets), + alignof(VkDescriptorSet)); + const VkDescriptorSet* descriptor_sets = + reinterpret_cast( + reinterpret_cast(stream) + offset_bytes); + offset_bytes += sizeof(VkDescriptorSet) * args.descriptor_set_count; + const uint32_t* dynamic_offsets = nullptr; + if (args.dynamic_offset_count) { + offset_bytes = xe::align(offset_bytes, alignof(uint32_t)); + dynamic_offsets = reinterpret_cast( + reinterpret_cast(stream) + offset_bytes); + offset_bytes += sizeof(uint32_t) * args.dynamic_offset_count; + } + dfn.vkCmdBindDescriptorSets(command_buffer, args.pipeline_bind_point, + args.layout, args.first_set, + args.descriptor_set_count, descriptor_sets, + args.dynamic_offset_count, dynamic_offsets); + } break; + case Command::kVkBindIndexBuffer: { auto& args = *reinterpret_cast(stream); dfn.vkCmdBindIndexBuffer(command_buffer, args.buffer, args.offset, args.index_type); } break; + case Command::kVkBindPipeline: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdBindPipeline(command_buffer, args.pipeline_bind_point, + args.pipeline); + } break; + case Command::kVkCopyBuffer: { auto& args = *reinterpret_cast(stream); - static_assert(alignof(VkBufferCopy) <= alignof(uintmax_t)); dfn.vkCmdCopyBuffer( command_buffer, args.src_buffer, args.dst_buffer, args.region_count, reinterpret_cast( @@ -62,26 +112,37 @@ void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) { xe::align(sizeof(ArgsVkCopyBuffer), alignof(VkBufferCopy)))); } break; + case Command::kVkDraw: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdDraw(command_buffer, args.vertex_count, args.instance_count, + args.first_vertex, args.first_instance); + } break; + + case Command::kVkDrawIndexed: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdDrawIndexed(command_buffer, args.index_count, + args.instance_count, args.first_index, + args.vertex_offset, args.first_instance); + } break; + + case Command::kVkEndRenderPass: + dfn.vkCmdEndRenderPass(command_buffer); + break; + case Command::kVkPipelineBarrier: { auto& args = *reinterpret_cast(stream); size_t barrier_offset_bytes = sizeof(ArgsVkPipelineBarrier); - - const VkMemoryBarrier* memory_barriers; + const VkMemoryBarrier* memory_barriers = nullptr; if (args.memory_barrier_count) { - static_assert(alignof(VkMemoryBarrier) <= alignof(uintmax_t)); barrier_offset_bytes = xe::align(barrier_offset_bytes, alignof(VkMemoryBarrier)); memory_barriers = reinterpret_cast( reinterpret_cast(stream) + barrier_offset_bytes); barrier_offset_bytes += sizeof(VkMemoryBarrier) * args.memory_barrier_count; - } else { - memory_barriers = nullptr; } - - const VkBufferMemoryBarrier* buffer_memory_barriers; + const VkBufferMemoryBarrier* buffer_memory_barriers = nullptr; if (args.buffer_memory_barrier_count) { - static_assert(alignof(VkBufferMemoryBarrier) <= alignof(uintmax_t)); barrier_offset_bytes = xe::align(barrier_offset_bytes, alignof(VkBufferMemoryBarrier)); buffer_memory_barriers = @@ -90,23 +151,16 @@ void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) { barrier_offset_bytes); barrier_offset_bytes += sizeof(VkBufferMemoryBarrier) * args.buffer_memory_barrier_count; - } else { - buffer_memory_barriers = nullptr; } - - const VkImageMemoryBarrier* image_memory_barriers; + const VkImageMemoryBarrier* image_memory_barriers = nullptr; if (args.image_memory_barrier_count) { - static_assert(alignof(VkImageMemoryBarrier) <= alignof(uintmax_t)); barrier_offset_bytes = xe::align(barrier_offset_bytes, alignof(VkImageMemoryBarrier)); image_memory_barriers = reinterpret_cast( reinterpret_cast(stream) + barrier_offset_bytes); barrier_offset_bytes += sizeof(VkImageMemoryBarrier) * args.image_memory_barrier_count; - } else { - image_memory_barriers = nullptr; } - dfn.vkCmdPipelineBarrier( command_buffer, args.src_stage_mask, args.dst_stage_mask, args.dependency_flags, args.memory_barrier_count, memory_barriers, @@ -114,6 +168,24 @@ void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) { args.image_memory_barrier_count, image_memory_barriers); } break; + case Command::kVkSetScissor: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdSetScissor( + command_buffer, args.first_scissor, args.scissor_count, + reinterpret_cast( + reinterpret_cast(stream) + + xe::align(sizeof(ArgsVkSetScissor), alignof(VkRect2D)))); + } break; + + case Command::kVkSetViewport: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdSetViewport( + command_buffer, args.first_viewport, args.viewport_count, + reinterpret_cast( + reinterpret_cast(stream) + + xe::align(sizeof(ArgsVkSetViewport), alignof(VkViewport)))); + } break; + default: assert_unhandled_case(header.command); break; @@ -133,38 +205,25 @@ void DeferredCommandBuffer::CmdVkPipelineBarrier( uint32_t image_memory_barrier_count, const VkImageMemoryBarrier* image_memory_barriers) { size_t arguments_size = sizeof(ArgsVkPipelineBarrier); - - size_t memory_barriers_offset; + size_t memory_barriers_offset = 0; if (memory_barrier_count) { - static_assert(alignof(VkMemoryBarrier) <= alignof(uintmax_t)); arguments_size = xe::align(arguments_size, alignof(VkMemoryBarrier)); memory_barriers_offset = arguments_size; arguments_size += sizeof(VkMemoryBarrier) * memory_barrier_count; - } else { - memory_barriers_offset = 0; } - - size_t buffer_memory_barriers_offset; + size_t buffer_memory_barriers_offset = 0; if (buffer_memory_barrier_count) { - static_assert(alignof(VkBufferMemoryBarrier) <= alignof(uintmax_t)); arguments_size = xe::align(arguments_size, alignof(VkBufferMemoryBarrier)); buffer_memory_barriers_offset = arguments_size; arguments_size += sizeof(VkBufferMemoryBarrier) * buffer_memory_barrier_count; - } else { - buffer_memory_barriers_offset = 0; } - - size_t image_memory_barriers_offset; + size_t image_memory_barriers_offset = 0; if (image_memory_barrier_count) { - static_assert(alignof(VkImageMemoryBarrier) <= alignof(uintmax_t)); arguments_size = xe::align(arguments_size, alignof(VkImageMemoryBarrier)); image_memory_barriers_offset = arguments_size; arguments_size += sizeof(VkImageMemoryBarrier) * image_memory_barrier_count; - } else { - image_memory_barriers_offset = 0; } - uint8_t* args_ptr = reinterpret_cast( WriteCommand(Command::kVkPipelineBarrier, arguments_size)); auto& args = *reinterpret_cast(args_ptr); diff --git a/src/xenia/gpu/vulkan/deferred_command_buffer.h b/src/xenia/gpu/vulkan/deferred_command_buffer.h index 879c92d5a..9ed39557b 100644 --- a/src/xenia/gpu/vulkan/deferred_command_buffer.h +++ b/src/xenia/gpu/vulkan/deferred_command_buffer.h @@ -14,6 +14,7 @@ #include #include +#include "xenia/base/assert.h" #include "xenia/base/math.h" #include "xenia/ui/vulkan/vulkan_provider.h" @@ -31,6 +32,65 @@ class DeferredCommandBuffer { void Reset(); void Execute(VkCommandBuffer command_buffer); + // render_pass_begin->pNext of all barriers must be null. + void CmdVkBeginRenderPass(const VkRenderPassBeginInfo* render_pass_begin, + VkSubpassContents contents) { + assert_null(render_pass_begin->pNext); + size_t arguments_size = sizeof(ArgsVkBeginRenderPass); + uint32_t clear_value_count = render_pass_begin->clearValueCount; + size_t clear_values_offset = 0; + if (clear_value_count) { + arguments_size = xe::align(arguments_size, alignof(VkClearValue)); + clear_values_offset = arguments_size; + arguments_size += sizeof(VkClearValue) * clear_value_count; + } + uint8_t* args_ptr = reinterpret_cast( + WriteCommand(Command::kVkBeginRenderPass, arguments_size)); + auto& args = *reinterpret_cast(args_ptr); + args.render_pass = render_pass_begin->renderPass; + args.framebuffer = render_pass_begin->framebuffer; + args.render_area = render_pass_begin->renderArea; + args.clear_value_count = clear_value_count; + args.contents = contents; + if (clear_value_count) { + std::memcpy(args_ptr + clear_values_offset, + render_pass_begin->pClearValues, + sizeof(VkClearValue) * clear_value_count); + } + } + + void CmdVkBindDescriptorSets(VkPipelineBindPoint pipeline_bind_point, + VkPipelineLayout layout, uint32_t first_set, + uint32_t descriptor_set_count, + const VkDescriptorSet* descriptor_sets, + uint32_t dynamic_offset_count, + const uint32_t* dynamic_offsets) { + size_t arguments_size = + xe::align(sizeof(ArgsVkBindDescriptorSets), alignof(VkDescriptorSet)); + size_t descriptor_sets_offset = arguments_size; + arguments_size += sizeof(VkDescriptorSet) * descriptor_set_count; + size_t dynamic_offsets_offset = 0; + if (dynamic_offset_count) { + arguments_size = xe::align(arguments_size, alignof(uint32_t)); + dynamic_offsets_offset = arguments_size; + arguments_size += sizeof(uint32_t) * dynamic_offset_count; + } + uint8_t* args_ptr = reinterpret_cast( + WriteCommand(Command::kVkBindDescriptorSets, arguments_size)); + auto& args = *reinterpret_cast(args_ptr); + args.pipeline_bind_point = pipeline_bind_point; + args.layout = layout; + args.first_set = first_set; + args.descriptor_set_count = descriptor_set_count; + args.dynamic_offset_count = dynamic_offset_count; + std::memcpy(args_ptr + descriptor_sets_offset, descriptor_sets, + sizeof(VkDescriptorSet) * descriptor_set_count); + if (dynamic_offset_count) { + std::memcpy(args_ptr + dynamic_offsets_offset, dynamic_offsets, + sizeof(uint32_t) * dynamic_offset_count); + } + } + void CmdVkBindIndexBuffer(VkBuffer buffer, VkDeviceSize offset, VkIndexType index_type) { auto& args = *reinterpret_cast(WriteCommand( @@ -40,9 +100,16 @@ class DeferredCommandBuffer { args.index_type = index_type; } + void CmdVkBindPipeline(VkPipelineBindPoint pipeline_bind_point, + VkPipeline pipeline) { + auto& args = *reinterpret_cast( + WriteCommand(Command::kVkBindPipeline, sizeof(ArgsVkBindPipeline))); + args.pipeline_bind_point = pipeline_bind_point; + args.pipeline = pipeline; + } + VkBufferCopy* CmdCopyBufferEmplace(VkBuffer src_buffer, VkBuffer dst_buffer, uint32_t region_count) { - static_assert(alignof(VkBufferCopy) <= alignof(uintmax_t)); const size_t header_size = xe::align(sizeof(ArgsVkCopyBuffer), alignof(VkBufferCopy)); uint8_t* args_ptr = reinterpret_cast( @@ -60,6 +127,30 @@ class DeferredCommandBuffer { regions, sizeof(VkBufferCopy) * region_count); } + void CmdVkDraw(uint32_t vertex_count, uint32_t instance_count, + uint32_t first_vertex, uint32_t first_instance) { + auto& args = *reinterpret_cast( + WriteCommand(Command::kVkDraw, sizeof(ArgsVkDraw))); + args.vertex_count = vertex_count; + args.instance_count = instance_count; + args.first_vertex = first_vertex; + args.first_instance = first_instance; + } + + void CmdVkDrawIndexed(uint32_t index_count, uint32_t instance_count, + uint32_t first_index, int32_t vertex_offset, + uint32_t first_instance) { + auto& args = *reinterpret_cast( + WriteCommand(Command::kVkDrawIndexed, sizeof(ArgsVkDrawIndexed))); + args.index_count = index_count; + args.instance_count = instance_count; + args.first_index = first_index; + args.vertex_offset = vertex_offset; + args.first_instance = first_instance; + } + + void CmdVkEndRenderPass() { WriteCommand(Command::kVkEndRenderPass, 0); } + // pNext of all barriers must be null. void CmdVkPipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, @@ -71,11 +162,47 @@ class DeferredCommandBuffer { uint32_t image_memory_barrier_count, const VkImageMemoryBarrier* image_memory_barriers); + void CmdVkSetScissor(uint32_t first_scissor, uint32_t scissor_count, + const VkRect2D* scissors) { + const size_t header_size = + xe::align(sizeof(ArgsVkSetScissor), alignof(VkRect2D)); + uint8_t* args_ptr = reinterpret_cast( + WriteCommand(Command::kVkSetScissor, + header_size + sizeof(VkRect2D) * scissor_count)); + auto& args = *reinterpret_cast(args_ptr); + args.first_scissor = first_scissor; + args.scissor_count = scissor_count; + std::memcpy(args_ptr + header_size, scissors, + sizeof(VkRect2D) * scissor_count); + } + + void CmdVkSetViewport(uint32_t first_viewport, uint32_t viewport_count, + const VkViewport* viewports) { + const size_t header_size = + xe::align(sizeof(ArgsVkSetViewport), alignof(VkViewport)); + uint8_t* args_ptr = reinterpret_cast( + WriteCommand(Command::kVkSetViewport, + header_size + sizeof(VkViewport) * viewport_count)); + auto& args = *reinterpret_cast(args_ptr); + args.first_viewport = first_viewport; + args.viewport_count = viewport_count; + std::memcpy(args_ptr + header_size, viewports, + sizeof(VkViewport) * viewport_count); + } + private: enum class Command { + kVkBeginRenderPass, + kVkBindDescriptorSets, kVkBindIndexBuffer, + kVkBindPipeline, kVkCopyBuffer, + kVkDraw, + kVkDrawIndexed, + kVkEndRenderPass, kVkPipelineBarrier, + kVkSetScissor, + kVkSetViewport, }; struct CommandHeader { @@ -85,17 +212,58 @@ class DeferredCommandBuffer { static constexpr size_t kCommandHeaderSizeElements = (sizeof(CommandHeader) + sizeof(uintmax_t) - 1) / sizeof(uintmax_t); + struct ArgsVkBeginRenderPass { + VkRenderPass render_pass; + VkFramebuffer framebuffer; + VkRect2D render_area; + uint32_t clear_value_count; + VkSubpassContents contents; + // Followed by aligned optional VkClearValue[]. + static_assert(alignof(VkClearValue) <= alignof(uintmax_t)); + }; + + struct ArgsVkBindDescriptorSets { + VkPipelineBindPoint pipeline_bind_point; + VkPipelineLayout layout; + uint32_t first_set; + uint32_t descriptor_set_count; + uint32_t dynamic_offset_count; + // Followed by aligned VkDescriptorSet[], optional uint32_t[]. + static_assert(alignof(VkDescriptorSet) <= alignof(uintmax_t)); + }; + struct ArgsVkBindIndexBuffer { VkBuffer buffer; VkDeviceSize offset; VkIndexType index_type; }; + struct ArgsVkBindPipeline { + VkPipelineBindPoint pipeline_bind_point; + VkPipeline pipeline; + }; + struct ArgsVkCopyBuffer { VkBuffer src_buffer; VkBuffer dst_buffer; uint32_t region_count; - // Followed by VkBufferCopy[]. + // Followed by aligned VkBufferCopy[]. + static_assert(alignof(VkBufferCopy) <= alignof(uintmax_t)); + }; + + struct ArgsVkDraw { + uint32_t vertex_count; + uint32_t instance_count; + uint32_t first_vertex; + uint32_t first_instance; + }; + + struct ArgsVkDrawIndexed { + uint32_t index_count; + uint32_t instance_count; + uint32_t first_index; + int32_t vertex_offset; + uint32_t first_instance; }; struct ArgsVkPipelineBarrier { @@ -105,8 +273,25 @@ class DeferredCommandBuffer { uint32_t memory_barrier_count; uint32_t buffer_memory_barrier_count; uint32_t image_memory_barrier_count; - // Followed by aligned VkMemoryBarrier[], VkBufferMemoryBarrier[], - // VkImageMemoryBarrier[]. + // Followed by aligned optional VkMemoryBarrier[], + // optional VkBufferMemoryBarrier[], optional VkImageMemoryBarrier[]. + static_assert(alignof(VkMemoryBarrier) <= alignof(uintmax_t)); + static_assert(alignof(VkBufferMemoryBarrier) <= alignof(uintmax_t)); + static_assert(alignof(VkImageMemoryBarrier) <= alignof(uintmax_t)); + }; + + struct ArgsVkSetScissor { + uint32_t first_scissor; + uint32_t scissor_count; + // Followed by aligned VkRect2D[]. + static_assert(alignof(VkRect2D) <= alignof(uintmax_t)); + }; + + struct ArgsVkSetViewport { + uint32_t first_viewport; + uint32_t viewport_count; + // Followed by aligned VkViewport[]. + static_assert(alignof(VkViewport) <= alignof(uintmax_t)); }; void* WriteCommand(Command command, size_t arguments_size_bytes); diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 7b895f48c..434ce0d02 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -9,15 +9,24 @@ #include "xenia/gpu/vulkan/vulkan_command_processor.h" +#include #include +#include #include #include "xenia/base/assert.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" #include "xenia/base/profiling.h" +#include "xenia/gpu/gpu_flags.h" +#include "xenia/gpu/registers.h" +#include "xenia/gpu/shader.h" #include "xenia/gpu/spirv_shader_translator.h" +#include "xenia/gpu/vulkan/vulkan_pipeline_cache.h" +#include "xenia/gpu/vulkan/vulkan_render_target_cache.h" +#include "xenia/gpu/vulkan/vulkan_shader.h" #include "xenia/gpu/vulkan/vulkan_shared_memory.h" +#include "xenia/gpu/xenos.h" #include "xenia/ui/vulkan/vulkan_context.h" #include "xenia/ui/vulkan/vulkan_provider.h" #include "xenia/ui/vulkan/vulkan_util.h" @@ -54,6 +63,16 @@ bool VulkanCommandProcessor::SetupContext() { transient_descriptor_pool_uniform_buffers_ = std::make_unique( provider, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 32768, 32768); + // 16384 is bigger than any single uniform buffer that Xenia needs, but is the + // minimum maxUniformBufferRange, thus the safe minimum amount. + VkDeviceSize uniform_buffer_alignment = std::max( + provider.device_properties().limits.minUniformBufferOffsetAlignment, + VkDeviceSize(1)); + uniform_buffer_pool_ = std::make_unique( + provider, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + xe::align(std::max(ui::GraphicsUploadBufferPool::kDefaultPageSize, + size_t(16384)), + size_t(uniform_buffer_alignment))); VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info; descriptor_set_layout_create_info.sType = @@ -162,6 +181,20 @@ bool VulkanCommandProcessor::SetupContext() { return false; } + render_target_cache_ = + std::make_unique(*this, *register_file_); + if (!render_target_cache_->Initialize()) { + XELOGE("Failed to initialize the render target cache"); + return false; + } + + pipeline_cache_ = std::make_unique( + *this, *register_file_, *render_target_cache_); + if (!pipeline_cache_->Initialize()) { + XELOGE("Failed to initialize the graphics pipeline cache"); + return false; + } + // Shared memory and EDRAM common bindings. VkDescriptorPoolSize descriptor_pool_sizes[1]; descriptor_pool_sizes[0].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; @@ -229,6 +262,9 @@ bool VulkanCommandProcessor::SetupContext() { // interlocks case. dfn.vkUpdateDescriptorSets(device, 1, write_descriptor_sets, 0, nullptr); + // Just not to expose uninitialized memory. + std::memset(&system_constants_, 0, sizeof(system_constants_)); + return true; } @@ -244,6 +280,10 @@ void VulkanCommandProcessor::ShutdownContext() { dfn.vkDestroyDescriptorPool, device, shared_memory_and_edram_descriptor_pool_); + pipeline_cache_.reset(); + + render_target_cache_.reset(); + shared_memory_.reset(); for (const auto& pipeline_layout_pair : pipeline_layouts_) { @@ -276,6 +316,7 @@ void VulkanCommandProcessor::ShutdownContext() { ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout, device, descriptor_set_layout_empty_); + uniform_buffer_pool_.reset(); transient_descriptor_pool_uniform_buffers_.reset(); sparse_bind_wait_stage_mask_ = 0; @@ -325,6 +366,42 @@ void VulkanCommandProcessor::ShutdownContext() { CommandProcessor::ShutdownContext(); } +void VulkanCommandProcessor::WriteRegister(uint32_t index, uint32_t value) { + CommandProcessor::WriteRegister(index, value); + + if (index >= XE_GPU_REG_SHADER_CONSTANT_000_X && + index <= XE_GPU_REG_SHADER_CONSTANT_511_W) { + if (frame_open_) { + uint32_t float_constant_index = + (index - XE_GPU_REG_SHADER_CONSTANT_000_X) >> 2; + if (float_constant_index >= 256) { + float_constant_index -= 256; + if (current_float_constant_map_pixel_[float_constant_index >> 6] & + (1ull << (float_constant_index & 63))) { + current_graphics_descriptor_set_values_up_to_date_ &= + ~(uint32_t(1) + << SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel); + } + } else { + if (current_float_constant_map_vertex_[float_constant_index >> 6] & + (1ull << (float_constant_index & 63))) { + current_graphics_descriptor_set_values_up_to_date_ &= + ~(uint32_t(1) + << SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex); + } + } + } + } else if (index >= XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 && + index <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31) { + current_graphics_descriptor_set_values_up_to_date_ &= ~( + uint32_t(1) << SpirvShaderTranslator::kDescriptorSetBoolLoopConstants); + } else if (index >= XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 && + index <= XE_GPU_REG_SHADER_CONSTANT_FETCH_31_5) { + current_graphics_descriptor_set_values_up_to_date_ &= + ~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants); + } +} + void VulkanCommandProcessor::SparseBindBuffer( VkBuffer buffer, uint32_t bind_count, const VkSparseMemoryBind* binds, VkPipelineStageFlags wait_stage_mask) { @@ -356,17 +433,25 @@ void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, EndSubmission(true); } -bool VulkanCommandProcessor::GetPipelineLayout( - uint32_t texture_count_pixel, uint32_t texture_count_vertex, - PipelineLayout& pipeline_layout_out) { +void VulkanCommandProcessor::EndRenderPass() { + assert_true(submission_open_); + if (current_render_pass_ == VK_NULL_HANDLE) { + return; + } + deferred_command_buffer_.CmdVkEndRenderPass(); + current_render_pass_ = VK_NULL_HANDLE; +} + +const VulkanPipelineCache::PipelineLayoutProvider* +VulkanCommandProcessor::GetPipelineLayout(uint32_t texture_count_pixel, + uint32_t texture_count_vertex) { PipelineLayoutKey pipeline_layout_key; pipeline_layout_key.texture_count_pixel = texture_count_pixel; pipeline_layout_key.texture_count_vertex = texture_count_vertex; { auto it = pipeline_layouts_.find(pipeline_layout_key.key); if (it != pipeline_layouts_.end()) { - pipeline_layout_out = it->second; - return true; + return &it->second; } } @@ -462,26 +547,28 @@ bool VulkanCommandProcessor::GetPipelineLayout( VkDescriptorSetLayout descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetCount]; - descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetFetchConstants] = - descriptor_set_layout_fetch_bool_loop_constants_; - descriptor_set_layouts - [SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex] = - descriptor_set_layout_float_constants_vertex_; - descriptor_set_layouts - [SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel] = - descriptor_set_layout_float_constants_pixel_; - descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesPixel] = - descriptor_set_layout_textures_pixel; - descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesVertex] = - descriptor_set_layout_textures_vertex; - descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetSystemConstants] = - descriptor_set_layout_system_constants_; - descriptor_set_layouts - [SpirvShaderTranslator::kDescriptorSetBoolLoopConstants] = - descriptor_set_layout_fetch_bool_loop_constants_; + // Immutable layouts. descriptor_set_layouts [SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram] = descriptor_set_layout_shared_memory_and_edram_; + descriptor_set_layouts + [SpirvShaderTranslator::kDescriptorSetBoolLoopConstants] = + descriptor_set_layout_fetch_bool_loop_constants_; + descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetSystemConstants] = + descriptor_set_layout_system_constants_; + descriptor_set_layouts + [SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel] = + descriptor_set_layout_float_constants_pixel_; + descriptor_set_layouts + [SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex] = + descriptor_set_layout_float_constants_vertex_; + descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetFetchConstants] = + descriptor_set_layout_fetch_bool_loop_constants_; + // Mutable layouts. + descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesVertex] = + descriptor_set_layout_textures_vertex; + descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesPixel] = + descriptor_set_layout_textures_pixel; VkPipelineLayoutCreateInfo pipeline_layout_create_info; pipeline_layout_create_info.sType = @@ -508,16 +595,18 @@ bool VulkanCommandProcessor::GetPipelineLayout( descriptor_set_layout_textures_pixel; pipeline_layout_entry.descriptor_set_layout_textures_vertex_ref = descriptor_set_layout_textures_vertex; - pipeline_layouts_.emplace(pipeline_layout_key.key, pipeline_layout_entry); - pipeline_layout_out = pipeline_layout_entry; - return true; + auto emplaced_pair = + pipeline_layouts_.emplace(pipeline_layout_key.key, pipeline_layout_entry); + // unordered_map insertion doesn't invalidate element references. + return &emplaced_pair.first->second; } Shader* VulkanCommandProcessor::LoadShader(xenos::ShaderType shader_type, uint32_t guest_address, const uint32_t* host_address, uint32_t dword_count) { - return nullptr; + return pipeline_cache_->LoadShader(shader_type, guest_address, host_address, + dword_count); } bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, @@ -530,9 +619,135 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, BeginSubmission(true); + auto vertex_shader = static_cast(active_vertex_shader()); + if (!vertex_shader) { + // Always need a vertex shader. + return false; + } + // TODO(Triang3l): Get a pixel shader. + VulkanShader* pixel_shader = nullptr; + + VulkanRenderTargetCache::FramebufferKey framebuffer_key; + if (!render_target_cache_->UpdateRenderTargets(framebuffer_key)) { + return false; + } + VkFramebuffer framebuffer = + render_target_cache_->GetFramebuffer(framebuffer_key); + if (framebuffer == VK_NULL_HANDLE) { + return false; + } + VkRenderPass render_pass = + render_target_cache_->GetRenderPass(framebuffer_key.render_pass_key); + if (render_pass == VK_NULL_HANDLE) { + return false; + } + + // Update the graphics pipeline, and if the new graphics pipeline has a + // different layout, invalidate incompatible descriptor sets before updating + // current_graphics_pipeline_layout_. + VkPipeline pipeline; + const VulkanPipelineCache::PipelineLayoutProvider* pipeline_layout_provider; + if (!pipeline_cache_->ConfigurePipeline(vertex_shader, pixel_shader, + framebuffer_key.render_pass_key, + pipeline, pipeline_layout_provider)) { + return false; + } + deferred_command_buffer_.CmdVkBindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline); + auto pipeline_layout = + static_cast(pipeline_layout_provider); + if (current_graphics_pipeline_layout_ != pipeline_layout) { + if (current_graphics_pipeline_layout_) { + // Keep descriptor set layouts for which the new pipeline layout is + // compatible with the previous one (pipeline layouts are compatible for + // set N if set layouts 0 through N are compatible). + uint32_t descriptor_sets_kept = + uint32_t(SpirvShaderTranslator::kDescriptorSetCount); + if (current_graphics_pipeline_layout_ + ->descriptor_set_layout_textures_vertex_ref != + pipeline_layout->descriptor_set_layout_textures_vertex_ref) { + descriptor_sets_kept = std::min( + descriptor_sets_kept, + uint32_t(SpirvShaderTranslator::kDescriptorSetTexturesVertex)); + } + if (current_graphics_pipeline_layout_ + ->descriptor_set_layout_textures_pixel_ref != + pipeline_layout->descriptor_set_layout_textures_pixel_ref) { + descriptor_sets_kept = std::min( + descriptor_sets_kept, + uint32_t(SpirvShaderTranslator::kDescriptorSetTexturesPixel)); + } + } else { + // No or unknown pipeline layout previously bound - all bindings are in an + // indeterminate state. + current_graphics_descriptor_sets_bound_up_to_date_ = 0; + } + current_graphics_pipeline_layout_ = pipeline_layout; + } + + // Update fixed-function dynamic state. + UpdateFixedFunctionState(); + bool indexed = index_buffer_info != nullptr && index_buffer_info->guest_base; - // Actually draw. + // Update system constants before uploading them. + UpdateSystemConstantValues(indexed ? index_buffer_info->endianness + : xenos::Endian::kNone); + + // Update uniform buffers and descriptor sets after binding the pipeline with + // the new layout. + if (!UpdateBindings(vertex_shader, pixel_shader)) { + return false; + } + + const RegisterFile& regs = *register_file_; + + // Ensure vertex buffers are resident. + // TODO(Triang3l): Cache residency for ranges in a way similar to how texture + // validity is tracked. + uint64_t vertex_buffers_resident[2] = {}; + for (const Shader::VertexBinding& vertex_binding : + vertex_shader->vertex_bindings()) { + uint32_t vfetch_index = vertex_binding.fetch_constant; + if (vertex_buffers_resident[vfetch_index >> 6] & + (uint64_t(1) << (vfetch_index & 63))) { + continue; + } + const auto& vfetch_constant = regs.Get( + XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2); + switch (vfetch_constant.type) { + case xenos::FetchConstantType::kVertex: + break; + case xenos::FetchConstantType::kInvalidVertex: + if (cvars::gpu_allow_invalid_fetch_constants) { + break; + } + XELOGW( + "Vertex fetch constant {} ({:08X} {:08X}) has \"invalid\" type! " + "This " + "is incorrect behavior, but you can try bypassing this by " + "launching Xenia with --gpu_allow_invalid_fetch_constants=true.", + vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1); + return false; + default: + XELOGW( + "Vertex fetch constant {} ({:08X} {:08X}) is completely invalid!", + vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1); + return false; + } + if (!shared_memory_->RequestRange(vfetch_constant.address << 2, + vfetch_constant.size << 2)) { + XELOGE( + "Failed to request vertex buffer at 0x{:08X} (size {}) in the shared " + "memory", + vfetch_constant.address << 2, vfetch_constant.size << 2); + return false; + } + vertex_buffers_resident[vfetch_index >> 6] |= uint64_t(1) + << (vfetch_index & 63); + } + + // Set up the geometry. if (indexed) { uint32_t index_size = index_buffer_info->format == xenos::IndexFormat::kInt32 @@ -557,6 +772,37 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, } shared_memory_->Use(VulkanSharedMemory::Usage::kRead); + // After all commands that may dispatch or copy, enter the render pass before + // drawing. + if (current_render_pass_ != render_pass || + current_framebuffer_ != framebuffer) { + if (current_render_pass_ != VK_NULL_HANDLE) { + deferred_command_buffer_.CmdVkEndRenderPass(); + } + current_render_pass_ = render_pass; + current_framebuffer_ = framebuffer; + VkRenderPassBeginInfo render_pass_begin_info; + render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + render_pass_begin_info.pNext = nullptr; + render_pass_begin_info.renderPass = render_pass; + render_pass_begin_info.framebuffer = framebuffer; + render_pass_begin_info.renderArea.offset.x = 0; + render_pass_begin_info.renderArea.offset.y = 0; + render_pass_begin_info.renderArea.extent.width = 1280; + render_pass_begin_info.renderArea.extent.height = 720; + render_pass_begin_info.clearValueCount = 0; + render_pass_begin_info.pClearValues = nullptr; + deferred_command_buffer_.CmdVkBeginRenderPass(&render_pass_begin_info, + VK_SUBPASS_CONTENTS_INLINE); + } + + // Draw. + if (indexed) { + deferred_command_buffer_.CmdVkDrawIndexed(index_count, 1, 0, 0, 0); + } else { + deferred_command_buffer_.CmdVkDraw(index_count, 1, 0, 0); + } + return true; } @@ -659,9 +905,6 @@ void VulkanCommandProcessor::CheckSubmissionFence(uint64_t await_submission) { command_buffers_submitted_.pop_front(); } - // Reclaim descriptor pools. - transient_descriptor_pool_uniform_buffers_->Reclaim(submission_completed_); - shared_memory_->CompletedSubmissionUpdated(); } @@ -705,13 +948,41 @@ void VulkanCommandProcessor::BeginSubmission(bool is_guest_command) { submission_open_ = true; // Start a new deferred command buffer - will submit it to the real one in - // the end of the submission (when async pipeline state object creation - // requests are fulfilled). + // the end of the submission (when async pipeline object creation requests + // are fulfilled). deferred_command_buffer_.Reset(); + + // Reset cached state of the command buffer. + ff_viewport_update_needed_ = true; + ff_scissor_update_needed_ = true; + current_render_pass_ = VK_NULL_HANDLE; + current_framebuffer_ = VK_NULL_HANDLE; + current_graphics_pipeline_ = VK_NULL_HANDLE; + current_graphics_pipeline_layout_ = nullptr; + current_graphics_descriptor_sets_bound_up_to_date_ = 0; } if (is_opening_frame) { frame_open_ = true; + + // Reset bindings that depend on transient data. + std::memset(current_float_constant_map_vertex_, 0, + sizeof(current_float_constant_map_vertex_)); + std::memset(current_float_constant_map_pixel_, 0, + sizeof(current_float_constant_map_pixel_)); + std::memset(current_graphics_descriptor_sets_, 0, + sizeof(current_graphics_descriptor_sets_)); + current_graphics_descriptor_sets_ + [SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram] = + shared_memory_and_edram_descriptor_set_; + current_graphics_descriptor_set_values_up_to_date_ = + uint32_t(1) + << SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram; + + // Reclaim pool pages - no need to do this every small submission since some + // may be reused. + transient_descriptor_pool_uniform_buffers_->Reclaim(frame_completed_); + uniform_buffer_pool_->Reclaim(frame_completed_); } } @@ -784,8 +1055,12 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { bool is_closing_frame = is_swap && frame_open_; if (submission_open_) { + EndRenderPass(); + shared_memory_->EndSubmission(); + uniform_buffer_pool_->FlushWrites(); + // Submit sparse binds earlier, before executing the deferred command // buffer, to reduce latency. if (!sparse_memory_binds_.empty()) { @@ -910,13 +1185,30 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { if (cache_clear_requested_ && AwaitAllQueueOperationsCompletion()) { cache_clear_requested_ = false; - transient_descriptor_pool_uniform_buffers_->ClearCache(); - assert_true(command_buffers_submitted_.empty()); for (const CommandBuffer& command_buffer : command_buffers_writable_) { dfn.vkDestroyCommandPool(device, command_buffer.pool, nullptr); } command_buffers_writable_.clear(); + + uniform_buffer_pool_->ClearCache(); + transient_descriptor_pool_uniform_buffers_->ClearCache(); + + pipeline_cache_->ClearCache(); + + render_target_cache_->ClearCache(); + + for (const auto& pipeline_layout_pair : pipeline_layouts_) { + dfn.vkDestroyPipelineLayout( + device, pipeline_layout_pair.second.pipeline_layout, nullptr); + } + pipeline_layouts_.clear(); + for (const auto& descriptor_set_layout_pair : + descriptor_set_layouts_textures_) { + dfn.vkDestroyDescriptorSetLayout( + device, descriptor_set_layout_pair.second, nullptr); + } + descriptor_set_layouts_textures_.clear(); } } @@ -936,6 +1228,441 @@ VkShaderStageFlags VulkanCommandProcessor::GetGuestVertexShaderStageFlags() return stages; } +void VulkanCommandProcessor::UpdateFixedFunctionState() { +#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + + const RegisterFile& regs = *register_file_; + + // Window parameters. + // http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h + // See r200UpdateWindow: + // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c + auto pa_sc_window_offset = regs.Get(); + + uint32_t pixel_size_x = 1, pixel_size_y = 1; + + // Viewport. + // PA_CL_VTE_CNTL contains whether offsets and scales are enabled. + // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf + // In games, either all are enabled (for regular drawing) or none are (for + // rectangle lists usually). + // + // If scale/offset is enabled, the Xenos shader is writing (neglecting W + // division) position in the NDC (-1, -1, dx_clip_space_def - 1) -> (1, 1, 1) + // box. If it's not, the position is in screen space. Since we can only use + // the NDC in PC APIs, we use a viewport of the largest possible size, and + // divide the position by it in translated shaders. + // + // TODO(Triang3l): Move all of this to draw_util. + // TODO(Triang3l): Limit the viewport if exceeding the device limit; move to + // NDC scale/offset constants. + auto pa_cl_vte_cntl = regs.Get(); + float viewport_scale_x = + pa_cl_vte_cntl.vport_x_scale_ena + ? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32) + : 4096.0f; + float viewport_scale_y = + pa_cl_vte_cntl.vport_y_scale_ena + ? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32) + : 4096.0f; + float viewport_scale_z = pa_cl_vte_cntl.vport_z_scale_ena + ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 + : 1.0f; + float viewport_offset_x = pa_cl_vte_cntl.vport_x_offset_ena + ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 + : std::abs(viewport_scale_x); + float viewport_offset_y = pa_cl_vte_cntl.vport_y_offset_ena + ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 + : std::abs(viewport_scale_y); + float viewport_offset_z = pa_cl_vte_cntl.vport_z_offset_ena + ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 + : 0.0f; + if (regs.Get().vtx_window_offset_enable) { + viewport_offset_x += float(pa_sc_window_offset.window_x_offset); + viewport_offset_y += float(pa_sc_window_offset.window_y_offset); + } + VkViewport viewport; + viewport.x = (viewport_offset_x - viewport_scale_x) * float(pixel_size_x); + viewport.y = (viewport_offset_y - viewport_scale_y) * float(pixel_size_y); + viewport.width = viewport_scale_x * 2.0f * float(pixel_size_x); + viewport.height = viewport_scale_y * 2.0f * float(pixel_size_y); + viewport.minDepth = std::min(std::max(viewport_offset_z, 0.0f), 1.0f); + viewport.maxDepth = + std::min(std::max(viewport_offset_z + viewport_scale_z, 0.0f), 1.0f); + ff_viewport_update_needed_ |= ff_viewport_.x != viewport.x; + ff_viewport_update_needed_ |= ff_viewport_.y != viewport.y; + ff_viewport_update_needed_ |= ff_viewport_.width != viewport.width; + ff_viewport_update_needed_ |= ff_viewport_.height != viewport.height; + ff_viewport_update_needed_ |= ff_viewport_.minDepth != viewport.minDepth; + ff_viewport_update_needed_ |= ff_viewport_.maxDepth != viewport.maxDepth; + if (ff_viewport_update_needed_) { + ff_viewport_ = viewport; + deferred_command_buffer_.CmdVkSetViewport(0, 1, &viewport); + ff_viewport_update_needed_ = false; + } + + // Scissor. + // TODO(Triang3l): Move all of this to draw_util. + // TODO(Triang3l): Limit the scissor if exceeding the device limit. + auto pa_sc_window_scissor_tl = regs.Get(); + auto pa_sc_window_scissor_br = regs.Get(); + VkRect2D scissor; + scissor.offset.x = int32_t(pa_sc_window_scissor_tl.tl_x); + scissor.offset.y = int32_t(pa_sc_window_scissor_tl.tl_y); + int32_t scissor_br_x = + std::max(int32_t(pa_sc_window_scissor_br.br_x), scissor.offset.x); + int32_t scissor_br_y = + std::max(int32_t(pa_sc_window_scissor_br.br_y), scissor.offset.y); + if (!pa_sc_window_scissor_tl.window_offset_disable) { + scissor.offset.x = std::max( + scissor.offset.x + pa_sc_window_offset.window_x_offset, int32_t(0)); + scissor.offset.y = std::max( + scissor.offset.y + pa_sc_window_offset.window_y_offset, int32_t(0)); + scissor_br_x = std::max(scissor_br_x + pa_sc_window_offset.window_x_offset, + int32_t(0)); + scissor_br_y = std::max(scissor_br_y + pa_sc_window_offset.window_y_offset, + int32_t(0)); + } + scissor.extent.width = uint32_t(scissor_br_x - scissor.offset.x); + scissor.extent.height = uint32_t(scissor_br_y - scissor.offset.y); + scissor.offset.x *= pixel_size_x; + scissor.offset.y *= pixel_size_y; + scissor.extent.width *= pixel_size_x; + scissor.extent.height *= pixel_size_y; + ff_scissor_update_needed_ |= ff_scissor_.offset.x != scissor.offset.x; + ff_scissor_update_needed_ |= ff_scissor_.offset.y != scissor.offset.y; + ff_scissor_update_needed_ |= ff_scissor_.extent.width != scissor.extent.width; + ff_scissor_update_needed_ |= + ff_scissor_.extent.height != scissor.extent.height; + if (ff_scissor_update_needed_) { + ff_scissor_ = scissor; + deferred_command_buffer_.CmdVkSetScissor(0, 1, &scissor); + ff_scissor_update_needed_ = false; + } +} + +void VulkanCommandProcessor::UpdateSystemConstantValues( + xenos::Endian index_endian) { +#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + + const RegisterFile& regs = *register_file_; + int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32); + + bool dirty = false; + + // Index or tessellation edge factor buffer endianness. + dirty |= system_constants_.vertex_index_endian != index_endian; + system_constants_.vertex_index_endian = index_endian; + + // Vertex index offset. + dirty |= system_constants_.vertex_base_index != vgt_indx_offset; + system_constants_.vertex_base_index = vgt_indx_offset; + + if (dirty) { + current_graphics_descriptor_set_values_up_to_date_ &= + ~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetSystemConstants); + } +} + +bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, + const VulkanShader* pixel_shader) { +#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + + const RegisterFile& regs = *register_file_; + + // Invalidate descriptors for changed data. + // These are the constant base addresses/ranges for shaders. + // We have these hardcoded right now cause nothing seems to differ on the Xbox + // 360 (however, OpenGL ES on Adreno 200 on Android has different ranges). + assert_true(regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x000FF000 || + regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000); + assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 || + regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000); + // Check if the float constant layout is still the same and get the counts. + const Shader::ConstantRegisterMap& float_constant_map_vertex = + vertex_shader->constant_register_map(); + uint32_t float_constant_count_vertex = float_constant_map_vertex.float_count; + for (uint32_t i = 0; i < 4; ++i) { + if (current_float_constant_map_vertex_[i] != + float_constant_map_vertex.float_bitmap[i]) { + current_float_constant_map_vertex_[i] = + float_constant_map_vertex.float_bitmap[i]; + // If no float constants at all, any buffer can be reused for them, so not + // invalidating. + if (float_constant_count_vertex) { + current_graphics_descriptor_set_values_up_to_date_ &= + ~( + uint32_t(1) + << SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex); + } + } + } + uint32_t float_constant_count_pixel = 0; + if (pixel_shader != nullptr) { + const Shader::ConstantRegisterMap& float_constant_map_pixel = + pixel_shader->constant_register_map(); + float_constant_count_pixel = float_constant_map_pixel.float_count; + for (uint32_t i = 0; i < 4; ++i) { + if (current_float_constant_map_pixel_[i] != + float_constant_map_pixel.float_bitmap[i]) { + current_float_constant_map_pixel_[i] = + float_constant_map_pixel.float_bitmap[i]; + if (float_constant_count_pixel) { + current_graphics_descriptor_set_values_up_to_date_ &= + ~(uint32_t(1) + << SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel); + } + } + } + } else { + std::memset(current_float_constant_map_pixel_, 0, + sizeof(current_float_constant_map_pixel_)); + } + + // Make sure new descriptor sets are bound to the command buffer. + current_graphics_descriptor_sets_bound_up_to_date_ &= + current_graphics_descriptor_set_values_up_to_date_; + + // Write the new descriptor sets. + VkWriteDescriptorSet + write_descriptor_sets[SpirvShaderTranslator::kDescriptorSetCount]; + uint32_t write_descriptor_set_count = 0; + uint32_t write_descriptor_set_bits = 0; + assert_not_zero( + current_graphics_descriptor_set_values_up_to_date_ & + (uint32_t(1) + << SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram)); + VkDescriptorBufferInfo buffer_info_bool_loop_constants; + if (!(current_graphics_descriptor_set_values_up_to_date_ & + (uint32_t(1) + << SpirvShaderTranslator::kDescriptorSetBoolLoopConstants))) { + VkWriteDescriptorSet& write_bool_loop_constants = + write_descriptor_sets[write_descriptor_set_count++]; + constexpr size_t kBoolLoopConstantsSize = sizeof(uint32_t) * (8 + 32); + uint8_t* mapping_bool_loop_constants = WriteUniformBufferBinding( + kBoolLoopConstantsSize, + descriptor_set_layout_fetch_bool_loop_constants_, + buffer_info_bool_loop_constants, write_bool_loop_constants); + if (!mapping_bool_loop_constants) { + return false; + } + std::memcpy(mapping_bool_loop_constants, + ®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32, + kBoolLoopConstantsSize); + write_descriptor_set_bits |= + uint32_t(1) << SpirvShaderTranslator::kDescriptorSetBoolLoopConstants; + current_graphics_descriptor_sets_ + [SpirvShaderTranslator::kDescriptorSetBoolLoopConstants] = + write_bool_loop_constants.dstSet; + } + VkDescriptorBufferInfo buffer_info_system_constants; + if (!(current_graphics_descriptor_set_values_up_to_date_ & + (uint32_t(1) + << SpirvShaderTranslator::kDescriptorSetSystemConstants))) { + VkWriteDescriptorSet& write_system_constants = + write_descriptor_sets[write_descriptor_set_count++]; + uint8_t* mapping_system_constants = WriteUniformBufferBinding( + sizeof(SpirvShaderTranslator::SystemConstants), + descriptor_set_layout_system_constants_, buffer_info_system_constants, + write_system_constants); + if (!mapping_system_constants) { + return false; + } + std::memcpy(mapping_system_constants, &system_constants_, + sizeof(SpirvShaderTranslator::SystemConstants)); + write_descriptor_set_bits |= + uint32_t(1) << SpirvShaderTranslator::kDescriptorSetSystemConstants; + current_graphics_descriptor_sets_ + [SpirvShaderTranslator::kDescriptorSetSystemConstants] = + write_system_constants.dstSet; + } + VkDescriptorBufferInfo buffer_info_float_constant_pixel; + if (!(current_graphics_descriptor_set_values_up_to_date_ & + (uint32_t(1) + << SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel))) { + // Even if the shader doesn't need any float constants, a valid binding must + // still be provided (the pipeline layout always has float constants, for + // both the vertex shader and the pixel shader), so if the first draw in the + // frame doesn't have float constants at all, still allocate an empty + // buffer. + VkWriteDescriptorSet& write_float_constants_pixel = + write_descriptor_sets[write_descriptor_set_count++]; + uint8_t* mapping_float_constants_pixel = WriteUniformBufferBinding( + sizeof(float) * 4 * std::max(float_constant_count_pixel, uint32_t(1)), + descriptor_set_layout_float_constants_pixel_, + buffer_info_float_constant_pixel, write_float_constants_pixel); + if (!mapping_float_constants_pixel) { + return false; + } + for (uint32_t i = 0; i < 4; ++i) { + uint64_t float_constant_map_entry = current_float_constant_map_pixel_[i]; + uint32_t float_constant_index; + while (xe::bit_scan_forward(float_constant_map_entry, + &float_constant_index)) { + float_constant_map_entry &= ~(1ull << float_constant_index); + std::memcpy(mapping_float_constants_pixel, + ®s[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) + + (float_constant_index << 2)] + .f32, + sizeof(float) * 4); + mapping_float_constants_pixel += sizeof(float) * 4; + } + } + write_descriptor_set_bits |= + uint32_t(1) << SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel; + current_graphics_descriptor_sets_ + [SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel] = + write_float_constants_pixel.dstSet; + } + VkDescriptorBufferInfo buffer_info_float_constant_vertex; + if (!(current_graphics_descriptor_set_values_up_to_date_ & + (uint32_t(1) + << SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex))) { + VkWriteDescriptorSet& write_float_constants_vertex = + write_descriptor_sets[write_descriptor_set_count++]; + uint8_t* mapping_float_constants_vertex = WriteUniformBufferBinding( + sizeof(float) * 4 * std::max(float_constant_count_vertex, uint32_t(1)), + descriptor_set_layout_float_constants_vertex_, + buffer_info_float_constant_vertex, write_float_constants_vertex); + if (!mapping_float_constants_vertex) { + return false; + } + for (uint32_t i = 0; i < 4; ++i) { + uint64_t float_constant_map_entry = current_float_constant_map_vertex_[i]; + uint32_t float_constant_index; + while (xe::bit_scan_forward(float_constant_map_entry, + &float_constant_index)) { + float_constant_map_entry &= ~(1ull << float_constant_index); + std::memcpy(mapping_float_constants_vertex, + ®s[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) + + (float_constant_index << 2)] + .f32, + sizeof(float) * 4); + mapping_float_constants_vertex += sizeof(float) * 4; + } + } + write_descriptor_set_bits |= + uint32_t(1) + << SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex; + current_graphics_descriptor_sets_ + [SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex] = + write_float_constants_vertex.dstSet; + } + VkDescriptorBufferInfo buffer_info_fetch_constants; + if (!(current_graphics_descriptor_set_values_up_to_date_ & + (uint32_t(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants))) { + VkWriteDescriptorSet& write_fetch_constants = + write_descriptor_sets[write_descriptor_set_count++]; + constexpr size_t kFetchConstantsSize = sizeof(uint32_t) * 6 * 32; + uint8_t* mapping_fetch_constants = WriteUniformBufferBinding( + kFetchConstantsSize, descriptor_set_layout_fetch_bool_loop_constants_, + buffer_info_fetch_constants, write_fetch_constants); + if (!mapping_fetch_constants) { + return false; + } + std::memcpy(mapping_fetch_constants, + ®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32, + kFetchConstantsSize); + write_descriptor_set_bits |= + uint32_t(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants; + current_graphics_descriptor_sets_ + [SpirvShaderTranslator::kDescriptorSetFetchConstants] = + write_fetch_constants.dstSet; + } + if (write_descriptor_set_count) { + const ui::vulkan::VulkanProvider& provider = + GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + dfn.vkUpdateDescriptorSets(device, write_descriptor_set_count, + write_descriptor_sets, 0, nullptr); + } + // Only make valid if written successfully. + current_graphics_descriptor_set_values_up_to_date_ |= + write_descriptor_set_bits; + + // Bind the new descriptor sets. + uint32_t descriptor_sets_needed = + (uint32_t(1) << SpirvShaderTranslator::kDescriptorSetCount) - 1; + if (current_graphics_pipeline_layout_ + ->descriptor_set_layout_textures_vertex_ref == + descriptor_set_layout_empty_) { + descriptor_sets_needed &= + ~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetTexturesVertex); + } + if (current_graphics_pipeline_layout_ + ->descriptor_set_layout_textures_pixel_ref == + descriptor_set_layout_empty_) { + descriptor_sets_needed &= + ~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetTexturesPixel); + } + uint32_t descriptor_sets_remaining = + descriptor_sets_needed & + ~current_graphics_descriptor_sets_bound_up_to_date_; + uint32_t descriptor_set_index; + while ( + xe::bit_scan_forward(descriptor_sets_remaining, &descriptor_set_index)) { + uint32_t descriptor_set_mask_tzcnt = + xe::tzcnt(~(descriptor_sets_remaining | + ((uint32_t(1) << descriptor_set_index) - 1))); + // TODO(Triang3l): Bind to compute for rectangle list emulation without + // geometry shaders. + deferred_command_buffer_.CmdVkBindDescriptorSets( + VK_PIPELINE_BIND_POINT_GRAPHICS, + current_graphics_pipeline_layout_->pipeline_layout, + descriptor_set_index, descriptor_set_mask_tzcnt - descriptor_set_index, + current_graphics_descriptor_sets_ + descriptor_set_index, 0, nullptr); + if (descriptor_set_mask_tzcnt >= 32) { + break; + } + descriptor_sets_remaining &= + ~((uint32_t(1) << descriptor_set_mask_tzcnt) - 1); + } + current_graphics_descriptor_sets_bound_up_to_date_ |= descriptor_sets_needed; + + return true; +} + +uint8_t* VulkanCommandProcessor::WriteUniformBufferBinding( + size_t size, VkDescriptorSetLayout descriptor_set_layout, + VkDescriptorBufferInfo& descriptor_buffer_info_out, + VkWriteDescriptorSet& write_descriptor_set_out) { + VkDescriptorSet descriptor_set = + transient_descriptor_pool_uniform_buffers_->Request( + frame_current_, descriptor_set_layout, 1); + if (descriptor_set == VK_NULL_HANDLE) { + return nullptr; + } + const ui::vulkan::VulkanProvider& provider = + GetVulkanContext().GetVulkanProvider(); + uint8_t* mapping = uniform_buffer_pool_->Request( + frame_current_, size, + size_t( + provider.device_properties().limits.minUniformBufferOffsetAlignment), + descriptor_buffer_info_out.buffer, descriptor_buffer_info_out.offset); + if (!mapping) { + return false; + } + descriptor_buffer_info_out.range = VkDeviceSize(size); + write_descriptor_set_out.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write_descriptor_set_out.pNext = nullptr; + write_descriptor_set_out.dstSet = descriptor_set; + write_descriptor_set_out.dstBinding = 0; + write_descriptor_set_out.dstArrayElement = 0; + write_descriptor_set_out.descriptorCount = 1; + write_descriptor_set_out.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + write_descriptor_set_out.pImageInfo = nullptr; + write_descriptor_set_out.pBufferInfo = &descriptor_buffer_info_out; + write_descriptor_set_out.pTexelBufferView = nullptr; + return mapping; +} + } // namespace vulkan } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 90df3f39b..a7283d56f 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -10,6 +10,7 @@ #ifndef XENIA_GPU_VULKAN_VULKAN_COMMAND_PROCESSOR_H_ #define XENIA_GPU_VULKAN_VULKAN_COMMAND_PROCESSOR_H_ +#include #include #include #include @@ -18,13 +19,18 @@ #include #include "xenia/gpu/command_processor.h" +#include "xenia/gpu/spirv_shader_translator.h" #include "xenia/gpu/vulkan/deferred_command_buffer.h" #include "xenia/gpu/vulkan/vulkan_graphics_system.h" +#include "xenia/gpu/vulkan/vulkan_pipeline_cache.h" +#include "xenia/gpu/vulkan/vulkan_render_target_cache.h" +#include "xenia/gpu/vulkan/vulkan_shader.h" #include "xenia/gpu/vulkan/vulkan_shared_memory.h" #include "xenia/gpu/xenos.h" #include "xenia/kernel/kernel_state.h" #include "xenia/ui/vulkan/transient_descriptor_pool.h" #include "xenia/ui/vulkan/vulkan_context.h" +#include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h" namespace xe { namespace gpu { @@ -67,19 +73,21 @@ class VulkanCommandProcessor : public CommandProcessor { const VkSparseMemoryBind* binds, VkPipelineStageFlags wait_stage_mask); - struct PipelineLayout { - VkPipelineLayout pipeline_layout; - VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref; - VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref; - }; - bool GetPipelineLayout(uint32_t texture_count_pixel, - uint32_t texture_count_vertex, - PipelineLayout& pipeline_layout_out); + // Must be called before doing anything outside the render pass scope, + // including adding pipeline barriers that are not a part of the render pass + // scope. Submission must be open. + void EndRenderPass(); + + // The returned reference is valid until a cache clear. + const VulkanPipelineCache::PipelineLayoutProvider* GetPipelineLayout( + uint32_t texture_count_pixel, uint32_t texture_count_vertex); protected: bool SetupContext() override; void ShutdownContext() override; + void WriteRegister(uint32_t index, uint32_t value) override; + void PerformSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width, uint32_t frontbuffer_height) override; @@ -95,6 +103,49 @@ class VulkanCommandProcessor : public CommandProcessor { void InitializeTrace() override; private: + struct CommandBuffer { + VkCommandPool pool; + VkCommandBuffer buffer; + }; + + struct SparseBufferBind { + VkBuffer buffer; + size_t bind_offset; + uint32_t bind_count; + }; + + union TextureDescriptorSetLayoutKey { + struct { + uint32_t is_vertex : 1; + // For 0, use descriptor_set_layout_empty_ instead as these are owning + // references. + uint32_t texture_count : 31; + }; + uint32_t key = 0; + }; + static_assert(sizeof(TextureDescriptorSetLayoutKey) == sizeof(uint32_t)); + + union PipelineLayoutKey { + struct { + // Pixel textures in the low bits since those are varied much more + // commonly. + uint32_t texture_count_pixel : 16; + uint32_t texture_count_vertex : 16; + }; + uint32_t key = 0; + }; + static_assert(sizeof(PipelineLayoutKey) == sizeof(uint32_t)); + + class PipelineLayout : public VulkanPipelineCache::PipelineLayoutProvider { + public: + VkPipelineLayout GetPipelineLayout() const override { + return pipeline_layout; + } + VkPipelineLayout pipeline_layout; + VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref; + VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref; + }; + // BeginSubmission and EndSubmission may be called at any time. If there's an // open non-frame submission, BeginSubmission(true) will promote it to a // frame. EndSubmission(true) will close the frame no matter whether the @@ -119,6 +170,18 @@ class VulkanCommandProcessor : public CommandProcessor { VkShaderStageFlags GetGuestVertexShaderStageFlags() const; + void UpdateFixedFunctionState(); + void UpdateSystemConstantValues(xenos::Endian index_endian); + bool UpdateBindings(const VulkanShader* vertex_shader, + const VulkanShader* pixel_shader); + // Allocates a descriptor, space in the uniform buffer pool, and fills the + // VkWriteDescriptorSet structure and VkDescriptorBufferInfo referenced by it. + // Returns null in case of failure. + uint8_t* WriteUniformBufferBinding( + size_t size, VkDescriptorSetLayout descriptor_set_layout, + VkDescriptorBufferInfo& descriptor_buffer_info_out, + VkWriteDescriptorSet& write_descriptor_set_out); + bool cache_clear_requested_ = false; std::vector fences_free_; @@ -143,20 +206,11 @@ class VulkanCommandProcessor : public CommandProcessor { // Submission indices of frames that have already been submitted. uint64_t closed_frame_submissions_[kMaxFramesInFlight] = {}; - struct CommandBuffer { - VkCommandPool pool; - VkCommandBuffer buffer; - }; std::vector command_buffers_writable_; std::deque> command_buffers_submitted_; DeferredCommandBuffer deferred_command_buffer_; std::vector sparse_memory_binds_; - struct SparseBufferBind { - VkBuffer buffer; - size_t bind_offset; - uint32_t bind_count; - }; std::vector sparse_buffer_binds_; // SparseBufferBind converted to VkSparseBufferMemoryBindInfo to this buffer // on submission (because pBinds should point to a place in std::vector, but @@ -166,6 +220,7 @@ class VulkanCommandProcessor : public CommandProcessor { std::unique_ptr transient_descriptor_pool_uniform_buffers_; + std::unique_ptr uniform_buffer_pool_; // Descriptor set layouts used by different shaders. VkDescriptorSetLayout descriptor_set_layout_empty_ = VK_NULL_HANDLE; @@ -180,34 +235,66 @@ class VulkanCommandProcessor : public CommandProcessor { VkDescriptorSetLayout descriptor_set_layout_shared_memory_and_edram_ = VK_NULL_HANDLE; - union TextureDescriptorSetLayoutKey { - struct { - uint32_t is_vertex : 1; - // For 0, use descriptor_set_layout_empty_ instead as these are owning - // references. - uint32_t texture_count : 31; - }; - uint32_t key = 0; - }; // TextureDescriptorSetLayoutKey::key -> VkDescriptorSetLayout. + // Layouts are referenced by pipeline_layouts_. std::unordered_map descriptor_set_layouts_textures_; - union PipelineLayoutKey { - struct { - // Pixel textures in the low bits since those are varied much more - // commonly. - uint32_t texture_count_pixel : 16; - uint32_t texture_count_vertex : 16; - }; - uint32_t key = 0; - }; // PipelineLayoutKey::key -> PipelineLayout. + // Layouts are referenced by VulkanPipelineCache. std::unordered_map pipeline_layouts_; std::unique_ptr shared_memory_; + std::unique_ptr pipeline_cache_; + + std::unique_ptr render_target_cache_; + VkDescriptorPool shared_memory_and_edram_descriptor_pool_ = VK_NULL_HANDLE; VkDescriptorSet shared_memory_and_edram_descriptor_set_; + + // The current fixed-function drawing state. + VkViewport ff_viewport_; + VkRect2D ff_scissor_; + bool ff_viewport_update_needed_; + bool ff_scissor_update_needed_; + + // Cache render pass currently started in the command buffer with framebuffer. + VkRenderPass current_render_pass_; + VkFramebuffer current_framebuffer_; + + // Cache graphics pipeline currently bound to the command buffer. + VkPipeline current_graphics_pipeline_; + + // Pipeline layout of the current graphics pipeline. + const PipelineLayout* current_graphics_pipeline_layout_; + VkDescriptorSet current_graphics_descriptor_sets_ + [SpirvShaderTranslator::kDescriptorSetCount]; + // Whether descriptor sets in current_graphics_descriptor_sets_ point to + // up-to-date data. + uint32_t current_graphics_descriptor_set_values_up_to_date_; + // Whether the descriptor sets currently bound to the command buffer - only + // low bits for the descriptor set layouts that remained the same are kept + // when changing the pipeline layout. May be out of sync with + // current_graphics_descriptor_set_values_up_to_date_, but should be ensured + // to be a subset of it at some point when it becomes important; bits for + // non-existent descriptor set layouts may also be set, but need to be ignored + // when they start to matter. + uint32_t current_graphics_descriptor_sets_bound_up_to_date_; + static_assert( + SpirvShaderTranslator::kDescriptorSetCount <= + sizeof(current_graphics_descriptor_set_values_up_to_date_) * CHAR_BIT, + "Bit fields storing descriptor set validity must be large enough"); + static_assert( + SpirvShaderTranslator::kDescriptorSetCount <= + sizeof(current_graphics_descriptor_sets_bound_up_to_date_) * CHAR_BIT, + "Bit fields storing descriptor set validity must be large enough"); + + // Float constant usage masks of the last draw call. + uint64_t current_float_constant_map_vertex_[4]; + uint64_t current_float_constant_map_pixel_[4]; + + // System shader constants. + SpirvShaderTranslator::SystemConstants system_constants_; }; } // namespace vulkan diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc new file mode 100644 index 000000000..8db426857 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -0,0 +1,443 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/vulkan/vulkan_pipeline_cache.h" + +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/base/profiling.h" +#include "xenia/gpu/register_file.h" +#include "xenia/gpu/registers.h" +#include "xenia/gpu/spirv_shader_translator.h" +#include "xenia/gpu/vulkan/vulkan_command_processor.h" +#include "xenia/gpu/vulkan/vulkan_shader.h" +#include "xenia/gpu/xenos.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +VulkanPipelineCache::VulkanPipelineCache( + VulkanCommandProcessor& command_processor, + const RegisterFile& register_file, + VulkanRenderTargetCache& render_target_cache) + : command_processor_(command_processor), + register_file_(register_file), + render_target_cache_(render_target_cache) {} + +VulkanPipelineCache::~VulkanPipelineCache() { Shutdown(); } + +bool VulkanPipelineCache::Initialize() { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + + device_pipeline_features_.features = 0; + // TODO(Triang3l): Support the portability subset. + device_pipeline_features_.triangle_fans = 1; + + shader_translator_ = std::make_unique( + SpirvShaderTranslator::Features(provider)); + + return true; +} + +void VulkanPipelineCache::Shutdown() { + ClearCache(); + + shader_translator_.reset(); +} + +void VulkanPipelineCache::ClearCache() { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + last_pipeline_ = nullptr; + for (const auto& pipeline_pair : pipelines_) { + if (pipeline_pair.second.pipeline != VK_NULL_HANDLE) { + dfn.vkDestroyPipeline(device, pipeline_pair.second.pipeline, nullptr); + } + } + pipelines_.clear(); + + for (auto it : shaders_) { + delete it.second; + } + shaders_.clear(); +} + +VulkanShader* VulkanPipelineCache::LoadShader(xenos::ShaderType shader_type, + uint32_t guest_address, + const uint32_t* host_address, + uint32_t dword_count) { + // Hash the input memory and lookup the shader. + uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0); + auto it = shaders_.find(data_hash); + if (it != shaders_.end()) { + // Shader has been previously loaded. + return it->second; + } + + // Always create the shader and stash it away. + // We need to track it even if it fails translation so we know not to try + // again. + VulkanShader* shader = + new VulkanShader(shader_type, data_hash, host_address, dword_count); + shaders_.emplace(data_hash, shader); + + return shader; +} + +bool VulkanPipelineCache::EnsureShadersTranslated( + VulkanShader* vertex_shader, VulkanShader* pixel_shader, + Shader::HostVertexShaderType host_vertex_shader_type) { + const RegisterFile& regs = register_file_; + auto sq_program_cntl = regs.Get(); + + // Edge flags are not supported yet (because polygon primitives are not). + assert_true(sq_program_cntl.vs_export_mode != + xenos::VertexShaderExportMode::kPosition2VectorsEdge && + sq_program_cntl.vs_export_mode != + xenos::VertexShaderExportMode::kPosition2VectorsEdgeKill); + assert_false(sq_program_cntl.gen_index_vtx); + + if (!vertex_shader->is_translated()) { + if (!TranslateShader(*shader_translator_, *vertex_shader, + sq_program_cntl)) { + XELOGE("Failed to translate the vertex shader!"); + return false; + } + } + + if (pixel_shader != nullptr && !pixel_shader->is_translated()) { + if (!TranslateShader(*shader_translator_, *pixel_shader, sq_program_cntl)) { + XELOGE("Failed to translate the pixel shader!"); + return false; + } + } + + return true; +} + +bool VulkanPipelineCache::ConfigurePipeline( + VulkanShader* vertex_shader, VulkanShader* pixel_shader, + VulkanRenderTargetCache::RenderPassKey render_pass_key, + VkPipeline& pipeline_out, + const PipelineLayoutProvider*& pipeline_layout_out) { +#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + + PipelineDescription description; + if (!GetCurrentStateDescription(vertex_shader, pixel_shader, render_pass_key, + description)) { + return false; + } + if (last_pipeline_ && last_pipeline_->first == description) { + pipeline_out = last_pipeline_->second.pipeline; + pipeline_layout_out = last_pipeline_->second.pipeline_layout; + return true; + } + auto it = pipelines_.find(description); + if (it != pipelines_.end()) { + last_pipeline_ = &*it; + pipeline_out = it->second.pipeline; + pipeline_layout_out = it->second.pipeline_layout; + return true; + } + + // Create the pipeline if not the latest and not already existing. + if (!EnsureShadersTranslated(vertex_shader, pixel_shader, + Shader::HostVertexShaderType::kVertex)) { + return false; + } + const PipelineLayoutProvider* pipeline_layout = + command_processor_.GetPipelineLayout(0, 0); + if (!pipeline_layout) { + return false; + } + VkRenderPass render_pass = + render_target_cache_.GetRenderPass(render_pass_key); + if (render_pass == VK_NULL_HANDLE) { + return false; + } + PipelineCreationArguments creation_arguments; + auto& pipeline = + *pipelines_.emplace(description, Pipeline(pipeline_layout)).first; + creation_arguments.pipeline = &pipeline; + creation_arguments.vertex_shader = vertex_shader; + creation_arguments.pixel_shader = pixel_shader; + creation_arguments.render_pass = render_pass; + if (!EnsurePipelineCreated(creation_arguments)) { + return false; + } + pipeline_out = pipeline.second.pipeline; + pipeline_layout_out = pipeline_layout; + return true; +} + +bool VulkanPipelineCache::TranslateShader(SpirvShaderTranslator& translator, + VulkanShader& shader, + reg::SQ_PROGRAM_CNTL cntl) { + // Perform translation. + // If this fails the shader will be marked as invalid and ignored later. + // TODO(Triang3l): Host vertex shader type. + if (!translator.Translate(&shader, cntl, + Shader::HostVertexShaderType::kVertex)) { + XELOGE("Shader {:016X} translation failed; marking as ignored", + shader.ucode_data_hash()); + return false; + } + return shader.InitializeShaderModule( + command_processor_.GetVulkanContext().GetVulkanProvider()); +} + +bool VulkanPipelineCache::GetCurrentStateDescription( + const VulkanShader* vertex_shader, const VulkanShader* pixel_shader, + VulkanRenderTargetCache::RenderPassKey render_pass_key, + PipelineDescription& description_out) const { + description_out.Reset(); + + const RegisterFile& regs = register_file_; + + description_out.vertex_shader_hash = vertex_shader->ucode_data_hash(); + description_out.pixel_shader_hash = + pixel_shader ? pixel_shader->ucode_data_hash() : 0; + description_out.render_pass_key = render_pass_key; + + auto vgt_draw_initiator = regs.Get(); + PipelinePrimitiveTopology primitive_topology; + switch (vgt_draw_initiator.prim_type) { + case xenos::PrimitiveType::kPointList: + primitive_topology = PipelinePrimitiveTopology::kPointList; + break; + case xenos::PrimitiveType::kLineList: + primitive_topology = PipelinePrimitiveTopology::kLineList; + break; + case xenos::PrimitiveType::kLineStrip: + primitive_topology = PipelinePrimitiveTopology::kLineStrip; + break; + case xenos::PrimitiveType::kTriangleList: + primitive_topology = PipelinePrimitiveTopology::kTriangleList; + break; + case xenos::PrimitiveType::kTriangleFan: + primitive_topology = device_pipeline_features_.triangle_fans + ? PipelinePrimitiveTopology::kTriangleFan + : PipelinePrimitiveTopology::kTriangleList; + break; + case xenos::PrimitiveType::kTriangleStrip: + primitive_topology = PipelinePrimitiveTopology::kTriangleStrip; + break; + default: + // TODO(Triang3l): All primitive types and tessellation. + return false; + } + description_out.primitive_topology = primitive_topology; + // TODO(Triang3l): Primitive restart. + + return true; +} + +bool VulkanPipelineCache::EnsurePipelineCreated( + const PipelineCreationArguments& creation_arguments) { + if (creation_arguments.pipeline->second.pipeline != VK_NULL_HANDLE) { + return true; + } + + // This function preferably should validate the description to prevent + // unsupported behavior that may be dangerous/crashing because pipelines can + // be created from the disk storage. + + if (creation_arguments.pixel_shader) { + XELOGGPU("Creating graphics pipeline state with VS {:016X}, PS {:016X}", + creation_arguments.vertex_shader->ucode_data_hash(), + creation_arguments.pixel_shader->ucode_data_hash()); + } else { + XELOGGPU("Creating graphics pipeline state with VS {:016X}", + creation_arguments.vertex_shader->ucode_data_hash()); + } + + const PipelineDescription& description = creation_arguments.pipeline->first; + + VkPipelineShaderStageCreateInfo shader_stages[2]; + uint32_t shader_stage_count = 0; + + assert_true(creation_arguments.vertex_shader->is_translated()); + if (!creation_arguments.vertex_shader->is_valid()) { + return false; + } + assert_true(shader_stage_count < xe::countof(shader_stages)); + VkPipelineShaderStageCreateInfo& shader_stage_vertex = + shader_stages[shader_stage_count++]; + shader_stage_vertex.sType = + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shader_stage_vertex.pNext = nullptr; + shader_stage_vertex.flags = 0; + shader_stage_vertex.stage = VK_SHADER_STAGE_VERTEX_BIT; + shader_stage_vertex.module = + creation_arguments.vertex_shader->shader_module(); + assert_true(shader_stage_vertex.module != VK_NULL_HANDLE); + shader_stage_vertex.pName = "main"; + shader_stage_vertex.pSpecializationInfo = nullptr; + if (creation_arguments.pixel_shader) { + assert_true(creation_arguments.pixel_shader->is_translated()); + if (!creation_arguments.pixel_shader->is_valid()) { + return false; + } + assert_true(shader_stage_count < xe::countof(shader_stages)); + VkPipelineShaderStageCreateInfo& shader_stage_fragment = + shader_stages[shader_stage_count++]; + shader_stage_fragment.sType = + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shader_stage_fragment.pNext = nullptr; + shader_stage_fragment.flags = 0; + shader_stage_fragment.stage = VK_SHADER_STAGE_FRAGMENT_BIT; + shader_stage_fragment.module = + creation_arguments.pixel_shader->shader_module(); + assert_true(shader_stage_fragment.module != VK_NULL_HANDLE); + shader_stage_fragment.pName = "main"; + shader_stage_fragment.pSpecializationInfo = nullptr; + } + + VkPipelineVertexInputStateCreateInfo vertex_input_state; + vertex_input_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + vertex_input_state.pNext = nullptr; + vertex_input_state.flags = 0; + vertex_input_state.vertexBindingDescriptionCount = 0; + vertex_input_state.pVertexBindingDescriptions = nullptr; + vertex_input_state.vertexAttributeDescriptionCount = 0; + vertex_input_state.pVertexAttributeDescriptions = nullptr; + + VkPipelineInputAssemblyStateCreateInfo input_assembly_state; + input_assembly_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + input_assembly_state.pNext = nullptr; + input_assembly_state.flags = 0; + switch (description.primitive_topology) { + case PipelinePrimitiveTopology::kPointList: + input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST; + break; + case PipelinePrimitiveTopology::kLineList: + input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST; + break; + case PipelinePrimitiveTopology::kLineStrip: + input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; + break; + case PipelinePrimitiveTopology::kTriangleList: + input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + break; + case PipelinePrimitiveTopology::kTriangleStrip: + input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; + break; + case PipelinePrimitiveTopology::kTriangleFan: + assert_true(device_pipeline_features_.triangle_fans); + if (!device_pipeline_features_.triangle_fans) { + return false; + } + input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; + break; + case PipelinePrimitiveTopology::kLineListWithAdjacency: + input_assembly_state.topology = + VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY; + break; + case PipelinePrimitiveTopology::kPatchList: + input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; + break; + default: + assert_unhandled_case(description.primitive_topology); + return false; + } + input_assembly_state.primitiveRestartEnable = + description.primitive_restart ? VK_TRUE : VK_FALSE; + + VkPipelineViewportStateCreateInfo viewport_state; + viewport_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + viewport_state.pNext = nullptr; + viewport_state.flags = 0; + viewport_state.viewportCount = 1; + viewport_state.pViewports = nullptr; + viewport_state.scissorCount = 1; + viewport_state.pScissors = nullptr; + + VkPipelineRasterizationStateCreateInfo rasterization_state = {}; + rasterization_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rasterization_state.lineWidth = 1.0f; + + VkPipelineMultisampleStateCreateInfo multisample_state = {}; + multisample_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + multisample_state.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + + static const VkDynamicState dynamic_states[] = { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + }; + VkPipelineDynamicStateCreateInfo dynamic_state; + dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + dynamic_state.pNext = nullptr; + dynamic_state.flags = 0; + dynamic_state.dynamicStateCount = uint32_t(xe::countof(dynamic_states)); + dynamic_state.pDynamicStates = dynamic_states; + + VkGraphicsPipelineCreateInfo pipeline_create_info; + pipeline_create_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + pipeline_create_info.pNext = nullptr; + pipeline_create_info.flags = 0; + pipeline_create_info.stageCount = shader_stage_count; + pipeline_create_info.pStages = shader_stages; + pipeline_create_info.pVertexInputState = &vertex_input_state; + pipeline_create_info.pInputAssemblyState = &input_assembly_state; + pipeline_create_info.pTessellationState = nullptr; + pipeline_create_info.pViewportState = &viewport_state; + pipeline_create_info.pRasterizationState = &rasterization_state; + pipeline_create_info.pMultisampleState = &multisample_state; + pipeline_create_info.pDepthStencilState = nullptr; + pipeline_create_info.pColorBlendState = nullptr; + pipeline_create_info.pDynamicState = &dynamic_state; + pipeline_create_info.layout = + creation_arguments.pipeline->second.pipeline_layout->GetPipelineLayout(); + pipeline_create_info.renderPass = creation_arguments.render_pass; + pipeline_create_info.subpass = 0; + pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE; + pipeline_create_info.basePipelineIndex = 0; + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + VkPipeline pipeline; + if (dfn.vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, + &pipeline_create_info, nullptr, + &pipeline) != VK_SUCCESS) { + // TODO(Triang3l): Move these error messages outside. + /* if (creation_arguments.pixel_shader) { + XELOGE( + "Failed to create graphics pipeline with VS {:016X}, PS {:016X}", + creation_arguments.vertex_shader->ucode_data_hash(), + creation_arguments.pixel_shader->ucode_data_hash()); + } else { + XELOGE("Failed to create graphics pipeline with VS {:016X}", + creation_arguments.vertex_shader->ucode_data_hash()); + } */ + return false; + } + creation_arguments.pipeline->second.pipeline = pipeline; + return true; +} + +} // namespace vulkan +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h new file mode 100644 index 000000000..1d2f852e5 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h @@ -0,0 +1,183 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_VULKAN_VULKAN_PIPELINE_STATE_CACHE_H_ +#define XENIA_GPU_VULKAN_VULKAN_PIPELINE_STATE_CACHE_H_ + +#include +#include +#include +#include +#include + +#include "third_party/xxhash/xxhash.h" +#include "xenia/base/hash.h" +#include "xenia/base/platform.h" +#include "xenia/gpu/register_file.h" +#include "xenia/gpu/spirv_shader_translator.h" +#include "xenia/gpu/vulkan/vulkan_render_target_cache.h" +#include "xenia/gpu/vulkan/vulkan_shader.h" +#include "xenia/gpu/xenos.h" +#include "xenia/ui/vulkan/vulkan_provider.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +class VulkanCommandProcessor; + +// TODO(Triang3l): Create a common base for both the Vulkan and the Direct3D +// implementations. +class VulkanPipelineCache { + public: + class PipelineLayoutProvider { + public: + virtual ~PipelineLayoutProvider() {} + virtual VkPipelineLayout GetPipelineLayout() const = 0; + }; + + VulkanPipelineCache(VulkanCommandProcessor& command_processor, + const RegisterFile& register_file, + VulkanRenderTargetCache& render_target_cache); + ~VulkanPipelineCache(); + + bool Initialize(); + void Shutdown(); + void ClearCache(); + + VulkanShader* LoadShader(xenos::ShaderType shader_type, + uint32_t guest_address, const uint32_t* host_address, + uint32_t dword_count); + + // Translates shaders if needed, also making shader info up to date. + bool EnsureShadersTranslated( + VulkanShader* vertex_shader, VulkanShader* pixel_shader, + Shader::HostVertexShaderType host_vertex_shader_type); + + // TODO(Triang3l): Return a deferred creation handle. + bool ConfigurePipeline(VulkanShader* vertex_shader, + VulkanShader* pixel_shader, + VulkanRenderTargetCache::RenderPassKey render_pass_key, + VkPipeline& pipeline_out, + const PipelineLayoutProvider*& pipeline_layout_out); + + private: + // Can only load pipeline storage if features of the device it was created on + // and the current device match because descriptions may requires features not + // supported on the device. Very radical differences (such as RB emulation + // method) should result in a different storage file being used. + union DevicePipelineFeatures { + struct { + uint32_t triangle_fans : 1; + }; + uint32_t features = 0; + }; + + enum class PipelinePrimitiveTopology : uint32_t { + kPointList, + kLineList, + kLineStrip, + kTriangleList, + kTriangleStrip, + // Requires DevicePipelineFeatures::triangle_fans. + kTriangleFan, + kLineListWithAdjacency, + kPatchList, + }; + + XEPACKEDSTRUCT(PipelineDescription, { + uint64_t vertex_shader_hash; + // 0 if no pixel shader. + uint64_t pixel_shader_hash; + VulkanRenderTargetCache::RenderPassKey render_pass_key; + + // Input assembly. + PipelinePrimitiveTopology primitive_topology : 3; + uint32_t primitive_restart : 1; + + // Including all the padding, for a stable hash. + PipelineDescription() { Reset(); } + PipelineDescription(const PipelineDescription& description) { + std::memcpy(this, &description, sizeof(*this)); + } + PipelineDescription& operator=(const PipelineDescription& description) { + std::memcpy(this, &description, sizeof(*this)); + return *this; + } + bool operator==(const PipelineDescription& description) const { + return std::memcmp(this, &description, sizeof(*this)) == 0; + } + void Reset() { std::memset(this, 0, sizeof(*this)); } + uint64_t GetHash() const { return XXH64(this, sizeof(*this), 0); } + struct Hasher { + size_t operator()(const PipelineDescription& description) const { + return size_t(description.GetHash()); + } + }; + }); + + struct Pipeline { + VkPipeline pipeline = VK_NULL_HANDLE; + // Owned by VulkanCommandProcessor, valid until ClearCache. + const PipelineLayoutProvider* pipeline_layout; + Pipeline(const PipelineLayoutProvider* pipeline_layout_provider) + : pipeline_layout(pipeline_layout_provider) {} + }; + + // Description that can be passed from the command processor thread to the + // creation threads, with everything needed from caches pre-looked-up. + struct PipelineCreationArguments { + std::pair* pipeline; + const VulkanShader* vertex_shader; + const VulkanShader* pixel_shader; + VkRenderPass render_pass; + }; + + // Can be called from multiple threads. + bool TranslateShader(SpirvShaderTranslator& translator, VulkanShader& shader, + reg::SQ_PROGRAM_CNTL cntl); + + bool GetCurrentStateDescription( + const VulkanShader* vertex_shader, const VulkanShader* pixel_shader, + VulkanRenderTargetCache::RenderPassKey render_pass_key, + PipelineDescription& description_out) const; + + // Can be called from creation threads - all needed data must be fully set up + // at the point of the call: shaders must be translated, pipeline layout and + // render pass objects must be available. + bool EnsurePipelineCreated( + const PipelineCreationArguments& creation_arguments); + + VulkanCommandProcessor& command_processor_; + const RegisterFile& register_file_; + VulkanRenderTargetCache& render_target_cache_; + + DevicePipelineFeatures device_pipeline_features_; + + // Reusable shader translator on the command processor thread. + std::unique_ptr shader_translator_; + + // Ucode hash -> shader. + std::unordered_map> + shaders_; + + std::unordered_map + pipelines_; + + // Previously used pipeline, to avoid lookups if the state wasn't changed. + const std::pair* last_pipeline_ = + nullptr; +}; + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_VULKAN_VULKAN_PIPELINE_STATE_CACHE_H_ diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc new file mode 100644 index 000000000..e85b6ea8b --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc @@ -0,0 +1,136 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/vulkan/vulkan_render_target_cache.h" + +#include "xenia/base/logging.h" +#include "xenia/gpu/vulkan/vulkan_command_processor.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +VulkanRenderTargetCache::VulkanRenderTargetCache( + VulkanCommandProcessor& command_processor, + const RegisterFile& register_file) + : command_processor_(command_processor), register_file_(register_file) {} + +VulkanRenderTargetCache::~VulkanRenderTargetCache() { Shutdown(); } + +bool VulkanRenderTargetCache::Initialize() { return true; } + +void VulkanRenderTargetCache::Shutdown() { ClearCache(); } + +void VulkanRenderTargetCache::ClearCache() { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + for (const auto& framebuffer_pair : framebuffers_) { + dfn.vkDestroyFramebuffer(device, framebuffer_pair.second, nullptr); + } + framebuffers_.clear(); + + for (const auto& render_pass_pair : render_passes_) { + dfn.vkDestroyRenderPass(device, render_pass_pair.second, nullptr); + } + render_passes_.clear(); +} + +VkRenderPass VulkanRenderTargetCache::GetRenderPass(RenderPassKey key) { + auto it = render_passes_.find(key.key); + if (it != render_passes_.end()) { + return it->second; + } + + // TODO(Triang3l): Attachments and dependencies. + + VkSubpassDescription subpass_description; + subpass_description.flags = 0; + subpass_description.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + subpass_description.inputAttachmentCount = 0; + subpass_description.pInputAttachments = nullptr; + subpass_description.colorAttachmentCount = 0; + subpass_description.pColorAttachments = nullptr; + subpass_description.pResolveAttachments = nullptr; + subpass_description.pDepthStencilAttachment = nullptr; + subpass_description.preserveAttachmentCount = 0; + subpass_description.pPreserveAttachments = nullptr; + + VkRenderPassCreateInfo render_pass_create_info; + render_pass_create_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + render_pass_create_info.pNext = nullptr; + render_pass_create_info.flags = 0; + render_pass_create_info.attachmentCount = 0; + render_pass_create_info.pAttachments = nullptr; + render_pass_create_info.subpassCount = 1; + render_pass_create_info.pSubpasses = &subpass_description; + render_pass_create_info.dependencyCount = 0; + render_pass_create_info.pDependencies = nullptr; + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + VkRenderPass render_pass; + if (dfn.vkCreateRenderPass(device, &render_pass_create_info, nullptr, + &render_pass) != VK_SUCCESS) { + XELOGE("Failed to create a Vulkan render pass"); + return VK_NULL_HANDLE; + } + render_passes_.emplace(key.key, render_pass); + return render_pass; +} + +VkFramebuffer VulkanRenderTargetCache::GetFramebuffer(FramebufferKey key) { + auto it = framebuffers_.find(key); + if (it != framebuffers_.end()) { + return it->second; + } + + VkRenderPass render_pass = GetRenderPass(key.render_pass_key); + if (render_pass == VK_NULL_HANDLE) { + return VK_NULL_HANDLE; + } + + VkFramebufferCreateInfo framebuffer_create_info; + framebuffer_create_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + framebuffer_create_info.pNext = nullptr; + framebuffer_create_info.flags = 0; + framebuffer_create_info.renderPass = render_pass; + framebuffer_create_info.attachmentCount = 0; + framebuffer_create_info.pAttachments = nullptr; + framebuffer_create_info.width = 1280; + framebuffer_create_info.height = 720; + framebuffer_create_info.layers = 1; + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + VkFramebuffer framebuffer; + if (dfn.vkCreateFramebuffer(device, &framebuffer_create_info, nullptr, + &framebuffer) != VK_SUCCESS) { + XELOGE("Failed to create a Vulkan framebuffer"); + return VK_NULL_HANDLE; + } + framebuffers_.emplace(key, framebuffer); + return framebuffer; +} + +bool VulkanRenderTargetCache::UpdateRenderTargets( + FramebufferKey& framebuffer_key_out) { + framebuffer_key_out = FramebufferKey(); + return true; +} + +} // namespace vulkan +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.h b/src/xenia/gpu/vulkan/vulkan_render_target_cache.h new file mode 100644 index 000000000..f5c183f70 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.h @@ -0,0 +1,95 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_VULKAN_VULKAN_RENDER_TARGET_CACHE_H_ +#define XENIA_GPU_VULKAN_VULKAN_RENDER_TARGET_CACHE_H_ + +#include +#include +#include + +#include "third_party/xxhash/xxhash.h" +#include "xenia/gpu/register_file.h" +#include "xenia/ui/vulkan/vulkan_provider.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +class VulkanCommandProcessor; + +// TODO(Triang3l): Create a common base for both the Vulkan and the Direct3D +// implementations. +class VulkanRenderTargetCache { + public: + union RenderPassKey { + uint32_t key = 0; + }; + static_assert(sizeof(RenderPassKey) == sizeof(uint32_t)); + + struct FramebufferKey { + RenderPassKey render_pass_key; + + // Including all the padding, for a stable hash. + FramebufferKey() { Reset(); } + FramebufferKey(const FramebufferKey& key) { + std::memcpy(this, &key, sizeof(*this)); + } + FramebufferKey& operator=(const FramebufferKey& key) { + std::memcpy(this, &key, sizeof(*this)); + return *this; + } + bool operator==(const FramebufferKey& key) const { + return std::memcmp(this, &key, sizeof(*this)) == 0; + } + void Reset() { std::memset(this, 0, sizeof(*this)); } + uint64_t GetHash() const { return XXH64(this, sizeof(*this), 0); } + struct Hasher { + size_t operator()(const FramebufferKey& description) const { + return size_t(description.GetHash()); + } + }; + }; + static_assert(sizeof(FramebufferKey) == sizeof(uint32_t)); + + VulkanRenderTargetCache(VulkanCommandProcessor& command_processor, + const RegisterFile& register_file); + ~VulkanRenderTargetCache(); + + bool Initialize(); + void Shutdown(); + void ClearCache(); + + // Returns the render pass object, or VK_NULL_HANDLE if failed to create. + // A render pass managed by the render target cache may be ended and resumed + // at any time (to allow for things like copying and texture loading). + VkRenderPass GetRenderPass(RenderPassKey key); + + // Returns the framebuffer object, or VK_NULL_HANDLE if failed to create. + VkFramebuffer GetFramebuffer(FramebufferKey key); + + // May dispatch computations. + bool UpdateRenderTargets(FramebufferKey& framebuffer_key_out); + + private: + VulkanCommandProcessor& command_processor_; + const RegisterFile& register_file_; + + // RenderPassKey::key -> VkRenderPass. + std::unordered_map render_passes_; + + std::unordered_map + framebuffers_; +}; + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_VULKAN_VULKAN_RENDER_TARGET_CACHE_H_ diff --git a/src/xenia/gpu/vulkan/vulkan_shader.cc b/src/xenia/gpu/vulkan/vulkan_shader.cc new file mode 100644 index 000000000..e4fafff96 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_shader.cc @@ -0,0 +1,48 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/vulkan/vulkan_shader.h" + +#include + +namespace xe { +namespace gpu { +namespace vulkan { + +VulkanShader::VulkanShader(xenos::ShaderType shader_type, uint64_t data_hash, + const uint32_t* dword_ptr, uint32_t dword_count) + : Shader(shader_type, data_hash, dword_ptr, dword_count) {} + +bool VulkanShader::InitializeShaderModule( + const ui::vulkan::VulkanProvider& provider) { + if (!is_valid()) { + return false; + } + if (shader_module_ != VK_NULL_HANDLE) { + return true; + } + VkShaderModuleCreateInfo shader_module_create_info; + shader_module_create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + shader_module_create_info.pNext = nullptr; + shader_module_create_info.flags = 0; + shader_module_create_info.codeSize = translated_binary().size(); + shader_module_create_info.pCode = + reinterpret_cast(translated_binary().data()); + if (provider.dfn().vkCreateShaderModule(provider.device(), + &shader_module_create_info, nullptr, + &shader_module_) != VK_SUCCESS) { + is_valid_ = false; + return false; + } + return true; +} + +} // namespace vulkan +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_shader.h b/src/xenia/gpu/vulkan/vulkan_shader.h new file mode 100644 index 000000000..23ff5fd90 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_shader.h @@ -0,0 +1,39 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_VULKAN_VULKAN_SHADER_H_ +#define XENIA_GPU_VULKAN_VULKAN_SHADER_H_ + +#include + +#include "xenia/gpu/shader.h" +#include "xenia/gpu/xenos.h" +#include "xenia/ui/vulkan/vulkan_provider.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +class VulkanShader : public Shader { + public: + VulkanShader(xenos::ShaderType shader_type, uint64_t data_hash, + const uint32_t* dword_ptr, uint32_t dword_count); + + bool InitializeShaderModule(const ui::vulkan::VulkanProvider& provider); + VkShaderModule shader_module() const { return shader_module_; } + + private: + VkShaderModule shader_module_ = VK_NULL_HANDLE; +}; + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_VULKAN_VULKAN_SHADER_H_ diff --git a/src/xenia/gpu/vulkan/vulkan_shared_memory.cc b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc index e8b1790b3..5ee2755f2 100644 --- a/src/xenia/gpu/vulkan/vulkan_shared_memory.cc +++ b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc @@ -241,6 +241,7 @@ void VulkanSharedMemory::Use(Usage usage, buffer_memory_barrier.size = VK_WHOLE_SIZE; last_usage_ = usage; } + command_processor_.EndRenderPass(); command_processor_.deferred_command_buffer().CmdVkPipelineBarrier( stage_mask_src, stage_mask_dst, 0, 0, nullptr, 1, &buffer_memory_barrier, 0, nullptr); @@ -271,7 +272,7 @@ bool VulkanSharedMemory::InitializeTraceSubmitDownloads() { return false; } - // TODO(Triang3l): End the render pass. + command_processor_.EndRenderPass(); Use(Usage::kRead); DeferredCommandBuffer& command_buffer = command_processor_.deferred_command_buffer(); @@ -384,7 +385,7 @@ bool VulkanSharedMemory::UploadRanges( if (upload_page_ranges.empty()) { return true; } - // TODO(Triang3l): End the render pass. + command_processor_.EndRenderPass(); // upload_page_ranges are sorted, use them to determine the range for the // ordering barrier. Use(Usage::kTransferDestination, diff --git a/src/xenia/ui/vulkan/transient_descriptor_pool.cc b/src/xenia/ui/vulkan/transient_descriptor_pool.cc index af2c0f424..e471a6c6e 100644 --- a/src/xenia/ui/vulkan/transient_descriptor_pool.cc +++ b/src/xenia/ui/vulkan/transient_descriptor_pool.cc @@ -80,6 +80,8 @@ VkDescriptorSet TransientDescriptorPool::Request( VkDescriptorSet descriptor_set; // Try to allocate as normal. + // TODO(Triang3l): Investigate the possibility of reuse of descriptor sets, as + // vkAllocateDescriptorSets may be implemented suboptimally. if (!pages_writable_.empty()) { if (page_current_descriptor_sets_used_ < page_descriptor_set_count_ && page_current_descriptors_used_ + layout_descriptor_count <= From 715d614f5e098ede92f38a7f349c2aee9a8ce19a Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 15 Nov 2020 15:08:50 +0300 Subject: [PATCH 059/123] [Vulkan] Float controls properties --- src/xenia/gpu/spirv_shader_translator.cc | 28 ++++-- src/xenia/gpu/spirv_shader_translator.h | 3 +- src/xenia/ui/vulkan/vulkan_provider.cc | 110 ++++++++++++++++++++++- src/xenia/ui/vulkan/vulkan_provider.h | 19 +++- 4 files changed, 149 insertions(+), 11 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index f13418355..69d1c04ac 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -28,7 +28,8 @@ SpirvShaderTranslator::Features::Features(bool all) max_storage_buffer_range(all ? UINT32_MAX : (128 * 1024 * 1024)), clip_distance(all), cull_distance(all), - float_controls(all) {} + signed_zero_inf_nan_preserve_float32(all), + denorm_flush_to_zero_float32(all) {} SpirvShaderTranslator::Features::Features( const ui::vulkan::VulkanProvider& provider) @@ -48,8 +49,18 @@ SpirvShaderTranslator::Features::Features( } else { spirv_version = spv::Spv_1_0; } - float_controls = spirv_version >= spv::Spv_1_4 || - device_extensions.khr_shader_float_controls; + if (spirv_version >= spv::Spv_1_4 || + device_extensions.khr_shader_float_controls) { + const VkPhysicalDeviceFloatControlsPropertiesKHR& + float_controls_properties = provider.device_float_controls_properties(); + signed_zero_inf_nan_preserve_float32 = + bool(float_controls_properties.shaderSignedZeroInfNanPreserveFloat32); + denorm_flush_to_zero_float32 = + bool(float_controls_properties.shaderDenormFlushToZeroFloat32); + } else { + signed_zero_inf_nan_preserve_float32 = false; + denorm_flush_to_zero_float32 = false; + } } SpirvShaderTranslator::SpirvShaderTranslator(const Features& features) @@ -82,7 +93,8 @@ void SpirvShaderTranslator::StartTranslation() { builder_->addCapability(IsSpirvTessEvalShader() ? spv::CapabilityTessellation : spv::CapabilityShader); if (features_.spirv_version < spv::Spv_1_4) { - if (features_.float_controls) { + if (features_.signed_zero_inf_nan_preserve_float32 || + features_.denorm_flush_to_zero_float32) { builder_->addExtension("SPV_KHR_float_controls"); } } @@ -511,21 +523,21 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { ? spv::ExecutionModelTessellationEvaluation : spv::ExecutionModelVertex; } - // TODO(Triang3l): Re-enable float controls when - // VkPhysicalDeviceFloatControlsPropertiesKHR are handled. - /* if (features_.float_controls) { + if (features_.denorm_flush_to_zero_float32) { // Flush to zero, similar to the real hardware, also for things like Shader // Model 3 multiplication emulation. builder_->addCapability(spv::CapabilityDenormFlushToZero); builder_->addExecutionMode(function_main_, spv::ExecutionModeDenormFlushToZero, 32); + } + if (features_.signed_zero_inf_nan_preserve_float32) { // Signed zero used to get VFACE from ps_param_gen, also special behavior // for infinity in certain instructions (such as logarithm, reciprocal, // muls_prev2). builder_->addCapability(spv::CapabilitySignedZeroInfNanPreserve); builder_->addExecutionMode(function_main_, spv::ExecutionModeSignedZeroInfNanPreserve, 32); - } */ + } spv::Instruction* entry_point = builder_->addEntryPoint(execution_model, function_main_, "main"); for (spv::Id interface_id : main_interface_) { diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 978ad8789..d4c32dda2 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -92,7 +92,8 @@ class SpirvShaderTranslator : public ShaderTranslator { uint32_t max_storage_buffer_range; bool clip_distance; bool cull_distance; - bool float_controls; + bool signed_zero_inf_nan_preserve_float32; + bool denorm_flush_to_zero_float32; }; SpirvShaderTranslator(const Features& features); diff --git a/src/xenia/ui/vulkan/vulkan_provider.cc b/src/xenia/ui/vulkan/vulkan_provider.cc index ba43f43d0..974e543dc 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.cc +++ b/src/xenia/ui/vulkan/vulkan_provider.cc @@ -125,6 +125,12 @@ bool VulkanProvider::Initialize() { library_functions_loaded &= (lfn_.vkCreateInstance = PFN_vkCreateInstance(lfn_.vkGetInstanceProcAddr( VK_NULL_HANDLE, "vkCreateInstance"))) != nullptr; + library_functions_loaded &= + (lfn_.vkEnumerateInstanceExtensionProperties = + PFN_vkEnumerateInstanceExtensionProperties( + lfn_.vkGetInstanceProcAddr( + VK_NULL_HANDLE, + "vkEnumerateInstanceExtensionProperties"))) != nullptr; if (!library_functions_loaded) { XELOGE( "Failed to get Vulkan library function pointers via " @@ -144,11 +150,58 @@ bool VulkanProvider::Initialize() { VK_SUCCESS) { instance_api_version = VK_API_VERSION_1_0; } - XELOGVK("Vulkan instance version {}.{}.{}", + XELOGVK("Vulkan instance version: {}.{}.{}", VK_VERSION_MAJOR(instance_api_version), VK_VERSION_MINOR(instance_api_version), VK_VERSION_PATCH(instance_api_version)); + // Get the instance extensions. + std::vector instance_extension_properties; + VkResult instance_extensions_enumerate_result; + for (;;) { + uint32_t instance_extension_count = + uint32_t(instance_extension_properties.size()); + bool instance_extensions_was_empty = !instance_extension_count; + instance_extensions_enumerate_result = + lfn_.vkEnumerateInstanceExtensionProperties( + nullptr, &instance_extension_count, + instance_extensions_was_empty + ? nullptr + : instance_extension_properties.data()); + // If the original extension count was 0 (first call), SUCCESS is + // returned, not INCOMPLETE. + if (instance_extensions_enumerate_result == VK_SUCCESS || + instance_extensions_enumerate_result == VK_INCOMPLETE) { + instance_extension_properties.resize(instance_extension_count); + if (instance_extensions_enumerate_result == VK_SUCCESS && + (!instance_extensions_was_empty || !instance_extension_count)) { + break; + } + } else { + break; + } + } + if (instance_extensions_enumerate_result != VK_SUCCESS) { + instance_extension_properties.clear(); + } + std::memset(&instance_extensions_, 0, sizeof(instance_extensions_)); + if (instance_api_version >= VK_MAKE_VERSION(1, 1, 0)) { + instance_extensions_.khr_get_physical_device_properties2 = true; + } + for (const VkExtensionProperties& instance_extension : + instance_extension_properties) { + const char* instance_extension_name = instance_extension.extensionName; + if (!instance_extensions_.khr_get_physical_device_properties2 && + !std::strcmp(instance_extension_name, + "VK_KHR_get_physical_device_properties2")) { + instance_extensions_.khr_get_physical_device_properties2 = true; + } + } + XELOGVK("Vulkan instance extensions:"); + XELOGVK( + "* VK_KHR_get_physical_device_properties2: {}", + instance_extensions_.khr_get_physical_device_properties2 ? "yes" : "no"); + // Create the instance. std::vector instance_extensions_enabled; instance_extensions_enabled.push_back("VK_KHR_surface"); @@ -157,6 +210,12 @@ bool VulkanProvider::Initialize() { #elif XE_PLATFORM_WIN32 instance_extensions_enabled.push_back("VK_KHR_win32_surface"); #endif + if (instance_api_version < VK_MAKE_VERSION(1, 1, 0)) { + if (instance_extensions_.khr_get_physical_device_properties2) { + instance_extensions_enabled.push_back( + "VK_KHR_get_physical_device_properties2"); + } + } VkApplicationInfo application_info; application_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; application_info.pNext = nullptr; @@ -205,11 +264,16 @@ bool VulkanProvider::Initialize() { } // Get instance functions. + std::memset(&ifn_, 0, sizeof(ifn_)); bool instance_functions_loaded = true; #define XE_VULKAN_LOAD_IFN(name) \ instance_functions_loaded &= \ (ifn_.name = PFN_##name( \ lfn_.vkGetInstanceProcAddr(instance_, #name))) != nullptr; +#define XE_VULKAN_LOAD_IFN_SYMBOL(name, symbol) \ + instance_functions_loaded &= \ + (ifn_.name = PFN_##name( \ + lfn_.vkGetInstanceProcAddr(instance_, symbol))) != nullptr; XE_VULKAN_LOAD_IFN(vkCreateDevice); XE_VULKAN_LOAD_IFN(vkDestroyDevice); XE_VULKAN_LOAD_IFN(vkDestroySurfaceKHR); @@ -229,6 +293,13 @@ bool VulkanProvider::Initialize() { #elif XE_PLATFORM_WIN32 XE_VULKAN_LOAD_IFN(vkCreateWin32SurfaceKHR); #endif + if (instance_extensions_.khr_get_physical_device_properties2) { + XE_VULKAN_LOAD_IFN_SYMBOL(vkGetPhysicalDeviceProperties2KHR, + (instance_api_version >= VK_MAKE_VERSION(1, 1, 0)) + ? "vkGetPhysicalDeviceProperties2" + : "vkGetPhysicalDeviceProperties2KHR"); + } +#undef XE_VULKAN_LOAD_IFN_SYMBOL #undef XE_VULKAN_LOAD_IFN if (!instance_functions_loaded) { XELOGE("Failed to get Vulkan instance function pointers"); @@ -470,6 +541,32 @@ bool VulkanProvider::Initialize() { "support"); return false; } + + // Get additional device properties. + std::memset(&device_float_controls_properties_, 0, + sizeof(device_float_controls_properties_)); + if (instance_extensions_.khr_get_physical_device_properties2) { + VkPhysicalDeviceProperties2KHR device_properties_2; + device_properties_2.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; + device_properties_2.pNext = nullptr; + VkPhysicalDeviceProperties2KHR* device_properties_2_last = + &device_properties_2; + if (device_extensions_.khr_shader_float_controls) { + device_float_controls_properties_.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR; + device_float_controls_properties_.pNext = nullptr; + device_properties_2_last->pNext = &device_float_controls_properties_; + device_properties_2_last = + reinterpret_cast( + &device_float_controls_properties_); + } + if (device_properties_2_last != &device_properties_2) { + ifn_.vkGetPhysicalDeviceProperties2KHR(physical_device_, + &device_properties_2); + } + } + XELOGVK( "Vulkan device: {} (vendor {:04X}, device {:04X}, driver {:08X}, API " "{}.{}.{})", @@ -487,6 +584,17 @@ bool VulkanProvider::Initialize() { device_extensions_.khr_shader_float_controls ? "yes" : "no"); XELOGVK("* VK_KHR_spirv_1_4: {}", device_extensions_.khr_spirv_1_4 ? "yes" : "no"); + if (device_extensions_.khr_shader_float_controls) { + XELOGVK( + "* Signed zero, inf, nan preserve for float32: {}", + device_float_controls_properties_.shaderSignedZeroInfNanPreserveFloat32 + ? "yes" + : "no"); + XELOGVK("* Denorm flush to zero for float32: {}", + device_float_controls_properties_.shaderDenormFlushToZeroFloat32 + ? "yes" + : "no"); + } // TODO(Triang3l): Report properties, features. // Create the device. diff --git a/src/xenia/ui/vulkan/vulkan_provider.h b/src/xenia/ui/vulkan/vulkan_provider.h index 31753472c..b8318fd00 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.h +++ b/src/xenia/ui/vulkan/vulkan_provider.h @@ -57,12 +57,21 @@ class VulkanProvider : public GraphicsProvider { PFN_vkDestroyInstance vkDestroyInstance; // From vkGetInstanceProcAddr. PFN_vkCreateInstance vkCreateInstance; + PFN_vkEnumerateInstanceExtensionProperties + vkEnumerateInstanceExtensionProperties; struct { PFN_vkEnumerateInstanceVersion vkEnumerateInstanceVersion; } v_1_1; }; const LibraryFunctions& lfn() const { return lfn_; } + struct InstanceExtensions { + // Core since 1.1.0. + bool khr_get_physical_device_properties2; + }; + const InstanceExtensions& instance_extensions() const { + return instance_extensions_; + } VkInstance instance() const { return instance_; } struct InstanceFunctions { PFN_vkCreateDevice vkCreateDevice; @@ -75,6 +84,8 @@ class VulkanProvider : public GraphicsProvider { PFN_vkGetPhysicalDeviceFeatures vkGetPhysicalDeviceFeatures; PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties; PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties; + // VK_KHR_get_physical_device_properties2 or 1.1.0. + PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR; PFN_vkGetPhysicalDeviceQueueFamilyProperties vkGetPhysicalDeviceQueueFamilyProperties; PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR @@ -129,6 +140,10 @@ class VulkanProvider : public GraphicsProvider { uint32_t queue_family_graphics_compute() const { return queue_family_graphics_compute_; } + const VkPhysicalDeviceFloatControlsPropertiesKHR& + device_float_controls_properties() const { + return device_float_controls_properties_; + } VkDevice device() const { return device_; } struct DeviceFunctions { @@ -262,8 +277,9 @@ class VulkanProvider : public GraphicsProvider { LibraryFunctions lfn_ = {}; + InstanceExtensions instance_extensions_; VkInstance instance_ = VK_NULL_HANDLE; - InstanceFunctions ifn_ = {}; + InstanceFunctions ifn_; VkPhysicalDevice physical_device_ = VK_NULL_HANDLE; VkPhysicalDeviceProperties device_properties_; @@ -274,6 +290,7 @@ class VulkanProvider : public GraphicsProvider { uint32_t memory_types_host_coherent_; uint32_t memory_types_host_cached_; uint32_t queue_family_graphics_compute_; + VkPhysicalDeviceFloatControlsPropertiesKHR device_float_controls_properties_; VkDevice device_ = VK_NULL_HANDLE; DeviceFunctions dfn_ = {}; From c2e8d23139df0c15540c6a0f7fa554e35db1e6aa Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 15 Nov 2020 17:26:41 +0300 Subject: [PATCH 060/123] [Vulkan] Scissor from draw_util --- .../gpu/vulkan/vulkan_command_processor.cc | 48 ++++++------------- 1 file changed, 15 insertions(+), 33 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 434ce0d02..6b5a51006 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -18,6 +18,7 @@ #include "xenia/base/logging.h" #include "xenia/base/math.h" #include "xenia/base/profiling.h" +#include "xenia/gpu/draw_util.h" #include "xenia/gpu/gpu_flags.h" #include "xenia/gpu/registers.h" #include "xenia/gpu/shader.h" @@ -1304,41 +1305,22 @@ void VulkanCommandProcessor::UpdateFixedFunctionState() { } // Scissor. - // TODO(Triang3l): Move all of this to draw_util. - // TODO(Triang3l): Limit the scissor if exceeding the device limit. - auto pa_sc_window_scissor_tl = regs.Get(); - auto pa_sc_window_scissor_br = regs.Get(); - VkRect2D scissor; - scissor.offset.x = int32_t(pa_sc_window_scissor_tl.tl_x); - scissor.offset.y = int32_t(pa_sc_window_scissor_tl.tl_y); - int32_t scissor_br_x = - std::max(int32_t(pa_sc_window_scissor_br.br_x), scissor.offset.x); - int32_t scissor_br_y = - std::max(int32_t(pa_sc_window_scissor_br.br_y), scissor.offset.y); - if (!pa_sc_window_scissor_tl.window_offset_disable) { - scissor.offset.x = std::max( - scissor.offset.x + pa_sc_window_offset.window_x_offset, int32_t(0)); - scissor.offset.y = std::max( - scissor.offset.y + pa_sc_window_offset.window_y_offset, int32_t(0)); - scissor_br_x = std::max(scissor_br_x + pa_sc_window_offset.window_x_offset, - int32_t(0)); - scissor_br_y = std::max(scissor_br_y + pa_sc_window_offset.window_y_offset, - int32_t(0)); - } - scissor.extent.width = uint32_t(scissor_br_x - scissor.offset.x); - scissor.extent.height = uint32_t(scissor_br_y - scissor.offset.y); - scissor.offset.x *= pixel_size_x; - scissor.offset.y *= pixel_size_y; - scissor.extent.width *= pixel_size_x; - scissor.extent.height *= pixel_size_y; - ff_scissor_update_needed_ |= ff_scissor_.offset.x != scissor.offset.x; - ff_scissor_update_needed_ |= ff_scissor_.offset.y != scissor.offset.y; - ff_scissor_update_needed_ |= ff_scissor_.extent.width != scissor.extent.width; + draw_util::Scissor scissor; + draw_util::GetScissor(regs, scissor); + VkRect2D scissor_rect; + scissor_rect.offset.x = int32_t(scissor.left * pixel_size_x); + scissor_rect.offset.y = int32_t(scissor.top * pixel_size_y); + scissor_rect.extent.width = scissor.width * pixel_size_x; + scissor_rect.extent.height = scissor.height * pixel_size_y; + ff_scissor_update_needed_ |= ff_scissor_.offset.x != scissor_rect.offset.x; + ff_scissor_update_needed_ |= ff_scissor_.offset.y != scissor_rect.offset.y; ff_scissor_update_needed_ |= - ff_scissor_.extent.height != scissor.extent.height; + ff_scissor_.extent.width != scissor_rect.extent.width; + ff_scissor_update_needed_ |= + ff_scissor_.extent.height != scissor_rect.extent.height; if (ff_scissor_update_needed_) { - ff_scissor_ = scissor; - deferred_command_buffer_.CmdVkSetScissor(0, 1, &scissor); + ff_scissor_ = scissor_rect; + deferred_command_buffer_.CmdVkSetScissor(0, 1, &scissor_rect); ff_scissor_update_needed_ = false; } } From a94301d967b5a0fb9753df8b70a1fbdd21b13e71 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Wed, 18 Nov 2020 12:48:12 +0300 Subject: [PATCH 061/123] [Vulkan] Viewport from draw_util and vtx_fmt --- src/xenia/gpu/spirv_shader_translator.cc | 130 ++++++++++++++++++ src/xenia/gpu/spirv_shader_translator.h | 24 ++++ .../gpu/vulkan/vulkan_command_processor.cc | 127 ++++++++++------- .../gpu/vulkan/vulkan_command_processor.h | 6 +- 4 files changed, 232 insertions(+), 55 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 69d1c04ac..de9c6c969 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -168,10 +168,13 @@ void SpirvShaderTranslator::StartTranslation() { spv::Id type; }; const SystemConstant system_constants[] = { + {"flags", offsetof(SystemConstants, flags), type_uint_}, {"vertex_index_endian", offsetof(SystemConstants, vertex_index_endian), type_uint_}, {"vertex_base_index", offsetof(SystemConstants, vertex_base_index), type_int_}, + {"ndc_scale", offsetof(SystemConstants, ndc_scale), type_float3_}, + {"ndc_offset", offsetof(SystemConstants, ndc_offset), type_float3_}, }; id_vector_temp_.clear(); id_vector_temp_.reserve(xe::countof(system_constants)); @@ -997,6 +1000,133 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() { } void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() { + id_vector_temp_.clear(); + id_vector_temp_.push_back(builder_->makeIntConstant(kSystemConstantFlags)); + spv::Id system_constant_flags = builder_->createLoad( + builder_->createAccessChain(spv::StorageClassUniform, + uniform_system_constants_, id_vector_temp_), + spv::NoPrecision); + + id_vector_temp_.clear(); + id_vector_temp_.push_back( + builder_->makeIntConstant(kOutputPerVertexMemberPosition)); + spv::Id position_ptr = builder_->createAccessChain( + spv::StorageClassOutput, output_per_vertex_, id_vector_temp_); + spv::Id guest_position = builder_->createLoad(position_ptr, spv::NoPrecision); + + // Check if the shader already returns W, not 1/W, and if it doesn't, turn 1/W + // into W. + spv::Id position_w = + builder_->createCompositeExtract(guest_position, type_float_, 3); + spv::Id is_w_not_reciprocal = builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, system_constant_flags, + builder_->makeUintConstant( + static_cast(kSysFlag_WNotReciprocal))), + const_uint_0_); + spv::Id guest_position_w_inv = builder_->createBinOp( + spv::OpFDiv, type_float_, const_float_1_, position_w); + builder_->addDecoration(guest_position_w_inv, spv::DecorationNoContraction); + position_w = + builder_->createTriOp(spv::OpSelect, type_float_, is_w_not_reciprocal, + position_w, guest_position_w_inv); + + // Check if the shader returns XY/W rather than XY, and if it does, revert + // that. + // TODO(Triang3l): Check if having XY or Z pre-divided by W should result in + // affine interpolation. + uint_vector_temp_.clear(); + uint_vector_temp_.reserve(2); + uint_vector_temp_.push_back(0); + uint_vector_temp_.push_back(1); + spv::Id position_xy = builder_->createRvalueSwizzle( + spv::NoPrecision, type_float2_, guest_position, uint_vector_temp_); + spv::Id is_xy_divided_by_w = builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, system_constant_flags, + builder_->makeUintConstant( + static_cast(kSysFlag_XYDividedByW))), + const_uint_0_); + spv::Id guest_position_xy_mul_w = builder_->createBinOp( + spv::OpVectorTimesScalar, type_float2_, position_xy, position_w); + builder_->addDecoration(guest_position_xy_mul_w, + spv::DecorationNoContraction); + position_xy = + builder_->createTriOp(spv::OpSelect, type_float2_, is_xy_divided_by_w, + guest_position_xy_mul_w, position_xy); + + // Check if the shader returns Z/W rather than Z, and if it does, revert that. + // TODO(Triang3l): Check if having XY or Z pre-divided by W should result in + // affine interpolation. + spv::Id position_z = + builder_->createCompositeExtract(guest_position, type_float_, 2); + spv::Id is_z_divided_by_w = builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, system_constant_flags, + builder_->makeUintConstant( + static_cast(kSysFlag_ZDividedByW))), + const_uint_0_); + spv::Id guest_position_z_mul_w = + builder_->createBinOp(spv::OpFMul, type_float_, position_z, position_w); + builder_->addDecoration(guest_position_z_mul_w, spv::DecorationNoContraction); + position_z = + builder_->createTriOp(spv::OpSelect, type_float_, is_z_divided_by_w, + guest_position_z_mul_w, position_z); + + // Build XYZ of the position with W format handled. + spv::Id position_xyz; + { + std::unique_ptr composite_construct_op = + std::make_unique( + builder_->getUniqueId(), type_float3_, spv::OpCompositeConstruct); + composite_construct_op->addIdOperand(position_xy); + composite_construct_op->addIdOperand(position_z); + position_xyz = composite_construct_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(composite_construct_op)); + } + + // Apply the NDC scale and offset for guest to host viewport transformation. + id_vector_temp_.clear(); + id_vector_temp_.push_back(builder_->makeIntConstant(kSystemConstantNdcScale)); + spv::Id ndc_scale = builder_->createLoad( + builder_->createAccessChain(spv::StorageClassUniform, + uniform_system_constants_, id_vector_temp_), + spv::NoPrecision); + position_xyz = + builder_->createBinOp(spv::OpFMul, type_float3_, position_xyz, ndc_scale); + builder_->addDecoration(position_xyz, spv::DecorationNoContraction); + id_vector_temp_.clear(); + id_vector_temp_.push_back( + builder_->makeIntConstant(kSystemConstantNdcOffset)); + spv::Id ndc_offset = builder_->createLoad( + builder_->createAccessChain(spv::StorageClassUniform, + uniform_system_constants_, id_vector_temp_), + spv::NoPrecision); + spv::Id ndc_offset_mul_w = builder_->createBinOp( + spv::OpVectorTimesScalar, type_float3_, ndc_offset, position_w); + builder_->addDecoration(ndc_offset_mul_w, spv::DecorationNoContraction); + position_xyz = builder_->createBinOp(spv::OpFAdd, type_float3_, position_xyz, + ndc_offset_mul_w); + builder_->addDecoration(position_xyz, spv::DecorationNoContraction); + + // Store the position converted to the host. + spv::Id position; + { + std::unique_ptr composite_construct_op = + std::make_unique( + builder_->getUniqueId(), type_float4_, spv::OpCompositeConstruct); + composite_construct_op->addIdOperand(position_xyz); + composite_construct_op->addIdOperand(position_w); + position = composite_construct_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(composite_construct_op)); + } + builder_->createStore(position, position_ptr); + // Write 1 to point size (using a geometry shader or another kind of fallback // to expand point sprites - point size support is not guaranteed, and the // size would also be limited, and can't be controlled independently along two diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index d4c32dda2..fadcf2a6b 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -25,12 +25,33 @@ namespace gpu { class SpirvShaderTranslator : public ShaderTranslator { public: + enum : uint32_t { + kSysFlag_XYDividedByW_Shift, + kSysFlag_ZDividedByW_Shift, + kSysFlag_WNotReciprocal_Shift, + + kSysFlag_Count, + + kSysFlag_XYDividedByW = 1u << kSysFlag_XYDividedByW_Shift, + kSysFlag_ZDividedByW = 1u << kSysFlag_ZDividedByW_Shift, + kSysFlag_WNotReciprocal = 1u << kSysFlag_WNotReciprocal_Shift, + }; + static_assert(kSysFlag_Count <= 32, "Too many flags in the system constants"); + // IF SYSTEM CONSTANTS ARE CHANGED OR ADDED, THE FOLLOWING MUST BE UPDATED: // - SystemConstantIndex enum. // - Structure members in BeginTranslation. struct SystemConstants { + uint32_t flags; xenos::Endian vertex_index_endian; int32_t vertex_base_index; + uint32_t padding_vertex_base_index; + + float ndc_scale[3]; + uint32_t padding_ndc_scale; + + float ndc_offset[3]; + uint32_t padding_ndc_offset; }; // The minimum limit for maxPerStageDescriptorStorageBuffers is 4, and for @@ -329,8 +350,11 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id const_float2_0_1_; enum SystemConstantIndex : unsigned int { + kSystemConstantFlags, kSystemConstantIndexVertexIndexEndian, kSystemConstantIndexVertexBaseIndex, + kSystemConstantNdcScale, + kSystemConstantNdcOffset, }; spv::Id uniform_system_constants_; spv::Id uniform_float_constants_; diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 6b5a51006..9b4d598f1 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -686,14 +686,45 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, current_graphics_pipeline_layout_ = pipeline_layout; } + const RegisterFile& regs = *register_file_; + const ui::vulkan::VulkanProvider& provider = + GetVulkanContext().GetVulkanProvider(); + const VkPhysicalDeviceProperties& device_properties = + provider.device_properties(); + + // Get dynamic rasterizer state. + draw_util::ViewportInfo viewport_info; + // Just handling maxViewportDimensions is enough - viewportBoundsRange[1] must + // be at least 2 * max(maxViewportDimensions[0...1]) - 1, and + // maxViewportDimensions must be greater than or equal to the size of the + // largest possible framebuffer attachment (if the viewport has positive + // offset and is between maxViewportDimensions and viewportBoundsRange[1], + // GetHostViewportInfo will adjust ndc_scale/ndc_offset to clamp it, and the + // clamped range will be outside the largest possible framebuffer anyway. + // TODO(Triang3l): Possibly handle maxViewportDimensions and + // viewportBoundsRange separately because when using fragment shader + // interlocks, framebuffers are not used, while the range may be wider than + // dimensions? Though viewport bigger than 4096 - the smallest possible + // maximum dimension (which is below the 8192 texture size limit on the Xbox + // 360) - and with offset, is probably a situation that never happens in real + // life. Or even disregard the viewport bounds range in the fragment shader + // interlocks case completely - apply the viewport and the scissor offset + // directly to pixel address and to things like ps_param_gen. + draw_util::GetHostViewportInfo( + regs, 1.0f, 1.0f, false, + float(device_properties.limits.maxViewportDimensions[0]), + float(device_properties.limits.maxViewportDimensions[1]), true, + viewport_info); + // Update fixed-function dynamic state. - UpdateFixedFunctionState(); + UpdateFixedFunctionState(viewport_info); bool indexed = index_buffer_info != nullptr && index_buffer_info->guest_base; // Update system constants before uploading them. - UpdateSystemConstantValues(indexed ? index_buffer_info->endianness - : xenos::Endian::kNone); + UpdateSystemConstantValues( + indexed ? index_buffer_info->endianness : xenos::Endian::kNone, + viewport_info); // Update uniform buffers and descriptor sets after binding the pipeline with // the new layout. @@ -701,8 +732,6 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, return false; } - const RegisterFile& regs = *register_file_; - // Ensure vertex buffers are resident. // TODO(Triang3l): Cache residency for ranges in a way similar to how texture // validity is tracked. @@ -1229,7 +1258,8 @@ VkShaderStageFlags VulkanCommandProcessor::GetGuestVertexShaderStageFlags() return stages; } -void VulkanCommandProcessor::UpdateFixedFunctionState() { +void VulkanCommandProcessor::UpdateFixedFunctionState( + const draw_util::ViewportInfo& viewport_info) { #if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES @@ -1245,53 +1275,13 @@ void VulkanCommandProcessor::UpdateFixedFunctionState() { uint32_t pixel_size_x = 1, pixel_size_y = 1; // Viewport. - // PA_CL_VTE_CNTL contains whether offsets and scales are enabled. - // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf - // In games, either all are enabled (for regular drawing) or none are (for - // rectangle lists usually). - // - // If scale/offset is enabled, the Xenos shader is writing (neglecting W - // division) position in the NDC (-1, -1, dx_clip_space_def - 1) -> (1, 1, 1) - // box. If it's not, the position is in screen space. Since we can only use - // the NDC in PC APIs, we use a viewport of the largest possible size, and - // divide the position by it in translated shaders. - // - // TODO(Triang3l): Move all of this to draw_util. - // TODO(Triang3l): Limit the viewport if exceeding the device limit; move to - // NDC scale/offset constants. - auto pa_cl_vte_cntl = regs.Get(); - float viewport_scale_x = - pa_cl_vte_cntl.vport_x_scale_ena - ? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32) - : 4096.0f; - float viewport_scale_y = - pa_cl_vte_cntl.vport_y_scale_ena - ? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32) - : 4096.0f; - float viewport_scale_z = pa_cl_vte_cntl.vport_z_scale_ena - ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 - : 1.0f; - float viewport_offset_x = pa_cl_vte_cntl.vport_x_offset_ena - ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 - : std::abs(viewport_scale_x); - float viewport_offset_y = pa_cl_vte_cntl.vport_y_offset_ena - ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 - : std::abs(viewport_scale_y); - float viewport_offset_z = pa_cl_vte_cntl.vport_z_offset_ena - ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 - : 0.0f; - if (regs.Get().vtx_window_offset_enable) { - viewport_offset_x += float(pa_sc_window_offset.window_x_offset); - viewport_offset_y += float(pa_sc_window_offset.window_y_offset); - } VkViewport viewport; - viewport.x = (viewport_offset_x - viewport_scale_x) * float(pixel_size_x); - viewport.y = (viewport_offset_y - viewport_scale_y) * float(pixel_size_y); - viewport.width = viewport_scale_x * 2.0f * float(pixel_size_x); - viewport.height = viewport_scale_y * 2.0f * float(pixel_size_y); - viewport.minDepth = std::min(std::max(viewport_offset_z, 0.0f), 1.0f); - viewport.maxDepth = - std::min(std::max(viewport_offset_z + viewport_scale_z, 0.0f), 1.0f); + viewport.x = viewport_info.left; + viewport.y = viewport_info.top; + viewport.width = viewport_info.width; + viewport.height = viewport_info.height; + viewport.minDepth = viewport_info.z_min; + viewport.maxDepth = viewport_info.z_max; ff_viewport_update_needed_ |= ff_viewport_.x != viewport.x; ff_viewport_update_needed_ |= ff_viewport_.y != viewport.y; ff_viewport_update_needed_ |= ff_viewport_.width != viewport.width; @@ -1326,16 +1316,39 @@ void VulkanCommandProcessor::UpdateFixedFunctionState() { } void VulkanCommandProcessor::UpdateSystemConstantValues( - xenos::Endian index_endian) { + xenos::Endian index_endian, const draw_util::ViewportInfo& viewport_info) { #if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES const RegisterFile& regs = *register_file_; + auto pa_cl_vte_cntl = regs.Get(); int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32); bool dirty = false; + // Flags. + uint32_t flags = 0; + // W0 division control. + // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf + // 8: VTX_XY_FMT = true: the incoming XY have already been multiplied by 1/W0. + // = false: multiply the X, Y coordinates by 1/W0. + // 9: VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0. + // = false: multiply the Z coordinate by 1/W0. + // 10: VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal + // to get 1/W0. + if (pa_cl_vte_cntl.vtx_xy_fmt) { + flags |= SpirvShaderTranslator::kSysFlag_XYDividedByW; + } + if (pa_cl_vte_cntl.vtx_z_fmt) { + flags |= SpirvShaderTranslator::kSysFlag_ZDividedByW; + } + if (pa_cl_vte_cntl.vtx_w0_fmt) { + flags |= SpirvShaderTranslator::kSysFlag_WNotReciprocal; + } + dirty |= system_constants_.flags != flags; + system_constants_.flags = flags; + // Index or tessellation edge factor buffer endianness. dirty |= system_constants_.vertex_index_endian != index_endian; system_constants_.vertex_index_endian = index_endian; @@ -1344,6 +1357,14 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( dirty |= system_constants_.vertex_base_index != vgt_indx_offset; system_constants_.vertex_base_index = vgt_indx_offset; + // Conversion to host normalized device coordinates. + for (uint32_t i = 0; i < 3; ++i) { + dirty |= system_constants_.ndc_scale[i] != viewport_info.ndc_scale[i]; + dirty |= system_constants_.ndc_offset[i] != viewport_info.ndc_offset[i]; + system_constants_.ndc_scale[i] = viewport_info.ndc_scale[i]; + system_constants_.ndc_offset[i] = viewport_info.ndc_offset[i]; + } + if (dirty) { current_graphics_descriptor_set_values_up_to_date_ &= ~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetSystemConstants); diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index a7283d56f..e083b3755 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -19,6 +19,7 @@ #include #include "xenia/gpu/command_processor.h" +#include "xenia/gpu/draw_util.h" #include "xenia/gpu/spirv_shader_translator.h" #include "xenia/gpu/vulkan/deferred_command_buffer.h" #include "xenia/gpu/vulkan/vulkan_graphics_system.h" @@ -170,8 +171,9 @@ class VulkanCommandProcessor : public CommandProcessor { VkShaderStageFlags GetGuestVertexShaderStageFlags() const; - void UpdateFixedFunctionState(); - void UpdateSystemConstantValues(xenos::Endian index_endian); + void UpdateFixedFunctionState(const draw_util::ViewportInfo& viewport_info); + void UpdateSystemConstantValues(xenos::Endian index_endian, + const draw_util::ViewportInfo& viewport_info); bool UpdateBindings(const VulkanShader* vertex_shader, const VulkanShader* pixel_shader); // Allocates a descriptor, space in the uniform buffer pool, and fills the From 1e818dca4beb660f4953744e5add2b0f53398614 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Thu, 19 Nov 2020 11:38:20 +0300 Subject: [PATCH 062/123] [Vulkan] Include Vulkan Android header and fix some warnings --- src/xenia/gpu/vulkan/vulkan_command_processor.cc | 8 ++++---- src/xenia/gpu/vulkan/vulkan_shared_memory.cc | 2 ++ src/xenia/ui/vulkan/vulkan_provider.h | 6 +++++- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 9b4d598f1..5ac052812 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -493,7 +493,7 @@ VulkanCommandProcessor::GetPipelineLayout(uint32_t texture_count_pixel, "Failed to create a Vulkan descriptor set layout for {} combined " "images and samplers for guest pixel shaders", texture_count_pixel); - return false; + return nullptr; } descriptor_set_layouts_textures_.emplace( texture_descriptor_set_layout_key.key, @@ -536,7 +536,7 @@ VulkanCommandProcessor::GetPipelineLayout(uint32_t texture_count_pixel, "Failed to create a Vulkan descriptor set layout for {} combined " "images and samplers for guest vertex shaders", texture_count_vertex); - return false; + return nullptr; } descriptor_set_layouts_textures_.emplace( texture_descriptor_set_layout_key.key, @@ -588,7 +588,7 @@ VulkanCommandProcessor::GetPipelineLayout(uint32_t texture_count_pixel, "Failed to create a Vulkan pipeline layout for guest drawing with {} " "pixel shader and {} vertex shader textures", texture_count_pixel, texture_count_vertex); - return false; + return nullptr; } PipelineLayout pipeline_layout_entry; pipeline_layout_entry.pipeline_layout = pipeline_layout; @@ -1650,7 +1650,7 @@ uint8_t* VulkanCommandProcessor::WriteUniformBufferBinding( provider.device_properties().limits.minUniformBufferOffsetAlignment), descriptor_buffer_info_out.buffer, descriptor_buffer_info_out.offset); if (!mapping) { - return false; + return nullptr; } descriptor_buffer_info_out.range = VkDeviceSize(size); write_descriptor_set_out.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; diff --git a/src/xenia/gpu/vulkan/vulkan_shared_memory.cc b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc index 5ee2755f2..0e7a6d66f 100644 --- a/src/xenia/gpu/vulkan/vulkan_shared_memory.cc +++ b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc @@ -467,6 +467,8 @@ void VulkanSharedMemory::GetBarrier(Usage usage, stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; access_mask = VK_ACCESS_TRANSFER_WRITE_BIT; return; + default: + break; } stage_mask = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | diff --git a/src/xenia/ui/vulkan/vulkan_provider.h b/src/xenia/ui/vulkan/vulkan_provider.h index b8318fd00..63114d1e0 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.h +++ b/src/xenia/ui/vulkan/vulkan_provider.h @@ -19,7 +19,11 @@ #include "xenia/base/platform.h" #include "xenia/ui/graphics_provider.h" -#if XE_PLATFORM_WIN32 +#if XE_PLATFORM_ANDROID +#ifndef VK_USE_PLATFORM_ANDROID_KHR +#define VK_USE_PLATFORM_ANDROID_KHR 1 +#endif +#elif XE_PLATFORM_WIN32 // Must be included before vulkan.h with VK_USE_PLATFORM_WIN32_KHR because it // includes Windows.h too. #include "xenia/base/platform_win.h" From fbb1a2270890d8f48e7a14d8392e854da7f45bc5 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Fri, 20 Nov 2020 21:27:15 +0300 Subject: [PATCH 063/123] [Build] Clone premake to internal storage on Android --- tools/build/premake | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/tools/build/premake b/tools/build/premake index 9113958a5..29eab42ac 100644 --- a/tools/build/premake +++ b/tools/build/premake @@ -10,6 +10,7 @@ __author__ = 'ben.vanik@gmail.com (Ben Vanik)' import json import os +import shutil import subprocess import sys import re @@ -17,7 +18,18 @@ import re self_path = os.path.dirname(os.path.abspath(__file__)) root_path = os.path.join(self_path, '..', '..') -premake_path = os.path.join(root_path, 'third_party', 'premake-core') +premake_external_path = os.path.join(root_path, 'third_party', 'premake-core') +# On Android, the repository may be cloned to the external storage, +# which doesn't support executables in it. +# In this case, premake-core needs to be checked out in the internal storage, +# which supports executables, with all the permissions as set in its repository. +# On Termux, the home directory is in the internal storage - use it for executing. +# If xenia-build doesn't have execute permissions, Xenia is in the external storage now. +premake_path = premake_external_path +if 'ANDROID_ROOT' in os.environ: + xb_file = os.path.join(root_path, 'xenia-build') + if os.path.isfile(xb_file) and not os.access(xb_file, os.X_OK) and 'HOME' in os.environ: + premake_path = os.path.join(os.environ['HOME'], 'xenia', 'third_party', 'premake-core') def main(): @@ -58,6 +70,8 @@ def main(): def build_premake(): """Builds premake from source. """ + # Ensure that on Android, premake-core is in the internal storage. + clone_premake_to_internal_storage() cwd = os.getcwd() try: os.chdir(premake_path) @@ -91,6 +105,33 @@ def build_premake(): pass +def clone_premake_to_internal_storage(): + """Clones premake to the Android internal storage so it can be executed. + """ + # premake_path is initialized to a value different than premake_external_path + # if running from the Android external storage, and may not exist yet. + if premake_path == premake_external_path: + return + + # Ensure the submodule has been checked out. + if not os.path.exists(os.path.join(premake_external_path, 'scripts', 'package.lua')): + print('third_party/premake-core was not present; run xb setup...') + sys.exit(1) + return + + # Create or refresh premake-core in the internal storage. + print('Cloning premake5 to the internal storage...') + shutil.rmtree(premake_path, ignore_errors=True) + os.makedirs(premake_path) + shell_call([ + 'git', + 'clone', + '--recurse-submodules', + premake_external_path, + premake_path, + ]) + + def has_bin(bin): """Checks whether the given binary is present. """ From 6584a2421c800d6e4437468186738c3719797a65 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Fri, 20 Nov 2020 23:47:19 +0300 Subject: [PATCH 064/123] [Vulkan] Faceness-related rasterization state --- src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc | 100 ++++++++++++++++-- src/xenia/gpu/vulkan/vulkan_pipeline_cache.h | 16 ++- 2 files changed, 108 insertions(+), 8 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index 8db426857..1700584b2 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -9,6 +9,7 @@ #include "xenia/gpu/vulkan/vulkan_pipeline_cache.h" +#include #include #include @@ -43,6 +44,7 @@ bool VulkanPipelineCache::Initialize() { device_pipeline_features_.features = 0; // TODO(Triang3l): Support the portability subset. + device_pipeline_features_.point_polygons = 1; device_pipeline_features_.triangle_fans = 1; shader_translator_ = std::make_unique( @@ -210,15 +212,21 @@ bool VulkanPipelineCache::GetCurrentStateDescription( description_out.Reset(); const RegisterFile& regs = register_file_; + auto pa_su_sc_mode_cntl = regs.Get(); + auto vgt_draw_initiator = regs.Get(); description_out.vertex_shader_hash = vertex_shader->ucode_data_hash(); description_out.pixel_shader_hash = pixel_shader ? pixel_shader->ucode_data_hash() : 0; description_out.render_pass_key = render_pass_key; - auto vgt_draw_initiator = regs.Get(); + xenos::PrimitiveType primitive_type = vgt_draw_initiator.prim_type; PipelinePrimitiveTopology primitive_topology; - switch (vgt_draw_initiator.prim_type) { + // Vulkan explicitly allows primitive restart only for specific primitive + // types, unlike Direct3D where it's valid for non-strips, but has + // implementation-defined behavior. + bool primitive_restart_allowed = false; + switch (primitive_type) { case xenos::PrimitiveType::kPointList: primitive_topology = PipelinePrimitiveTopology::kPointList; break; @@ -227,24 +235,76 @@ bool VulkanPipelineCache::GetCurrentStateDescription( break; case xenos::PrimitiveType::kLineStrip: primitive_topology = PipelinePrimitiveTopology::kLineStrip; + primitive_restart_allowed = true; break; case xenos::PrimitiveType::kTriangleList: + case xenos::PrimitiveType::kRectangleList: primitive_topology = PipelinePrimitiveTopology::kTriangleList; break; case xenos::PrimitiveType::kTriangleFan: - primitive_topology = device_pipeline_features_.triangle_fans - ? PipelinePrimitiveTopology::kTriangleFan - : PipelinePrimitiveTopology::kTriangleList; + if (device_pipeline_features_.triangle_fans) { + primitive_topology = PipelinePrimitiveTopology::kTriangleFan; + primitive_restart_allowed = true; + } else { + primitive_topology = PipelinePrimitiveTopology::kTriangleList; + } break; case xenos::PrimitiveType::kTriangleStrip: primitive_topology = PipelinePrimitiveTopology::kTriangleStrip; + primitive_restart_allowed = true; break; default: // TODO(Triang3l): All primitive types and tessellation. return false; } description_out.primitive_topology = primitive_topology; - // TODO(Triang3l): Primitive restart. + description_out.primitive_restart = + primitive_restart_allowed && pa_su_sc_mode_cntl.multi_prim_ib_ena; + + // TODO(Triang3l): Tessellation. + bool primitive_polygonal = xenos::IsPrimitivePolygonal(false, primitive_type); + if (primitive_polygonal) { + // Vulkan only allows the polygon mode to be set for both faces - pick the + // most special one (more likely to represent the developer's deliberate + // intentions - fill is very generic, wireframe is common in debug, points + // are for pretty unusual things, but closer to debug purposes too - on the + // Xenos, points have the lowest register value and triangles have the + // highest) based on which faces are not culled. + bool cull_front = pa_su_sc_mode_cntl.cull_front; + bool cull_back = pa_su_sc_mode_cntl.cull_back; + description_out.cull_front = cull_front; + description_out.cull_back = cull_back; + xenos::PolygonType polygon_type = xenos::PolygonType::kTriangles; + if (!cull_front) { + polygon_type = + std::min(polygon_type, pa_su_sc_mode_cntl.polymode_front_ptype); + } + if (!cull_back) { + polygon_type = + std::min(polygon_type, pa_su_sc_mode_cntl.polymode_back_ptype); + } + switch (polygon_type) { + case xenos::PolygonType::kPoints: + // When points are not supported, use lines instead, preserving + // debug-like purpose. + description_out.polygon_mode = device_pipeline_features_.point_polygons + ? PipelinePolygonMode::kPoint + : PipelinePolygonMode::kLine; + break; + case xenos::PolygonType::kLines: + description_out.polygon_mode = PipelinePolygonMode::kLine; + break; + case xenos::PolygonType::kTriangles: + description_out.polygon_mode = PipelinePolygonMode::kFill; + break; + default: + assert_unhandled_case(polygon_type); + return false; + } + description_out.front_face_clockwise = pa_su_sc_mode_cntl.face != 0; + } else { + description_out.polygon_mode = PipelinePolygonMode::kFill; + } return true; } @@ -374,6 +434,34 @@ bool VulkanPipelineCache::EnsurePipelineCreated( VkPipelineRasterizationStateCreateInfo rasterization_state = {}; rasterization_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + switch (description.polygon_mode) { + case PipelinePolygonMode::kFill: + rasterization_state.polygonMode = VK_POLYGON_MODE_FILL; + break; + case PipelinePolygonMode::kLine: + rasterization_state.polygonMode = VK_POLYGON_MODE_LINE; + break; + case PipelinePolygonMode::kPoint: + assert_true(device_pipeline_features_.point_polygons); + if (!device_pipeline_features_.point_polygons) { + return false; + } + rasterization_state.polygonMode = VK_POLYGON_MODE_POINT; + break; + default: + assert_unhandled_case(description.polygon_mode); + return false; + } + rasterization_state.cullMode = VK_CULL_MODE_NONE; + if (description.cull_front) { + rasterization_state.cullMode |= VK_CULL_MODE_FRONT_BIT; + } + if (description.cull_back) { + rasterization_state.cullMode |= VK_CULL_MODE_BACK_BIT; + } + rasterization_state.frontFace = description.front_face_clockwise + ? VK_FRONT_FACE_CLOCKWISE + : VK_FRONT_FACE_COUNTER_CLOCKWISE; rasterization_state.lineWidth = 1.0f; VkPipelineMultisampleStateCreateInfo multisample_state = {}; diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h index 1d2f852e5..b22212552 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h @@ -74,6 +74,7 @@ class VulkanPipelineCache { // method) should result in a different storage file being used. union DevicePipelineFeatures { struct { + uint32_t point_polygons : 1; uint32_t triangle_fans : 1; }; uint32_t features = 0; @@ -91,6 +92,12 @@ class VulkanPipelineCache { kPatchList, }; + enum class PipelinePolygonMode : uint32_t { + kFill, + kLine, + kPoint, + }; + XEPACKEDSTRUCT(PipelineDescription, { uint64_t vertex_shader_hash; // 0 if no pixel shader. @@ -98,8 +105,13 @@ class VulkanPipelineCache { VulkanRenderTargetCache::RenderPassKey render_pass_key; // Input assembly. - PipelinePrimitiveTopology primitive_topology : 3; - uint32_t primitive_restart : 1; + PipelinePrimitiveTopology primitive_topology : 3; // 3 + uint32_t primitive_restart : 1; // 4 + // Rasterization. + PipelinePolygonMode polygon_mode : 2; // 6 + uint32_t cull_front : 1; // 7 + uint32_t cull_back : 1; // 8 + uint32_t front_face_clockwise : 1; // 9 // Including all the padding, for a stable hash. PipelineDescription() { Reset(); } From 1f69516bb5504599d8520f117dc983d7dff8d1ce Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 21 Nov 2020 14:43:10 +0300 Subject: [PATCH 065/123] [Build] Better Android detection in tools/build/premake --- tools/build/premake | 52 +++++++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 16 deletions(-) diff --git a/tools/build/premake b/tools/build/premake index 29eab42ac..0e87bcc49 100644 --- a/tools/build/premake +++ b/tools/build/premake @@ -18,18 +18,37 @@ import re self_path = os.path.dirname(os.path.abspath(__file__)) root_path = os.path.join(self_path, '..', '..') -premake_external_path = os.path.join(root_path, 'third_party', 'premake-core') -# On Android, the repository may be cloned to the external storage, -# which doesn't support executables in it. -# In this case, premake-core needs to be checked out in the internal storage, -# which supports executables, with all the permissions as set in its repository. -# On Termux, the home directory is in the internal storage - use it for executing. -# If xenia-build doesn't have execute permissions, Xenia is in the external storage now. -premake_path = premake_external_path -if 'ANDROID_ROOT' in os.environ: - xb_file = os.path.join(root_path, 'xenia-build') - if os.path.isfile(xb_file) and not os.access(xb_file, os.X_OK) and 'HOME' in os.environ: - premake_path = os.path.join(os.environ['HOME'], 'xenia', 'third_party', 'premake-core') +premake_submodule_path = os.path.join(root_path, 'third_party', 'premake-core') +premake_path = premake_submodule_path + + +def setup_premake_path_override(): + global premake_path + premake_path = premake_submodule_path + if sys.platform == 'linux': + # On Android, the repository may be cloned to the external storage, which + # doesn't support executables in it. + # In this case, premake-core needs to be checked out in the internal + # storage, which supports executables, with all the permissions as set in + # its repository. + # On Termux, the home directory is in the internal storage - use it for + # executing. + # If xenia-build doesn't have execute permissions, Xenia is in the external + # storage now. + try: + popen = subprocess.Popen( + ['uname', '-o'], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, + universal_newlines=True) + if popen.communicate()[0] == 'Android\n': + xb_file = os.path.join(root_path, 'xenia-build') + if (os.path.isfile(xb_file) and not os.access(xb_file, os.X_OK) and + 'HOME' in os.environ): + premake_path = os.path.join( + os.environ['HOME'], 'xenia', 'third_party', 'premake-core') + except Exception: + pass + +setup_premake_path_override() def main(): @@ -108,13 +127,14 @@ def build_premake(): def clone_premake_to_internal_storage(): """Clones premake to the Android internal storage so it can be executed. """ - # premake_path is initialized to a value different than premake_external_path + # premake_path is initialized to a value different than premake_submodule_path # if running from the Android external storage, and may not exist yet. - if premake_path == premake_external_path: + if premake_path == premake_submodule_path: return # Ensure the submodule has been checked out. - if not os.path.exists(os.path.join(premake_external_path, 'scripts', 'package.lua')): + if not os.path.exists( + os.path.join(premake_submodule_path, 'scripts', 'package.lua')): print('third_party/premake-core was not present; run xb setup...') sys.exit(1) return @@ -127,7 +147,7 @@ def clone_premake_to_internal_storage(): 'git', 'clone', '--recurse-submodules', - premake_external_path, + premake_submodule_path, premake_path, ]) From 8ccd6f956003d08310ce18b978c02ce6b7be80e9 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 21 Nov 2020 14:54:57 +0300 Subject: [PATCH 066/123] [Build] xenia-build Android host OS detection --- xenia-build | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/xenia-build b/xenia-build index 7a9614ba9..ccea6201c 100755 --- a/xenia-build +++ b/xenia-build @@ -22,6 +22,17 @@ __author__ = 'ben.vanik@gmail.com (Ben Vanik)' self_path = os.path.dirname(os.path.abspath(__file__)) +# Detect if building on Android via Termux. +host_os_is_android = False +if sys.platform == 'linux': + try: + host_os_is_android = subprocess.Popen( + ['uname', '-o'], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, + universal_newlines=True).communicate()[0] == 'Android\n' + except Exception: + pass + + def main(): # Add self to the root search path. sys.path.insert(0, self_path) From fa0c34648d24ecb2559ce08f3a5cabc4a8f81b8c Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 21 Nov 2020 16:18:50 +0300 Subject: [PATCH 067/123] [Build] Support cross-compilation via xb premake --target_os --- xenia-build | 311 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 181 insertions(+), 130 deletions(-) diff --git a/xenia-build b/xenia-build index ccea6201c..5fbc701f5 100755 --- a/xenia-build +++ b/xenia-build @@ -23,16 +23,89 @@ self_path = os.path.dirname(os.path.abspath(__file__)) # Detect if building on Android via Termux. -host_os_is_android = False +host_linux_platform_is_android = False if sys.platform == 'linux': try: - host_os_is_android = subprocess.Popen( + host_linux_platform_is_android = subprocess.Popen( ['uname', '-o'], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, universal_newlines=True).communicate()[0] == 'Android\n' except Exception: pass +def import_subprocess_environment(args): + popen = subprocess.Popen( + args, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True) + variables, _ = popen.communicate() + envvars_to_save = ( + 'devenvdir', + 'include', + 'lib', + 'libpath', + 'path', + 'pathext', + 'systemroot', + 'temp', + 'tmp', + 'windowssdkdir', + ) + for line in variables.splitlines(): + for envvar in envvars_to_save: + if re.match(envvar + '=', line.lower()): + var, setting = line.split('=', 1) + if envvar == 'path': + setting = os.path.dirname(sys.executable) + os.pathsep + setting + os.environ[var.upper()] = setting + break + + +def import_vs_environment(): + """Finds the installed Visual Studio version and imports + interesting environment variables into os.environ. + + Returns: + A version such as 2015 or None if no installation is found. + """ + + if sys.platform != 'win32': + return None + + version = 0 + install_path = None + env_tool_args = None + + vswhere = subprocess.check_output('third_party/vswhere/vswhere.exe -version "[15,)" -latest -prerelease -format json -utf8', shell=False, universal_newlines=True, encoding="utf-8") + if vswhere: + vswhere = json.loads(vswhere) + if vswhere and len(vswhere) > 0: + version = int(vswhere[0].get("catalog", {}).get("productLineVersion", 2017)) + install_path = vswhere[0].get("installationPath", None) + + if version < 2017: + if 'VS140COMNTOOLS' in os.environ: + version = 2015 + vcvars_path = os.environ['VS140COMNTOOLS'] + vcvars_path = os.path.join(tools_path, '..\\..\\vc\\vcvarsall.bat') + env_tool_args = [vcvars_path, 'x64', '&&', 'set'] + else: + vsdevcmd_path = os.path.join(install_path, 'Common7\\Tools\\VsDevCmd.bat') + if os.path.isfile(vsdevcmd_path) and os.access(vsdevcmd_path, os.X_OK): + env_tool_args = [vsdevcmd_path, '-arch=amd64', '-host_arch=amd64', '&&', 'set'] + else: + vcvars_path = os.path.join(install_path, 'VC\\Auxiliary\\Build\\vcvarsall.bat') + env_tool_args = [vcvars_path, 'x64', '&&', 'set'] + + if version == 0: + return None + + import_subprocess_environment(env_tool_args) + os.environ['VSVERSION'] = str(version) + return version + + +vs_version = import_vs_environment() + + def main(): # Add self to the root search path. sys.path.insert(0, self_path) @@ -57,13 +130,11 @@ def main(): sys.exit(1) # Grab Visual Studio version and execute shell to set up environment. - if sys.platform == 'win32': - vs_version = import_vs_environment() - if vs_version is None: - print('ERROR: Visual Studio not found!') - print('Please refer to the building guide:') - print('https://github.com/xenia-project/xenia/blob/master/docs/building.md') - sys.exit(1) + if sys.platform == 'win32' and vs_version is None: + print('WARNING: Visual Studio not found!') + print('Building for Windows will not be supported.') + print('Please refer to the building guide:') + print('https://github.com/xenia-project/xenia/blob/master/docs/building.md') # Setup main argument parser and common arguments. parser = argparse.ArgumentParser(prog='xenia-build') @@ -109,72 +180,6 @@ def print_box(msg): .format('', msg, len(msg) + 2)) -def import_vs_environment(): - """Finds the installed Visual Studio version and imports - interesting environment variables into os.environ. - - Returns: - A version such as 2015 or None if no installation is found. - """ - version = 0 - install_path = None - env_tool_args = None - - vswhere = subprocess.check_output('third_party/vswhere/vswhere.exe -version "[15,)" -latest -prerelease -format json -utf8', shell=False, universal_newlines=True, encoding="utf-8") - if vswhere: - vswhere = json.loads(vswhere) - if vswhere and len(vswhere) > 0: - version = int(vswhere[0].get("catalog", {}).get("productLineVersion", 2017)) - install_path = vswhere[0].get("installationPath", None) - - if version < 2017: - if 'VS140COMNTOOLS' in os.environ: - version = 2015 - vcvars_path = os.environ['VS140COMNTOOLS'] - vcvars_path = os.path.join(tools_path, '..\\..\\vc\\vcvarsall.bat') - env_tool_args = [vcvars_path, 'x64', '&&', 'set'] - else: - vsdevcmd_path = os.path.join(install_path, 'Common7\\Tools\\VsDevCmd.bat') - if os.path.isfile(vsdevcmd_path) and os.access(vsdevcmd_path, os.X_OK): - env_tool_args = [vsdevcmd_path, '-arch=amd64', '-host_arch=amd64', '&&', 'set'] - else: - vcvars_path = os.path.join(install_path, 'VC\\Auxiliary\\Build\\vcvarsall.bat') - env_tool_args = [vcvars_path, 'x64', '&&', 'set'] - - if version == 0: - return None - - import_subprocess_environment(env_tool_args) - os.environ['VSVERSION'] = str(version) - return version - - -def import_subprocess_environment(args): - popen = subprocess.Popen( - args, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True) - variables, _ = popen.communicate() - envvars_to_save = ( - 'devenvdir', - 'include', - 'lib', - 'libpath', - 'path', - 'pathext', - 'systemroot', - 'temp', - 'tmp', - 'windowssdkdir', - ) - for line in variables.splitlines(): - for envvar in envvars_to_save: - if re.match(envvar + '=', line.lower()): - var, setting = line.split('=', 1) - if envvar == 'path': - setting = os.path.dirname(sys.executable) + os.pathsep + setting - os.environ[var.upper()] = setting - break - - def has_bin(binary): """Checks whether the given binary is present. @@ -347,6 +352,37 @@ def get_clang_format_binary(): sys.exit(1) +def get_premake_target_os(target_os_override=None): + """Gets the target --os to pass to premake, either for the current platform + or for the user-specified cross-compilation target. + + Args: + target_os_override: override specified by the user for cross-compilation, + or None to target the host platform. + + Returns: + Target --os to pass to premake. If a return value of this function valid + for the current configuration is passed to it function again, the same + value will be returned. + """ + if sys.platform == 'darwin': + target_os = 'macosx' + elif sys.platform == 'win32': + target_os = 'windows' + elif host_linux_platform_is_android: + target_os = 'android' + else: + target_os = 'linux' + if target_os_override is not None and target_os_override != target_os: + if target_os_override == 'android': + target_os = target_os_override + else: + print( + 'ERROR: cross-compilation is only supported for Android target') + sys.exit(0) + return target_os + + def run_premake(target_os, action, cc=None): """Runs premake on the main project with the given format. @@ -373,42 +409,26 @@ def run_premake(target_os, action, cc=None): return ret -def run_premake_clean(): - """Runs a premake clean operation. - """ - if sys.platform == 'darwin': - return run_premake('macosx', 'clean') - elif sys.platform == 'win32': - return run_premake('windows', 'clean') - else: - return run_premake('linux', 'clean') - -def run_platform_premake(cc='clang', devenv=None): +def run_platform_premake(target_os_override=None, cc='clang', devenv=None): """Runs all gyp configurations. """ - if sys.platform == 'darwin': - return run_premake('macosx', 'xcode4') - elif sys.platform == 'win32': - vs_version = '2015' - if 'VSVERSION' in os.environ: - vs_version = os.environ['VSVERSION'] - - return run_premake('windows', devenv or ('vs' + vs_version)) - else: - return run_premake('linux', devenv or 'gmake2', cc) - - -def run_premake_export_commands(): - """Runs premake to generate an LLVM compile_commands.json file. - """ - # TODO(benvanik): only do linux? whatever clang-tidy is ok with. - if sys.platform == 'darwin': - run_premake('macosx', 'export-compile-commands') - elif sys.platform == 'win32': - run_premake('windows', 'export-compile-commands') - else: - run_premake('linux', 'export-compile-commands') + target_os = get_premake_target_os(target_os_override) + if devenv is None: + if target_os == 'macosx': + devenv = 'xcode4' + elif target_os == 'windows': + vs_version = '2015' + if 'VSVERSION' in os.environ: + vs_version = os.environ['VSVERSION'] + devenv = 'vs' + vs_version + elif target_os == 'android': + devenv = 'androidmk' + else: + devenv = 'gmake2' + if target_os != 'linux': + cc = None + return run_premake(target_os=target_os, action=devenv, cc=cc) def get_build_bin_path(args): @@ -545,6 +565,9 @@ class SetupCommand(Command): name='setup', help_short='Setup the build environment.', *args, **kwargs) + self.parser.add_argument( + '--target_os', default=None, + help='Target OS passed to premake, for cross-compilation') def execute(self, args, pass_args, cwd): print('Setting up the build environment...') @@ -559,7 +582,7 @@ class SetupCommand(Command): print('') print('- running premake...') - if run_platform_premake() == 0: + if run_platform_premake(target_os_override=args['target_os']) == 0: print('') print('Success!') @@ -575,8 +598,12 @@ class PullCommand(Command): name='pull', help_short='Pulls the repo and all dependencies and rebases changes.', *args, **kwargs) - self.parser.add_argument('--merge', action='store_true', - help='Merges on master instead of rebasing.') + self.parser.add_argument( + '--merge', action='store_true', + help='Merges on master instead of rebasing.') + self.parser.add_argument( + '--target_os', default=None, + help='Target OS passed to premake, for cross-compilation') def execute(self, args, pass_args, cwd): print('Pulling...') @@ -609,7 +636,7 @@ class PullCommand(Command): print('') print('- running premake...') - if run_platform_premake() == 0: + if run_platform_premake(target_os_override=args['target_os']) == 0: print('') print('Success!') @@ -629,12 +656,16 @@ class PremakeCommand(Command): '--cc', default='clang', help='Compiler toolchain passed to premake') self.parser.add_argument( '--devenv', default=None, help='Development environment') + self.parser.add_argument( + '--target_os', default=None, + help='Target OS passed to premake, for cross-compilation') def execute(self, args, pass_args, cwd): # Update premake. If no binary found, it will be built from source. print('Running premake...') print('') - if run_platform_premake(cc=args['cc'], devenv=args['devenv']) == 0: + if run_platform_premake(target_os_override=args['target_os'], + cc=args['cc'], devenv=args['devenv']) == 0: print('Success!') return 0 @@ -667,7 +698,7 @@ class BaseBuildCommand(Command): def execute(self, args, pass_args, cwd): if not args['no_premake']: print('- running premake...') - run_platform_premake(args['cc']) + run_platform_premake(cc=args['cc']) print('') threads = args['j'] @@ -678,21 +709,27 @@ class BaseBuildCommand(Command): 'all' if not len(args['target']) else ', '.join(args['target']), args['config'])) if sys.platform == 'win32': - targets = None - if len(args['target']): - targets = '/t:' + ';'.join(target + (':Rebuild' if args['force'] else '') - for target in args['target']) + if vs_version is None: + print('ERROR: Visual Studio is not installed.'); + result = 1 else: - targets = '/t:Rebuild' if args['force'] else None + targets = None + if len(args['target']): + targets = '/t:' + ';'.join( + target + (':Rebuild' if args['force'] else '') + for target in args['target']) + else: + targets = '/t:Rebuild' if args['force'] else None - result = subprocess.call([ - 'msbuild', - 'build/xenia.sln', - '/nologo', - '/m', - '/v:m', - '/p:Configuration=' + args['config'], - ] + ([targets] if targets is not None else []) + pass_args, shell=False) + result = subprocess.call([ + 'msbuild', + 'build/xenia.sln', + '/nologo', + '/m', + '/v:m', + '/p:Configuration=' + args['config'], + ] + ([targets] if targets is not None else []) + pass_args, + shell=False) elif sys.platform == 'darwin': # TODO(benvanik): other platforms. print('ERROR: don\'t know how to build on this platform.') @@ -1172,13 +1209,16 @@ class CleanCommand(Command): name='clean', help_short='Removes intermediate files and build outputs.', *args, **kwargs) + self.parser.add_argument( + '--target_os', default=None, + help='Target OS passed to premake, for cross-compilation') def execute(self, args, pass_args, cwd): print('Cleaning build artifacts...') print('') print('- premake clean...') - run_premake_clean() + run_premake(get_premake_target_os(args['target_os']), 'clean') print('') print('Success!') @@ -1194,6 +1234,9 @@ class NukeCommand(Command): name='nuke', help_short='Removes all build/ output.', *args, **kwargs) + self.parser.add_argument( + '--target_os', default=None, + help='Target OS passed to premake, for cross-compilation') def execute(self, args, pass_args, cwd): print('Cleaning build artifacts...') @@ -1214,7 +1257,7 @@ class NukeCommand(Command): print('') print('- running premake...') - run_platform_premake() + run_platform_premake(target_os_override=args['target_os']) print('') print('Success!') @@ -1442,10 +1485,15 @@ class TidyCommand(Command): self.parser.add_argument( '--fix', action='store_true', help='Applies suggested fixes, where possible.') + self.parser.add_argument( + '--target_os', default=None, + help='Target OS passed to premake, for cross-compilation') def execute(self, args, pass_args, cwd): # Run premake to generate our compile_commands.json file for clang to use. - run_premake_export_commands() + # TODO(benvanik): only do linux? whatever clang-tidy is ok with. + run_premake(get_premake_target_os(args['target_os']), + 'export-compile-commands') platform_name = '' if sys.platform == 'darwin': @@ -1505,6 +1553,9 @@ class DevenvCommand(Command): devenv = None show_reload_prompt = False if sys.platform == 'win32': + if vs_version is None: + print('ERROR: Visual Studio is not installed.'); + return 1 print('Launching Visual Studio...') elif has_bin('clion') or has_bin('clion.sh'): print('Launching CLion...') From 4617dc556900b1ec53cb2dfd984fcc28f5a421a6 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 13 Dec 2020 18:41:07 +0300 Subject: [PATCH 068/123] Merge branch 'master' into vulkan --- .appveyor.yml | 1 + .gitmodules | 3 + src/xenia/app/xenia_main.cc | 32 +- src/xenia/base/hash.h | 2 +- src/xenia/base/xxhash.h | 21 + src/xenia/cpu/backend/x64/x64_emitter.cc | 2 + src/xenia/cpu/backend/x64/x64_emitter.h | 2 + src/xenia/cpu/backend/x64/x64_seq_vector.cc | 42 +- src/xenia/cpu/hir/opcodes.h | 49 + src/xenia/cpu/ppc/ppc_emit_altivec.cc | 16 +- src/xenia/cpu/ppc/testing/instr_frsqrte.s | 24 +- src/xenia/cpu/ppc/testing/ppc_testing_main.cc | 11 +- src/xenia/cpu/processor.cc | 1 - src/xenia/emulator.cc | 6 +- src/xenia/emulator.h | 7 +- src/xenia/gpu/command_processor.cc | 4 +- src/xenia/gpu/command_processor.h | 5 +- .../gpu/d3d12/d3d12_command_processor.cc | 126 +- src/xenia/gpu/d3d12/d3d12_command_processor.h | 15 +- src/xenia/gpu/d3d12/d3d12_shader.cc | 76 +- src/xenia/gpu/d3d12/d3d12_shader.h | 104 +- src/xenia/gpu/d3d12/deferred_command_list.cc | 4 +- src/xenia/gpu/d3d12/pipeline_cache.cc | 1087 +++++++++-------- src/xenia/gpu/d3d12/pipeline_cache.h | 97 +- src/xenia/gpu/d3d12/render_target_cache.cc | 41 +- src/xenia/gpu/d3d12/render_target_cache.h | 11 +- src/xenia/gpu/d3d12/texture_cache.cc | 3 +- src/xenia/gpu/draw_util.cc | 13 + src/xenia/gpu/draw_util.h | 1 + src/xenia/gpu/dxbc_shader.cc | 27 + src/xenia/gpu/dxbc_shader.h | 83 ++ src/xenia/gpu/dxbc_shader_translator.cc | 282 +++-- src/xenia/gpu/dxbc_shader_translator.h | 188 ++- src/xenia/gpu/dxbc_shader_translator_fetch.cc | 2 +- .../gpu/dxbc_shader_translator_memexport.cc | 2 +- src/xenia/gpu/dxbc_shader_translator_om.cc | 415 +++++-- src/xenia/gpu/gpu_flags.cc | 54 + src/xenia/gpu/gpu_flags.h | 63 + src/xenia/gpu/graphics_system.cc | 20 +- src/xenia/gpu/graphics_system.h | 2 +- src/xenia/gpu/registers.h | 16 +- src/xenia/gpu/sampler_info.cc | 4 +- src/xenia/gpu/shader.cc | 94 +- src/xenia/gpu/shader.h | 171 ++- src/xenia/gpu/shader_compiler_main.cc | 10 +- src/xenia/gpu/shader_translator.cc | 273 +++-- src/xenia/gpu/shader_translator.h | 71 +- .../edram_load_depth_float24and32_cs.cso | Bin 0 -> 3500 bytes .../edram_load_depth_float24and32_cs.h | 296 +++++ .../edram_load_depth_float24and32_cs.txt | 117 ++ .../d3d12_5_1/edram_load_depth_float_cs.cso | Bin 3500 -> 2660 bytes .../d3d12_5_1/edram_load_depth_float_cs.h | 218 ++-- .../d3d12_5_1/edram_load_depth_float_cs.txt | 45 +- .../edram_store_depth_float24and32_cs.cso | Bin 0 -> 2660 bytes .../edram_store_depth_float24and32_cs.h | 226 ++++ .../edram_store_depth_float24and32_cs.txt | 95 ++ .../d3d12_5_1/edram_store_depth_float_cs.cso | Bin 2660 -> 2600 bytes .../d3d12_5_1/edram_store_depth_float_cs.h | 135 +- .../d3d12_5_1/edram_store_depth_float_cs.txt | 74 +- .../bytecode/d3d12_5_1/float24_round_ps.cso | Bin 0 -> 1816 bytes .../bytecode/d3d12_5_1/float24_round_ps.h | 156 +++ .../bytecode/d3d12_5_1/float24_round_ps.txt | 74 ++ .../d3d12_5_1/float24_truncate_ps.cso | Bin 0 -> 1148 bytes .../bytecode/d3d12_5_1/float24_truncate_ps.h | 100 ++ .../d3d12_5_1/float24_truncate_ps.txt | 55 + .../shaders/edram_load_depth_float.cs.hlsl | 18 +- .../edram_load_depth_float24and32.cs.hlsl | 31 + .../shaders/edram_store_depth_float.cs.hlsl | 17 +- .../edram_store_depth_float24and32.cs.hlsl | 25 + .../shaders/edram_store_depth_unorm.cs.hlsl | 3 +- src/xenia/gpu/shaders/float24_round.ps.hlsl | 13 + .../gpu/shaders/float24_truncate.ps.hlsl | 38 + src/xenia/gpu/shaders/pixel_formats.hlsli | 25 + .../gpu/shaders/primitive_point_list.gs.hlsl | 19 +- .../shaders/primitive_rectangle_list.gs.hlsl | 21 +- src/xenia/gpu/shaders/xenos_draw.hlsli | 6 +- src/xenia/gpu/spirv_shader_translator.cc | 34 +- src/xenia/gpu/spirv_shader_translator.h | 42 +- src/xenia/gpu/texture_conversion.cc | 3 +- src/xenia/gpu/texture_info.cc | 5 +- src/xenia/gpu/trace_dump.cc | 2 +- src/xenia/gpu/trace_viewer.cc | 21 +- src/xenia/gpu/ucode.h | 44 +- .../gpu/vulkan/vulkan_command_processor.cc | 16 +- src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc | 75 +- src/xenia/gpu/vulkan/vulkan_pipeline_cache.h | 31 +- .../gpu/vulkan/vulkan_render_target_cache.h | 4 +- src/xenia/gpu/vulkan/vulkan_shader.cc | 39 +- src/xenia/gpu/vulkan/vulkan_shader.h | 24 +- src/xenia/gpu/xenos.cc | 32 +- src/xenia/gpu/xenos.h | 10 +- src/xenia/hid/hid_demo.cc | 29 +- src/xenia/hid/sdl/sdl_input_driver.cc | 105 +- src/xenia/hid/sdl/sdl_input_driver.h | 14 +- src/xenia/kernel/xam/user_profile.h | 2 +- src/xenia/kernel/xam/xam_enum.cc | 72 +- src/xenia/kernel/xam/xam_net.cc | 5 + src/xenia/kernel/xboxkrnl/xboxkrnl_io.cc | 3 +- src/xenia/kernel/xboxkrnl/xboxkrnl_memory.cc | 15 +- .../kernel/xboxkrnl/xboxkrnl_threading.cc | 26 +- src/xenia/kernel/xfile.cc | 2 +- src/xenia/kernel/xthread.cc | 70 +- src/xenia/kernel/xthread.h | 81 +- src/xenia/ui/window_gtk.cc | 12 +- src/xenia/vfs/virtual_file_system.cc | 9 +- src/xenia/vfs/virtual_file_system.h | 3 +- src/xenia/xbox.h | 1 + third_party/xxhash | 1 + third_party/xxhash/LICENSE | 24 - third_party/xxhash/Makefile | 67 - third_party/xxhash/README.md | 74 -- third_party/xxhash/README.xenia | 2 - third_party/xxhash/xxhash.c | 928 -------------- third_party/xxhash/xxhash.h | 156 --- third_party/xxhash/xxhsum.c | 689 ----------- 115 files changed, 4290 insertions(+), 3872 deletions(-) create mode 100644 src/xenia/base/xxhash.h create mode 100644 src/xenia/gpu/dxbc_shader.cc create mode 100644 src/xenia/gpu/dxbc_shader.h create mode 100644 src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_float24and32_cs.cso create mode 100644 src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_float24and32_cs.h create mode 100644 src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_float24and32_cs.txt create mode 100644 src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_float24and32_cs.cso create mode 100644 src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_float24and32_cs.h create mode 100644 src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_float24and32_cs.txt create mode 100644 src/xenia/gpu/shaders/bytecode/d3d12_5_1/float24_round_ps.cso create mode 100644 src/xenia/gpu/shaders/bytecode/d3d12_5_1/float24_round_ps.h create mode 100644 src/xenia/gpu/shaders/bytecode/d3d12_5_1/float24_round_ps.txt create mode 100644 src/xenia/gpu/shaders/bytecode/d3d12_5_1/float24_truncate_ps.cso create mode 100644 src/xenia/gpu/shaders/bytecode/d3d12_5_1/float24_truncate_ps.h create mode 100644 src/xenia/gpu/shaders/bytecode/d3d12_5_1/float24_truncate_ps.txt create mode 100644 src/xenia/gpu/shaders/edram_load_depth_float24and32.cs.hlsl create mode 100644 src/xenia/gpu/shaders/edram_store_depth_float24and32.cs.hlsl create mode 100644 src/xenia/gpu/shaders/float24_round.ps.hlsl create mode 100644 src/xenia/gpu/shaders/float24_truncate.ps.hlsl create mode 160000 third_party/xxhash delete mode 100644 third_party/xxhash/LICENSE delete mode 100644 third_party/xxhash/Makefile delete mode 100644 third_party/xxhash/README.md delete mode 100644 third_party/xxhash/README.xenia delete mode 100644 third_party/xxhash/xxhash.c delete mode 100644 third_party/xxhash/xxhash.h delete mode 100644 third_party/xxhash/xxhsum.c diff --git a/.appveyor.yml b/.appveyor.yml index ccd75d2ab..4329b47e0 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -29,6 +29,7 @@ init: - git config --global core.autocrlf input install: + - cmd: vcpkg integrate remove - cmd: xb setup platform: Windows diff --git a/.gitmodules b/.gitmodules index 0ab83261a..10b24d730 100644 --- a/.gitmodules +++ b/.gitmodules @@ -64,6 +64,9 @@ [submodule "third_party/date"] path = third_party/date url = https://github.com/HowardHinnant/date.git +[submodule "third_party/xxhash"] + path = third_party/xxhash + url = https://github.com/Cyan4973/xxHash.git [submodule "third_party/glslang"] path = third_party/glslang url = https://github.com/KhronosGroup/glslang.git diff --git a/src/xenia/app/xenia_main.cc b/src/xenia/app/xenia_main.cc index f13f0de32..e3f8b9fbc 100644 --- a/src/xenia/app/xenia_main.cc +++ b/src/xenia/app/xenia_main.cc @@ -65,6 +65,14 @@ DEFINE_path( "Root path for guest content storage (saves, etc.), or empty to use the " "content folder under the storage root.", "Storage"); +DEFINE_path( + cache_root, "", + "Root path for files used to speed up certain parts of the emulator or the " + "game. These files may be persistent, but they can be deleted without " + "major side effects such as progress loss. If empty, the cache folder " + "under the storage root, or, if available, the cache directory preferred " + "for the OS, will be used.", + "Storage"); DEFINE_bool(mount_scratch, false, "Enable scratch mount", "Storage"); DEFINE_bool(mount_cache, false, "Enable cache mount", "Storage"); @@ -189,10 +197,12 @@ std::vector> CreateInputDrivers( Factory factory; #if XE_PLATFORM_WIN32 factory.Add("xinput", xe::hid::xinput::Create); +#endif // XE_PLATFORM_WIN32 + factory.Add("sdl", xe::hid::sdl::Create); +#if XE_PLATFORM_WIN32 // WinKey input driver should always be the last input driver added! factory.Add("winkey", xe::hid::winkey::Create); #endif // XE_PLATFORM_WIN32 - factory.Add("sdl", xe::hid::sdl::Create); for (auto& driver : factory.CreateAll(cvars::hid, window)) { if (XSUCCEEDED(driver->Setup())) { drivers.emplace_back(std::move(driver)); @@ -220,6 +230,8 @@ int xenia_main(const std::vector& args) { #if defined(XE_PLATFORM_WIN32) || defined(XE_PLATFORM_GNU_LINUX) storage_root = storage_root / "Xenia"; #else + // TODO(Triang3l): Point to the app's external storage "files" directory + // on Android. #warning Unhandled platform for the data root. storage_root = storage_root / "Xenia"; #endif @@ -243,13 +255,29 @@ int xenia_main(const std::vector& args) { content_root = std::filesystem::absolute(content_root); XELOGI("Content root: {}", xe::path_to_utf8(content_root)); + std::filesystem::path cache_root = cvars::cache_root; + if (cache_root.empty()) { + cache_root = storage_root / "cache"; + // TODO(Triang3l): Point to the app's external storage "cache" directory on + // Android. + } else { + // If content root isn't an absolute path, then it should be relative to the + // storage root. + if (!cache_root.is_absolute()) { + cache_root = storage_root / cache_root; + } + } + cache_root = std::filesystem::absolute(cache_root); + XELOGI("Cache root: {}", xe::path_to_utf8(cache_root)); + if (cvars::discord) { discord::DiscordPresence::Initialize(); discord::DiscordPresence::NotPlaying(); } // Create the emulator but don't initialize so we can setup the window. - auto emulator = std::make_unique("", storage_root, content_root); + auto emulator = + std::make_unique("", storage_root, content_root, cache_root); // Main emulator display window. auto emulator_window = EmulatorWindow::Create(emulator.get()); diff --git a/src/xenia/base/hash.h b/src/xenia/base/hash.h index b4f252eb4..88c98b64c 100644 --- a/src/xenia/base/hash.h +++ b/src/xenia/base/hash.h @@ -17,7 +17,7 @@ namespace hash { // For use in unordered_sets and unordered_maps (primarily multisets and // multimaps, with manual collision resolution), where the hash is calculated -// externally (for instance, as XXH64), possibly requiring context data rather +// externally (for instance, as XXH3), possibly requiring context data rather // than a pure function to calculate the hash template struct IdentityHasher { diff --git a/src/xenia/base/xxhash.h b/src/xenia/base/xxhash.h new file mode 100644 index 000000000..30960e8d5 --- /dev/null +++ b/src/xenia/base/xxhash.h @@ -0,0 +1,21 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_BASE_XXHASH_H_ +#define XENIA_BASE_XXHASH_H_ + +#define XXH_INLINE_ALL + +// Can't use XXH_X86DISPATCH because XXH is calculated on multiple threads, +// while the dispatch writes the result (multiple pointers without any +// synchronization) to XXH_g_dispatch at the first call. + +#include "third_party/xxhash/xxhash.h" + +#endif // XENIA_BASE_XXHASH_H_ diff --git a/src/xenia/cpu/backend/x64/x64_emitter.cc b/src/xenia/cpu/backend/x64/x64_emitter.cc index 37d1cdc77..92f45d493 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.cc +++ b/src/xenia/cpu/backend/x64/x64_emitter.cc @@ -746,6 +746,8 @@ static const vec128_t xmm_consts[] = { /* XMMIntMaxPD */ vec128d(INT_MAX), /* XMMPosIntMinPS */ vec128f((float)0x80000000u), /* XMMQNaN */ vec128i(0x7FC00000u), + /* XMMInt127 */ vec128i(0x7Fu), + /* XMM2To32 */ vec128f(0x1.0p32f), }; // First location to try and place constants. diff --git a/src/xenia/cpu/backend/x64/x64_emitter.h b/src/xenia/cpu/backend/x64/x64_emitter.h index 4f661a331..4a31543b6 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.h +++ b/src/xenia/cpu/backend/x64/x64_emitter.h @@ -114,6 +114,8 @@ enum XmmConst { XMMIntMaxPD, XMMPosIntMinPS, XMMQNaN, + XMMInt127, + XMM2To32, }; // Unfortunately due to the design of xbyak we have to pass this to the ctor. diff --git a/src/xenia/cpu/backend/x64/x64_seq_vector.cc b/src/xenia/cpu/backend/x64/x64_seq_vector.cc index 5cfb4615c..4c7fb665a 100644 --- a/src/xenia/cpu/backend/x64/x64_seq_vector.cc +++ b/src/xenia/cpu/backend/x64/x64_seq_vector.cc @@ -33,19 +33,41 @@ struct VECTOR_CONVERT_I2F static void Emit(X64Emitter& e, const EmitArgType& i) { // flags = ARITHMETIC_UNSIGNED if (i.instr->flags & ARITHMETIC_UNSIGNED) { - // xmm0 = mask of positive values - e.vpcmpgtd(e.xmm0, i.src1, e.GetXmmConstPtr(XMMFFFF)); + // Round manually to (1.stored mantissa bits * 2^31) or to 2^32 to the + // nearest even (the only rounding mode used on AltiVec) if the number is + // 0x80000000 or greater, instead of converting src & 0x7FFFFFFF and then + // adding 2147483648.0f, which results in double rounding that can give a + // result larger than needed - see OPCODE_VECTOR_CONVERT_I2F notes. - // scale any values >= (unsigned)INT_MIN back to [0, INT_MAX] - e.vpsubd(e.xmm1, i.src1, e.GetXmmConstPtr(XMMSignMaskI32)); - e.vblendvps(e.xmm1, e.xmm1, i.src1, e.xmm0); + // [0x80000000, 0xFFFFFFFF] case: - // xmm1 = [0, INT_MAX] - e.vcvtdq2ps(i.dest, e.xmm1); + // Round to the nearest even, from (0x80000000 | 31 stored mantissa bits) + // to ((-1 << 23) | 23 stored mantissa bits), or to 0 if the result should + // be 4294967296.0f. + // xmm0 = src + 0b01111111 + ((src >> 8) & 1) + // (xmm1 also used to launch reg + mem early and to require it late) + e.vpaddd(e.xmm1, i.src1, e.GetXmmConstPtr(XMMInt127)); + e.vpslld(e.xmm0, i.src1, 31 - 8); + e.vpsrld(e.xmm0, e.xmm0, 31); + e.vpaddd(e.xmm0, e.xmm0, e.xmm1); + // xmm0 = (0xFF800000 | 23 explicit mantissa bits), or 0 if overflowed + e.vpsrad(e.xmm0, e.xmm0, 8); + // Calculate the result for the [0x80000000, 0xFFFFFFFF] case - take the + // rounded mantissa, and add -1 or 0 to the exponent of 32, depending on + // whether the number should be (1.stored mantissa bits * 2^31) or 2^32. + // xmm0 = [0x80000000, 0xFFFFFFFF] case result + e.vpaddd(e.xmm0, e.xmm0, e.GetXmmConstPtr(XMM2To32)); - // scale values back above [INT_MIN, UINT_MAX] - e.vpandn(e.xmm0, e.xmm0, e.GetXmmConstPtr(XMMPosIntMinPS)); - e.vaddps(i.dest, i.dest, e.xmm0); + // [0x00000000, 0x7FFFFFFF] case + // (during vblendvps reg -> vpaddd reg -> vpaddd mem dependency): + + // Convert from signed integer to float. + // xmm1 = [0x00000000, 0x7FFFFFFF] case result + e.vcvtdq2ps(e.xmm1, i.src1); + + // Merge the two ways depending on whether the number is >= 0x80000000 + // (has high bit set). + e.vblendvps(i.dest, e.xmm1, e.xmm0, i.src1); } else { e.vcvtdq2ps(i.dest, i.src1); } diff --git a/src/xenia/cpu/hir/opcodes.h b/src/xenia/cpu/hir/opcodes.h index 488e7e168..1649ec9dc 100644 --- a/src/xenia/cpu/hir/opcodes.h +++ b/src/xenia/cpu/hir/opcodes.h @@ -143,6 +143,55 @@ enum Opcode { OPCODE_TRUNCATE, OPCODE_CONVERT, OPCODE_ROUND, + // Note that 2147483648.0 + (src & 0x7FFFFFFF) is not a correct way of + // performing the uint -> float conversion for large numbers on backends where + // only sint -> float is available. + // + // Take 0b11000000000000000000000101000001 as an example, + // or 1.1000000000000000000000101000001 * 2^31. + // This one has 31 mantissa bits (excluding the implicit 1.), and needs to be + // rounded to 23 bits - 8 mantissa bits need to be dropped: + // 10000000000000000000001_01000001 + // + // Rounding to the nearest even (the only rounding mode that exists on + // AltiVec, and the likely rounding mode in the implementations) should be + // done downwards - 01000001 of 1_01000001 is in [00000000, 01111111]. + // The correct mantissa in this case is: + // 1.10000000000000000000001 * 2^31. + // + // With a two-step conversion, rounding is done twice instead, which gives an + // incorrect result. + // + // First, converting the low 31 bits to float: + // The number is 0.1000000000000000000000101000001 * 2^31. + // Normalizing it, we get 1.000000000000000000000101000001 (30 significand + // bits). + // We need to round 30 bits to 23 - 7 bits need to be dropped: + // 00000000000000000000010_1000001 + // + // Rounding to the nearest even is done upwards in this case - 1000001 of + // 0_1000001 is in [1000001, 1111111]. + // The result of the sint -> float conversion is: + // 1.00000000000000000000011 * 2^30. + // + // Now 2147483648.0 (1 * 2^31) needs to be added. Aligning the exponents, we + // get: + // 0.|10000000000000000000001|1 * 2^31 + // + 1.|00000000000000000000000| * 2^31 + // = 1.|10000000000000000000001|1 * 2^31 + // + // At "infinite precision", the result has 24 significand bits, but only 23 + // can be stored, thus rounding to the nearest even needs to be done. 1_1 is + // (odd + 0.5). 0.5 is ambiguous, thus tie-breaking to the nearest even - + // which is above in this case - is done. The result is: + // 1.10000000000000000000010 * 2^31. + // + // This is incorrect - larger than the correctly rounded result, which is: + // 1.10000000000000000000001 * 2^31. + // + // Test cases checked on real hardware via vcfux: 0xFFFDFF7E, 0xFFFCFF7D - + // should be 0x4F7FFDFF and 0x4F7FFCFF respectively, not 0x4F7FFE00 and + // 0x4F7FFD00. OPCODE_VECTOR_CONVERT_I2F, OPCODE_VECTOR_CONVERT_F2I, OPCODE_LOAD_VECTOR_SHL, diff --git a/src/xenia/cpu/ppc/ppc_emit_altivec.cc b/src/xenia/cpu/ppc/ppc_emit_altivec.cc index 08ea1b2fa..770def3c1 100644 --- a/src/xenia/cpu/ppc/ppc_emit_altivec.cc +++ b/src/xenia/cpu/ppc/ppc_emit_altivec.cc @@ -519,9 +519,11 @@ int InstrEmit_vavguw(PPCHIRBuilder& f, const InstrData& i) { int InstrEmit_vcfsx_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb, uint32_t uimm) { // (VD) <- float(VB as signed) / 2^uimm - float fuimm = static_cast(std::exp2(uimm)); - Value* v = f.Div(f.VectorConvertI2F(f.LoadVR(vb)), - f.Splat(f.LoadConstantFloat32(fuimm), VEC128_TYPE)); + Value* v = f.VectorConvertI2F(f.LoadVR(vb)); + if (uimm) { + float fuimm = std::ldexp(1.0f, -int(uimm)); + v = f.Mul(v, f.Splat(f.LoadConstantFloat32(fuimm), VEC128_TYPE)); + } f.StoreVR(vd, v); return 0; } @@ -535,9 +537,11 @@ int InstrEmit_vcsxwfp128(PPCHIRBuilder& f, const InstrData& i) { int InstrEmit_vcfux_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb, uint32_t uimm) { // (VD) <- float(VB as unsigned) / 2^uimm - float fuimm = static_cast(std::exp2(uimm)); - Value* v = f.Div(f.VectorConvertI2F(f.LoadVR(vb), ARITHMETIC_UNSIGNED), - f.Splat(f.LoadConstantFloat32(fuimm), VEC128_TYPE)); + Value* v = f.VectorConvertI2F(f.LoadVR(vb), ARITHMETIC_UNSIGNED); + if (uimm) { + float fuimm = std::ldexp(1.0f, -int(uimm)); + v = f.Mul(v, f.Splat(f.LoadConstantFloat32(fuimm), VEC128_TYPE)); + } f.StoreVR(vd, v); return 0; } diff --git a/src/xenia/cpu/ppc/testing/instr_frsqrte.s b/src/xenia/cpu/ppc/testing/instr_frsqrte.s index f114cb597..df7f0e1d2 100644 --- a/src/xenia/cpu/ppc/testing/instr_frsqrte.s +++ b/src/xenia/cpu/ppc/testing/instr_frsqrte.s @@ -1,21 +1,21 @@ # frsqrte tests disabled because accuracy is CPU dependent. -#test_frsqrte_1: - #_ REGISTER_IN f1 1.0 +test_frsqrte_1: + # _ REGISTER_IN f1 1.0 # frsqrte f1, f1 -# blr - #_ REGISTER_OUT f1 0.99975585937500000 + blr + # _ REGISTER_OUT f1 0.99975585937500000 # want: 0.97 -#test_frsqrte_2: - #_ REGISTER_IN f1 64.0 +test_frsqrte_2: + # _ REGISTER_IN f1 64.0 # frsqrte f1, f1 -# blr - #_ REGISTER_OUT f1 0.12496948242187500 + blr + # _ REGISTER_OUT f1 0.12496948242187500 -#test_frsqrte_3: - #_ REGISTER_IN f1 0.5 +test_frsqrte_3: + # _ REGISTER_IN f1 0.5 # frsqrte f1, f1 -# blr - #_ REGISTER_OUT f1 1.41381835937500000 + blr + # _ REGISTER_OUT f1 1.41381835937500000 # want: 1.375 diff --git a/src/xenia/cpu/ppc/testing/ppc_testing_main.cc b/src/xenia/cpu/ppc/testing/ppc_testing_main.cc index bb18dfcb6..1d115af1e 100644 --- a/src/xenia/cpu/ppc/testing/ppc_testing_main.cc +++ b/src/xenia/cpu/ppc/testing/ppc_testing_main.cc @@ -7,6 +7,7 @@ ****************************************************************************** */ +#include "xenia/base/cvar.h" #include "xenia/base/filesystem.h" #include "xenia/base/logging.h" #include "xenia/base/main.h" @@ -28,7 +29,7 @@ DEFINE_path(test_path, "src/xenia/cpu/ppc/testing/", "Directory scanned for test files.", "Other"); DEFINE_path(test_bin_path, "src/xenia/cpu/ppc/testing/bin/", "Directory with binary outputs of the test files.", "Other"); -DEFINE_transient_string(test_name, "", "Specifies test name.", "General"); +DEFINE_transient_string(test_name, "", "Test suite name.", "General"); namespace xe { namespace cpu { @@ -475,13 +476,7 @@ bool RunTests(const std::string_view test_name) { } int main(const std::vector& args) { - // Grab test name, if present. - std::string test_name; - if (args.size() >= 2) { - test_name = args[1]; - } - - return RunTests(test_name) ? 0 : 1; + return RunTests(cvars::test_name) ? 0 : 1; } } // namespace test diff --git a/src/xenia/cpu/processor.cc b/src/xenia/cpu/processor.cc index 7a787873d..95b016d17 100644 --- a/src/xenia/cpu/processor.cc +++ b/src/xenia/cpu/processor.cc @@ -358,7 +358,6 @@ bool Processor::ExecuteRaw(ThreadState* thread_state, uint32_t address) { return false; } - auto context = thread_state->context(); return function->Call(thread_state, 0xBCBCBCBC); } diff --git a/src/xenia/emulator.cc b/src/xenia/emulator.cc index 44f284d34..4e6b10783 100644 --- a/src/xenia/emulator.cc +++ b/src/xenia/emulator.cc @@ -59,13 +59,15 @@ namespace xe { Emulator::Emulator(const std::filesystem::path& command_line, const std::filesystem::path& storage_root, - const std::filesystem::path& content_root) + const std::filesystem::path& content_root, + const std::filesystem::path& cache_root) : on_launch(), on_terminate(), on_exit(), command_line_(command_line), storage_root_(storage_root), content_root_(content_root), + cache_root_(cache_root), game_title_(), display_window_(nullptr), memory_(), @@ -689,7 +691,7 @@ X_STATUS Emulator::CompleteLaunch(const std::filesystem::path& path, // playing before the video can be seen if doing this in parallel with the // main thread. on_shader_storage_initialization(true); - graphics_system_->InitializeShaderStorage(storage_root_, title_id_, true); + graphics_system_->InitializeShaderStorage(cache_root_, title_id_, true); on_shader_storage_initialization(false); auto main_thread = kernel_state_->LaunchModule(module); diff --git a/src/xenia/emulator.h b/src/xenia/emulator.h index df5426227..739c12b51 100644 --- a/src/xenia/emulator.h +++ b/src/xenia/emulator.h @@ -49,7 +49,8 @@ class Emulator { public: explicit Emulator(const std::filesystem::path& command_line, const std::filesystem::path& storage_root, - const std::filesystem::path& content_root); + const std::filesystem::path& content_root, + const std::filesystem::path& cache_root); ~Emulator(); // Full command line used when launching the process. @@ -61,6 +62,9 @@ class Emulator { // Folder guest content is stored in. const std::filesystem::path& content_root() const { return content_root_; } + // Folder files safe to remove without significant side effects are stored in. + const std::filesystem::path& cache_root() const { return cache_root_; } + // Title of the game in the default language. const std::string& game_title() const { return game_title_; } @@ -166,6 +170,7 @@ class Emulator { std::filesystem::path command_line_; std::filesystem::path storage_root_; std::filesystem::path content_root_; + std::filesystem::path cache_root_; std::string game_title_; diff --git a/src/xenia/gpu/command_processor.cc b/src/xenia/gpu/command_processor.cc index 651952da6..044773161 100644 --- a/src/xenia/gpu/command_processor.cc +++ b/src/xenia/gpu/command_processor.cc @@ -89,8 +89,8 @@ void CommandProcessor::Shutdown() { } void CommandProcessor::InitializeShaderStorage( - const std::filesystem::path& storage_root, uint32_t title_id, - bool blocking) {} + const std::filesystem::path& cache_root, uint32_t title_id, bool blocking) { +} void CommandProcessor::RequestFrameTrace( const std::filesystem::path& root_path) { diff --git a/src/xenia/gpu/command_processor.h b/src/xenia/gpu/command_processor.h index b94562d79..5002f0137 100644 --- a/src/xenia/gpu/command_processor.h +++ b/src/xenia/gpu/command_processor.h @@ -133,9 +133,8 @@ class CommandProcessor { // May be called not only from the command processor thread when the command // processor is paused, and the termination of this function may be explicitly // awaited. - virtual void InitializeShaderStorage( - const std::filesystem::path& storage_root, uint32_t title_id, - bool blocking); + virtual void InitializeShaderStorage(const std::filesystem::path& cache_root, + uint32_t title_id, bool blocking); virtual void RequestFrameTrace(const std::filesystem::path& root_path); virtual void BeginTracing(const std::filesystem::path& root_path); diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 5b216b22b..f6af89881 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -7,8 +7,6 @@ ****************************************************************************** */ -#include "third_party/xxhash/xxhash.h" - #include #include #include @@ -73,10 +71,9 @@ void D3D12CommandProcessor::ClearCaches() { } void D3D12CommandProcessor::InitializeShaderStorage( - const std::filesystem::path& storage_root, uint32_t title_id, - bool blocking) { - CommandProcessor::InitializeShaderStorage(storage_root, title_id, blocking); - pipeline_cache_->InitializeShaderStorage(storage_root, title_id, blocking); + const std::filesystem::path& cache_root, uint32_t title_id, bool blocking) { + CommandProcessor::InitializeShaderStorage(cache_root, title_id, blocking); + pipeline_cache_->InitializeShaderStorage(cache_root, title_id, blocking); } void D3D12CommandProcessor::RequestFrameTrace( @@ -102,7 +99,7 @@ void D3D12CommandProcessor::RestoreEdramSnapshot(const void* snapshot) { } uint32_t D3D12CommandProcessor::GetCurrentColorMask( - const D3D12Shader* pixel_shader) const { + const Shader* pixel_shader) const { if (pixel_shader == nullptr) { return 0; } @@ -159,25 +156,16 @@ void D3D12CommandProcessor::SubmitBarriers() { } ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature( - const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader) { - assert_true(vertex_shader->is_translated()); - + const DxbcShader* vertex_shader, const DxbcShader* pixel_shader, + bool tessellated) { if (bindless_resources_used_) { - return vertex_shader->host_vertex_shader_type() != - Shader::HostVertexShaderType::kVertex - ? root_signature_bindless_ds_ - : root_signature_bindless_vs_; + return tessellated ? root_signature_bindless_ds_ + : root_signature_bindless_vs_; } - assert_true(pixel_shader == nullptr || pixel_shader->is_translated()); - - D3D12_SHADER_VISIBILITY vertex_visibility; - if (vertex_shader->host_vertex_shader_type() != - Shader::HostVertexShaderType::kVertex) { - vertex_visibility = D3D12_SHADER_VISIBILITY_DOMAIN; - } else { - vertex_visibility = D3D12_SHADER_VISIBILITY_VERTEX; - } + D3D12_SHADER_VISIBILITY vertex_visibility = + tessellated ? D3D12_SHADER_VISIBILITY_DOMAIN + : D3D12_SHADER_VISIBILITY_VERTEX; uint32_t texture_count_vertex, sampler_count_vertex; vertex_shader->GetTextureBindings(texture_count_vertex); @@ -393,7 +381,7 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature( } uint32_t D3D12CommandProcessor::GetRootBindfulExtraParameterIndices( - const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader, + const DxbcShader* vertex_shader, const DxbcShader* pixel_shader, RootBindfulExtraParameterIndices& indices_out) { uint32_t texture_count_pixel = 0, sampler_count_pixel = 0; if (pixel_shader != nullptr) { @@ -1202,6 +1190,7 @@ bool D3D12CommandProcessor::SetupContext() { pipeline_cache_ = std::make_unique( *this, *register_file_, bindless_resources_used_, edram_rov_used_, + render_target_cache_->depth_float24_conversion(), texture_cache_->IsResolutionScale2X() ? 2 : 1); if (!pipeline_cache_->Initialize()) { XELOGE("Failed to initialize the graphics pipeline cache"); @@ -1804,8 +1793,7 @@ Shader* D3D12CommandProcessor::LoadShader(xenos::ShaderType shader_type, uint32_t guest_address, const uint32_t* host_address, uint32_t dword_count) { - return pipeline_cache_->LoadShader(shader_type, guest_address, host_address, - dword_count); + return pipeline_cache_->LoadShader(shader_type, host_address, dword_count); } bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, @@ -1851,21 +1839,30 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, // Need a pixel shader in normal color mode. return false; } - // Get tessellation info for the current draw for vertex shader translation. - Shader::HostVertexShaderType host_vertex_shader_type = - pipeline_cache_->GetHostVertexShaderTypeIfValid(); - if (host_vertex_shader_type == Shader::HostVertexShaderType(-1)) { + DxbcShaderTranslator::Modification vertex_shader_modification; + DxbcShaderTranslator::Modification pixel_shader_modification; + if (!pipeline_cache_->GetCurrentShaderModifications( + vertex_shader_modification, pixel_shader_modification)) { return false; } + D3D12Shader::D3D12Translation* vertex_shader_translation = + static_cast( + vertex_shader->GetOrCreateTranslation( + vertex_shader_modification.value)); + D3D12Shader::D3D12Translation* pixel_shader_translation = + pixel_shader ? static_cast( + pixel_shader->GetOrCreateTranslation( + pixel_shader_modification.value)) + : nullptr; // Translate the shaders now to get memexport configuration and color mask, - // which is needed by the render target cache, to check the possibility of - // doing early depth/stencil, and also to get used textures and samplers. - if (!pipeline_cache_->EnsureShadersTranslated(vertex_shader, pixel_shader, - host_vertex_shader_type)) { + // which is needed by the render target cache, and also to get used textures + // and samplers. + if (!pipeline_cache_->EnsureShadersTranslated(vertex_shader_translation, + pixel_shader_translation)) { return false; } - bool tessellated = - host_vertex_shader_type != Shader::HostVertexShaderType::kVertex; + bool tessellated = vertex_shader_modification.host_vertex_shader_type != + Shader::HostVertexShaderType::kVertex; // Check if memexport is used. If it is, we can't skip draw calls that have no // visual effect. @@ -1967,26 +1964,14 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, (pixel_shader != nullptr ? pixel_shader->GetUsedTextureMask() : 0); texture_cache_->RequestTextures(used_texture_mask); - // Check if early depth/stencil can be enabled. - bool early_z; - if (pixel_shader) { - auto rb_colorcontrol = regs.Get(); - early_z = pixel_shader->implicit_early_z_allowed() && - (!rb_colorcontrol.alpha_test_enable || - rb_colorcontrol.alpha_func == xenos::CompareFunction::kAlways) && - !rb_colorcontrol.alpha_to_mask_enable; - } else { - early_z = true; - } - // Create the pipeline if needed and bind it. void* pipeline_handle; ID3D12RootSignature* root_signature; if (!pipeline_cache_->ConfigurePipeline( - vertex_shader, pixel_shader, primitive_type_converted, + vertex_shader_translation, pixel_shader_translation, + primitive_type_converted, indexed ? index_buffer_info->format : xenos::IndexFormat::kInt16, - early_z, pipeline_render_targets, &pipeline_handle, - &root_signature)) { + pipeline_render_targets, &pipeline_handle, &root_signature)) { return false; } if (current_cached_pipeline_ != pipeline_handle) { @@ -2014,11 +1999,18 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, pixel_size_x *= 2; pixel_size_y *= 2; } + flags::DepthFloat24Conversion depth_float24_conversion = + render_target_cache_->depth_float24_conversion(); draw_util::ViewportInfo viewport_info; - draw_util::GetHostViewportInfo(regs, float(pixel_size_x), float(pixel_size_y), - true, float(D3D12_VIEWPORT_BOUNDS_MAX), - float(D3D12_VIEWPORT_BOUNDS_MAX), false, - viewport_info); + draw_util::GetHostViewportInfo( + regs, float(pixel_size_x), float(pixel_size_y), true, + float(D3D12_VIEWPORT_BOUNDS_MAX), float(D3D12_VIEWPORT_BOUNDS_MAX), false, + !edram_rov_used_ && + (depth_float24_conversion == + flags::DepthFloat24Conversion::kOnOutputTruncating || + depth_float24_conversion == + flags::DepthFloat24Conversion::kOnOutputRounding), + viewport_info); draw_util::Scissor scissor; draw_util::GetScissor(regs, scissor); scissor.left *= pixel_size_x; @@ -2033,7 +2025,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, UpdateSystemConstantValues( memexport_used, primitive_polygonal, line_loop_closing_index, indexed ? index_buffer_info->endianness : xenos::Endian::kNone, - viewport_info, pixel_size_x, pixel_size_y, used_texture_mask, early_z, + viewport_info, pixel_size_x, pixel_size_y, used_texture_mask, GetCurrentColorMask(pixel_shader), pipeline_render_targets); // Update constant buffers, descriptors and root parameters. @@ -2659,6 +2651,8 @@ bool D3D12CommandProcessor::EndSubmission(bool is_swap) { bool is_closing_frame = is_swap && frame_open_; if (is_closing_frame) { + render_target_cache_->EndFrame(); + texture_cache_->EndFrame(); } @@ -2873,8 +2867,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( bool shared_memory_is_uav, bool primitive_polygonal, uint32_t line_loop_closing_index, xenos::Endian index_endian, const draw_util::ViewportInfo& viewport_info, uint32_t pixel_size_x, - uint32_t pixel_size_y, uint32_t used_texture_mask, bool early_z, - uint32_t color_mask, + uint32_t pixel_size_y, uint32_t used_texture_mask, uint32_t color_mask, const RenderTargetCache::PipelineRenderTarget render_targets[4]) { #if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); @@ -2992,14 +2985,11 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( flags |= DxbcShaderTranslator::kSysFlag_KillIfAnyVertexKilled; } // Alpha test. - if (rb_colorcontrol.alpha_test_enable) { - flags |= uint32_t(rb_colorcontrol.alpha_func) - << DxbcShaderTranslator::kSysFlag_AlphaPassIfLess_Shift; - } else { - flags |= DxbcShaderTranslator::kSysFlag_AlphaPassIfLess | - DxbcShaderTranslator::kSysFlag_AlphaPassIfEqual | - DxbcShaderTranslator::kSysFlag_AlphaPassIfGreater; - } + xenos::CompareFunction alpha_test_function = + rb_colorcontrol.alpha_test_enable ? rb_colorcontrol.alpha_func + : xenos::CompareFunction::kAlways; + flags |= uint32_t(alpha_test_function) + << DxbcShaderTranslator::kSysFlag_AlphaPassIfLess_Shift; // Gamma writing. for (uint32_t i = 0; i < 4; ++i) { if (color_infos[i].color_format == @@ -3028,7 +3018,9 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( if (rb_depthcontrol.stencil_enable) { flags |= DxbcShaderTranslator::kSysFlag_ROVStencilTest; } - if (early_z) { + // Hint - if not applicable to the shader, will not have effect. + if (alpha_test_function == xenos::CompareFunction::kAlways && + !rb_colorcontrol.alpha_to_mask_enable) { flags |= DxbcShaderTranslator::kSysFlag_ROVDepthStencilEarlyWrite; } } diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index c75b5c203..a9181f1c3 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -27,6 +27,7 @@ #include "xenia/gpu/d3d12/render_target_cache.h" #include "xenia/gpu/d3d12/texture_cache.h" #include "xenia/gpu/draw_util.h" +#include "xenia/gpu/dxbc_shader.h" #include "xenia/gpu/dxbc_shader_translator.h" #include "xenia/gpu/xenos.h" #include "xenia/kernel/kernel_state.h" @@ -47,7 +48,7 @@ class D3D12CommandProcessor : public CommandProcessor { void ClearCaches() override; - void InitializeShaderStorage(const std::filesystem::path& storage_root, + void InitializeShaderStorage(const std::filesystem::path& cache_root, uint32_t title_id, bool blocking) override; void RequestFrameTrace(const std::filesystem::path& root_path) override; @@ -88,7 +89,7 @@ class D3D12CommandProcessor : public CommandProcessor { // there are 4 render targets bound with the same EDRAM base (clearly not // correct usage), but the shader only clears 1, and then EDRAM buffer stores // conflict with each other. - uint32_t GetCurrentColorMask(const D3D12Shader* pixel_shader) const; + uint32_t GetCurrentColorMask(const Shader* pixel_shader) const; void PushTransitionBarrier( ID3D12Resource* resource, D3D12_RESOURCE_STATES old_state, @@ -100,8 +101,9 @@ class D3D12CommandProcessor : public CommandProcessor { void SubmitBarriers(); // Finds or creates root signature for a pipeline. - ID3D12RootSignature* GetRootSignature(const D3D12Shader* vertex_shader, - const D3D12Shader* pixel_shader); + ID3D12RootSignature* GetRootSignature(const DxbcShader* vertex_shader, + const DxbcShader* pixel_shader, + bool tessellated); ui::d3d12::D3D12UploadBufferPool& GetConstantBufferPool() const { return *constant_buffer_pool_; @@ -300,7 +302,7 @@ class D3D12CommandProcessor : public CommandProcessor { // Gets the indices of optional root parameters. Returns the total parameter // count. static uint32_t GetRootBindfulExtraParameterIndices( - const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader, + const DxbcShader* vertex_shader, const DxbcShader* pixel_shader, RootBindfulExtraParameterIndices& indices_out); // BeginSubmission and EndSubmission may be called at any time. If there's an @@ -353,8 +355,7 @@ class D3D12CommandProcessor : public CommandProcessor { bool shared_memory_is_uav, bool primitive_polygonal, uint32_t line_loop_closing_index, xenos::Endian index_endian, const draw_util::ViewportInfo& viewport_info, uint32_t pixel_size_x, - uint32_t pixel_size_y, uint32_t used_texture_mask, bool early_z, - uint32_t color_mask, + uint32_t pixel_size_y, uint32_t used_texture_mask, uint32_t color_mask, const RenderTargetCache::PipelineRenderTarget render_targets[4]); bool UpdateBindings(const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader, diff --git a/src/xenia/gpu/d3d12/d3d12_shader.cc b/src/xenia/gpu/d3d12/d3d12_shader.cc index 0b5296a4f..672f1e37d 100644 --- a/src/xenia/gpu/d3d12/d3d12_shader.cc +++ b/src/xenia/gpu/d3d12/d3d12_shader.cc @@ -10,9 +10,11 @@ #include "xenia/gpu/d3d12/d3d12_shader.h" #include +#include #include "xenia/base/assert.h" #include "xenia/base/logging.h" +#include "xenia/gpu/dxbc_shader.h" #include "xenia/gpu/gpu_flags.h" #include "xenia/ui/d3d12/d3d12_api.h" @@ -22,51 +24,13 @@ namespace d3d12 { D3D12Shader::D3D12Shader(xenos::ShaderType shader_type, uint64_t data_hash, const uint32_t* dword_ptr, uint32_t dword_count) - : Shader(shader_type, data_hash, dword_ptr, dword_count) {} + : DxbcShader(shader_type, data_hash, dword_ptr, dword_count) {} -void D3D12Shader::SetTexturesAndSamplers( - const DxbcShaderTranslator::TextureBinding* texture_bindings, - uint32_t texture_binding_count, - const DxbcShaderTranslator::SamplerBinding* sampler_bindings, - uint32_t sampler_binding_count) { - texture_bindings_.clear(); - texture_bindings_.reserve(texture_binding_count); - used_texture_mask_ = 0; - for (uint32_t i = 0; i < texture_binding_count; ++i) { - TextureBinding& binding = texture_bindings_.emplace_back(); - // For a stable hash. - std::memset(&binding, 0, sizeof(binding)); - const DxbcShaderTranslator::TextureBinding& translator_binding = - texture_bindings[i]; - binding.bindless_descriptor_index = - translator_binding.bindless_descriptor_index; - binding.fetch_constant = translator_binding.fetch_constant; - binding.dimension = translator_binding.dimension; - binding.is_signed = translator_binding.is_signed; - used_texture_mask_ |= 1u << translator_binding.fetch_constant; - } - sampler_bindings_.clear(); - sampler_bindings_.reserve(sampler_binding_count); - for (uint32_t i = 0; i < sampler_binding_count; ++i) { - SamplerBinding binding; - const DxbcShaderTranslator::SamplerBinding& translator_binding = - sampler_bindings[i]; - binding.bindless_descriptor_index = - translator_binding.bindless_descriptor_index; - binding.fetch_constant = translator_binding.fetch_constant; - binding.mag_filter = translator_binding.mag_filter; - binding.min_filter = translator_binding.min_filter; - binding.mip_filter = translator_binding.mip_filter; - binding.aniso_filter = translator_binding.aniso_filter; - sampler_bindings_.push_back(binding); - } -} - -void D3D12Shader::DisassembleDxbc(const ui::d3d12::D3D12Provider& provider, - bool disassemble_dxbc, - IDxbcConverter* dxbc_converter, - IDxcUtils* dxc_utils, - IDxcCompiler* dxc_compiler) { +void D3D12Shader::D3D12Translation::DisassembleDxbcAndDxil( + const ui::d3d12::D3D12Provider& provider, bool disassemble_dxbc, + IDxbcConverter* dxbc_converter, IDxcUtils* dxc_utils, + IDxcCompiler* dxc_compiler) { + std::string disassembly; bool is_first_disassembly = true; if (disassemble_dxbc) { ID3DBlob* dxbc_disassembly; @@ -77,11 +41,12 @@ void D3D12Shader::DisassembleDxbc(const ui::d3d12::D3D12Provider& provider, nullptr, &dxbc_disassembly))) { assert_true(is_first_disassembly); is_first_disassembly = false; - host_disassembly_.append( + disassembly.append( reinterpret_cast(dxbc_disassembly->GetBufferPointer())); dxbc_disassembly->Release(); } else { - XELOGE("Failed to disassemble DXBC shader {:016X}", ucode_data_hash()); + XELOGE("Failed to disassemble DXBC shader {:016X}", + shader().ucode_data_hash()); } } if (dxbc_converter && dxc_utils && dxc_compiler) { @@ -106,29 +71,36 @@ void D3D12Shader::DisassembleDxbc(const ui::d3d12::D3D12Provider& provider, dxil_disassembly->Release(); if (dxil_disassembly_got_utf8) { if (!is_first_disassembly) { - host_disassembly_.append("\n\n"); + disassembly.append("\n\n"); } is_first_disassembly = false; - host_disassembly_.append(reinterpret_cast( + disassembly.append(reinterpret_cast( dxil_disassembly_utf8->GetStringPointer())); dxil_disassembly_utf8->Release(); } else { XELOGE("Failed to get DXIL shader {:016X} disassembly as UTF-8", - ucode_data_hash()); + shader().ucode_data_hash()); } } else { XELOGE("Failed to disassemble DXIL shader {:016X}", - ucode_data_hash()); + shader().ucode_data_hash()); } } else { XELOGE("Failed to create a blob with DXIL shader {:016X}", - ucode_data_hash()); + shader().ucode_data_hash()); CoTaskMemFree(dxil); } } else { - XELOGE("Failed to convert shader {:016X} to DXIL", ucode_data_hash()); + XELOGE("Failed to convert shader {:016X} to DXIL", + shader().ucode_data_hash()); } } + set_host_disassembly(std::move(disassembly)); +} + +Shader::Translation* D3D12Shader::CreateTranslationInstance( + uint32_t modification) { + return new D3D12Translation(*this, modification); } } // namespace d3d12 diff --git a/src/xenia/gpu/d3d12/d3d12_shader.h b/src/xenia/gpu/d3d12/d3d12_shader.h index c24d6a00a..384e48a8a 100644 --- a/src/xenia/gpu/d3d12/d3d12_shader.h +++ b/src/xenia/gpu/d3d12/d3d12_shader.h @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2018 Ben Vanik. All rights reserved. * + * Copyright 2020 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -10,106 +10,62 @@ #ifndef XENIA_GPU_D3D12_D3D12_SHADER_H_ #define XENIA_GPU_D3D12_D3D12_SHADER_H_ -#include +#include -#include "xenia/gpu/dxbc_shader_translator.h" -#include "xenia/gpu/shader.h" -#include "xenia/gpu/xenos.h" +#include "xenia/gpu/dxbc_shader.h" #include "xenia/ui/d3d12/d3d12_provider.h" namespace xe { namespace gpu { namespace d3d12 { -class D3D12Shader : public Shader { +class D3D12Shader : public DxbcShader { public: + class D3D12Translation : public DxbcTranslation { + public: + D3D12Translation(D3D12Shader& shader, uint32_t modification) + : DxbcTranslation(shader, modification) {} + + void DisassembleDxbcAndDxil(const ui::d3d12::D3D12Provider& provider, + bool disassemble_dxbc, + IDxbcConverter* dxbc_converter = nullptr, + IDxcUtils* dxc_utils = nullptr, + IDxcCompiler* dxc_compiler = nullptr); + }; + D3D12Shader(xenos::ShaderType shader_type, uint64_t data_hash, const uint32_t* dword_ptr, uint32_t dword_count); - void SetTexturesAndSamplers( - const DxbcShaderTranslator::TextureBinding* texture_bindings, - uint32_t texture_binding_count, - const DxbcShaderTranslator::SamplerBinding* sampler_bindings, - uint32_t sampler_binding_count); - - void SetForcedEarlyZShaderObject(const std::vector& shader_object) { - forced_early_z_shader_ = shader_object; - } - // Returns the shader with forced early depth/stencil set with - // SetForcedEarlyZShader after translation. If there's none (for example, - // if the shader discards pixels or writes to the depth buffer), an empty - // vector is returned. - const std::vector& GetForcedEarlyZShaderObject() const { - return forced_early_z_shader_; - } - - void DisassembleDxbc(const ui::d3d12::D3D12Provider& provider, - bool disassemble_dxbc, - IDxbcConverter* dxbc_converter = nullptr, - IDxcUtils* dxc_utils = nullptr, - IDxcCompiler* dxc_compiler = nullptr); - - static constexpr uint32_t kMaxTextureBindingIndexBits = - DxbcShaderTranslator::kMaxTextureBindingIndexBits; - static constexpr uint32_t kMaxTextureBindings = - DxbcShaderTranslator::kMaxTextureBindings; - struct TextureBinding { - uint32_t bindless_descriptor_index; - uint32_t fetch_constant; - // Stacked and 3D are separate TextureBindings, even for bindless for null - // descriptor handling simplicity. - xenos::FetchOpDimension dimension; - bool is_signed; - }; - // Safe to hash and compare with memcmp for layout hashing. - const TextureBinding* GetTextureBindings(uint32_t& count_out) const { - count_out = uint32_t(texture_bindings_.size()); - return texture_bindings_.data(); - } - const uint32_t GetUsedTextureMask() const { return used_texture_mask_; } - - static constexpr uint32_t kMaxSamplerBindingIndexBits = - DxbcShaderTranslator::kMaxSamplerBindingIndexBits; - static constexpr uint32_t kMaxSamplerBindings = - DxbcShaderTranslator::kMaxSamplerBindings; - struct SamplerBinding { - uint32_t bindless_descriptor_index; - uint32_t fetch_constant; - xenos::TextureFilter mag_filter; - xenos::TextureFilter min_filter; - xenos::TextureFilter mip_filter; - xenos::AnisoFilter aniso_filter; - }; - const SamplerBinding* GetSamplerBindings(uint32_t& count_out) const { - count_out = uint32_t(sampler_bindings_.size()); - return sampler_bindings_.data(); - } - - // For owning subsystems like the pipeline cache, accessors for unique + // For owning subsystem like the pipeline cache, accessors for unique // identifiers (used instead of hashes to make sure collisions can't happen) // of binding layouts used by the shader, for invalidation if a shader with an // incompatible layout was bound. size_t GetTextureBindingLayoutUserUID() const { return texture_binding_layout_user_uid_; } - void SetTextureBindingLayoutUserUID(size_t uid) { - texture_binding_layout_user_uid_ = uid; - } size_t GetSamplerBindingLayoutUserUID() const { return sampler_binding_layout_user_uid_; } + // Modifications of the same shader can be translated on different threads. + // The "set" function must only be called if "enter" returned true - these are + // set up only once. + bool EnterBindingLayoutUserUIDSetup() { + return !binding_layout_user_uids_set_up_.test_and_set(); + } + void SetTextureBindingLayoutUserUID(size_t uid) { + texture_binding_layout_user_uid_ = uid; + } void SetSamplerBindingLayoutUserUID(size_t uid) { sampler_binding_layout_user_uid_ = uid; } + protected: + Translation* CreateTranslationInstance(uint32_t modification) override; + private: - std::vector texture_bindings_; - std::vector sampler_bindings_; + std::atomic_flag binding_layout_user_uids_set_up_ = ATOMIC_FLAG_INIT; size_t texture_binding_layout_user_uid_ = 0; size_t sampler_binding_layout_user_uid_ = 0; - uint32_t used_texture_mask_ = 0; - - std::vector forced_early_z_shader_; }; } // namespace d3d12 diff --git a/src/xenia/gpu/d3d12/deferred_command_list.cc b/src/xenia/gpu/d3d12/deferred_command_list.cc index eb8d8922e..e618931d4 100644 --- a/src/xenia/gpu/d3d12/deferred_command_list.cc +++ b/src/xenia/gpu/d3d12/deferred_command_list.cc @@ -221,7 +221,9 @@ void DeferredCommandList::Execute(ID3D12GraphicsCommandList* command_list, *reinterpret_cast(stream); command_list_1->SetSamplePositions( args.num_samples_per_pixel, args.num_pixels, - const_cast(args.sample_positions)); + (args.num_samples_per_pixel && args.num_pixels) + ? const_cast(args.sample_positions) + : nullptr); } } break; default: diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc index ade180c97..0a0ab8bdd 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.cc +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -16,10 +16,10 @@ #include #include #include +#include #include #include "third_party/fmt/include/fmt/format.h" -#include "third_party/xxhash/xxhash.h" #include "xenia/base/assert.h" #include "xenia/base/byte_order.h" #include "xenia/base/clock.h" @@ -29,6 +29,7 @@ #include "xenia/base/math.h" #include "xenia/base/profiling.h" #include "xenia/base/string.h" +#include "xenia/base/xxhash.h" #include "xenia/gpu/d3d12/d3d12_command_processor.h" #include "xenia/gpu/gpu_flags.h" #include "xenia/ui/d3d12/d3d12_util.h" @@ -63,19 +64,23 @@ namespace d3d12 { #include "xenia/gpu/shaders/bytecode/d3d12_5_1/continuous_triangle_hs.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/discrete_quad_hs.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/discrete_triangle_hs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/float24_round_ps.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/float24_truncate_ps.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_point_list_gs.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_quad_list_gs.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/primitive_rectangle_list_gs.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/tessellation_vs.h" -PipelineCache::PipelineCache(D3D12CommandProcessor& command_processor, - const RegisterFile& register_file, - bool bindless_resources_used, bool edram_rov_used, - uint32_t resolution_scale) +PipelineCache::PipelineCache( + D3D12CommandProcessor& command_processor, const RegisterFile& register_file, + bool bindless_resources_used, bool edram_rov_used, + flags::DepthFloat24Conversion depth_float24_conversion, + uint32_t resolution_scale) : command_processor_(command_processor), register_file_(register_file), bindless_resources_used_(bindless_resources_used), edram_rov_used_(edram_rov_used), + depth_float24_conversion_(depth_float24_conversion), resolution_scale_(resolution_scale) { auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider(); @@ -177,10 +182,10 @@ void PipelineCache::Shutdown() { void PipelineCache::ClearCache(bool shutting_down) { bool reinitialize_shader_storage = !shutting_down && storage_write_thread_ != nullptr; - std::filesystem::path shader_storage_root; + std::filesystem::path shader_storage_cache_root; uint32_t shader_storage_title_id = shader_storage_title_id_; if (reinitialize_shader_storage) { - shader_storage_root = shader_storage_root_; + shader_storage_cache_root = shader_storage_cache_root_; } ShutdownShaderStorage(); @@ -226,19 +231,19 @@ void PipelineCache::ClearCache(bool shutting_down) { delete it.second; } shaders_.clear(); + shader_storage_index_ = 0; if (reinitialize_shader_storage) { - InitializeShaderStorage(shader_storage_root, shader_storage_title_id, + InitializeShaderStorage(shader_storage_cache_root, shader_storage_title_id, false); } } void PipelineCache::InitializeShaderStorage( - const std::filesystem::path& storage_root, uint32_t title_id, - bool blocking) { + const std::filesystem::path& cache_root, uint32_t title_id, bool blocking) { ShutdownShaderStorage(); - auto shader_storage_root = storage_root / "shaders"; + auto shader_storage_root = cache_root / "shaders"; // For files that can be moved between different hosts. // Host PSO blobs - if ever added - should be stored in shaders/local/ (they // currently aren't used because because they may be not very practical - @@ -256,6 +261,90 @@ void PipelineCache::InitializeShaderStorage( } } + // Initialize the pipeline storage stream - read pipeline descriptions and + // collect used shader modifications to translate. + std::vector pipeline_stored_descriptions; + // . + std::set> shader_translations_needed; + auto pipeline_storage_file_path = + shader_storage_shareable_root / + fmt::format("{:08X}.{}.d3d12.xpso", title_id, + edram_rov_used_ ? "rov" : "rtv"); + pipeline_storage_file_ = + xe::filesystem::OpenFile(pipeline_storage_file_path, "a+b"); + if (!pipeline_storage_file_) { + XELOGE( + "Failed to open the Direct3D 12 pipeline description storage file for " + "writing, persistent shader storage will be disabled: {}", + xe::path_to_utf8(pipeline_storage_file_path)); + return; + } + pipeline_storage_file_flush_needed_ = false; + // 'XEPS'. + const uint32_t pipeline_storage_magic = 0x53504558; + // 'DXRO' or 'DXRT'. + const uint32_t pipeline_storage_magic_api = + edram_rov_used_ ? 0x4F525844 : 0x54525844; + const uint32_t pipeline_storage_version_swapped = + xe::byte_swap(std::max(PipelineDescription::kVersion, + DxbcShaderTranslator::Modification::kVersion)); + struct { + uint32_t magic; + uint32_t magic_api; + uint32_t version_swapped; + uint32_t device_features; + } pipeline_storage_file_header; + if (fread(&pipeline_storage_file_header, sizeof(pipeline_storage_file_header), + 1, pipeline_storage_file_) && + pipeline_storage_file_header.magic == pipeline_storage_magic && + pipeline_storage_file_header.magic_api == pipeline_storage_magic_api && + pipeline_storage_file_header.version_swapped == + pipeline_storage_version_swapped) { + xe::filesystem::Seek(pipeline_storage_file_, 0, SEEK_END); + int64_t pipeline_storage_told_end = + xe::filesystem::Tell(pipeline_storage_file_); + size_t pipeline_storage_told_count = + size_t(pipeline_storage_told_end >= + int64_t(sizeof(pipeline_storage_file_header)) + ? (uint64_t(pipeline_storage_told_end) - + sizeof(pipeline_storage_file_header)) / + sizeof(PipelineStoredDescription) + : 0); + if (pipeline_storage_told_count && + xe::filesystem::Seek(pipeline_storage_file_, + int64_t(sizeof(pipeline_storage_file_header)), + SEEK_SET)) { + pipeline_stored_descriptions.resize(pipeline_storage_told_count); + pipeline_stored_descriptions.resize( + fread(pipeline_stored_descriptions.data(), + sizeof(PipelineStoredDescription), pipeline_storage_told_count, + pipeline_storage_file_)); + size_t pipeline_storage_read_count = pipeline_stored_descriptions.size(); + for (size_t i = 0; i < pipeline_storage_read_count; ++i) { + const PipelineStoredDescription& pipeline_stored_description = + pipeline_stored_descriptions[i]; + // Validate file integrity, stop and truncate the stream if data is + // corrupted. + if (XXH3_64bits(&pipeline_stored_description.description, + sizeof(pipeline_stored_description.description)) != + pipeline_stored_description.description_hash) { + pipeline_stored_descriptions.resize(i); + break; + } + // Mark the shader modifications as needed for translation. + shader_translations_needed.emplace( + pipeline_stored_description.description.vertex_shader_hash, + pipeline_stored_description.description.vertex_shader_modification); + if (pipeline_stored_description.description.pixel_shader_hash) { + shader_translations_needed.emplace( + pipeline_stored_description.description.pixel_shader_hash, + pipeline_stored_description.description + .pixel_shader_modification); + } + } + } + } + size_t logical_processor_count = xe::threading::logical_processor_count(); if (!logical_processor_count) { // Pick some reasonable amount if couldn't determine the number of cores. @@ -274,8 +363,11 @@ void PipelineCache::InitializeShaderStorage( "Failed to open the guest shader storage file for writing, persistent " "shader storage will be disabled: {}", xe::path_to_utf8(shader_storage_file_path)); + fclose(pipeline_storage_file_); + pipeline_storage_file_ = nullptr; return; } + ++shader_storage_index_; shader_storage_file_flush_needed_ = false; struct { uint32_t magic; @@ -299,12 +391,12 @@ void PipelineCache::InitializeShaderStorage( // Threads overlapping file reading. std::mutex shaders_translation_thread_mutex; std::condition_variable shaders_translation_thread_cond; - std::deque> + std::deque> shaders_to_translate; size_t shader_translation_threads_busy = 0; bool shader_translation_threads_shutdown = false; std::mutex shaders_failed_to_translate_mutex; - std::vector shaders_failed_to_translate; + std::vector shaders_failed_to_translate; auto shader_translation_thread_function = [&]() { auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider(); DxbcShaderTranslator translator( @@ -324,7 +416,8 @@ void PipelineCache::InitializeShaderStorage( IID_PPV_ARGS(&dxc_compiler)); } for (;;) { - std::pair shader_to_translate; + std::pair + shader_to_translate; for (;;) { std::unique_lock lock(shaders_translation_thread_mutex); if (shaders_to_translate.empty()) { @@ -340,11 +433,9 @@ void PipelineCache::InitializeShaderStorage( break; } assert_not_null(shader_to_translate.second); - if (!TranslateShader( - translator, *shader_to_translate.second, - shader_to_translate.first.sq_program_cntl, dxbc_converter, - dxc_utils, dxc_compiler, - shader_to_translate.first.host_vertex_shader_type)) { + if (!TranslateShader(translator, *shader_to_translate.second, + shader_to_translate.first.sq_program_cntl, + dxbc_converter, dxc_utils, dxc_compiler)) { std::lock_guard lock(shaders_failed_to_translate_mutex); shaders_failed_to_translate.push_back(shader_to_translate.second); } @@ -373,18 +464,6 @@ void PipelineCache::InitializeShaderStorage( } size_t ucode_byte_count = shader_header.ucode_dword_count * sizeof(uint32_t); - if (shaders_.find(shader_header.ucode_data_hash) != shaders_.end()) { - // Already added - usually shaders aren't added without the intention of - // translating them imminently, so don't do additional checks to - // actually ensure that translation happens right now (they would cause - // a race condition with shaders currently queued for translation). - if (!xe::filesystem::Seek(shader_storage_file_, - int64_t(ucode_byte_count), SEEK_CUR)) { - break; - } - shader_storage_valid_bytes += sizeof(shader_header) + ucode_byte_count; - continue; - } ucode_dwords.resize(shader_header.ucode_dword_count); if (shader_header.ucode_dword_count && !fread(ucode_dwords.data(), ucode_byte_count, 1, @@ -392,39 +471,65 @@ void PipelineCache::InitializeShaderStorage( break; } uint64_t ucode_data_hash = - XXH64(ucode_dwords.data(), ucode_byte_count, 0); + XXH3_64bits(ucode_dwords.data(), ucode_byte_count); if (shader_header.ucode_data_hash != ucode_data_hash) { // Validation failed. break; } - D3D12Shader* shader = - new D3D12Shader(shader_header.type, ucode_data_hash, - ucode_dwords.data(), shader_header.ucode_dword_count); - shaders_.emplace(ucode_data_hash, shader); - // Create new threads if the currently existing threads can't keep up with - // file reading, but not more than the number of logical processors minus - // one. - size_t shader_translation_threads_needed; - { - std::lock_guard lock(shaders_translation_thread_mutex); - shader_translation_threads_needed = - std::min(shader_translation_threads_busy + - shaders_to_translate.size() + size_t(1), - logical_processor_count - size_t(1)); - } - while (shader_translation_threads.size() < - shader_translation_threads_needed) { - shader_translation_threads.push_back(xe::threading::Thread::Create( - {}, shader_translation_thread_function)); - shader_translation_threads.back()->set_name("Shader Translation"); - } - { - std::lock_guard lock(shaders_translation_thread_mutex); - shaders_to_translate.emplace_back(shader_header, shader); - } - shaders_translation_thread_cond.notify_one(); shader_storage_valid_bytes += sizeof(shader_header) + ucode_byte_count; - ++shaders_translated; + // Only add the shader if needed. + auto modification_it = shader_translations_needed.lower_bound( + std::make_pair(ucode_data_hash, uint32_t(0))); + if (modification_it == shader_translations_needed.end() || + modification_it->first != ucode_data_hash) { + continue; + } + D3D12Shader* shader = + LoadShader(shader_header.type, ucode_dwords.data(), + shader_header.ucode_dword_count, ucode_data_hash); + // Loaded from the current storage - don't write again. + shader->set_ucode_storage_index(shader_storage_index_); + // Translate all the needed modifications. + for (; modification_it != shader_translations_needed.end() && + modification_it->first == ucode_data_hash; + ++modification_it) { + bool translation_is_new; + D3D12Shader::D3D12Translation* translation = + static_cast( + shader->GetOrCreateTranslation(modification_it->second, + &translation_is_new)); + if (!translation_is_new) { + // Already added - usually shaders aren't added without the intention + // of translating them imminently, so don't do additional checks to + // actually ensure that translation happens right now (they would + // cause a race condition with shaders currently queued for + // translation). + continue; + } + // Create new threads if the currently existing threads can't keep up + // with file reading, but not more than the number of logical processors + // minus one. + size_t shader_translation_threads_needed; + { + std::lock_guard lock(shaders_translation_thread_mutex); + shader_translation_threads_needed = + std::min(shader_translation_threads_busy + + shaders_to_translate.size() + size_t(1), + logical_processor_count - size_t(1)); + } + while (shader_translation_threads.size() < + shader_translation_threads_needed) { + shader_translation_threads.push_back(xe::threading::Thread::Create( + {}, shader_translation_thread_function)); + shader_translation_threads.back()->set_name("Shader Translation"); + } + { + std::lock_guard lock(shaders_translation_thread_mutex); + shaders_to_translate.emplace_back(shader_header, translation); + } + shaders_translation_thread_cond.notify_one(); + ++shaders_translated; + } } if (!shader_translation_threads.empty()) { { @@ -436,9 +541,14 @@ void PipelineCache::InitializeShaderStorage( xe::threading::Wait(shader_translation_thread.get(), false); } shader_translation_threads.clear(); - for (D3D12Shader* shader : shaders_failed_to_translate) { - shaders_.erase(shader->ucode_data_hash()); - delete shader; + for (D3D12Shader::D3D12Translation* translation : + shaders_failed_to_translate) { + D3D12Shader* shader = static_cast(&translation->shader()); + shader->DestroyTranslation(translation->modification()); + if (shader->translations().empty()) { + shaders_.erase(shader->ucode_data_hash()); + delete shader; + } } } XELOGGPU("Translated {} shaders from the storage in {} milliseconds", @@ -457,220 +567,177 @@ void PipelineCache::InitializeShaderStorage( shader_storage_file_); } - // 'DXRO' or 'DXRT'. - const uint32_t pipeline_storage_magic_api = - edram_rov_used_ ? 0x4F525844 : 0x54525844; + // Create the pipelines. + if (!pipeline_stored_descriptions.empty()) { + uint64_t pipeline_creation_start_ = xe::Clock::QueryHostTickCount(); - // Initialize the pipeline storage stream. - uint64_t pipeline_storage_initialization_start_ = - xe::Clock::QueryHostTickCount(); - auto pipeline_storage_file_path = - shader_storage_shareable_root / - fmt::format("{:08X}.{}.d3d12.xpso", title_id, - edram_rov_used_ ? "rov" : "rtv"); - pipeline_storage_file_ = - xe::filesystem::OpenFile(pipeline_storage_file_path, "a+b"); - if (!pipeline_storage_file_) { - XELOGE( - "Failed to open the Direct3D 12 pipeline description storage file for " - "writing, persistent shader storage will be disabled: {}", - xe::path_to_utf8(pipeline_storage_file_path)); - fclose(shader_storage_file_); - shader_storage_file_ = nullptr; - return; - } - pipeline_storage_file_flush_needed_ = false; - // 'XEPS'. - const uint32_t pipeline_storage_magic = 0x53504558; - struct { - uint32_t magic; - uint32_t magic_api; - uint32_t version_swapped; - } pipeline_storage_file_header; - if (fread(&pipeline_storage_file_header, sizeof(pipeline_storage_file_header), - 1, pipeline_storage_file_) && - pipeline_storage_file_header.magic == pipeline_storage_magic && - pipeline_storage_file_header.magic_api == pipeline_storage_magic_api && - xe::byte_swap(pipeline_storage_file_header.version_swapped) == - PipelineDescription::kVersion) { - uint64_t pipeline_storage_valid_bytes = - sizeof(pipeline_storage_file_header); - // Enqueue pipeline descriptions written by previous Xenia executions until - // the end of the file or until a corrupted one is detected. - xe::filesystem::Seek(pipeline_storage_file_, 0, SEEK_END); - int64_t pipeline_storage_told_end = - xe::filesystem::Tell(pipeline_storage_file_); - size_t pipeline_storage_told_count = size_t( - pipeline_storage_told_end >= int64_t(pipeline_storage_valid_bytes) - ? (uint64_t(pipeline_storage_told_end) - - pipeline_storage_valid_bytes) / - sizeof(PipelineStoredDescription) - : 0); - if (pipeline_storage_told_count && - xe::filesystem::Seek(pipeline_storage_file_, - int64_t(pipeline_storage_valid_bytes), SEEK_SET)) { - std::vector pipeline_stored_descriptions; - pipeline_stored_descriptions.resize(pipeline_storage_told_count); - pipeline_stored_descriptions.resize( - fread(pipeline_stored_descriptions.data(), - sizeof(PipelineStoredDescription), pipeline_storage_told_count, - pipeline_storage_file_)); - if (!pipeline_stored_descriptions.empty()) { - // Launch additional creation threads to use all cores to create - // pipelines faster. Will also be using the main thread, so minus 1. - size_t creation_thread_original_count = creation_threads_.size(); - size_t creation_thread_needed_count = - std::max(std::min(pipeline_stored_descriptions.size(), - logical_processor_count) - - size_t(1), - creation_thread_original_count); - while (creation_threads_.size() < creation_thread_original_count) { - size_t creation_thread_index = creation_threads_.size(); - std::unique_ptr creation_thread = - xe::threading::Thread::Create( - {}, [this, creation_thread_index]() { - CreationThread(creation_thread_index); - }); - creation_thread->set_name("D3D12 Pipelines"); - creation_threads_.push_back(std::move(creation_thread)); - } - size_t pipelines_created = 0; - for (const PipelineStoredDescription& pipeline_stored_description : - pipeline_stored_descriptions) { - const PipelineDescription& pipeline_description = - pipeline_stored_description.description; - // Validate file integrity, stop and truncate the stream if data is - // corrupted. - if (XXH64(&pipeline_stored_description.description, - sizeof(pipeline_stored_description.description), - 0) != pipeline_stored_description.description_hash) { - break; - } - pipeline_storage_valid_bytes += sizeof(PipelineStoredDescription); - // Skip already known pipelines - those have already been enqueued. - auto found_range = pipelines_.equal_range( - pipeline_stored_description.description_hash); - bool pipeline_found = false; - for (auto it = found_range.first; it != found_range.second; ++it) { - Pipeline* found_pipeline = it->second; - if (!std::memcmp(&found_pipeline->description.description, - &pipeline_description, - sizeof(pipeline_description))) { - pipeline_found = true; - break; - } - } - if (pipeline_found) { - continue; - } + // Launch additional creation threads to use all cores to create + // pipelines faster. Will also be using the main thread, so minus 1. + size_t creation_thread_original_count = creation_threads_.size(); + size_t creation_thread_needed_count = std::max( + std::min(pipeline_stored_descriptions.size(), logical_processor_count) - + size_t(1), + creation_thread_original_count); + while (creation_threads_.size() < creation_thread_original_count) { + size_t creation_thread_index = creation_threads_.size(); + std::unique_ptr creation_thread = + xe::threading::Thread::Create({}, [this, creation_thread_index]() { + CreationThread(creation_thread_index); + }); + creation_thread->set_name("D3D12 Pipelines"); + creation_threads_.push_back(std::move(creation_thread)); + } - PipelineRuntimeDescription pipeline_runtime_description; - auto vertex_shader_it = - shaders_.find(pipeline_description.vertex_shader_hash); - if (vertex_shader_it == shaders_.end()) { - continue; - } - pipeline_runtime_description.vertex_shader = vertex_shader_it->second; - if (!pipeline_runtime_description.vertex_shader->is_valid()) { - continue; - } - if (pipeline_description.pixel_shader_hash) { - auto pixel_shader_it = - shaders_.find(pipeline_description.pixel_shader_hash); - if (pixel_shader_it == shaders_.end()) { - continue; - } - pipeline_runtime_description.pixel_shader = pixel_shader_it->second; - if (!pipeline_runtime_description.pixel_shader->is_valid()) { - continue; - } - } else { - pipeline_runtime_description.pixel_shader = nullptr; - } - pipeline_runtime_description.root_signature = - command_processor_.GetRootSignature( - pipeline_runtime_description.vertex_shader, - pipeline_runtime_description.pixel_shader); - if (!pipeline_runtime_description.root_signature) { - continue; - } - std::memcpy(&pipeline_runtime_description.description, - &pipeline_description, sizeof(pipeline_description)); + size_t pipelines_created = 0; + for (const PipelineStoredDescription& pipeline_stored_description : + pipeline_stored_descriptions) { + const PipelineDescription& pipeline_description = + pipeline_stored_description.description; + // Skip already known pipelines - those have already been enqueued. + auto found_range = + pipelines_.equal_range(pipeline_stored_description.description_hash); + bool pipeline_found = false; + for (auto it = found_range.first; it != found_range.second; ++it) { + Pipeline* found_pipeline = it->second; + if (!std::memcmp(&found_pipeline->description.description, + &pipeline_description, sizeof(pipeline_description))) { + pipeline_found = true; + break; + } + } + if (pipeline_found) { + continue; + } - Pipeline* new_pipeline = new Pipeline; - new_pipeline->state = nullptr; - std::memcpy(&new_pipeline->description, &pipeline_runtime_description, - sizeof(pipeline_runtime_description)); - pipelines_.emplace(pipeline_stored_description.description_hash, - new_pipeline); - COUNT_profile_set("gpu/pipeline_cache/pipelines", pipelines_.size()); - if (!creation_threads_.empty()) { - // Submit the pipeline for creation to any available thread. - { - std::lock_guard lock(creation_request_lock_); - creation_queue_.push_back(new_pipeline); - } - creation_request_cond_.notify_one(); - } else { - new_pipeline->state = - CreateD3D12Pipeline(pipeline_runtime_description); - } - ++pipelines_created; + PipelineRuntimeDescription pipeline_runtime_description; + auto vertex_shader_it = + shaders_.find(pipeline_description.vertex_shader_hash); + if (vertex_shader_it == shaders_.end()) { + continue; + } + D3D12Shader* vertex_shader = vertex_shader_it->second; + pipeline_runtime_description.vertex_shader = + static_cast( + vertex_shader->GetTranslation( + pipeline_description.vertex_shader_modification)); + if (!pipeline_runtime_description.vertex_shader || + !pipeline_runtime_description.vertex_shader->is_valid()) { + continue; + } + D3D12Shader* pixel_shader; + if (pipeline_description.pixel_shader_hash) { + auto pixel_shader_it = + shaders_.find(pipeline_description.pixel_shader_hash); + if (pixel_shader_it == shaders_.end()) { + continue; } - CreateQueuedPipelinesOnProcessorThread(); - if (creation_threads_.size() > creation_thread_original_count) { - { - std::lock_guard lock(creation_request_lock_); - creation_threads_shutdown_from_ = creation_thread_original_count; - // Assuming the queue is empty because of - // CreateQueuedPipelinesOnProcessorThread. - } - creation_request_cond_.notify_all(); - while (creation_threads_.size() > creation_thread_original_count) { - xe::threading::Wait(creation_threads_.back().get(), false); - creation_threads_.pop_back(); - } - bool await_creation_completion_event; - { - // Cleanup so additional threads can be created later again. - std::lock_guard lock(creation_request_lock_); - creation_threads_shutdown_from_ = SIZE_MAX; - // If the invocation is blocking, all the shader storage - // initialization is expected to be done before proceeding, to avoid - // latency in the command processor after the invocation. - await_creation_completion_event = - blocking && creation_threads_busy_ != 0; - if (await_creation_completion_event) { - creation_completion_event_->Reset(); - creation_completion_set_event_ = true; - } - } - if (await_creation_completion_event) { - creation_request_cond_.notify_one(); - xe::threading::Wait(creation_completion_event_.get(), false); - } + pixel_shader = pixel_shader_it->second; + pipeline_runtime_description.pixel_shader = + static_cast( + pixel_shader->GetTranslation( + pipeline_description.pixel_shader_modification)); + if (!pipeline_runtime_description.pixel_shader || + !pipeline_runtime_description.pixel_shader->is_valid()) { + continue; } - XELOGGPU( - "Created {} graphics pipelines from the storage in {} milliseconds", - pipelines_created, - (xe::Clock::QueryHostTickCount() - - pipeline_storage_initialization_start_) * - 1000 / xe::Clock::QueryHostTickFrequency()); + } else { + pixel_shader = nullptr; + pipeline_runtime_description.pixel_shader = nullptr; + } + pipeline_runtime_description.root_signature = + command_processor_.GetRootSignature( + vertex_shader, pixel_shader, + DxbcShaderTranslator::Modification( + pipeline_description.vertex_shader_modification) + .host_vertex_shader_type != + Shader::HostVertexShaderType::kVertex); + if (!pipeline_runtime_description.root_signature) { + continue; + } + std::memcpy(&pipeline_runtime_description.description, + &pipeline_description, sizeof(pipeline_description)); + + Pipeline* new_pipeline = new Pipeline; + new_pipeline->state = nullptr; + std::memcpy(&new_pipeline->description, &pipeline_runtime_description, + sizeof(pipeline_runtime_description)); + pipelines_.emplace(pipeline_stored_description.description_hash, + new_pipeline); + COUNT_profile_set("gpu/pipeline_cache/pipelines", pipelines_.size()); + if (!creation_threads_.empty()) { + // Submit the pipeline for creation to any available thread. + { + std::lock_guard lock(creation_request_lock_); + creation_queue_.push_back(new_pipeline); + } + creation_request_cond_.notify_one(); + } else { + new_pipeline->state = CreateD3D12Pipeline(pipeline_runtime_description); + } + ++pipelines_created; + } + + CreateQueuedPipelinesOnProcessorThread(); + if (creation_threads_.size() > creation_thread_original_count) { + { + std::lock_guard lock(creation_request_lock_); + creation_threads_shutdown_from_ = creation_thread_original_count; + // Assuming the queue is empty because of + // CreateQueuedPipelinesOnProcessorThread. + } + creation_request_cond_.notify_all(); + while (creation_threads_.size() > creation_thread_original_count) { + xe::threading::Wait(creation_threads_.back().get(), false); + creation_threads_.pop_back(); + } + bool await_creation_completion_event; + { + // Cleanup so additional threads can be created later again. + std::lock_guard lock(creation_request_lock_); + creation_threads_shutdown_from_ = SIZE_MAX; + // If the invocation is blocking, all the shader storage initialization + // is expected to be done before proceeding, to avoid latency in the + // command processor after the invocation. + await_creation_completion_event = + blocking && creation_threads_busy_ != 0; + if (await_creation_completion_event) { + creation_completion_event_->Reset(); + creation_completion_set_event_ = true; + } + } + if (await_creation_completion_event) { + creation_request_cond_.notify_one(); + xe::threading::Wait(creation_completion_event_.get(), false); } } - xe::filesystem::TruncateStdioFile(pipeline_storage_file_, - pipeline_storage_valid_bytes); + + XELOGGPU( + "Created {} graphics pipelines (not including reading the " + "descriptions) from the storage in {} milliseconds", + pipelines_created, + (xe::Clock::QueryHostTickCount() - pipeline_creation_start_) * 1000 / + xe::Clock::QueryHostTickFrequency()); + // If any pipeline descriptions were corrupted (or the whole file has excess + // bytes in the end), truncate to the last valid pipeline description. + xe::filesystem::TruncateStdioFile( + pipeline_storage_file_, + uint64_t(sizeof(pipeline_storage_file_header) + + sizeof(PipelineStoredDescription) * + pipeline_stored_descriptions.size())); } else { xe::filesystem::TruncateStdioFile(pipeline_storage_file_, 0); pipeline_storage_file_header.magic = pipeline_storage_magic; pipeline_storage_file_header.magic_api = pipeline_storage_magic_api; pipeline_storage_file_header.version_swapped = - xe::byte_swap(PipelineDescription::kVersion); + pipeline_storage_version_swapped; + // Reserved for future (for Vulkan) - host device features affecting legal + // pipeline descriptions. + pipeline_storage_file_header.device_features = 0; fwrite(&pipeline_storage_file_header, sizeof(pipeline_storage_file_header), 1, pipeline_storage_file_); } - shader_storage_root_ = storage_root; + shader_storage_cache_root_ = cache_root; shader_storage_title_id_ = title_id; // Start the storage writing thread. @@ -706,7 +773,7 @@ void PipelineCache::ShutdownShaderStorage() { shader_storage_file_flush_needed_ = false; } - shader_storage_root_.clear(); + shader_storage_cache_root_.clear(); shader_storage_title_id_ = 0; } @@ -757,11 +824,17 @@ bool PipelineCache::IsCreatingPipelines() { } D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type, - uint32_t guest_address, const uint32_t* host_address, uint32_t dword_count) { // Hash the input memory and lookup the shader. - uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0); + return LoadShader(shader_type, host_address, dword_count, + XXH3_64bits(host_address, dword_count * sizeof(uint32_t))); +} + +D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type, + const uint32_t* host_address, + uint32_t dword_count, + uint64_t data_hash) { auto it = shaders_.find(data_hash); if (it != shaders_.end()) { // Shader has been previously loaded. @@ -774,12 +847,64 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type, D3D12Shader* shader = new D3D12Shader(shader_type, data_hash, host_address, dword_count); shaders_.emplace(data_hash, shader); + if (!cvars::dump_shaders.empty()) { + shader->DumpUcodeBinary(cvars::dump_shaders); + } return shader; } -Shader::HostVertexShaderType PipelineCache::GetHostVertexShaderTypeIfValid() - const { +bool PipelineCache::GetCurrentShaderModifications( + DxbcShaderTranslator::Modification& vertex_shader_modification_out, + DxbcShaderTranslator::Modification& pixel_shader_modification_out) const { + Shader::HostVertexShaderType host_vertex_shader_type = + GetCurrentHostVertexShaderTypeIfValid(); + if (host_vertex_shader_type == Shader::HostVertexShaderType(-1)) { + return false; + } + vertex_shader_modification_out = DxbcShaderTranslator::Modification( + shader_translator_->GetDefaultModification(xenos::ShaderType::kVertex, + host_vertex_shader_type)); + DxbcShaderTranslator::Modification pixel_shader_modification( + shader_translator_->GetDefaultModification(xenos::ShaderType::kPixel)); + if (!edram_rov_used_) { + const auto& regs = register_file_; + using DepthStencilMode = + DxbcShaderTranslator::Modification::DepthStencilMode; + if ((depth_float24_conversion_ == + flags::DepthFloat24Conversion::kOnOutputTruncating || + depth_float24_conversion_ == + flags::DepthFloat24Conversion::kOnOutputRounding) && + regs.Get().z_enable && + regs.Get().depth_format == + xenos::DepthRenderTargetFormat::kD24FS8) { + pixel_shader_modification.depth_stencil_mode = + depth_float24_conversion_ == + flags::DepthFloat24Conversion::kOnOutputTruncating + ? DepthStencilMode::kFloat24Truncating + : DepthStencilMode::kFloat24Rounding; + } else { + // Hint to enable early depth/stencil writing if possible - whether it + // will actually take effect depends on the shader itself, it's not known + // before translation. + auto rb_colorcontrol = regs.Get(); + if ((!rb_colorcontrol.alpha_test_enable || + rb_colorcontrol.alpha_func == xenos::CompareFunction::kAlways) && + !rb_colorcontrol.alpha_to_mask_enable) { + pixel_shader_modification.depth_stencil_mode = + DepthStencilMode::kEarlyHint; + } else { + pixel_shader_modification.depth_stencil_mode = + DepthStencilMode::kNoModifiers; + } + } + } + pixel_shader_modification_out = pixel_shader_modification; + return true; +} + +Shader::HostVertexShaderType +PipelineCache::GetCurrentHostVertexShaderTypeIfValid() const { // If the values this functions returns are changed, INVALIDATE THE SHADER // STORAGE (increase kVersion for BOTH shaders and pipelines)! The exception // is when the function originally returned "unsupported", but started to @@ -855,8 +980,8 @@ Shader::HostVertexShaderType PipelineCache::GetHostVertexShaderTypeIfValid() } bool PipelineCache::EnsureShadersTranslated( - D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, - Shader::HostVertexShaderType host_vertex_shader_type) { + D3D12Shader::D3D12Translation* vertex_shader, + D3D12Shader::D3D12Translation* pixel_shader) { const auto& regs = register_file_; auto sq_program_cntl = regs.Get(); @@ -869,18 +994,19 @@ bool PipelineCache::EnsureShadersTranslated( if (!vertex_shader->is_translated()) { if (!TranslateShader(*shader_translator_, *vertex_shader, sq_program_cntl, - dxbc_converter_, dxc_utils_, dxc_compiler_, - host_vertex_shader_type)) { + dxbc_converter_, dxc_utils_, dxc_compiler_)) { XELOGE("Failed to translate the vertex shader!"); return false; } - if (shader_storage_file_) { + if (shader_storage_file_ && vertex_shader->shader().ucode_storage_index() != + shader_storage_index_) { + vertex_shader->shader().set_ucode_storage_index(shader_storage_index_); assert_not_null(storage_write_thread_); shader_storage_file_flush_needed_ = true; { std::lock_guard lock(storage_write_request_lock_); storage_write_shader_queue_.push_back( - std::make_pair(vertex_shader, sq_program_cntl)); + std::make_pair(&vertex_shader->shader(), sq_program_cntl)); } storage_write_request_cond_.notify_all(); } @@ -892,13 +1018,15 @@ bool PipelineCache::EnsureShadersTranslated( XELOGE("Failed to translate the pixel shader!"); return false; } - if (shader_storage_file_) { + if (shader_storage_file_ && + pixel_shader->shader().ucode_storage_index() != shader_storage_index_) { + pixel_shader->shader().set_ucode_storage_index(shader_storage_index_); assert_not_null(storage_write_thread_); shader_storage_file_flush_needed_ = true; { std::lock_guard lock(storage_write_request_lock_); storage_write_shader_queue_.push_back( - std::make_pair(pixel_shader, sq_program_cntl)); + std::make_pair(&pixel_shader->shader(), sq_program_cntl)); } storage_write_request_cond_.notify_all(); } @@ -908,9 +1036,9 @@ bool PipelineCache::EnsureShadersTranslated( } bool PipelineCache::ConfigurePipeline( - D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, + D3D12Shader::D3D12Translation* vertex_shader, + D3D12Shader::D3D12Translation* pixel_shader, xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format, - bool early_z, const RenderTargetCache::PipelineRenderTarget render_targets[5], void** pipeline_handle_out, ID3D12RootSignature** root_signature_out) { #if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES @@ -922,7 +1050,7 @@ bool PipelineCache::ConfigurePipeline( PipelineRuntimeDescription runtime_description; if (!GetCurrentStateDescription(vertex_shader, pixel_shader, primitive_type, - index_format, early_z, render_targets, + index_format, render_targets, runtime_description)) { return false; } @@ -937,7 +1065,7 @@ bool PipelineCache::ConfigurePipeline( } // Find an existing pipeline in the cache. - uint64_t hash = XXH64(&description, sizeof(description), 0); + uint64_t hash = XXH3_64bits(&description, sizeof(description)); auto found_range = pipelines_.equal_range(hash); for (auto it = found_range.first; it != found_range.second; ++it) { Pipeline* found_pipeline = it->second; @@ -950,9 +1078,7 @@ bool PipelineCache::ConfigurePipeline( } } - if (!EnsureShadersTranslated( - vertex_shader, pixel_shader, - Shader::HostVertexShaderType(description.host_vertex_shader_type))) { + if (!EnsureShadersTranslated(vertex_shader, pixel_shader)) { return false; } @@ -995,14 +1121,17 @@ bool PipelineCache::ConfigurePipeline( return true; } -bool PipelineCache::TranslateShader( - DxbcShaderTranslator& translator, D3D12Shader& shader, - reg::SQ_PROGRAM_CNTL cntl, IDxbcConverter* dxbc_converter, - IDxcUtils* dxc_utils, IDxcCompiler* dxc_compiler, - Shader::HostVertexShaderType host_vertex_shader_type) { +bool PipelineCache::TranslateShader(DxbcShaderTranslator& translator, + D3D12Shader::D3D12Translation& translation, + reg::SQ_PROGRAM_CNTL cntl, + IDxbcConverter* dxbc_converter, + IDxcUtils* dxc_utils, + IDxcCompiler* dxc_compiler) { + D3D12Shader& shader = static_cast(translation.shader()); + // Perform translation. // If this fails the shader will be marked as invalid and ignored later. - if (!translator.Translate(&shader, cntl, host_vertex_shader_type)) { + if (!translator.Translate(translation, cntl)) { XELOGE("Shader {:016X} translation failed; marking as ignored", shader.ucode_data_hash()); return false; @@ -1010,7 +1139,8 @@ bool PipelineCache::TranslateShader( const char* host_shader_type; if (shader.type() == xenos::ShaderType::kVertex) { - switch (shader.host_vertex_shader_type()) { + DxbcShaderTranslator::Modification modification(translation.modification()); + switch (modification.host_vertex_shader_type) { case Shader::HostVertexShaderType::kLineDomainCPIndexed: host_shader_type = "control-point-indexed line domain"; break; @@ -1039,169 +1169,156 @@ bool PipelineCache::TranslateShader( shader.ucode_dword_count() * 4, shader.ucode_data_hash(), shader.ucode_disassembly().c_str()); - // Set up texture and sampler bindings. - uint32_t texture_binding_count; - const DxbcShaderTranslator::TextureBinding* translator_texture_bindings = - translator.GetTextureBindings(texture_binding_count); - uint32_t sampler_binding_count; - const DxbcShaderTranslator::SamplerBinding* sampler_bindings = - translator.GetSamplerBindings(sampler_binding_count); - shader.SetTexturesAndSamplers(translator_texture_bindings, - texture_binding_count, sampler_bindings, - sampler_binding_count); - assert_false(bindless_resources_used_ && - texture_binding_count + sampler_binding_count > - D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 4); - // Get hashable texture bindings, without translator-specific info. - const D3D12Shader::TextureBinding* texture_bindings = - shader.GetTextureBindings(texture_binding_count); - size_t texture_binding_layout_bytes = - texture_binding_count * sizeof(*texture_bindings); - uint64_t texture_binding_layout_hash = 0; - if (texture_binding_count) { - texture_binding_layout_hash = - XXH64(texture_bindings, texture_binding_layout_bytes, 0); - } - uint32_t bindless_sampler_count = - bindless_resources_used_ ? sampler_binding_count : 0; - uint64_t bindless_sampler_layout_hash = 0; - if (bindless_sampler_count) { - XXH64_state_t hash_state; - XXH64_reset(&hash_state, 0); - for (uint32_t i = 0; i < bindless_sampler_count; ++i) { - XXH64_update(&hash_state, &sampler_bindings[i].bindless_descriptor_index, - sizeof(sampler_bindings[i].bindless_descriptor_index)); - } - bindless_sampler_layout_hash = XXH64_digest(&hash_state); - } - // Obtain the unique IDs of binding layouts if there are any texture bindings - // or bindless samplers, for invalidation in the command processor. - size_t texture_binding_layout_uid = kLayoutUIDEmpty; - // Use sampler count for the bindful case because it's the only thing that - // must be the same for layouts to be compatible in this case - // (instruction-specified parameters are used as overrides for actual - // samplers). - static_assert( - kLayoutUIDEmpty == 0, - "Empty layout UID is assumed to be 0 because for bindful samplers, the " - "UID is their count"); - size_t sampler_binding_layout_uid = bindless_resources_used_ - ? kLayoutUIDEmpty - : size_t(sampler_binding_count); - if (texture_binding_count || bindless_sampler_count) { - std::lock_guard layouts_mutex_(layouts_mutex_); + // Set up texture and sampler binding layouts. + if (shader.EnterBindingLayoutUserUIDSetup()) { + uint32_t texture_binding_count; + const D3D12Shader::TextureBinding* texture_bindings = + shader.GetTextureBindings(texture_binding_count); + uint32_t sampler_binding_count; + const D3D12Shader::SamplerBinding* sampler_bindings = + shader.GetSamplerBindings(sampler_binding_count); + assert_false(bindless_resources_used_ && + texture_binding_count + sampler_binding_count > + D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 4); + size_t texture_binding_layout_bytes = + texture_binding_count * sizeof(*texture_bindings); + uint64_t texture_binding_layout_hash = 0; if (texture_binding_count) { - auto found_range = - texture_binding_layout_map_.equal_range(texture_binding_layout_hash); - for (auto it = found_range.first; it != found_range.second; ++it) { - if (it->second.vector_span_length == texture_binding_count && - !std::memcmp( - texture_binding_layouts_.data() + it->second.vector_span_offset, - texture_bindings, texture_binding_layout_bytes)) { - texture_binding_layout_uid = it->second.uid; - break; - } - } - if (texture_binding_layout_uid == kLayoutUIDEmpty) { - static_assert( - kLayoutUIDEmpty == 0, - "Layout UID is size + 1 because it's assumed that 0 is the UID for " - "an empty layout"); - texture_binding_layout_uid = texture_binding_layout_map_.size() + 1; - LayoutUID new_uid; - new_uid.uid = texture_binding_layout_uid; - new_uid.vector_span_offset = texture_binding_layouts_.size(); - new_uid.vector_span_length = texture_binding_count; - texture_binding_layouts_.resize(new_uid.vector_span_offset + - texture_binding_count); - std::memcpy( - texture_binding_layouts_.data() + new_uid.vector_span_offset, - texture_bindings, texture_binding_layout_bytes); - texture_binding_layout_map_.emplace(texture_binding_layout_hash, - new_uid); - } + texture_binding_layout_hash = + XXH3_64bits(texture_bindings, texture_binding_layout_bytes); } + uint32_t bindless_sampler_count = + bindless_resources_used_ ? sampler_binding_count : 0; + uint64_t bindless_sampler_layout_hash = 0; if (bindless_sampler_count) { - auto found_range = - bindless_sampler_layout_map_.equal_range(sampler_binding_layout_uid); - for (auto it = found_range.first; it != found_range.second; ++it) { - if (it->second.vector_span_length != bindless_sampler_count) { - continue; - } - sampler_binding_layout_uid = it->second.uid; - const uint32_t* vector_bindless_sampler_layout = - bindless_sampler_layouts_.data() + it->second.vector_span_offset; - for (uint32_t i = 0; i < bindless_sampler_count; ++i) { - if (vector_bindless_sampler_layout[i] != - sampler_bindings[i].bindless_descriptor_index) { - sampler_binding_layout_uid = kLayoutUIDEmpty; + XXH3_state_t hash_state; + XXH3_64bits_reset(&hash_state); + for (uint32_t i = 0; i < bindless_sampler_count; ++i) { + XXH3_64bits_update( + &hash_state, &sampler_bindings[i].bindless_descriptor_index, + sizeof(sampler_bindings[i].bindless_descriptor_index)); + } + bindless_sampler_layout_hash = XXH3_64bits_digest(&hash_state); + } + // Obtain the unique IDs of binding layouts if there are any texture + // bindings or bindless samplers, for invalidation in the command processor. + size_t texture_binding_layout_uid = kLayoutUIDEmpty; + // Use sampler count for the bindful case because it's the only thing that + // must be the same for layouts to be compatible in this case + // (instruction-specified parameters are used as overrides for actual + // samplers). + static_assert( + kLayoutUIDEmpty == 0, + "Empty layout UID is assumed to be 0 because for bindful samplers, the " + "UID is their count"); + size_t sampler_binding_layout_uid = bindless_resources_used_ + ? kLayoutUIDEmpty + : size_t(sampler_binding_count); + if (texture_binding_count || bindless_sampler_count) { + std::lock_guard layouts_mutex_(layouts_mutex_); + if (texture_binding_count) { + auto found_range = texture_binding_layout_map_.equal_range( + texture_binding_layout_hash); + for (auto it = found_range.first; it != found_range.second; ++it) { + if (it->second.vector_span_length == texture_binding_count && + !std::memcmp(texture_binding_layouts_.data() + + it->second.vector_span_offset, + texture_bindings, texture_binding_layout_bytes)) { + texture_binding_layout_uid = it->second.uid; break; } } - if (sampler_binding_layout_uid != kLayoutUIDEmpty) { - break; + if (texture_binding_layout_uid == kLayoutUIDEmpty) { + static_assert( + kLayoutUIDEmpty == 0, + "Layout UID is size + 1 because it's assumed that 0 is the UID " + "for an empty layout"); + texture_binding_layout_uid = texture_binding_layout_map_.size() + 1; + LayoutUID new_uid; + new_uid.uid = texture_binding_layout_uid; + new_uid.vector_span_offset = texture_binding_layouts_.size(); + new_uid.vector_span_length = texture_binding_count; + texture_binding_layouts_.resize(new_uid.vector_span_offset + + texture_binding_count); + std::memcpy( + texture_binding_layouts_.data() + new_uid.vector_span_offset, + texture_bindings, texture_binding_layout_bytes); + texture_binding_layout_map_.emplace(texture_binding_layout_hash, + new_uid); } } - if (sampler_binding_layout_uid == kLayoutUIDEmpty) { - sampler_binding_layout_uid = bindless_sampler_layout_map_.size(); - LayoutUID new_uid; - static_assert( - kLayoutUIDEmpty == 0, - "Layout UID is size + 1 because it's assumed that 0 is the UID for " - "an empty layout"); - new_uid.uid = sampler_binding_layout_uid + 1; - new_uid.vector_span_offset = bindless_sampler_layouts_.size(); - new_uid.vector_span_length = sampler_binding_count; - bindless_sampler_layouts_.resize(new_uid.vector_span_offset + - sampler_binding_count); - uint32_t* vector_bindless_sampler_layout = - bindless_sampler_layouts_.data() + new_uid.vector_span_offset; - for (uint32_t i = 0; i < bindless_sampler_count; ++i) { - vector_bindless_sampler_layout[i] = - sampler_bindings[i].bindless_descriptor_index; + if (bindless_sampler_count) { + auto found_range = bindless_sampler_layout_map_.equal_range( + sampler_binding_layout_uid); + for (auto it = found_range.first; it != found_range.second; ++it) { + if (it->second.vector_span_length != bindless_sampler_count) { + continue; + } + sampler_binding_layout_uid = it->second.uid; + const uint32_t* vector_bindless_sampler_layout = + bindless_sampler_layouts_.data() + it->second.vector_span_offset; + for (uint32_t i = 0; i < bindless_sampler_count; ++i) { + if (vector_bindless_sampler_layout[i] != + sampler_bindings[i].bindless_descriptor_index) { + sampler_binding_layout_uid = kLayoutUIDEmpty; + break; + } + } + if (sampler_binding_layout_uid != kLayoutUIDEmpty) { + break; + } + } + if (sampler_binding_layout_uid == kLayoutUIDEmpty) { + sampler_binding_layout_uid = bindless_sampler_layout_map_.size(); + LayoutUID new_uid; + static_assert( + kLayoutUIDEmpty == 0, + "Layout UID is size + 1 because it's assumed that 0 is the UID " + "for an empty layout"); + new_uid.uid = sampler_binding_layout_uid + 1; + new_uid.vector_span_offset = bindless_sampler_layouts_.size(); + new_uid.vector_span_length = sampler_binding_count; + bindless_sampler_layouts_.resize(new_uid.vector_span_offset + + sampler_binding_count); + uint32_t* vector_bindless_sampler_layout = + bindless_sampler_layouts_.data() + new_uid.vector_span_offset; + for (uint32_t i = 0; i < bindless_sampler_count; ++i) { + vector_bindless_sampler_layout[i] = + sampler_bindings[i].bindless_descriptor_index; + } + bindless_sampler_layout_map_.emplace(bindless_sampler_layout_hash, + new_uid); } - bindless_sampler_layout_map_.emplace(bindless_sampler_layout_hash, - new_uid); } } - } - shader.SetTextureBindingLayoutUserUID(texture_binding_layout_uid); - shader.SetSamplerBindingLayoutUserUID(sampler_binding_layout_uid); - - // Create a version of the shader with early depth/stencil forced by Xenia - // itself when it's safe to do so or when EARLY_Z_ENABLE is set in - // RB_DEPTHCONTROL. - if (shader.type() == xenos::ShaderType::kPixel && !edram_rov_used_ && - !shader.writes_depth()) { - shader.SetForcedEarlyZShaderObject( - std::move(DxbcShaderTranslator::ForceEarlyDepthStencil( - shader.translated_binary().data()))); + shader.SetTextureBindingLayoutUserUID(texture_binding_layout_uid); + shader.SetSamplerBindingLayoutUserUID(sampler_binding_layout_uid); } // Disassemble the shader for dumping. auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider(); if (cvars::d3d12_dxbc_disasm_dxilconv) { - shader.DisassembleDxbc(provider, cvars::d3d12_dxbc_disasm, dxbc_converter, - dxc_utils, dxc_compiler); + translation.DisassembleDxbcAndDxil(provider, cvars::d3d12_dxbc_disasm, + dxbc_converter, dxc_utils, dxc_compiler); } else { - shader.DisassembleDxbc(provider, cvars::d3d12_dxbc_disasm); + translation.DisassembleDxbcAndDxil(provider, cvars::d3d12_dxbc_disasm); } // Dump shader files if desired. if (!cvars::dump_shaders.empty()) { - shader.Dump(cvars::dump_shaders, - (shader.type() == xenos::ShaderType::kPixel) - ? (edram_rov_used_ ? "d3d12_rov" : "d3d12_rtv") - : "d3d12"); + translation.Dump(cvars::dump_shaders, + (shader.type() == xenos::ShaderType::kPixel) + ? (edram_rov_used_ ? "d3d12_rov" : "d3d12_rtv") + : "d3d12"); } - return shader.is_valid(); + return translation.is_valid(); } bool PipelineCache::GetCurrentStateDescription( - D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, + D3D12Shader::D3D12Translation* vertex_shader, + D3D12Shader::D3D12Translation* pixel_shader, xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format, - bool early_z, const RenderTargetCache::PipelineRenderTarget render_targets[5], PipelineRuntimeDescription& runtime_description_out) { PipelineDescription& description_out = runtime_description_out.description; @@ -1212,19 +1329,30 @@ bool PipelineCache::GetCurrentStateDescription( // Initialize all unused fields to zero for comparison/hashing. std::memset(&runtime_description_out, 0, sizeof(runtime_description_out)); + bool tessellated = + DxbcShaderTranslator::Modification(vertex_shader->modification()) + .host_vertex_shader_type != Shader::HostVertexShaderType::kVertex; + // Root signature. - runtime_description_out.root_signature = - command_processor_.GetRootSignature(vertex_shader, pixel_shader); + runtime_description_out.root_signature = command_processor_.GetRootSignature( + static_cast(&vertex_shader->shader()), + pixel_shader ? static_cast(&pixel_shader->shader()) + : nullptr, + tessellated); if (runtime_description_out.root_signature == nullptr) { return false; } // Shaders. runtime_description_out.vertex_shader = vertex_shader; - description_out.vertex_shader_hash = vertex_shader->ucode_data_hash(); + description_out.vertex_shader_hash = + vertex_shader->shader().ucode_data_hash(); + description_out.vertex_shader_modification = vertex_shader->modification(); if (pixel_shader) { runtime_description_out.pixel_shader = pixel_shader; - description_out.pixel_shader_hash = pixel_shader->ucode_data_hash(); + description_out.pixel_shader_hash = + pixel_shader->shader().ucode_data_hash(); + description_out.pixel_shader_modification = pixel_shader->modification(); } // Index buffer strip cut value. @@ -1239,13 +1367,10 @@ bool PipelineCache::GetCurrentStateDescription( } // Host vertex shader type and primitive topology. - Shader::HostVertexShaderType host_vertex_shader_type = - GetHostVertexShaderTypeIfValid(); - if (host_vertex_shader_type == Shader::HostVertexShaderType(-1)) { - return false; - } - description_out.host_vertex_shader_type = host_vertex_shader_type; - if (host_vertex_shader_type == Shader::HostVertexShaderType::kVertex) { + if (tessellated) { + description_out.primitive_topology_type_or_tessellation_mode = + uint32_t(regs.Get().tess_mode); + } else { switch (primitive_type) { case xenos::PrimitiveType::kPointList: description_out.primitive_topology_type_or_tessellation_mode = @@ -1280,14 +1405,10 @@ bool PipelineCache::GetCurrentStateDescription( description_out.geometry_shader = PipelineGeometryShader::kNone; break; } - } else { - description_out.primitive_topology_type_or_tessellation_mode = - uint32_t(regs.Get().tess_mode); } - bool primitive_polygonal = xenos::IsPrimitivePolygonal( - host_vertex_shader_type != Shader::HostVertexShaderType::kVertex, - primitive_type); + bool primitive_polygonal = + xenos::IsPrimitivePolygonal(tessellated, primitive_type); // Rasterizer state. // Because Direct3D 12 doesn't support per-side fill mode and depth bias, the @@ -1386,8 +1507,7 @@ bool PipelineCache::GetCurrentStateDescription( description_out.depth_bias_slope_scaled = poly_offset_scale * (1.0f / 16.0f); } - if (cvars::d3d12_tessellation_wireframe && - host_vertex_shader_type != Shader::HostVertexShaderType::kVertex) { + if (tessellated && cvars::d3d12_tessellation_wireframe) { description_out.fill_mode_wireframe = 1; } description_out.depth_clip = !regs.Get().clip_disable; @@ -1453,13 +1573,11 @@ bool PipelineCache::GetCurrentStateDescription( } else { description_out.depth_func = xenos::CompareFunction::kAlways; } - if (early_z) { - description_out.force_early_z = 1; - } // Render targets and blending state. 32 because of 0x1F mask, for safety // (all unknown to zero). - uint32_t color_mask = command_processor_.GetCurrentColorMask(pixel_shader); + uint32_t color_mask = command_processor_.GetCurrentColorMask( + pixel_shader ? &pixel_shader->shader() : nullptr); static const PipelineBlendFactor kBlendFactorMap[32] = { /* 0 */ PipelineBlendFactor::kZero, /* 1 */ PipelineBlendFactor::kOne, @@ -1550,11 +1668,11 @@ ID3D12PipelineState* PipelineCache::CreateD3D12Pipeline( if (runtime_description.pixel_shader != nullptr) { XELOGGPU("Creating graphics pipeline with VS {:016X}, PS {:016X}", - runtime_description.vertex_shader->ucode_data_hash(), - runtime_description.pixel_shader->ucode_data_hash()); + runtime_description.vertex_shader->shader().ucode_data_hash(), + runtime_description.pixel_shader->shader().ucode_data_hash()); } else { XELOGGPU("Creating graphics pipeline with VS {:016X}", - runtime_description.vertex_shader->ucode_data_hash()); + runtime_description.vertex_shader->shader().ucode_data_hash()); } D3D12_GRAPHICS_PIPELINE_STATE_DESC state_desc; @@ -1580,21 +1698,14 @@ ID3D12PipelineState* PipelineCache::CreateD3D12Pipeline( // Primitive topology, vertex, hull, domain and geometry shaders. if (!runtime_description.vertex_shader->is_translated()) { XELOGE("Vertex shader {:016X} not translated", - runtime_description.vertex_shader->ucode_data_hash()); + runtime_description.vertex_shader->shader().ucode_data_hash()); assert_always(); return nullptr; } Shader::HostVertexShaderType host_vertex_shader_type = - description.host_vertex_shader_type; - if (runtime_description.vertex_shader->host_vertex_shader_type() != - host_vertex_shader_type) { - XELOGE( - "Vertex shader {:016X} translated into the wrong host shader " - "type", - runtime_description.vertex_shader->ucode_data_hash()); - assert_always(); - return nullptr; - } + DxbcShaderTranslator::Modification( + runtime_description.vertex_shader->modification()) + .host_vertex_shader_type; if (host_vertex_shader_type == Shader::HostVertexShaderType::kVertex) { state_desc.VS.pShaderBytecode = runtime_description.vertex_shader->translated_binary().data(); @@ -1704,24 +1815,34 @@ ID3D12PipelineState* PipelineCache::CreateD3D12Pipeline( if (runtime_description.pixel_shader != nullptr) { if (!runtime_description.pixel_shader->is_translated()) { XELOGE("Pixel shader {:016X} not translated", - runtime_description.pixel_shader->ucode_data_hash()); + runtime_description.pixel_shader->shader().ucode_data_hash()); assert_always(); return nullptr; } - const auto& forced_early_z_shader = - runtime_description.pixel_shader->GetForcedEarlyZShaderObject(); - if (description.force_early_z && forced_early_z_shader.size() != 0) { - state_desc.PS.pShaderBytecode = forced_early_z_shader.data(); - state_desc.PS.BytecodeLength = forced_early_z_shader.size(); - } else { - state_desc.PS.pShaderBytecode = - runtime_description.pixel_shader->translated_binary().data(); - state_desc.PS.BytecodeLength = - runtime_description.pixel_shader->translated_binary().size(); - } + state_desc.PS.pShaderBytecode = + runtime_description.pixel_shader->translated_binary().data(); + state_desc.PS.BytecodeLength = + runtime_description.pixel_shader->translated_binary().size(); } else if (edram_rov_used_) { state_desc.PS.pShaderBytecode = depth_only_pixel_shader_.data(); state_desc.PS.BytecodeLength = depth_only_pixel_shader_.size(); + } else { + if ((description.depth_func != xenos::CompareFunction::kAlways || + description.depth_write) && + description.depth_format == xenos::DepthRenderTargetFormat::kD24FS8) { + switch (depth_float24_conversion_) { + case flags::DepthFloat24Conversion::kOnOutputTruncating: + state_desc.PS.pShaderBytecode = float24_truncate_ps; + state_desc.PS.BytecodeLength = sizeof(float24_truncate_ps); + break; + case flags::DepthFloat24Conversion::kOnOutputRounding: + state_desc.PS.pShaderBytecode = float24_round_ps; + state_desc.PS.BytecodeLength = sizeof(float24_round_ps); + break; + default: + break; + } + } } // Rasterizer state. @@ -1810,9 +1931,6 @@ ID3D12PipelineState* PipelineCache::CreateD3D12Pipeline( state_desc.DSVFormat = RenderTargetCache::GetDepthDXGIFormat(description.depth_format); } - // TODO(Triang3l): EARLY_Z_ENABLE (needs to be enabled in shaders, but alpha - // test is dynamic - should be enabled anyway if there's no alpha test, - // discarding and depth output). // Render targets and blending. state_desc.BlendState.IndependentBlendEnable = TRUE; @@ -1874,22 +1992,24 @@ ID3D12PipelineState* PipelineCache::CreateD3D12Pipeline( IID_PPV_ARGS(&state)))) { if (runtime_description.pixel_shader != nullptr) { XELOGE("Failed to create graphics pipeline with VS {:016X}, PS {:016X}", - runtime_description.vertex_shader->ucode_data_hash(), - runtime_description.pixel_shader->ucode_data_hash()); + runtime_description.vertex_shader->shader().ucode_data_hash(), + runtime_description.pixel_shader->shader().ucode_data_hash()); } else { XELOGE("Failed to create graphics pipeline with VS {:016X}", - runtime_description.vertex_shader->ucode_data_hash()); + runtime_description.vertex_shader->shader().ucode_data_hash()); } return nullptr; } std::wstring name; if (runtime_description.pixel_shader != nullptr) { - name = fmt::format(L"VS {:016X}, PS {:016X}", - runtime_description.vertex_shader->ucode_data_hash(), - runtime_description.pixel_shader->ucode_data_hash()); + name = fmt::format( + L"VS {:016X}, PS {:016X}", + runtime_description.vertex_shader->shader().ucode_data_hash(), + runtime_description.pixel_shader->shader().ucode_data_hash()); } else { - name = fmt::format(L"VS {:016X}", - runtime_description.vertex_shader->ucode_data_hash()); + name = fmt::format( + L"VS {:016X}", + runtime_description.vertex_shader->shader().ucode_data_hash()); } state->SetName(name.c_str()); return state; @@ -1954,7 +2074,6 @@ void PipelineCache::StorageWriteThread() { shader_header.ucode_data_hash = shader->ucode_data_hash(); shader_header.ucode_dword_count = shader->ucode_dword_count(); shader_header.type = shader->type(); - shader_header.host_vertex_shader_type = shader->host_vertex_shader_type(); shader_header.sq_program_cntl = shader_pair.second; assert_not_null(shader_storage_file_); fwrite(&shader_header, sizeof(shader_header), 1, shader_storage_file_); diff --git a/src/xenia/gpu/d3d12/pipeline_cache.h b/src/xenia/gpu/d3d12/pipeline_cache.h index 8159416d0..fe867c82a 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.h +++ b/src/xenia/gpu/d3d12/pipeline_cache.h @@ -27,6 +27,7 @@ #include "xenia/gpu/d3d12/d3d12_shader.h" #include "xenia/gpu/d3d12/render_target_cache.h" #include "xenia/gpu/dxbc_shader_translator.h" +#include "xenia/gpu/gpu_flags.h" #include "xenia/gpu/register_file.h" #include "xenia/gpu/xenos.h" #include "xenia/ui/d3d12/d3d12_api.h" @@ -43,36 +44,39 @@ class PipelineCache { PipelineCache(D3D12CommandProcessor& command_processor, const RegisterFile& register_file, bool bindless_resources_used, - bool edram_rov_used, uint32_t resolution_scale); + bool edram_rov_used, + flags::DepthFloat24Conversion depth_float24_conversion, + uint32_t resolution_scale); ~PipelineCache(); bool Initialize(); void Shutdown(); void ClearCache(bool shutting_down = false); - void InitializeShaderStorage(const std::filesystem::path& storage_root, + void InitializeShaderStorage(const std::filesystem::path& cache_root, uint32_t title_id, bool blocking); void ShutdownShaderStorage(); void EndSubmission(); bool IsCreatingPipelines(); - D3D12Shader* LoadShader(xenos::ShaderType shader_type, uint32_t guest_address, + D3D12Shader* LoadShader(xenos::ShaderType shader_type, const uint32_t* host_address, uint32_t dword_count); - // Returns the host vertex shader type for the current draw if it's valid and - // supported, or Shader::HostVertexShaderType(-1) if not. - Shader::HostVertexShaderType GetHostVertexShaderTypeIfValid() const; + // Retrieves the shader modifications for the current state, and returns + // whether they are valid. + bool GetCurrentShaderModifications( + DxbcShaderTranslator::Modification& vertex_shader_modification_out, + DxbcShaderTranslator::Modification& pixel_shader_modification_out) const; // Translates shaders if needed, also making shader info up to date. - bool EnsureShadersTranslated( - D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, - Shader::HostVertexShaderType host_vertex_shader_type); + bool EnsureShadersTranslated(D3D12Shader::D3D12Translation* vertex_shader, + D3D12Shader::D3D12Translation* pixel_shader); bool ConfigurePipeline( - D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, + D3D12Shader::D3D12Translation* vertex_shader, + D3D12Shader::D3D12Translation* pixel_shader, xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format, - bool early_z, const RenderTargetCache::PipelineRenderTarget render_targets[5], void** pipeline_handle_out, ID3D12RootSignature** root_signature_out); @@ -86,13 +90,12 @@ class PipelineCache { XEPACKEDSTRUCT(ShaderStoredHeader, { uint64_t ucode_data_hash; - uint32_t ucode_dword_count : 16; + uint32_t ucode_dword_count : 31; xenos::ShaderType type : 1; - Shader::HostVertexShaderType host_vertex_shader_type : 3; reg::SQ_PROGRAM_CNTL sq_program_cntl; - static constexpr uint32_t kVersion = 0x20200405; + static constexpr uint32_t kVersion = 0x20201207; }); // Update PipelineDescription::kVersion if any of the Pipeline* enums are @@ -170,28 +173,28 @@ class PipelineCache { uint64_t vertex_shader_hash; // 0 if drawing without a pixel shader. uint64_t pixel_shader_hash; + uint32_t vertex_shader_modification; + uint32_t pixel_shader_modification; int32_t depth_bias; float depth_bias_slope_scaled; - PipelineStripCutIndex strip_cut_index : 2; // 2 - Shader::HostVertexShaderType host_vertex_shader_type : 3; // 5 + PipelineStripCutIndex strip_cut_index : 2; // 2 // PipelinePrimitiveTopologyType for a vertex shader. // xenos::TessellationMode for a domain shader. - uint32_t primitive_topology_type_or_tessellation_mode : 2; // 7 + uint32_t primitive_topology_type_or_tessellation_mode : 2; // 4 // Zero for non-kVertex host_vertex_shader_type. - PipelineGeometryShader geometry_shader : 2; // 9 - uint32_t fill_mode_wireframe : 1; // 10 - PipelineCullMode cull_mode : 2; // 12 - uint32_t front_counter_clockwise : 1; // 13 - uint32_t depth_clip : 1; // 14 - uint32_t rov_msaa : 1; // 15 - xenos::DepthRenderTargetFormat depth_format : 1; // 16 - xenos::CompareFunction depth_func : 3; // 19 - uint32_t depth_write : 1; // 20 - uint32_t stencil_enable : 1; // 21 - uint32_t stencil_read_mask : 8; // 29 - uint32_t force_early_z : 1; // 30 + PipelineGeometryShader geometry_shader : 2; // 6 + uint32_t fill_mode_wireframe : 1; // 7 + PipelineCullMode cull_mode : 2; // 9 + uint32_t front_counter_clockwise : 1; // 10 + uint32_t depth_clip : 1; // 11 + uint32_t rov_msaa : 1; // 12 + xenos::DepthRenderTargetFormat depth_format : 1; // 13 + xenos::CompareFunction depth_func : 3; // 16 + uint32_t depth_write : 1; // 17 + uint32_t stencil_enable : 1; // 18 + uint32_t stencil_read_mask : 8; // 26 uint32_t stencil_write_mask : 8; // 8 xenos::StencilOp stencil_front_fail_op : 3; // 11 @@ -205,7 +208,7 @@ class PipelineCache { PipelineRenderTarget render_targets[4]; - static constexpr uint32_t kVersion = 0x20200405; + static constexpr uint32_t kVersion = 0x20201207; }); XEPACKEDSTRUCT(PipelineStoredDescription, { @@ -215,24 +218,31 @@ class PipelineCache { struct PipelineRuntimeDescription { ID3D12RootSignature* root_signature; - D3D12Shader* vertex_shader; - D3D12Shader* pixel_shader; + D3D12Shader::D3D12Translation* vertex_shader; + D3D12Shader::D3D12Translation* pixel_shader; PipelineDescription description; }; + // Returns the host vertex shader type for the current draw if it's valid and + // supported, or Shader::HostVertexShaderType(-1) if not. + Shader::HostVertexShaderType GetCurrentHostVertexShaderTypeIfValid() const; + + D3D12Shader* LoadShader(xenos::ShaderType shader_type, + const uint32_t* host_address, uint32_t dword_count, + uint64_t data_hash); + // Can be called from multiple threads. - bool TranslateShader(DxbcShaderTranslator& translator, D3D12Shader& shader, + bool TranslateShader(DxbcShaderTranslator& translator, + D3D12Shader::D3D12Translation& translation, reg::SQ_PROGRAM_CNTL cntl, IDxbcConverter* dxbc_converter = nullptr, IDxcUtils* dxc_utils = nullptr, - IDxcCompiler* dxc_compiler = nullptr, - Shader::HostVertexShaderType host_vertex_shader_type = - Shader::HostVertexShaderType::kVertex); + IDxcCompiler* dxc_compiler = nullptr); bool GetCurrentStateDescription( - D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, + D3D12Shader::D3D12Translation* vertex_shader, + D3D12Shader::D3D12Translation* pixel_shader, xenos::PrimitiveType primitive_type, xenos::IndexFormat index_format, - bool early_z, const RenderTargetCache::PipelineRenderTarget render_targets[5], PipelineRuntimeDescription& runtime_description_out); @@ -243,6 +253,8 @@ class PipelineCache { const RegisterFile& register_file_; bool bindless_resources_used_; bool edram_rov_used_; + // 20e4 depth conversion mode to use for non-ROV output. + flags::DepthFloat24Conversion depth_float24_conversion_; uint32_t resolution_scale_; // Reusable shader translator. @@ -267,7 +279,7 @@ class PipelineCache { // Texture binding layouts of different shaders, for obtaining layout UIDs. std::vector texture_binding_layouts_; // Map of texture binding layouts used by shaders, for obtaining UIDs. Keys - // are XXH64 hashes of layouts, values need manual collision resolution using + // are XXH3 hashes of layouts, values need manual collision resolution using // layout_vector_offset:layout_length of texture_binding_layouts_. std::unordered_multimap> @@ -275,7 +287,7 @@ class PipelineCache { // Bindless sampler indices of different shaders, for obtaining layout UIDs. // For bindful, sampler count is used as the UID instead. std::vector bindless_sampler_layouts_; - // Keys are XXH64 hashes of used bindless sampler indices. + // Keys are XXH3 hashes of used bindless sampler indices. std::unordered_multimap> bindless_sampler_layout_map_; @@ -300,11 +312,14 @@ class PipelineCache { Pipeline* current_pipeline_ = nullptr; // Currently open shader storage path. - std::filesystem::path shader_storage_root_; + std::filesystem::path shader_storage_cache_root_; uint32_t shader_storage_title_id_ = 0; // Shader storage output stream, for preload in the next emulator runs. FILE* shader_storage_file_ = nullptr; + // For only writing shaders to the currently open storage once, incremented + // when switching the storage. + uint32_t shader_storage_index_ = 0; bool shader_storage_file_flush_needed_ = false; // Pipeline storage output stream, for preload in the next emulator runs. diff --git a/src/xenia/gpu/d3d12/render_target_cache.cc b/src/xenia/gpu/d3d12/render_target_cache.cc index e80aba714..77d54e032 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.cc +++ b/src/xenia/gpu/d3d12/render_target_cache.cc @@ -40,11 +40,13 @@ namespace d3d12 { #include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_32bpp_cs.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_64bpp_cs.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_color_7e3_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_float24and32_cs.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_float_cs.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_unorm_cs.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_color_32bpp_cs.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_color_64bpp_cs.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_color_7e3_cs.h" +#include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_float24and32_cs.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_float_cs.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_unorm_cs.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/resolve_clear_32bpp_2xres_cs.h" @@ -87,6 +89,12 @@ const RenderTargetCache::EdramLoadStoreModeInfo {edram_load_depth_float_cs, sizeof(edram_load_depth_float_cs), L"EDRAM Load Float Depth", edram_store_depth_float_cs, sizeof(edram_store_depth_float_cs), L"EDRAM Store Float Depth"}, + {edram_load_depth_float24and32_cs, + sizeof(edram_load_depth_float24and32_cs), + L"EDRAM Load 24-bit & 32-bit Float Depth", + edram_store_depth_float24and32_cs, + sizeof(edram_store_depth_float24and32_cs), + L"EDRAM Store 24-bit & 32-bit Float Depth"}, }; const std::pair @@ -126,6 +134,8 @@ RenderTargetCache::RenderTargetCache(D3D12CommandProcessor& command_processor, RenderTargetCache::~RenderTargetCache() { Shutdown(); } bool RenderTargetCache::Initialize(const TextureCache& texture_cache) { + depth_float24_conversion_ = flags::GetDepthFloat24Conversion(); + // EDRAM buffer size depends on this. resolution_scale_2x_ = texture_cache.IsResolutionScale2X(); assert_false(resolution_scale_2x_ && !edram_rov_used_); @@ -420,7 +430,8 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) { return false; } resolve_clear_64bpp_pipeline_->SetName(L"Resolve Clear 64bpp"); - if (!edram_rov_used_) { + if (!edram_rov_used_ && + depth_float24_conversion_ == flags::DepthFloat24Conversion::kOnCopy) { assert_false(resolution_scale_2x_); resolve_clear_depth_24_32_pipeline_ = ui::d3d12::util::CreateComputePipeline( @@ -434,7 +445,7 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) { Shutdown(); return false; } - resolve_clear_64bpp_pipeline_->SetName( + resolve_clear_depth_24_32_pipeline_->SetName( L"Resolve Clear 24-bit & 32-bit Depth"); } @@ -1266,10 +1277,12 @@ bool RenderTargetCache::Resolve(const Memory& memory, if (clear_depth) { // Also clear the host 32-bit floating-point depth used for loaing and // storing 24-bit floating-point depth at full precision. - bool clear_float32_depth = - !edram_rov_used_ && xenos::DepthRenderTargetFormat( - resolve_info.depth_edram_info.format) == - xenos::DepthRenderTargetFormat::kD24FS8; + bool clear_float32_depth = !edram_rov_used_ && + depth_float24_conversion_ == + flags::DepthFloat24Conversion::kOnCopy && + xenos::DepthRenderTargetFormat( + resolve_info.depth_edram_info.format) == + xenos::DepthRenderTargetFormat::kD24FS8; draw_util::ResolveClearShaderConstants depth_clear_constants; resolve_info.GetDepthClearShaderConstants(clear_float32_depth, depth_clear_constants); @@ -1558,7 +1571,8 @@ void RenderTargetCache::RestoreEdramSnapshot(const void* snapshot) { uint32_t RenderTargetCache::GetEdramBufferSize() const { uint32_t size = xenos::kEdramSizeBytes; - if (!edram_rov_used_) { + if (!edram_rov_used_ && + depth_float24_conversion_ == flags::DepthFloat24Conversion::kOnCopy) { // Two 10 MB pages, one containing color and integer depth data, another // with 32-bit float depth when 20e4 depth is used to allow for multipass // drawing without precision loss in case of EDRAM store/load. @@ -1831,12 +1845,15 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget( } RenderTargetCache::EdramLoadStoreMode RenderTargetCache::GetLoadStoreMode( - bool is_depth, uint32_t format) { + bool is_depth, uint32_t format) const { if (is_depth) { - return xenos::DepthRenderTargetFormat(format) == - xenos::DepthRenderTargetFormat::kD24FS8 - ? EdramLoadStoreMode::kDepthFloat - : EdramLoadStoreMode::kDepthUnorm; + if (xenos::DepthRenderTargetFormat(format) == + xenos::DepthRenderTargetFormat::kD24FS8) { + return depth_float24_conversion_ == flags::DepthFloat24Conversion::kOnCopy + ? EdramLoadStoreMode::kDepthFloat24And32 + : EdramLoadStoreMode::kDepthFloat; + } + return EdramLoadStoreMode::kDepthUnorm; } xenos::ColorRenderTargetFormat color_format = xenos::ColorRenderTargetFormat(format); diff --git a/src/xenia/gpu/d3d12/render_target_cache.h b/src/xenia/gpu/d3d12/render_target_cache.h index 5069b3cb7..2f71c13c8 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.h +++ b/src/xenia/gpu/d3d12/render_target_cache.h @@ -18,6 +18,7 @@ #include "xenia/gpu/d3d12/d3d12_shared_memory.h" #include "xenia/gpu/d3d12/texture_cache.h" #include "xenia/gpu/draw_util.h" +#include "xenia/gpu/gpu_flags.h" #include "xenia/gpu/register_file.h" #include "xenia/gpu/trace_writer.h" #include "xenia/gpu/xenos.h" @@ -259,6 +260,10 @@ class RenderTargetCache { void Shutdown(); void ClearCache(); + flags::DepthFloat24Conversion depth_float24_conversion() const { + return depth_float24_conversion_; + } + void CompletedSubmissionUpdated(); void BeginSubmission(); void EndFrame(); @@ -318,6 +323,7 @@ class RenderTargetCache { kColor7e3, kDepthUnorm, kDepthFloat, + kDepthFloat24And32, kCount }; @@ -424,7 +430,7 @@ class RenderTargetCache { uint32_t instance); #endif - static EdramLoadStoreMode GetLoadStoreMode(bool is_depth, uint32_t format); + EdramLoadStoreMode GetLoadStoreMode(bool is_depth, uint32_t format) const; // Must be in a frame to call. Stores the dirty areas of the currently bound // render targets and marks them as clean. @@ -442,6 +448,9 @@ class RenderTargetCache { bool bindless_resources_used_; bool edram_rov_used_; + // 20e4 depth conversion mode to use for non-ROV output. + flags::DepthFloat24Conversion depth_float24_conversion_; + // Whether 1 guest pixel is rendered as 2x2 host pixels (currently only // supported with ROV). bool resolution_scale_2x_ = false; diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index b5851ba57..a32a8b02d 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -9,8 +9,6 @@ #include "xenia/gpu/d3d12/texture_cache.h" -#include "third_party/xxhash/xxhash.h" - #include #include #include @@ -21,6 +19,7 @@ #include "xenia/base/logging.h" #include "xenia/base/math.h" #include "xenia/base/profiling.h" +#include "xenia/base/xxhash.h" #include "xenia/gpu/d3d12/d3d12_command_processor.h" #include "xenia/gpu/gpu_flags.h" #include "xenia/gpu/texture_info.h" diff --git a/src/xenia/gpu/draw_util.cc b/src/xenia/gpu/draw_util.cc index 29072c3e8..a9d9fff92 100644 --- a/src/xenia/gpu/draw_util.cc +++ b/src/xenia/gpu/draw_util.cc @@ -114,6 +114,7 @@ int32_t FloatToD3D11Fixed16p8(float f32) { void GetHostViewportInfo(const RegisterFile& regs, float pixel_size_x, float pixel_size_y, bool origin_bottom_left, float x_max, float y_max, bool allow_reverse_z, + bool convert_z_to_float24, ViewportInfo& viewport_info_out) { assert_true(pixel_size_x >= 1.0f); assert_true(pixel_size_y >= 1.0f); @@ -227,6 +228,7 @@ void GetHostViewportInfo(const RegisterFile& regs, float pixel_size_x, ndc_offset_y = 0.0f; } } else { + viewport_top = 0.0f; viewport_height = std::min( float(xenos::kTexture2DCubeMaxWidthHeight) * pixel_size_y, y_max); ndc_scale_y = (2.0f * pixel_size_y) / viewport_height; @@ -269,6 +271,17 @@ void GetHostViewportInfo(const RegisterFile& regs, float pixel_size_x, ndc_scale_z = -ndc_scale_z; ndc_offset_z = 1.0f - ndc_offset_z; } + if (convert_z_to_float24 && regs.Get().z_enable && + regs.Get().depth_format == + xenos::DepthRenderTargetFormat::kD24FS8) { + // Need to adjust the bounds that the resulting depth values will be clamped + // to after the pixel shader. Preferring adding some error to interpolated Z + // instead if conversion can't be done exactly, without modifying clipping + // bounds by adjusting Z in vertex shaders, as that may cause polygons + // placed explicitly at Z = 0 or Z = W to be clipped. + viewport_z_min = xenos::Float20e4To32(xenos::Float32To20e4(viewport_z_min)); + viewport_z_max = xenos::Float20e4To32(xenos::Float32To20e4(viewport_z_max)); + } viewport_info_out.left = viewport_left; viewport_info_out.top = viewport_top; diff --git a/src/xenia/gpu/draw_util.h b/src/xenia/gpu/draw_util.h index 1a9798aeb..c47640a20 100644 --- a/src/xenia/gpu/draw_util.h +++ b/src/xenia/gpu/draw_util.h @@ -53,6 +53,7 @@ struct ViewportInfo { void GetHostViewportInfo(const RegisterFile& regs, float pixel_size_x, float pixel_size_y, bool origin_bottom_left, float x_max, float y_max, bool allow_reverse_z, + bool convert_z_to_float24, ViewportInfo& viewport_info_out); struct Scissor { diff --git a/src/xenia/gpu/dxbc_shader.cc b/src/xenia/gpu/dxbc_shader.cc new file mode 100644 index 000000000..144308d57 --- /dev/null +++ b/src/xenia/gpu/dxbc_shader.cc @@ -0,0 +1,27 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/dxbc_shader.h" + +#include + +namespace xe { +namespace gpu { + +DxbcShader::DxbcShader(xenos::ShaderType shader_type, uint64_t data_hash, + const uint32_t* dword_ptr, uint32_t dword_count) + : Shader(shader_type, data_hash, dword_ptr, dword_count) {} + +Shader::Translation* DxbcShader::CreateTranslationInstance( + uint32_t modification) { + return new DxbcTranslation(*this, modification); +} + +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/dxbc_shader.h b/src/xenia/gpu/dxbc_shader.h new file mode 100644 index 000000000..49439a2a6 --- /dev/null +++ b/src/xenia/gpu/dxbc_shader.h @@ -0,0 +1,83 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_DXBC_SHADER_H_ +#define XENIA_GPU_DXBC_SHADER_H_ + +#include + +#include "xenia/gpu/dxbc_shader_translator.h" +#include "xenia/gpu/shader.h" +#include "xenia/gpu/xenos.h" + +namespace xe { +namespace gpu { + +class DxbcShader : public Shader { + public: + class DxbcTranslation : public Translation { + public: + DxbcTranslation(DxbcShader& shader, uint32_t modification) + : Translation(shader, modification) {} + }; + + DxbcShader(xenos::ShaderType shader_type, uint64_t data_hash, + const uint32_t* dword_ptr, uint32_t dword_count); + + static constexpr uint32_t kMaxTextureBindingIndexBits = + DxbcShaderTranslator::kMaxTextureBindingIndexBits; + static constexpr uint32_t kMaxTextureBindings = + DxbcShaderTranslator::kMaxTextureBindings; + struct TextureBinding { + uint32_t bindless_descriptor_index; + uint32_t fetch_constant; + // Stacked and 3D are separate TextureBindings, even for bindless for null + // descriptor handling simplicity. + xenos::FetchOpDimension dimension; + bool is_signed; + }; + // Safe to hash and compare with memcmp for layout hashing. + const TextureBinding* GetTextureBindings(uint32_t& count_out) const { + count_out = uint32_t(texture_bindings_.size()); + return texture_bindings_.data(); + } + const uint32_t GetUsedTextureMask() const { return used_texture_mask_; } + + static constexpr uint32_t kMaxSamplerBindingIndexBits = + DxbcShaderTranslator::kMaxSamplerBindingIndexBits; + static constexpr uint32_t kMaxSamplerBindings = + DxbcShaderTranslator::kMaxSamplerBindings; + struct SamplerBinding { + uint32_t bindless_descriptor_index; + uint32_t fetch_constant; + xenos::TextureFilter mag_filter; + xenos::TextureFilter min_filter; + xenos::TextureFilter mip_filter; + xenos::AnisoFilter aniso_filter; + }; + const SamplerBinding* GetSamplerBindings(uint32_t& count_out) const { + count_out = uint32_t(sampler_bindings_.size()); + return sampler_bindings_.data(); + } + + protected: + Translation* CreateTranslationInstance(uint32_t modification) override; + + private: + friend class DxbcShaderTranslator; + + std::vector texture_bindings_; + std::vector sampler_bindings_; + uint32_t used_texture_mask_ = 0; +}; + +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_DXBC_SHADER_H_ diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index a08cafd5e..865fbd77e 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -19,6 +19,7 @@ #include "xenia/base/assert.h" #include "xenia/base/cvar.h" #include "xenia/base/math.h" +#include "xenia/gpu/dxbc_shader.h" DEFINE_bool(dxbc_switch, true, "Use switch rather than if for flow control. Turning this off or " @@ -76,64 +77,31 @@ DxbcShaderTranslator::DxbcShaderTranslator(uint32_t vendor_id, } DxbcShaderTranslator::~DxbcShaderTranslator() = default; -std::vector DxbcShaderTranslator::ForceEarlyDepthStencil( - const uint8_t* shader) { - const uint32_t* old_shader = reinterpret_cast(shader); - - // To return something anyway even if patching fails. - std::vector new_shader; - uint32_t shader_size_bytes = old_shader[6]; - new_shader.resize(shader_size_bytes); - std::memcpy(new_shader.data(), shader, shader_size_bytes); - - // Find the SHEX chunk. - uint32_t chunk_count = old_shader[7]; - for (uint32_t i = 0; i < chunk_count; ++i) { - uint32_t chunk_offset_bytes = old_shader[8 + i]; - const uint32_t* chunk = old_shader + chunk_offset_bytes / sizeof(uint32_t); - if (chunk[0] != 'XEHS') { - continue; - } - // Find dcl_globalFlags and patch it. - uint32_t code_size_dwords = chunk[3]; - chunk += 4; - for (uint32_t j = 0; j < code_size_dwords;) { - uint32_t opcode_token = chunk[j]; - uint32_t opcode = DECODE_D3D10_SB_OPCODE_TYPE(opcode_token); - if (opcode == D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS) { - opcode_token |= D3D11_SB_GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL; - std::memcpy(new_shader.data() + - (chunk_offset_bytes + (4 + j) * sizeof(uint32_t)), - &opcode_token, sizeof(uint32_t)); - // Recalculate the checksum since the shader was modified. - CalculateDXBCChecksum( - reinterpret_cast(new_shader.data()), - shader_size_bytes, - reinterpret_cast(new_shader.data() + - sizeof(uint32_t))); - break; - } - if (opcode == D3D10_SB_OPCODE_CUSTOMDATA) { - j += chunk[j + 1]; - } else { - j += DECODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(opcode_token); - } - } - break; - } - - return std::move(new_shader); -} - std::vector DxbcShaderTranslator::CreateDepthOnlyPixelShader() { - Reset(); + Reset(xenos::ShaderType::kPixel); is_depth_only_pixel_shader_ = true; StartTranslation(); return std::move(CompleteTranslation()); } -void DxbcShaderTranslator::Reset() { - ShaderTranslator::Reset(); +uint32_t DxbcShaderTranslator::GetDefaultModification( + xenos::ShaderType shader_type, + Shader::HostVertexShaderType host_vertex_shader_type) const { + Modification shader_modification; + switch (shader_type) { + case xenos::ShaderType::kVertex: + shader_modification.host_vertex_shader_type = host_vertex_shader_type; + break; + case xenos::ShaderType::kPixel: + shader_modification.depth_stencil_mode = + Modification::DepthStencilMode::kNoModifiers; + break; + } + return shader_modification.value; +} + +void DxbcShaderTranslator::Reset(xenos::ShaderType shader_type) { + ShaderTranslator::Reset(shader_type); shader_code_.clear(); @@ -152,7 +120,7 @@ void DxbcShaderTranslator::Reset() { in_domain_location_used_ = 0; in_primitive_id_used_ = false; in_control_point_index_used_ = false; - in_position_xy_used_ = false; + in_position_used_ = 0; in_front_face_used_ = false; system_temp_count_current_ = 0; @@ -457,7 +425,9 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() { // Remember that x# are only accessible via mov load or store - use a // temporary variable if need to do any computations! - switch (host_vertex_shader_type()) { + Shader::HostVertexShaderType host_vertex_shader_type = + GetDxbcShaderModification().host_vertex_shader_type; + switch (host_vertex_shader_type) { case Shader::HostVertexShaderType::kVertex: StartVertexShader_LoadVertexIndex(); break; @@ -618,7 +588,7 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() { default: // TODO(Triang3l): Support line and non-adaptive quad patches. - assert_unhandled_case(host_vertex_shader_type()); + assert_unhandled_case(host_vertex_shader_type); EmitTranslationError( "Unsupported host vertex shader type in StartVertexOrDomainShader"); break; @@ -720,7 +690,7 @@ void DxbcShaderTranslator::StartPixelShader() { // faceness as X sign bit. Using Z as scratch register now. if (edram_rov_used_) { // Get XY address of the current host pixel as float. - in_position_xy_used_ = true; + in_position_used_ |= 0b0011; DxbcOpRoundZ(DxbcDest::R(param_gen_temp, 0b0011), DxbcSrc::V(uint32_t(InOutRegister::kPSInPosition))); // Revert resolution scale - after truncating, so if the pixel position @@ -744,7 +714,7 @@ void DxbcShaderTranslator::StartPixelShader() { } else { // Get XY address of the current SSAA sample by converting // SV_Position.xy to an integer. - in_position_xy_used_ = true; + in_position_used_ |= 0b0011; DxbcOpFToU(DxbcDest::R(param_gen_temp, 0b0011), DxbcSrc::V(uint32_t(InOutRegister::kPSInPosition))); // Undo SSAA that is used instead of MSAA - since it's used as a @@ -870,7 +840,7 @@ void DxbcShaderTranslator::StartPixelShader() { void DxbcShaderTranslator::StartTranslation() { // Allocate global system temporary registers that may also be used in the // epilogue. - if (IsDxbcVertexOrDomainShader()) { + if (is_vertex_shader()) { system_temp_position_ = PushSystemTemp(0b1111); system_temp_point_size_edge_flag_kill_vertex_ = PushSystemTemp(0b0100); // Set the point size to a negative value to tell the geometry shader that @@ -879,20 +849,21 @@ void DxbcShaderTranslator::StartTranslation() { DxbcOpMov( DxbcDest::R(system_temp_point_size_edge_flag_kill_vertex_, 0b0001), DxbcSrc::LF(-1.0f)); - } else if (IsDxbcPixelShader()) { + } else if (is_pixel_shader()) { if (edram_rov_used_) { // Will be initialized unconditionally. system_temp_rov_params_ = PushSystemTemp(); - if (ROV_IsDepthStencilEarly() || writes_depth()) { - // If the shader doesn't write to oDepth, each component will be written - // to if depth/stencil is enabled and the respective sample is covered - - // so need to initialize now because the first writes will be - // conditional. If the shader writes to oDepth, this is oDepth of the - // shader, written by the guest code, so initialize because assumptions - // can't be made about the integrity of the guest code. - system_temp_rov_depth_stencil_ = - PushSystemTemp(writes_depth() ? 0b0001 : 0b1111); - } + } + if (IsDepthStencilSystemTempUsed()) { + // If the shader doesn't write to oDepth, and ROV is used, each + // component will be written to if depth/stencil is enabled and the + // respective sample is covered - so need to initialize now because the + // first writes will be conditional. + // If the shader writes to oDepth, this is oDepth of the shader, written + // by the guest code, so initialize because assumptions can't be made + // about the integrity of the guest code. + system_temp_depth_stencil_ = + PushSystemTemp(writes_depth() ? 0b0001 : 0b1111); } for (uint32_t i = 0; i < 4; ++i) { if (writes_color_target(i)) { @@ -942,7 +913,7 @@ void DxbcShaderTranslator::StartTranslation() { // Zero general-purpose registers to prevent crashes when the game // references them after only initializing them conditionally. - for (uint32_t i = IsDxbcPixelShader() ? xenos::kMaxInterpolators : 0; + for (uint32_t i = is_pixel_shader() ? xenos::kMaxInterpolators : 0; i < register_count(); ++i) { DxbcOpMov(uses_register_dynamic_addressing() ? DxbcDest::X(0, i) : DxbcDest::R(i), @@ -951,9 +922,9 @@ void DxbcShaderTranslator::StartTranslation() { } // Write stage-specific prologue. - if (IsDxbcVertexOrDomainShader()) { + if (is_vertex_shader()) { StartVertexOrDomainShader(); - } else if (IsDxbcPixelShader()) { + } else if (is_pixel_shader()) { StartPixelShader(); } @@ -1168,31 +1139,31 @@ void DxbcShaderTranslator::CompleteShaderCode() { } // Write stage-specific epilogue. - if (IsDxbcVertexOrDomainShader()) { + if (is_vertex_shader()) { CompleteVertexOrDomainShader(); - } else if (IsDxbcPixelShader()) { + } else if (is_pixel_shader()) { CompletePixelShader(); } // Return from `main`. DxbcOpRet(); - if (IsDxbcVertexOrDomainShader()) { + if (is_vertex_shader()) { // Release system_temp_position_ and // system_temp_point_size_edge_flag_kill_vertex_. PopSystemTemp(2); - } else if (IsDxbcPixelShader()) { + } else if (is_pixel_shader()) { // Release system_temps_color_. for (int32_t i = 3; i >= 0; --i) { if (writes_color_target(i)) { PopSystemTemp(); } } + if (IsDepthStencilSystemTempUsed()) { + // Release system_temp_depth_stencil_. + PopSystemTemp(); + } if (edram_rov_used_) { - if (ROV_IsDepthStencilEarly() || writes_depth()) { - // Release system_temp_rov_depth_stencil_. - PopSystemTemp(); - } // Release system_temp_rov_params_. PopSystemTemp(); } @@ -1303,6 +1274,44 @@ std::vector DxbcShaderTranslator::CompleteTranslation() { return shader_object_bytes; } +void DxbcShaderTranslator::PostTranslation( + Shader::Translation& translation, bool setup_shader_post_translation_info) { + if (setup_shader_post_translation_info) { + DxbcShader* dxbc_shader = dynamic_cast(&translation.shader()); + if (dxbc_shader) { + dxbc_shader->texture_bindings_.clear(); + dxbc_shader->texture_bindings_.reserve(texture_bindings_.size()); + dxbc_shader->used_texture_mask_ = 0; + for (const TextureBinding& translator_binding : texture_bindings_) { + DxbcShader::TextureBinding& shader_binding = + dxbc_shader->texture_bindings_.emplace_back(); + // For a stable hash. + std::memset(&shader_binding, 0, sizeof(shader_binding)); + shader_binding.bindless_descriptor_index = + translator_binding.bindless_descriptor_index; + shader_binding.fetch_constant = translator_binding.fetch_constant; + shader_binding.dimension = translator_binding.dimension; + shader_binding.is_signed = translator_binding.is_signed; + dxbc_shader->used_texture_mask_ |= 1u + << translator_binding.fetch_constant; + } + dxbc_shader->sampler_bindings_.clear(); + dxbc_shader->sampler_bindings_.reserve(sampler_bindings_.size()); + for (const SamplerBinding& translator_binding : sampler_bindings_) { + DxbcShader::SamplerBinding& shader_binding = + dxbc_shader->sampler_bindings_.emplace_back(); + shader_binding.bindless_descriptor_index = + translator_binding.bindless_descriptor_index; + shader_binding.fetch_constant = translator_binding.fetch_constant; + shader_binding.mag_filter = translator_binding.mag_filter; + shader_binding.min_filter = translator_binding.min_filter; + shader_binding.mip_filter = translator_binding.mip_filter; + shader_binding.aniso_filter = translator_binding.aniso_filter; + } + } + } +} + void DxbcShaderTranslator::EmitInstructionDisassembly() { if (!emit_source_map_) { return; @@ -1527,19 +1536,20 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result, } break; case InstructionStorageTarget::kDepth: - // Writes X to scalar oDepth or to X of system_temp_rov_depth_stencil_, no + // Writes X to scalar oDepth or to X of system_temp_depth_stencil_, no // additional swizzling needed. assert_true(used_write_mask == 0b0001); assert_true(writes_depth()); - if (edram_rov_used_) { - dest = DxbcDest::R(system_temp_rov_depth_stencil_); + if (IsDepthStencilSystemTempUsed()) { + dest = DxbcDest::R(system_temp_depth_stencil_); } else { dest = DxbcDest::ODepth(); } - // Depth outside [0, 1] is not safe for use with the ROV code. Though 20e4 - // float depth can store values below 2, it's a very unusual case. - // Direct3D 10+ SV_Depth, however, can accept any values, including - // specials, when the depth buffer is floating-point. + // Depth outside [0, 1] is not safe for use with the ROV code and with + // 20e4-as-32 conversion. Though 20e4 float depth can store values between + // 1 and 2, it's a very unusual case. Direct3D 10+ SV_Depth, however, can + // accept any values, including specials, when the depth buffer is + // floating-point; but depth is clamped to the viewport bounds anyway. is_clamped = true; break; } @@ -2094,7 +2104,7 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { // ds_5_1 shader_object_.push_back(0x44530501u); } else { - assert_true(IsDxbcPixelShader()); + assert_true(is_pixel_shader()); // ps_5_1 shader_object_.push_back(0xFFFF0501u); } @@ -2765,7 +2775,7 @@ void DxbcShaderTranslator::WriteInputSignature() { control_point_index.semantic_name = semantic_offset; } semantic_offset += AppendString(shader_object_, "XEVERTEXID"); - } else if (IsDxbcPixelShader()) { + } else if (is_pixel_shader()) { // Written dynamically, so assume it's always used if it can be written to // any interpolator register. bool param_gen_used = !is_depth_only_pixel_shader_ && register_count() != 0; @@ -2843,7 +2853,7 @@ void DxbcShaderTranslator::WriteInputSignature() { position.component_type = DxbcSignatureRegisterComponentType::kFloat32; position.register_index = uint32_t(InOutRegister::kPSInPosition); position.mask = 0b1111; - position.always_reads_mask = in_position_xy_used_ ? 0b0011 : 0b0000; + position.always_reads_mask = in_position_used_; } // Is front face (SV_IsFrontFace). @@ -2927,7 +2937,9 @@ void DxbcShaderTranslator::WritePatchConstantSignature() { DxbcName tess_factor_edge_system_value = DxbcName::kUndefined; uint32_t tess_factor_inside_count = 0; DxbcName tess_factor_inside_system_value = DxbcName::kUndefined; - switch (host_vertex_shader_type()) { + Shader::HostVertexShaderType host_vertex_shader_type = + GetDxbcShaderModification().host_vertex_shader_type; + switch (host_vertex_shader_type) { case Shader::HostVertexShaderType::kTriangleDomainCPIndexed: case Shader::HostVertexShaderType::kTriangleDomainPatchIndexed: tess_factor_edge_count = 3; @@ -2944,7 +2956,7 @@ void DxbcShaderTranslator::WritePatchConstantSignature() { break; default: // TODO(Triang3l): Support line patches. - assert_unhandled_case(host_vertex_shader_type()); + assert_unhandled_case(host_vertex_shader_type); EmitTranslationError( "Unsupported host vertex shader type in WritePatchConstantSignature"); } @@ -3033,7 +3045,7 @@ void DxbcShaderTranslator::WriteOutputSignature() { constexpr size_t kParameterDwords = sizeof(DxbcSignatureParameter) / sizeof(uint32_t); - if (IsDxbcVertexOrDomainShader()) { + if (is_vertex_shader()) { // Intepolators (TEXCOORD#). size_t interpolator_position = shader_object_.size(); shader_object_.resize(shader_object_.size() + @@ -3195,7 +3207,7 @@ void DxbcShaderTranslator::WriteOutputSignature() { cull_distance.semantic_name = semantic_offset; } semantic_offset += AppendString(shader_object_, "SV_CullDistance"); - } else if (IsDxbcPixelShader()) { + } else if (is_pixel_shader()) { if (!edram_rov_used_) { // Color render targets (SV_Target#). size_t target_position = SIZE_MAX; @@ -3217,9 +3229,11 @@ void DxbcShaderTranslator::WriteOutputSignature() { } } - // Depth (SV_Depth). + // Depth (SV_Depth or SV_DepthLessEqual). + Modification::DepthStencilMode depth_stencil_mode = + GetDxbcShaderModification().depth_stencil_mode; size_t depth_position = SIZE_MAX; - if (writes_depth()) { + if (writes_depth() || DSV_IsWritingFloat24Depth()) { depth_position = shader_object_.size(); shader_object_.resize(shader_object_.size() + kParameterDwords); ++parameter_count; @@ -3253,7 +3267,15 @@ void DxbcShaderTranslator::WriteOutputSignature() { depth_position); depth.semantic_name = semantic_offset; } - semantic_offset += AppendString(shader_object_, "SV_Depth"); + const char* depth_semantic_name; + if (!writes_depth() && + GetDxbcShaderModification().depth_stencil_mode == + Modification::DepthStencilMode::kFloat24Truncating) { + depth_semantic_name = "SV_DepthLessEqual"; + } else { + depth_semantic_name = "SV_Depth"; + } + semantic_offset += AppendString(shader_object_, depth_semantic_name); } } } @@ -3276,7 +3298,7 @@ void DxbcShaderTranslator::WriteShaderCode() { } else if (IsDxbcDomainShader()) { shader_type = D3D11_SB_DOMAIN_SHADER; } else { - assert_true(IsDxbcPixelShader()); + assert_true(is_pixel_shader()); shader_type = D3D10_SB_PIXEL_SHADER; } shader_object_.push_back( @@ -3296,12 +3318,14 @@ void DxbcShaderTranslator::WriteShaderCode() { // Inputs/outputs have 1D-indexed operands with a component mask and a // register index. + Modification shader_modification = GetDxbcShaderModification(); + if (IsDxbcDomainShader()) { // Not using control point data since Xenos only has a vertex shader acting // as both vertex shader and domain shader. stat_.c_control_points = 3; stat_.tessellator_domain = DxbcTessellatorDomain::kTriangle; - switch (host_vertex_shader_type()) { + switch (shader_modification.host_vertex_shader_type) { case Shader::HostVertexShaderType::kTriangleDomainCPIndexed: case Shader::HostVertexShaderType::kTriangleDomainPatchIndexed: stat_.c_control_points = 3; @@ -3314,7 +3338,7 @@ void DxbcShaderTranslator::WriteShaderCode() { break; default: // TODO(Triang3l): Support line patches. - assert_unhandled_case(host_vertex_shader_type()); + assert_unhandled_case(shader_modification.host_vertex_shader_type); EmitTranslationError( "Unsupported host vertex shader type in WriteShaderCode"); } @@ -3330,11 +3354,17 @@ void DxbcShaderTranslator::WriteShaderCode() { } // Don't allow refactoring when converting to native code to maintain position - // invariance (needed even in pixel shaders for oDepth invariance). Also this - // dcl will be modified by ForceEarlyDepthStencil. - shader_object_.push_back( + // invariance (needed even in pixel shaders for oDepth invariance). + uint32_t global_flags_opcode = ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1); + if (is_pixel_shader() && + GetDxbcShaderModification().depth_stencil_mode == + Modification::DepthStencilMode::kEarlyHint && + !edram_rov_used_ && CanWriteZEarly()) { + global_flags_opcode |= D3D11_SB_GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL; + } + shader_object_.push_back(global_flags_opcode); // Constant buffers, from most frequenly accessed to least frequently accessed // (the order is a hint to the driver according to the DXBC header). @@ -3560,7 +3590,7 @@ void DxbcShaderTranslator::WriteShaderCode() { } // Inputs and outputs. - if (IsDxbcVertexOrDomainShader()) { + if (is_vertex_shader()) { if (IsDxbcDomainShader()) { if (in_domain_location_used_) { // Domain location input. @@ -3584,7 +3614,7 @@ void DxbcShaderTranslator::WriteShaderCode() { if (in_control_point_index_used_) { // Control point indices as float input. uint32_t control_point_array_size; - switch (host_vertex_shader_type()) { + switch (shader_modification.host_vertex_shader_type) { case Shader::HostVertexShaderType::kTriangleDomainCPIndexed: control_point_array_size = 3; break; @@ -3593,7 +3623,7 @@ void DxbcShaderTranslator::WriteShaderCode() { break; default: // TODO(Triang3l): Support line patches. - assert_unhandled_case(host_vertex_shader_type()); + assert_unhandled_case(shader_modification.host_vertex_shader_type); EmitTranslationError( "Unsupported host vertex shader type in " "StartVertexOrDomainShader"); @@ -3683,7 +3713,8 @@ void DxbcShaderTranslator::WriteShaderCode() { uint32_t(InOutRegister::kVSDSOutClipDistance45AndCullDistance)); shader_object_.push_back(ENCODE_D3D10_SB_NAME(D3D10_SB_NAME_CULL_DISTANCE)); ++stat_.dcl_count; - } else if (IsDxbcPixelShader()) { + } else if (is_pixel_shader()) { + bool is_writing_float24_depth = DSV_IsWritingFloat24Depth(); // Interpolator input. if (!is_depth_only_pixel_shader_) { uint32_t interpolator_count = @@ -3725,16 +3756,26 @@ void DxbcShaderTranslator::WriteShaderCode() { shader_object_.push_back(uint32_t(InOutRegister::kPSInClipSpaceZW)); ++stat_.dcl_count; } - if (in_position_xy_used_) { - // Position input (only XY needed for ps_param_gen, and the ROV depth code - // calculates the depth from clip space Z and W). + if (in_position_used_) { + // Position input (XY needed for ps_param_gen, Z needed for non-ROV + // float24 conversion; the ROV depth code calculates the depth the from + // clip space Z and W with pull-mode per-sample interpolation instead). + // At the cost of possibility of MSAA with pixel-rate shading, need + // per-sample depth - otherwise intersections cannot be antialiased, and + // with SV_DepthLessEqual, per-sample (or centroid, but this isn't + // applicable here) position is mandatory. However, with depth output, on + // the guest, there's only one depth value for the whole pixel. + D3D10_SB_INTERPOLATION_MODE position_interpolation_mode = + is_writing_float24_depth && !writes_depth() + ? D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE + : D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE; shader_object_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT_PS_SIV) | ENCODE_D3D10_SB_INPUT_INTERPOLATION_MODE( - D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE) | + position_interpolation_mode) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4)); - shader_object_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_INPUT, 0b0011, 1)); + shader_object_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_INPUT, in_position_used_, 1)); shader_object_.push_back(uint32_t(InOutRegister::kPSInPosition)); shader_object_.push_back(ENCODE_D3D10_SB_NAME(D3D10_SB_NAME_POSITION)); ++stat_.dcl_count; @@ -3778,12 +3819,19 @@ void DxbcShaderTranslator::WriteShaderCode() { } } // Depth output. - if (writes_depth()) { + if (is_writing_float24_depth || writes_depth()) { + D3D10_SB_OPERAND_TYPE depth_operand_type; + if (!writes_depth() && + GetDxbcShaderModification().depth_stencil_mode == + Modification::DepthStencilMode::kFloat24Truncating) { + depth_operand_type = D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL; + } else { + depth_operand_type = D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH; + } shader_object_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_OUTPUT) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(2)); - shader_object_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH, 0)); + shader_object_.push_back(EncodeScalarOperand(depth_operand_type, 0)); ++stat_.dcl_count; } } diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index 9edc40b56..1e9891771 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -102,6 +102,51 @@ class DxbcShaderTranslator : public ShaderTranslator { bool edram_rov_used, bool force_emit_source_map = false); ~DxbcShaderTranslator() override; + union Modification { + // If anything in this is structure is changed in a way not compatible with + // the previous layout, invalidate the pipeline storages by increasing this + // version number (0xYYYYMMDD)! + static constexpr uint32_t kVersion = 0x20201203; + + enum class DepthStencilMode : uint32_t { + kNoModifiers, + // [earlydepthstencil] - enable if alpha test and alpha to coverage are + // disabled; ignored if anything in the shader blocks early Z writing + // (which is not known before translation, so this will be set anyway). + kEarlyHint, + // Converting the depth to the closest 32-bit float representable exactly + // as a 20e4 float, to support invariance in cases when the guest + // reuploads a previously resolved depth buffer to the EDRAM, rounding + // towards zero (which contradicts the rounding used by the Direct3D 9 + // reference rasterizer, but allows SV_DepthLessEqual to be used to allow + // slightly coarse early Z culling; also truncating regardless of whether + // the shader writes depth and thus always uses SV_Depth, for + // consistency). MSAA is limited - depth must be per-sample + // (SV_DepthLessEqual also explicitly requires sample or centroid position + // interpolation), thus the sampler has to run at sample frequency even if + // the device supports stencil loading and thus true non-ROV MSAA via + // SV_StencilRef. + // Fixed-function viewport depth bounds must be snapped to float24 for + // clamping purposes. + kFloat24Truncating, + // Similar to kFloat24Truncating, but rounding to the nearest even, + // however, always using SV_Depth rather than SV_DepthLessEqual because + // rounding up results in a bigger value. Same viewport usage rules apply. + kFloat24Rounding, + }; + + struct { + // VS - pipeline stage and input configuration. + Shader::HostVertexShaderType host_vertex_shader_type + : Shader::kHostVertexShaderTypeBitCount; + // PS, non-ROV - depth / stencil output mode. + DepthStencilMode depth_stencil_mode : 2; + }; + uint32_t value = 0; + + Modification(uint32_t modification_value = 0) : value(modification_value) {} + }; + // Constant buffer bindings in space 0. enum class CbufferRegister { kSystemConstants, @@ -144,12 +189,14 @@ class DxbcShaderTranslator : public ShaderTranslator { kSysFlag_ROVStencilTest_Shift, // If the depth/stencil test has failed, but resulted in a stencil value // that is different than the one currently in the depth buffer, write it - // anyway and don't run the shader (to check if the sample may be discarded - // some way). This, however, also results in depth/stencil testing done - // entirely early even when it passes to prevent writing in divergent places - // in the shader. When the shader can kill, this must be set only for - // RB_DEPTHCONTROL EARLY_Z_ENABLE, not for alpha test/alpha to coverage - // disabled. + // anyway and don't run the rest of the shader (to check if the sample may + // be discarded some way) - use when alpha test and alpha to coverage are + // disabled. Ignored by the shader if not applicable to it (like if it has + // kill instructions or writes the depth output). + // TODO(Triang3l): Investigate replacement with an alpha-to-mask flag, + // checking `(flags & (alpha test | alpha to mask)) == (always | disabled)`, + // taking into account the potential relation with occlusion queries (but + // should be safe at least temporarily). kSysFlag_ROVDepthStencilEarlyWrite_Shift, kSysFlag_Count, @@ -238,15 +285,15 @@ class DxbcShaderTranslator : public ShaderTranslator { // EDRAM address calculation. uint32_t sample_count_log2[2]; float alpha_test_reference; + // If alpha to mask is disabled, the entire alpha_to_mask value must be 0. + // If alpha to mask is enabled, bits 0:7 are sample offsets, and bit 8 must + // be 1. uint32_t alpha_to_mask; float color_exp_bias[4]; uint32_t color_output_map[4]; - // If alpha to mask is disabled, the entire alpha_to_mask value must be 0. - // If alpha to mask is enabled, bits 0:7 are sample offsets, and bit 8 must - // be 1. uint32_t edram_resolution_square_scale; uint32_t edram_pitch_tiles; union { @@ -358,12 +405,6 @@ class DxbcShaderTranslator : public ShaderTranslator { bool is_signed; std::string name; }; - // The first binding returned is at t[SRVMainRegister::kBindfulTexturesStart] - // of space SRVSpace::kMain. - const TextureBinding* GetTextureBindings(uint32_t& count_out) const { - count_out = uint32_t(texture_bindings_.size()); - return texture_bindings_.data(); - } // Arbitrary limit - there can't be more than 2048 in a shader-visible // descriptor heap, though some older hardware (tier 1 resource binding - @@ -385,16 +426,6 @@ class DxbcShaderTranslator : public ShaderTranslator { xenos::AnisoFilter aniso_filter; std::string name; }; - const SamplerBinding* GetSamplerBindings(uint32_t& count_out) const { - count_out = uint32_t(sampler_bindings_.size()); - return sampler_bindings_.data(); - } - - // Returns the number of texture SRV and sampler offsets that need to be - // passed via a constant buffer to the shader. - uint32_t GetBindlessResourceCount() const { - return uint32_t(texture_bindings_.size() + sampler_bindings_.size()); - } // Unordered access view bindings in space 0. enum class UAVRegister { @@ -402,10 +433,6 @@ class DxbcShaderTranslator : public ShaderTranslator { kEdram, }; - // Creates a copy of the shader with early depth/stencil testing forced, - // overriding that alpha testing is used in the shader. - static std::vector ForceEarlyDepthStencil(const uint8_t* shader); - // Returns the format with internal flags for passing via the // edram_rt_format_flags system constant. static constexpr uint32_t ROV_AddColorFormatFlags( @@ -440,16 +467,22 @@ class DxbcShaderTranslator : public ShaderTranslator { float& clamp_alpha_high, uint32_t& keep_mask_low, uint32_t& keep_mask_high); + uint32_t GetDefaultModification( + xenos::ShaderType shader_type, + Shader::HostVertexShaderType host_vertex_shader_type = + Shader::HostVertexShaderType::kVertex) const override; + // Creates a special pixel shader without color outputs - this resets the // state of the translator. std::vector CreateDepthOnlyPixelShader(); protected: - void Reset() override; + void Reset(xenos::ShaderType shader_type) override; void StartTranslation() override; - std::vector CompleteTranslation() override; + void PostTranslation(Shader::Translation& translation, + bool setup_shader_post_translation_info) override; void ProcessLabel(uint32_t cf_index) override; @@ -650,6 +683,7 @@ class DxbcShaderTranslator : public ShaderTranslator { kInputDomainPoint = 28, kUnorderedAccessView = 30, kInputCoverageMask = 35, + kOutputDepthLessEqual = 39, }; // D3D10_SB_OPERAND_INDEX_DIMENSION @@ -689,6 +723,7 @@ class DxbcShaderTranslator : public ShaderTranslator { return DxbcOperandDimension::kNoData; case DxbcOperandType::kInputPrimitiveID: case DxbcOperandType::kOutputDepth: + case DxbcOperandType::kOutputDepthLessEqual: return DxbcOperandDimension::kScalar; case DxbcOperandType::kInputCoverageMask: return dest_in_dcl ? DxbcOperandDimension::kScalar @@ -860,6 +895,9 @@ class DxbcShaderTranslator : public ShaderTranslator { return DxbcDest(DxbcOperandType::kUnorderedAccessView, write_mask, index_1d, index_2d); } + static DxbcDest ODepthLE() { + return DxbcDest(DxbcOperandType::kOutputDepthLessEqual, 0b0001); + } uint32_t GetMask() const { switch (GetDimension()) { @@ -2145,21 +2183,19 @@ class DxbcShaderTranslator : public ShaderTranslator { (index_representation_1 << 25) | (index_representation_2 << 28); } - // Use these instead of is_vertex_shader/is_pixel_shader because they don't - // take is_depth_only_pixel_shader_ into account. - inline bool IsDxbcVertexOrDomainShader() const { - return !is_depth_only_pixel_shader_ && is_vertex_shader(); + Modification GetDxbcShaderModification() const { + return Modification(modification()); } - inline bool IsDxbcVertexShader() const { - return IsDxbcVertexOrDomainShader() && - host_vertex_shader_type() == Shader::HostVertexShaderType::kVertex; + + bool IsDxbcVertexShader() const { + return is_vertex_shader() && + GetDxbcShaderModification().host_vertex_shader_type == + Shader::HostVertexShaderType::kVertex; } - inline bool IsDxbcDomainShader() const { - return IsDxbcVertexOrDomainShader() && - host_vertex_shader_type() != Shader::HostVertexShaderType::kVertex; - } - inline bool IsDxbcPixelShader() const { - return is_depth_only_pixel_shader_ || is_pixel_shader(); + bool IsDxbcDomainShader() const { + return is_vertex_shader() && + GetDxbcShaderModification().host_vertex_shader_type != + Shader::HostVertexShaderType::kVertex; } // Whether to use switch-case rather than if (pc >= label) for control flow. @@ -2181,10 +2217,37 @@ class DxbcShaderTranslator : public ShaderTranslator { uint32_t piece_temp_component, uint32_t accumulator_temp, uint32_t accumulator_temp_component); + // Converts the depth value externally clamped to the representable [0, 2) + // range to 20e4 floating point, with zeros in bits 24:31, rounding to the + // nearest even. Source and destination may be the same, temporary must be + // different than both. + void PreClampedDepthTo20e4(uint32_t d24_temp, uint32_t d24_temp_component, + uint32_t d32_temp, uint32_t d32_temp_component, + uint32_t temp_temp, uint32_t temp_temp_component); + bool IsDepthStencilSystemTempUsed() const { + // See system_temp_depth_stencil_ documentation for explanation of cases. + if (edram_rov_used_) { + return writes_depth() || ROV_IsDepthStencilEarly(); + } + return writes_depth() && DSV_IsWritingFloat24Depth(); + } + // Whether the current non-ROV pixel shader should convert the depth to 20e4. + bool DSV_IsWritingFloat24Depth() const { + if (edram_rov_used_) { + return false; + } + Modification::DepthStencilMode depth_stencil_mode = + GetDxbcShaderModification().depth_stencil_mode; + return depth_stencil_mode == + Modification::DepthStencilMode::kFloat24Truncating || + depth_stencil_mode == + Modification::DepthStencilMode::kFloat24Rounding; + } // Whether it's possible and worth skipping running the translated shader for // 2x2 quads. bool ROV_IsDepthStencilEarly() const { - return !is_depth_only_pixel_shader_ && !writes_depth(); + return !is_depth_only_pixel_shader_ && !writes_depth() && + memexport_stream_constants().empty(); } // Converts the depth value to 24-bit (storing the result in bits 0:23 and // zeros in 24:31, not creating room for stencil - since this may be involved @@ -2197,8 +2260,8 @@ class DxbcShaderTranslator : public ShaderTranslator { // Does all the depth/stencil-related things, including or not including // writing based on whether it's late, or on whether it's safe to do it early. // Updates system_temp_rov_params_ result and coverage if allowed and safe, - // updates system_temp_rov_depth_stencil_, and if early and the coverage is - // empty for all pixels in the 2x2 quad and safe to return early (stencil is + // updates system_temp_depth_stencil_, and if early and the coverage is empty + // for all pixels in the 2x2 quad and safe to return early (stencil is // unchanged or known that it's safe not to await kills/alphatest/AtoC), // returns from the shader. void ROV_DepthStencilTest(); @@ -2248,6 +2311,7 @@ class DxbcShaderTranslator : public ShaderTranslator { // Discards the SSAA sample if it's masked out by alpha to coverage. void CompletePixelShader_WriteToRTVs_AlphaToMask(); void CompletePixelShader_WriteToRTVs(); + void CompletePixelShader_DSV_DepthTo24Bit(); // Masks the sample away from system_temp_rov_params_.x if it's not covered. // threshold_offset and temp.temp_component can be the same if needed. void CompletePixelShader_ROV_AlphaToMaskSample( @@ -2333,6 +2397,11 @@ class DxbcShaderTranslator : public ShaderTranslator { xenos::TextureFilter min_filter, xenos::TextureFilter mip_filter, xenos::AnisoFilter aniso_filter); + // Returns the number of texture SRV and sampler offsets that need to be + // passed via a constant buffer to the shader. + uint32_t GetBindlessResourceCount() const { + return uint32_t(texture_bindings_.size() + sampler_bindings_.size()); + } // Marks fetch constants as used by the DXBC shader and returns DxbcSrc // for the words 01 (pair 0), 23 (pair 1) or 45 (pair 2) of the texture fetch // constant. @@ -2364,7 +2433,7 @@ class DxbcShaderTranslator : public ShaderTranslator { static uint32_t AppendString(std::vector& dest, const char* source); // Returns the length of a string as if it was appended to a DWORD stream, in // bytes. - static inline uint32_t GetStringLength(const char* source) { + static uint32_t GetStringLength(const char* source) { return uint32_t(xe::align(std::strlen(source) + 1, sizeof(uint32_t))); } @@ -2479,8 +2548,8 @@ class DxbcShaderTranslator : public ShaderTranslator { bool in_primitive_id_used_; // Whether InOutRegister::kDSInControlPointIndex has been used in the shader. bool in_control_point_index_used_; - // Whether the XY of the pixel position has been used in the pixel shader. - bool in_position_xy_used_; + // Mask of the pixel/sample position actually used in the pixel shader. + uint32_t in_position_used_; // Whether the faceness has been used in the pixel shader. bool in_front_face_used_; @@ -2518,15 +2587,14 @@ class DxbcShaderTranslator : public ShaderTranslator { // W - Base-relative resolution-scaled EDRAM offset for 64bpp color data, in // dwords. uint32_t system_temp_rov_params_; - // ROV only - new depth/stencil data. 4 VGPRs when not writing to oDepth, 1 - // VGPR when writing to oDepth. Not used in the depth-only pixel shader (or, - // more formally, if neither early depth-stencil nor oDepth are used) because - // it always calculates and writes in the same place. - // When not writing to oDepth: New per-sample depth/stencil values, generated - // during early depth/stencil test (actual writing checks coverage bits). - // When writing to oDepth: X also used to hold the depth written by the - // shader, later used as a temporary during depth/stencil testing. - uint32_t system_temp_rov_depth_stencil_; + // Two purposes: + // - When writing to oDepth, and either using ROV or converting the depth to + // float24: X also used to hold the depth written by the shader, + // later used as a temporary during depth/stencil testing. + // - Otherwise, when using ROV output with ROV_IsDepthStencilEarly being true: + // New per-sample depth/stencil values, generated during early depth/stencil + // test (actual writing checks coverage bits). + uint32_t system_temp_depth_stencil_; // Up to 4 color outputs in pixel shaders (because of exponent bias, alpha // test and remapping, and also for ROV writing). uint32_t system_temps_color_[4]; @@ -2587,6 +2655,8 @@ class DxbcShaderTranslator : public ShaderTranslator { uint32_t srv_index_bindless_textures_3d_; uint32_t srv_index_bindless_textures_cube_; + // The first binding is at t[SRVMainRegister::kBindfulTexturesStart] of space + // SRVSpace::kMain. std::vector texture_bindings_; std::unordered_map texture_bindings_for_bindful_srv_indices_; diff --git a/src/xenia/gpu/dxbc_shader_translator_fetch.cc b/src/xenia/gpu/dxbc_shader_translator_fetch.cc index 76eed4d10..b4813b381 100644 --- a/src/xenia/gpu/dxbc_shader_translator_fetch.cc +++ b/src/xenia/gpu/dxbc_shader_translator_fetch.cc @@ -677,7 +677,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( // Whether to use gradients (implicit or explicit) for LOD calculation. bool use_computed_lod = instr.attributes.use_computed_lod && - (IsDxbcPixelShader() || instr.attributes.use_register_gradients); + (is_pixel_shader() || instr.attributes.use_register_gradients); if (instr.opcode == FetchOpcode::kGetTextureComputedLod && (!use_computed_lod || instr.attributes.use_register_gradients)) { assert_always(); diff --git a/src/xenia/gpu/dxbc_shader_translator_memexport.cc b/src/xenia/gpu/dxbc_shader_translator_memexport.cc index d20cb11bf..5f3d47bc0 100644 --- a/src/xenia/gpu/dxbc_shader_translator_memexport.cc +++ b/src/xenia/gpu/dxbc_shader_translator_memexport.cc @@ -106,7 +106,7 @@ void DxbcShaderTranslator::ExportToMemory() { kSysConst_Flags_Vec) .Select(kSysConst_Flags_Comp), DxbcSrc::LU(kSysFlag_SharedMemoryIsUAV)); - if (IsDxbcPixelShader()) { + if (is_pixel_shader()) { // Disable memexport in pixel shaders with supersampling since VPOS is // ambiguous. if (edram_rov_used_) { diff --git a/src/xenia/gpu/dxbc_shader_translator_om.cc b/src/xenia/gpu/dxbc_shader_translator_om.cc index 24963008f..ea79b737c 100644 --- a/src/xenia/gpu/dxbc_shader_translator_om.cc +++ b/src/xenia/gpu/dxbc_shader_translator_om.cc @@ -167,7 +167,7 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() { // bigger) to integer to system_temp_rov_params_.zw. // system_temp_rov_params_.z = X host pixel position as uint // system_temp_rov_params_.w = Y host pixel position as uint - in_position_xy_used_ = true; + in_position_used_ |= 0b0011; DxbcOpFToU(DxbcDest::R(system_temp_rov_params_, 0b1100), DxbcSrc::V(uint32_t(InOutRegister::kPSInPosition), 0b01000000)); // Revert the resolution scale to convert the position to guest pixels. @@ -315,7 +315,7 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() { // Add host pixel offsets. // system_temp_rov_params_.y = scaled 32bpp depth/stencil address // system_temp_rov_params_.z = scaled 32bpp color offset if needed - in_position_xy_used_ = true; + in_position_used_ |= 0b0011; for (uint32_t i = 0; i < 2; ++i) { // Convert a position component to integer. DxbcOpFToU(DxbcDest::R(system_temp_rov_params_, 0b0001), @@ -417,23 +417,50 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { // With early depth/stencil, depth/stencil writing may be deferred to the // end of the shader to prevent writing in case something (like alpha test, // which is dynamic GPU state) discards the pixel. So, write directly to the - // persistent register, system_temp_rov_depth_stencil_, instead of a local + // persistent register, system_temp_depth_stencil_, instead of a local // temporary register. DxbcDest sample_depth_stencil_dest( - depth_stencil_early - ? DxbcDest::R(system_temp_rov_depth_stencil_, 1 << i) - : temp_x_dest); + depth_stencil_early ? DxbcDest::R(system_temp_depth_stencil_, 1 << i) + : temp_x_dest); DxbcSrc sample_depth_stencil_src( - depth_stencil_early - ? DxbcSrc::R(system_temp_rov_depth_stencil_).Select(i) - : temp_x_src); + depth_stencil_early ? DxbcSrc::R(system_temp_depth_stencil_).Select(i) + : temp_x_src); if (!i) { if (writes_depth()) { + // Clamp oDepth to the lower viewport depth bound (depth clamp happens + // after the pixel shader in the pipeline, at least on Direct3D 11 and + // Vulkan, thus applies to the shader's depth output too). + system_constants_used_ |= 1ull << kSysConst_EdramDepthRange_Index; + DxbcOpMax(DxbcDest::R(system_temp_depth_stencil_, 0b0001), + DxbcSrc::R(system_temp_depth_stencil_, DxbcSrc::kXXXX), + DxbcSrc::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramDepthRange_Vec) + .Select(kSysConst_EdramDepthRangeOffset_Comp)); + // Calculate the upper Z range bound to temp.x for clamping after + // biasing. + // temp.x = viewport maximum depth + system_constants_used_ |= 1ull << kSysConst_EdramDepthRange_Index; + DxbcOpAdd(temp_x_dest, + DxbcSrc::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramDepthRange_Vec) + .Select(kSysConst_EdramDepthRangeOffset_Comp), + DxbcSrc::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramDepthRange_Vec) + .Select(kSysConst_EdramDepthRangeScale_Comp)); + // Clamp oDepth to the upper viewport depth bound (already not above 1, + // but saturate for total safety). + // temp.x = free + DxbcOpMin(DxbcDest::R(system_temp_depth_stencil_, 0b0001), + DxbcSrc::R(system_temp_depth_stencil_, DxbcSrc::kXXXX), + temp_x_src, true); // Convert the shader-generated depth to 24-bit, using temp.x as // temporary. - ROV_DepthTo24Bit(system_temp_rov_depth_stencil_, 0, - system_temp_rov_depth_stencil_, 0, temp, 0); + ROV_DepthTo24Bit(system_temp_depth_stencil_, 0, + system_temp_depth_stencil_, 0, temp, 0); } else { // Load the first sample's Z*W and W to temp.xy - need this regardless // of coverage for polygon offset. @@ -529,14 +556,14 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { } // Get if the current sample is covered to temp.w. - // temp.x = first sample's viewport space Z or 24-bit oDepth + // temp.x = first sample's viewport space Z if not writing to oDepth // temp.y = polygon offset if not writing to oDepth // temp.z = viewport maximum depth if not writing to oDepth // temp.w = coverage of the current sample DxbcOpAnd(temp_w_dest, DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), DxbcSrc::LU(1 << i)); // Check if the current sample is covered. Release 1 VGPR. - // temp.x = first sample's viewport space Z or 24-bit oDepth + // temp.x = first sample's viewport space Z if not writing to oDepth // temp.y = polygon offset if not writing to oDepth // temp.z = viewport maximum depth if not writing to oDepth // temp.w = free @@ -546,7 +573,7 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { // Copy the 24-bit depth common to all samples to sample_depth_stencil. // temp.x = shader-generated 24-bit depth DxbcOpMov(sample_depth_stencil_dest, - DxbcSrc::R(system_temp_rov_depth_stencil_, DxbcSrc::kXXXX)); + DxbcSrc::R(system_temp_depth_stencil_, DxbcSrc::kXXXX)); } else { if (i) { // Sample's depth precalculated for sample 0 (for slope-scaled depth @@ -997,51 +1024,60 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { // temp.z = viewport maximum depth if not writing to oDepth // temp.w = whether depth/stencil has been modified DxbcOpINE(temp_w_dest, sample_depth_stencil_src, temp_w_src); - // Check if need to write. - // temp.x? = resulting sample depth/stencil - // temp.y = polygon offset if not writing to oDepth - // temp.z = viewport maximum depth if not writing to oDepth - // temp.w = free - DxbcOpIf(true, temp_w_src); - { - if (depth_stencil_early) { - // Get if early depth/stencil write is enabled to temp.w. - // temp.w = whether early depth/stencil write is enabled - system_constants_used_ |= 1ull << kSysConst_Flags_Index; - DxbcOpAnd(temp_w_dest, - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_Flags_Vec) - .Select(kSysConst_Flags_Comp), - DxbcSrc::LU(kSysFlag_ROVDepthStencilEarlyWrite)); - // Check if need to write early. - // temp.w = free - DxbcOpIf(true, temp_w_src); - } - // Write the new depth/stencil. - if (uav_index_edram_ == kBindingIndexUnallocated) { - uav_index_edram_ = uav_count_++; - } - DxbcOpStoreUAVTyped( - DxbcDest::U(uav_index_edram_, uint32_t(UAVRegister::kEdram)), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), 1, - sample_depth_stencil_src); - if (depth_stencil_early) { - // Need to still run the shader to know whether to write the - // depth/stencil value. - DxbcOpElse(); - // Set sample bit out of bits 4:7 of system_temp_rov_params_.x if need - // to write later (after checking if the sample is not discarded by a - // kill instruction, alphatest or alpha-to-coverage). - DxbcOpOr(DxbcDest::R(system_temp_rov_params_, 0b0001), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), - DxbcSrc::LU(1 << (4 + i))); - // Close the early depth/stencil check. - DxbcOpEndIf(); + if (depth_stencil_early && !CanWriteZEarly()) { + // Set the sample bit in bits 4:7 of system_temp_rov_params_.x - always + // need to write late in this shader, as it may do something like + // explicitly killing pixels. + DxbcOpBFI(DxbcDest::R(system_temp_rov_params_, 0b0001), DxbcSrc::LU(1), + DxbcSrc::LU(4 + i), temp_w_src, + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX)); + } else { + // Check if need to write. + // temp.x? = resulting sample depth/stencil + // temp.y = polygon offset if not writing to oDepth + // temp.z = viewport maximum depth if not writing to oDepth + // temp.w = free + DxbcOpIf(true, temp_w_src); + { + if (depth_stencil_early) { + // Get if early depth/stencil write is enabled to temp.w. + // temp.w = whether early depth/stencil write is enabled + system_constants_used_ |= 1ull << kSysConst_Flags_Index; + DxbcOpAnd(temp_w_dest, + DxbcSrc::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_Flags_Vec) + .Select(kSysConst_Flags_Comp), + DxbcSrc::LU(kSysFlag_ROVDepthStencilEarlyWrite)); + // Check if need to write early. + // temp.w = free + DxbcOpIf(true, temp_w_src); + } + // Write the new depth/stencil. + if (uav_index_edram_ == kBindingIndexUnallocated) { + uav_index_edram_ = uav_count_++; + } + DxbcOpStoreUAVTyped( + DxbcDest::U(uav_index_edram_, uint32_t(UAVRegister::kEdram)), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), 1, + sample_depth_stencil_src); + if (depth_stencil_early) { + // Need to still run the shader to know whether to write the + // depth/stencil value. + DxbcOpElse(); + // Set the sample bit in bits 4:7 of system_temp_rov_params_.x if need + // to write later (after checking if the sample is not discarded by a + // kill instruction, alphatest or alpha-to-coverage). + DxbcOpOr(DxbcDest::R(system_temp_rov_params_, 0b0001), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), + DxbcSrc::LU(1 << (4 + i))); + // Close the early depth/stencil check. + DxbcOpEndIf(); + } } + // Close the write check. + DxbcOpEndIf(); } - // Close the write check. - DxbcOpEndIf(); // Release sample_temp. PopSystemTemp(); @@ -1720,7 +1756,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs_AlphaToMask() { // Convert SSAA sample position to integer to temp.xy (not caring about the // resolution scale because it's not supported anywhere on the RTV output // path). - in_position_xy_used_ = true; + in_position_used_ |= 0b0011; DxbcOpFToU(DxbcDest::R(temp, 0b0011), DxbcSrc::V(uint32_t(InOutRegister::kPSInPosition))); @@ -1913,6 +1949,139 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs() { PopSystemTemp(2); } +void DxbcShaderTranslator::CompletePixelShader_DSV_DepthTo24Bit() { + if (!DSV_IsWritingFloat24Depth()) { + return; + } + + uint32_t temp; + if (writes_depth()) { + // The depth is already written to system_temp_depth_stencil_.x and clamped + // to 0...1 with NaNs dropped (saturating in StoreResult); yzw are free. + temp = system_temp_depth_stencil_; + } else { + // Need a temporary variable; copy the sample's depth input to it and + // saturate it (in Direct3D 11, depth is clamped to the viewport bounds + // after the pixel shader, and SV_Position.z contains the unclamped depth, + // which may be outside the viewport's depth range if it's biased); though + // it will be clamped to the viewport bounds anyway, but to be able to make + // the assumption of it being clamped while working with the bit + // representation. + temp = PushSystemTemp(); + in_position_used_ |= 0b0100; + DxbcOpMov( + DxbcDest::R(temp, 0b0001), + DxbcSrc::V(uint32_t(InOutRegister::kPSInPosition), DxbcSrc::kZZZZ), + true); + } + + DxbcDest temp_x_dest(DxbcDest::R(temp, 0b0001)); + DxbcSrc temp_x_src(DxbcSrc::R(temp, DxbcSrc::kXXXX)); + DxbcDest temp_y_dest(DxbcDest::R(temp, 0b0010)); + DxbcSrc temp_y_src(DxbcSrc::R(temp, DxbcSrc::kYYYY)); + + if (GetDxbcShaderModification().depth_stencil_mode == + Modification::DepthStencilMode::kFloat24Truncating) { + // Simplified conversion, always less than or equal to the original value - + // just drop the lower bits. + // The float32 exponent bias is 127. + // After saturating, the exponent range is -127...0. + // The smallest normalized 20e4 exponent is -14 - should drop 3 mantissa + // bits at -14 or above. + // The smallest denormalized 20e4 number is -34 - should drop 23 mantissa + // bits at -34. + // Anything smaller than 2^-34 becomes 0. + DxbcDest truncate_dest(writes_depth() ? DxbcDest::ODepth() + : DxbcDest::ODepthLE()); + // Check if the number is representable as a float24 after truncation - the + // exponent is at least -34. + DxbcOpUGE(temp_y_dest, temp_x_src, DxbcSrc::LU(0x2E800000)); + DxbcOpIf(true, temp_y_src); + { + // Extract the biased float32 exponent to temp.y. + // temp.y = 113+ at exponent -14+. + // temp.y = 93 at exponent -34. + DxbcOpUBFE(temp_y_dest, DxbcSrc::LU(8), DxbcSrc::LU(23), temp_x_src); + // Convert exponent to the unclamped number of bits to truncate. + // 116 - 113 = 3. + // 116 - 93 = 23. + // temp.y = 3+ at exponent -14+. + // temp.y = 23 at exponent -34. + DxbcOpIAdd(temp_y_dest, DxbcSrc::LI(116), -temp_y_src); + // Clamp the truncated bit count to drop 3 bits of any normal number. + // Exponents below -34 are handled separately. + // temp.y = 3 at exponent -14. + // temp.y = 23 at exponent -34. + DxbcOpIMax(temp_y_dest, temp_y_src, DxbcSrc::LI(3)); + // Truncate the mantissa - fill the low bits with zeros. + DxbcOpBFI(truncate_dest, temp_y_src, DxbcSrc::LU(0), DxbcSrc::LU(0), + temp_x_src); + } + // The number is not representable as float24 after truncation - zero. + DxbcOpElse(); + DxbcOpMov(truncate_dest, DxbcSrc::LF(0.0f)); + // Close the non-zero result check. + DxbcOpEndIf(); + } else { + // Properly convert to 20e4, with rounding to the nearest even. + PreClampedDepthTo20e4(temp, 0, temp, 0, temp, 1); + // Convert back to float32. + // https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp + // Unpack the exponent to temp.y. + DxbcOpUShR(temp_y_dest, temp_x_src, DxbcSrc::LU(20)); + // Unpack the mantissa to temp.x. + DxbcOpAnd(temp_x_dest, temp_x_src, DxbcSrc::LU(0xFFFFF)); + // Check if the number is denormalized. + DxbcOpIf(false, temp_y_src); + { + // Check if the number is non-zero (if the mantissa isn't zero - the + // exponent is known to be zero at this point). + DxbcOpIf(true, temp_x_src); + { + // Normalize the mantissa. + // Note that HLSL firstbithigh(x) is compiled to DXBC like: + // `x ? 31 - firstbit_hi(x) : -1` + // (returns the index from the LSB, not the MSB, but -1 for zero too). + // temp.y = firstbit_hi(mantissa) + DxbcOpFirstBitHi(temp_y_dest, temp_x_src); + // temp.y = 20 - firstbithigh(mantissa) + // Or: + // temp.y = 20 - (31 - firstbit_hi(mantissa)) + DxbcOpIAdd(temp_y_dest, temp_y_src, DxbcSrc::LI(20 - 31)); + // mantissa = mantissa << (20 - firstbithigh(mantissa)) + // AND 0xFFFFF not needed after this - BFI will do it. + DxbcOpIShL(temp_x_dest, temp_x_src, temp_y_src); + // Get the normalized exponent. + // exponent = 1 - (20 - firstbithigh(mantissa)) + DxbcOpIAdd(temp_y_dest, DxbcSrc::LI(1), -temp_y_src); + } + // The number is zero. + DxbcOpElse(); + { + // Set the unbiased exponent to -112 for zero - 112 will be added later, + // resulting in zero float32. + DxbcOpMov(temp_y_dest, DxbcSrc::LI(-112)); + } + // Close the non-zero check. + DxbcOpEndIf(); + } + // Close the denormal check. + DxbcOpEndIf(); + // Bias the exponent and move it to the correct location in float32 to + // temp.y. + DxbcOpIMAd(temp_y_dest, temp_y_src, DxbcSrc::LI(1 << 23), + DxbcSrc::LI(112 << 23)); + // Combine the mantissa and the exponent into the result. + DxbcOpBFI(DxbcDest::ODepth(), DxbcSrc::LU(20), DxbcSrc::LU(3), temp_x_src, + temp_y_src); + } + + if (!writes_depth()) { + // Release temp. + PopSystemTemp(); + } +} + void DxbcShaderTranslator::CompletePixelShader_ROV_AlphaToMaskSample( uint32_t sample_index, float threshold_base, DxbcSrc threshold_offset, float threshold_offset_scale, uint32_t temp, uint32_t temp_component) { @@ -1957,7 +2126,7 @@ void DxbcShaderTranslator::CompletePixelShader_ROV_AlphaToMask() { // floating-point. With resolution scaling, still using host pixels, to // preserve the idea of dithering. // temp.x = alpha to coverage offset as float 0.0...3.0. - in_position_xy_used_ = true; + in_position_used_ |= 0b0011; DxbcOpFToU(DxbcDest::R(temp, 0b0011), DxbcSrc::V(uint32_t(InOutRegister::kPSInPosition))); DxbcOpAnd(DxbcDest::R(temp, 0b0010), DxbcSrc::R(temp, DxbcSrc::kYYYY), @@ -2067,7 +2236,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { DxbcOpStoreUAVTyped( DxbcDest::U(uav_index_edram_, uint32_t(UAVRegister::kEdram)), DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), 1, - DxbcSrc::R(system_temp_rov_depth_stencil_).Select(i)); + DxbcSrc::R(system_temp_depth_stencil_).Select(i)); } // Close the write check. DxbcOpEndIf(); @@ -3059,15 +3228,16 @@ void DxbcShaderTranslator::CompletePixelShader() { CompletePixelShader_WriteToROV(); } else { CompletePixelShader_WriteToRTVs(); + CompletePixelShader_DSV_DepthTo24Bit(); } } -void DxbcShaderTranslator::ROV_DepthTo24Bit(uint32_t d24_temp, - uint32_t d24_temp_component, - uint32_t d32_temp, - uint32_t d32_temp_component, - uint32_t temp_temp, - uint32_t temp_temp_component) { +void DxbcShaderTranslator::PreClampedDepthTo20e4(uint32_t d24_temp, + uint32_t d24_temp_component, + uint32_t d32_temp, + uint32_t d32_temp_component, + uint32_t temp_temp, + uint32_t temp_temp_component) { assert_true(temp_temp != d24_temp || temp_temp_component != d24_temp_component); assert_true(temp_temp != d32_temp || @@ -3079,68 +3249,83 @@ void DxbcShaderTranslator::ROV_DepthTo24Bit(uint32_t d24_temp, DxbcDest temp_dest(DxbcDest::R(temp_temp, 1 << temp_temp_component)); DxbcSrc temp_src(DxbcSrc::R(temp_temp).Select(temp_temp_component)); + // CFloat24 from d3dref9.dll. + // Assuming the depth is already clamped to [0, 2) (in all places, the depth + // is written with the saturate flag set). + + // Check if the number is too small to be represented as normalized 20e4. + // temp = f32 < 2^-14 + DxbcOpULT(temp_dest, d32_src, DxbcSrc::LU(0x38800000)); + // Handle denormalized numbers separately. + DxbcOpIf(true, temp_src); + { + // temp = f32 >> 23 + DxbcOpUShR(temp_dest, d32_src, DxbcSrc::LU(23)); + // temp = 113 - (f32 >> 23) + DxbcOpIAdd(temp_dest, DxbcSrc::LI(113), -temp_src); + // Don't allow the shift to overflow, since in DXBC the lower 5 bits of the + // shift amount are used (otherwise 0 becomes 8). + // temp = min(113 - (f32 >> 23), 24) + DxbcOpUMin(temp_dest, temp_src, DxbcSrc::LU(24)); + // biased_f32 = (f32 & 0x7FFFFF) | 0x800000 + DxbcOpBFI(d24_dest, DxbcSrc::LU(9), DxbcSrc::LU(23), DxbcSrc::LU(1), + d32_src); + // biased_f32 = ((f32 & 0x7FFFFF) | 0x800000) >> min(113 - (f32 >> 23), 24) + DxbcOpUShR(d24_dest, d24_src, temp_src); + } + // Not denormalized? + DxbcOpElse(); + { + // Bias the exponent. + // biased_f32 = f32 + (-112 << 23) + // (left shift of a negative value is undefined behavior) + DxbcOpIAdd(d24_dest, d32_src, DxbcSrc::LU(0xC8000000u)); + } + // Close the denormal check. + DxbcOpEndIf(); + // Build the 20e4 number. + // temp = (biased_f32 >> 3) & 1 + DxbcOpUBFE(temp_dest, DxbcSrc::LU(1), DxbcSrc::LU(3), d24_src); + // f24 = biased_f32 + 3 + DxbcOpIAdd(d24_dest, d24_src, DxbcSrc::LU(3)); + // f24 = biased_f32 + 3 + ((biased_f32 >> 3) & 1) + DxbcOpIAdd(d24_dest, d24_src, temp_src); + // f24 = ((biased_f32 + 3 + ((biased_f32 >> 3) & 1)) >> 3) & 0xFFFFFF + DxbcOpUBFE(d24_dest, DxbcSrc::LU(24), DxbcSrc::LU(3), d24_src); +} + +void DxbcShaderTranslator::ROV_DepthTo24Bit(uint32_t d24_temp, + uint32_t d24_temp_component, + uint32_t d32_temp, + uint32_t d32_temp_component, + uint32_t temp_temp, + uint32_t temp_temp_component) { + assert_true(temp_temp != d32_temp || + temp_temp_component != d32_temp_component); + // Source and destination may be the same. + system_constants_used_ |= 1ull << kSysConst_Flags_Index; - DxbcOpAnd(temp_dest, + DxbcOpAnd(DxbcDest::R(temp_temp, 1 << temp_temp_component), DxbcSrc::CB(cbuffer_index_system_constants_, uint32_t(CbufferRegister::kSystemConstants), kSysConst_Flags_Vec) .Select(kSysConst_Flags_Comp), DxbcSrc::LU(kSysFlag_ROVDepthFloat24)); // Convert according to the format. - DxbcOpIf(true, temp_src); + DxbcOpIf(true, DxbcSrc::R(temp_temp).Select(temp_temp_component)); { - // 20e4 conversion, using 1 VGPR. - // CFloat24 from d3dref9.dll. - // Assuming the depth is already clamped to [0, 2) (in all places, the depth - // is written with the saturate flag set). - - // Check if the number is too small to be represented as normalized 20e4. - // temp = f32 < 2^-14 - DxbcOpULT(temp_dest, d32_src, DxbcSrc::LU(0x38800000)); - // Handle denormalized numbers separately. - DxbcOpIf(true, temp_src); - { - // temp = f32 >> 23 - DxbcOpUShR(temp_dest, d32_src, DxbcSrc::LU(23)); - // temp = 113 - (f32 >> 23) - DxbcOpIAdd(temp_dest, DxbcSrc::LI(113), -temp_src); - // Don't allow the shift to overflow, since in DXBC the lower 5 bits of - // the shift amount are used (otherwise 0 becomes 8). - // temp = min(113 - (f32 >> 23), 24) - DxbcOpUMin(temp_dest, temp_src, DxbcSrc::LU(24)); - // biased_f32 = (f32 & 0x7FFFFF) | 0x800000 - DxbcOpBFI(d24_dest, DxbcSrc::LU(9), DxbcSrc::LU(23), DxbcSrc::LU(1), - d32_src); - // biased_f32 = - // ((f32 & 0x7FFFFF) | 0x800000) >> min(113 - (f32 >> 23), 24) - DxbcOpUShR(d24_dest, d24_src, temp_src); - } - // Not denormalized? - DxbcOpElse(); - { - // Bias the exponent. - // biased_f32 = f32 + (-112 << 23) - // (left shift of a negative value is undefined behavior) - DxbcOpIAdd(d24_dest, d32_src, DxbcSrc::LU(0xC8000000u)); - } - // Close the denormal check. - DxbcOpEndIf(); - // Build the 20e4 number. - // temp = (biased_f32 >> 3) & 1 - DxbcOpUBFE(temp_dest, DxbcSrc::LU(1), DxbcSrc::LU(3), d24_src); - // f24 = biased_f32 + 3 - DxbcOpIAdd(d24_dest, d24_src, DxbcSrc::LU(3)); - // f24 = biased_f32 + 3 + ((biased_f32 >> 3) & 1) - DxbcOpIAdd(d24_dest, d24_src, temp_src); - // f24 = ((biased_f32 + 3 + ((biased_f32 >> 3) & 1)) >> 3) & 0xFFFFFF - DxbcOpUBFE(d24_dest, DxbcSrc::LU(24), DxbcSrc::LU(3), d24_src); + // 20e4 conversion. + PreClampedDepthTo20e4(d24_temp, d24_temp_component, d32_temp, + d32_temp_component, temp_temp, temp_temp_component); } DxbcOpElse(); { // Unorm24 conversion. - + DxbcDest d24_dest(DxbcDest::R(d24_temp, 1 << d24_temp_component)); + DxbcSrc d24_src(DxbcSrc::R(d24_temp).Select(d24_temp_component)); // Multiply by float(0xFFFFFF). - DxbcOpMul(d24_dest, d32_src, DxbcSrc::LF(16777215.0f)); + DxbcOpMul(d24_dest, DxbcSrc::R(d32_temp).Select(d32_temp_component), + DxbcSrc::LF(16777215.0f)); // Round to the nearest even integer. This seems to be the correct way: // rounding towards zero gives 0xFF instead of 0x100 in clear shaders in, // for instance, Halo 3, but other clear shaders in it are also broken if diff --git a/src/xenia/gpu/gpu_flags.cc b/src/xenia/gpu/gpu_flags.cc index 5f73fd3c2..07eff0bc8 100644 --- a/src/xenia/gpu/gpu_flags.cc +++ b/src/xenia/gpu/gpu_flags.cc @@ -40,9 +40,63 @@ DEFINE_bool( "be fully covered when MSAA is used with fullscreen passes.", "GPU"); +DEFINE_string( + depth_float24_conversion, "", + "Method for converting 32-bit Z values to 20e4 floating point when using " + "host depth buffers without native 20e4 support (when not using rasterizer-" + "ordered views / fragment shader interlocks to perform depth testing " + "manually).\n" + "Use: [any, on_copy, truncate, round]\n" + " on_copy:\n" + " Do depth testing at host precision, converting when copying between " + "host depth buffers and the EDRAM buffer to support reinterpretation, " + "maintaining two copies, in both host and 20e4 formats, for reloading data " + "to host depth buffers when it wasn't overwritten.\n" + " + Highest performance, allows early depth test and writing.\n" + " + Host MSAA is possible with pixel-rate shading where supported.\n" + " - EDRAM > RAM > EDRAM depth buffer round trip done in certain games " + "(such as GTA IV) destroys precision irreparably, causing artifacts if " + "another rendering pass is done after the EDRAM reupload.\n" + " truncate:\n" + " Convert to 20e4 directly in pixel shaders, always rounding down.\n" + " + Good performance, conservative early depth test is possible.\n" + " + No precision loss when anything changes in the storage of the depth " + "buffer, EDRAM > RAM > EDRAM copying preserves precision.\n" + " - Rounding mode is incorrect, sometimes giving results smaller than " + "they should be - may cause inaccuracy especially in edge cases when the " + "game wants to write an exact value.\n" + " - Host MSAA is only possible at SSAA speed, with per-sample shading.\n" + " round:\n" + " Convert to 20e4 directly in pixel shaders, correctly rounding to the " + "nearest even.\n" + " + Highest accuracy.\n" + " - Significantly limited performance, early depth test is not possible.\n" + " - Host MSAA is only possible at SSAA speed, with per-sample shading.\n" + " Any other value:\n" + " Choose what is considered the most optimal (currently \"on_copy\").", + "GPU"); + DEFINE_int32(query_occlusion_fake_sample_count, 1000, "If set to -1 no sample counts are written, games may hang. Else, " "the sample count of every tile will be incremented on every " "EVENT_WRITE_ZPD by this number. Setting this to 0 means " "everything is reported as occluded.", "GPU"); + +namespace xe { +namespace gpu { +namespace flags { + +DepthFloat24Conversion GetDepthFloat24Conversion() { + if (cvars::depth_float24_conversion == "truncate") { + return DepthFloat24Conversion::kOnOutputTruncating; + } + if (cvars::depth_float24_conversion == "round") { + return DepthFloat24Conversion::kOnOutputRounding; + } + return DepthFloat24Conversion::kOnCopy; +} + +} // namespace flags +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/gpu_flags.h b/src/xenia/gpu/gpu_flags.h index 5ae64b76e..2405dc23c 100644 --- a/src/xenia/gpu/gpu_flags.h +++ b/src/xenia/gpu/gpu_flags.h @@ -22,6 +22,69 @@ DECLARE_bool(gpu_allow_invalid_fetch_constants); DECLARE_bool(half_pixel_offset); +DECLARE_string(depth_float24_conversion); + DECLARE_int32(query_occlusion_fake_sample_count); +namespace xe { +namespace gpu { +namespace flags { + +enum class DepthFloat24Conversion { + // Doing depth test at the host precision, converting to 20e4 to support + // reinterpretation, but keeping a separate EDRAM view containing depth values + // in the host format. When copying from the EDRAM buffer to host depth + // buffers, writing the stored host pixel if stored_f24 == to_f24(stored_host) + // (otherwise it was overwritten by something else, like clearing, or a color + // buffer; this is inexact though, and will incorrectly load pixels that were + // overwritten by something else in the EDRAM, but turned out to have the same + // value on the guest as before - an outdated host-precision value will be + // loaded in these cases instead). + // + // EDRAM > RAM, then reusing the EDRAM region for something else > EDRAM round + // trip destroys precision beyond repair. + // + // Full host early Z and MSAA with pixel-rate shading are supported. + kOnCopy, + // Converting the depth to the closest host value representable exactly as a + // 20e4 float in pixel shaders, to support invariance in cases when the guest + // reuploads a previously resolved depth buffer to the EDRAM, rounding towards + // zero (which contradicts the rounding used by the Direct3D 9 reference + // rasterizer, but allows less-than-or-equal pixel shader depth output to be + // used to preserve most of early Z culling when the game is using reversed + // depth, which is the usual way of doing depth testing on the Xbox 360 and of + // utilizing the advantages of a floating-point encoding). + // + // With MSAA, pixel shaders must run at sample frequency - otherwise, if the + // depth is the same for the entire pixel, intersections of polygons cannot be + // antialiased. + // + // Important usage note: When using this mode, bounds of the fixed-function + // viewport must be converted to and back from float24 too (preferably using + // correct rounding to the nearest even, to reduce the error already caused by + // truncation rather than to amplify it). This ensures that clamping to the + // viewport bounds, which happens after the pixel shader even if it overwrites + // the resulting depth, is never done to a value not representable as float24 + // (for example, if the minimum Z is a number too small to be represented as + // float24, but not zero, it won't be possible to write what should become + // 0x000000 to the depth buffer). Note that this may add some error to the + // depth values from the rasterizer; however, modifying Z in the vertex shader + // to make interpolated depth values would cause clipping to be done to + // different bounds, which may be more undesirable, especially in cases when Z + // is explicitly set to a value like 0 or W (in such cases, the adjusted + // polygon may go outside 0...W in clip space and disappear). + kOnOutputTruncating, + // Similar to kOnOutputTruncating, but rounding to the nearest even, more + // correctly, however, because the resulting depth can be bigger than the + // original host value, early depth testing can't be used at all. Same + // viewport usage rules apply. + kOnOutputRounding, +}; + +DepthFloat24Conversion GetDepthFloat24Conversion(); + +} // namespace flags +} // namespace gpu +} // namespace xe + #endif // XENIA_GPU_GPU_FLAGS_H_ diff --git a/src/xenia/gpu/graphics_system.cc b/src/xenia/gpu/graphics_system.cc index c9b608e9e..519625c95 100644 --- a/src/xenia/gpu/graphics_system.cc +++ b/src/xenia/gpu/graphics_system.cc @@ -277,8 +277,7 @@ void GraphicsSystem::ClearCaches() { } void GraphicsSystem::InitializeShaderStorage( - const std::filesystem::path& storage_root, uint32_t title_id, - bool blocking) { + const std::filesystem::path& cache_root, uint32_t title_id, bool blocking) { if (!cvars::store_shaders) { return; } @@ -286,21 +285,18 @@ void GraphicsSystem::InitializeShaderStorage( if (command_processor_->is_paused()) { // Safe to run on any thread while the command processor is paused, no // race condition. - command_processor_->InitializeShaderStorage(storage_root, title_id, true); + command_processor_->InitializeShaderStorage(cache_root, title_id, true); } else { xe::threading::Fence fence; - command_processor_->CallInThread( - [this, storage_root, title_id, &fence]() { - command_processor_->InitializeShaderStorage(storage_root, title_id, - true); - fence.Signal(); - }); + command_processor_->CallInThread([this, cache_root, title_id, &fence]() { + command_processor_->InitializeShaderStorage(cache_root, title_id, true); + fence.Signal(); + }); fence.Wait(); } } else { - command_processor_->CallInThread([this, storage_root, title_id]() { - command_processor_->InitializeShaderStorage(storage_root, title_id, - false); + command_processor_->CallInThread([this, cache_root, title_id]() { + command_processor_->InitializeShaderStorage(cache_root, title_id, false); }); } } diff --git a/src/xenia/gpu/graphics_system.h b/src/xenia/gpu/graphics_system.h index 47a4d3f7b..148206af2 100644 --- a/src/xenia/gpu/graphics_system.h +++ b/src/xenia/gpu/graphics_system.h @@ -63,7 +63,7 @@ class GraphicsSystem { virtual void ClearCaches(); - void InitializeShaderStorage(const std::filesystem::path& storage_root, + void InitializeShaderStorage(const std::filesystem::path& cache_root, uint32_t title_id, bool blocking); void RequestFrameTrace(); diff --git a/src/xenia/gpu/registers.h b/src/xenia/gpu/registers.h index dd1a7dfc2..07986b169 100644 --- a/src/xenia/gpu/registers.h +++ b/src/xenia/gpu/registers.h @@ -254,15 +254,15 @@ union PA_SU_SC_MODE_CNTL { uint32_t msaa_enable : 1; // +15 uint32_t vtx_window_offset_enable : 1; // +16 // LINE_STIPPLE_ENABLE was added on Adreno. - uint32_t : 2; // +17 - uint32_t provoking_vtx_last : 1; // +19 - uint32_t persp_corr_dis : 1; // +20 - uint32_t multi_prim_ib_ena : 1; // +21 - uint32_t : 1; // +22 - uint32_t quad_order_enable : 1; // +23 + uint32_t : 2; // +17 + uint32_t provoking_vtx_last : 1; // +19 + uint32_t persp_corr_dis : 1; // +20 + uint32_t multi_prim_ib_ena : 1; // +21 + uint32_t : 1; // +22 + uint32_t quad_order_enable : 1; // +23 + uint32_t sc_one_quad_per_clock : 1; // +24 // WAIT_RB_IDLE_ALL_TRI and WAIT_RB_IDLE_FIRST_TRI_NEW_STATE were added on // Adreno. - // TODO(Triang3l): Find SC_ONE_QUAD_PER_CLOCK offset. }; uint32_t value; static constexpr Register register_index = XE_GPU_REG_PA_SU_SC_MODE_CNTL; @@ -298,7 +298,7 @@ union PA_SC_VIZ_QUERY { // discard geometry after test (but use for testing) uint32_t kill_pix_post_hi_z : 1; // +7 // not used with d3d - uint32_t kill_pix_detail_mask : 1; // +8 + uint32_t kill_pix_post_detail_mask : 1; // +8 }; uint32_t value; static constexpr Register register_index = XE_GPU_REG_PA_SC_VIZ_QUERY; diff --git a/src/xenia/gpu/sampler_info.cc b/src/xenia/gpu/sampler_info.cc index 916be887f..025dcd3fe 100644 --- a/src/xenia/gpu/sampler_info.cc +++ b/src/xenia/gpu/sampler_info.cc @@ -12,7 +12,7 @@ #include #include -#include "third_party/xxhash/xxhash.h" +#include "xenia/base/xxhash.h" namespace xe { namespace gpu { @@ -51,7 +51,7 @@ bool SamplerInfo::Prepare(const xenos::xe_gpu_texture_fetch_t& fetch, } uint64_t SamplerInfo::hash() const { - return XXH64(this, sizeof(SamplerInfo), 0); + return XXH3_64bits(this, sizeof(SamplerInfo)); } } // namespace gpu diff --git a/src/xenia/gpu/shader.cc b/src/xenia/gpu/shader.cc index 931b728da..6df03fb81 100644 --- a/src/xenia/gpu/shader.cc +++ b/src/xenia/gpu/shader.cc @@ -31,9 +31,13 @@ Shader::Shader(xenos::ShaderType shader_type, uint64_t ucode_data_hash, xe::copy_and_swap(ucode_data_.data(), ucode_dwords, ucode_dword_count); } -Shader::~Shader() = default; +Shader::~Shader() { + for (auto it : translations_) { + delete it.second; + } +} -std::string Shader::GetTranslatedBinaryString() const { +std::string Shader::Translation::GetTranslatedBinaryString() const { std::string result; result.resize(translated_binary_.size()); std::memcpy(const_cast(result.data()), translated_binary_.data(), @@ -41,36 +45,24 @@ std::string Shader::GetTranslatedBinaryString() const { return result; } -std::pair Shader::Dump( +std::filesystem::path Shader::Translation::Dump( const std::filesystem::path& base_path, const char* path_prefix) { + std::filesystem::path path = base_path; // Ensure target path exists. - auto target_path = base_path; - if (!target_path.empty()) { - target_path = std::filesystem::absolute(target_path); - std::filesystem::create_directories(target_path); + if (!path.empty()) { + path = std::filesystem::absolute(path); + std::filesystem::create_directories(path); } - - auto base_name = - fmt::format("shader_{}_{:016X}", path_prefix, ucode_data_hash_); - - std::string txt_name, bin_name; - if (shader_type_ == xenos::ShaderType::kVertex) { - txt_name = base_name + ".vert"; - bin_name = base_name + ".bin.vert"; - } else { - txt_name = base_name + ".frag"; - bin_name = base_name + ".bin.frag"; - } - - std::filesystem::path txt_path, bin_path; - txt_path = base_path / txt_name; - bin_path = base_path / bin_name; - - FILE* f = filesystem::OpenFile(txt_path, "wb"); + path = path / + fmt::format( + "shader_{:016X}_{:08X}.{}.{}", shader().ucode_data_hash(), + modification(), path_prefix, + shader().type() == xenos::ShaderType::kVertex ? "vert" : "frag"); + FILE* f = filesystem::OpenFile(path, "wb"); if (f) { fwrite(translated_binary_.data(), 1, translated_binary_.size(), f); fprintf(f, "\n\n"); - auto ucode_disasm_ptr = ucode_disassembly().c_str(); + auto ucode_disasm_ptr = shader().ucode_disassembly().c_str(); while (*ucode_disasm_ptr) { auto line_end = std::strchr(ucode_disasm_ptr, '\n'); fprintf(f, "// "); @@ -83,14 +75,58 @@ std::pair Shader::Dump( } fclose(f); } + return std::move(path); +} - f = filesystem::OpenFile(bin_path, "wb"); +Shader::Translation* Shader::GetOrCreateTranslation(uint32_t modification, + bool* is_new) { + auto it = translations_.find(modification); + if (it != translations_.end()) { + if (is_new) { + *is_new = false; + } + return it->second; + } + Translation* translation = CreateTranslationInstance(modification); + translations_.emplace(modification, translation); + if (is_new) { + *is_new = true; + } + return translation; +} + +void Shader::DestroyTranslation(uint32_t modification) { + auto it = translations_.find(modification); + if (it == translations_.end()) { + return; + } + delete it->second; + translations_.erase(it); +} + +std::filesystem::path Shader::DumpUcodeBinary( + const std::filesystem::path& base_path) { + // Ensure target path exists. + std::filesystem::path path = base_path; + if (!path.empty()) { + path = std::filesystem::absolute(path); + std::filesystem::create_directories(path); + } + path = path / + fmt::format("shader_{:016X}.ucode.bin.{}", ucode_data_hash(), + type() == xenos::ShaderType::kVertex ? "vert" : "frag"); + + FILE* f = filesystem::OpenFile(path, "wb"); if (f) { - fwrite(ucode_data_.data(), 4, ucode_data_.size(), f); + fwrite(ucode_data().data(), 4, ucode_data().size(), f); fclose(f); } + return std::move(path); +} - return {std::move(txt_path), std::move(bin_path)}; +Shader::Translation* Shader::CreateTranslationInstance(uint32_t modification) { + // Default implementation for simple cases like ucode disassembly. + return new Translation(*this, modification); } } // namespace gpu diff --git a/src/xenia/gpu/shader.h b/src/xenia/gpu/shader.h index 23998c307..0d45371d8 100644 --- a/src/xenia/gpu/shader.h +++ b/src/xenia/gpu/shader.h @@ -11,8 +11,12 @@ #define XENIA_GPU_SHADER_H_ #include +#include +#include #include #include +#include +#include #include #include "xenia/base/math.h" @@ -591,6 +595,8 @@ struct ParsedAluInstruction { class Shader { public: + // Type of the vertex shader in a D3D11-like rendering pipeline - shader + // interface depends on in, so it must be known at translation time. // If values are changed, INVALIDATE SHADER STORAGES (increase their version // constexpr) where those are stored! And check bit count where this is // packed. This is : uint32_t for simplicity of packing in bit fields. @@ -603,6 +609,8 @@ class Shader { kQuadDomainCPIndexed, kQuadDomainPatchIndexed, }; + // For packing HostVertexShaderType in bit fields. + static constexpr uint32_t kHostVertexShaderTypeBitCount = 3; struct Error { bool is_fatal = false; @@ -683,6 +691,70 @@ class Shader { } }; + class Translation { + public: + virtual ~Translation() {} + + Shader& shader() const { return shader_; } + + // Translator-specific modification bits. + uint32_t modification() const { return modification_; } + + // True if the shader was translated and prepared without error. + bool is_valid() const { return is_valid_; } + + // True if the shader has already been translated. + bool is_translated() const { return is_translated_; } + + // Errors that occurred during translation. + const std::vector& errors() const { return errors_; } + + // Translated shader binary (or text). + const std::vector& translated_binary() const { + return translated_binary_; + } + + // Gets the translated shader binary as a string. + // This is only valid if it is actually text. + std::string GetTranslatedBinaryString() const; + + // Disassembly of the translated from the host graphics layer. + // May be empty if the host does not support disassembly. + const std::string& host_disassembly() const { return host_disassembly_; } + + // In case disassembly depends on the GPU backend, for setting it + // externally. + void set_host_disassembly(std::string disassembly) { + host_disassembly_ = std::move(disassembly); + } + + // For dumping after translation. Dumps the shader's disassembled microcode, + // translated code, and, if available, translated disassembly, to a file in + // the given path based on ucode hash. Returns the name of the written file. + std::filesystem::path Dump(const std::filesystem::path& base_path, + const char* path_prefix); + + protected: + Translation(Shader& shader, uint32_t modification) + : shader_(shader), modification_(modification) {} + + // If there was some failure during preparation on the implementation side. + void MakeInvalid() { is_valid_ = false; } + + private: + friend class Shader; + friend class ShaderTranslator; + + Shader& shader_; + uint32_t modification_; + + bool is_valid_ = false; + bool is_translated_ = false; + std::vector errors_; + std::vector translated_binary_; + std::string host_disassembly_; + }; + Shader(xenos::ShaderType shader_type, uint64_t ucode_data_hash, const uint32_t* ucode_dwords, size_t ucode_dword_count); virtual ~Shader(); @@ -690,19 +762,30 @@ class Shader { // Whether the shader is identified as a vertex or pixel shader. xenos::ShaderType type() const { return shader_type_; } - // If this is a vertex shader, and it has been translated, type of the shader - // in a D3D11-like rendering pipeline - shader interface depends on in, so it - // must be known at translation time. - HostVertexShaderType host_vertex_shader_type() const { - return host_vertex_shader_type_; - } - // Microcode dwords in host endianness. const std::vector& ucode_data() const { return ucode_data_; } uint64_t ucode_data_hash() const { return ucode_data_hash_; } const uint32_t* ucode_dwords() const { return ucode_data_.data(); } size_t ucode_dword_count() const { return ucode_data_.size(); } + // Host translations with the specified modification bits. Not thread-safe + // with respect to translation creation/destruction. + const std::unordered_map& translations() const { + return translations_; + } + Translation* GetTranslation(uint32_t modification) const { + auto it = translations_.find(modification); + if (it != translations_.cend()) { + return it->second; + } + return nullptr; + } + Translation* GetOrCreateTranslation(uint32_t modification, + bool* is_new = nullptr); + // For shader storage loading, to remove a modification in case of translation + // failure. Not thread-safe. + void DestroyTranslation(uint32_t modification); + // All vertex bindings used in the shader. // Valid for vertex shaders only. const std::vector& vertex_bindings() const { @@ -733,73 +816,55 @@ class Shader { // True if the shader overrides the pixel depth. bool writes_depth() const { return writes_depth_; } - // True if Xenia can automatically enable early depth/stencil for the pixel - // shader when RB_DEPTHCONTROL EARLY_Z_ENABLE is not set, provided alpha - // testing and alpha to coverage are disabled. - bool implicit_early_z_allowed() const { return implicit_early_z_allowed_; } - - // True if the shader was translated and prepared without error. - bool is_valid() const { return is_valid_; } - - // True if the shader has already been translated. - bool is_translated() const { return is_translated_; } - - // Errors that occurred during translation. - const std::vector& errors() const { return errors_; } + // True if the current shader has any `kill` instructions. + bool kills_pixels() const { return kills_pixels_; } // Microcode disassembly in D3D format. const std::string& ucode_disassembly() const { return ucode_disassembly_; } - // Translated shader binary (or text). - const std::vector& translated_binary() const { - return translated_binary_; + // An externally managed identifier of the shader storage the microcode of the + // shader was last written to, or was loaded from, to only write the shader + // microcode to the storage once. UINT32_MAX by default. + uint32_t ucode_storage_index() const { return ucode_storage_index_; } + void set_ucode_storage_index(uint32_t storage_index) { + ucode_storage_index_ = storage_index; } - // Gets the translated shader binary as a string. - // This is only valid if it is actually text. - std::string GetTranslatedBinaryString() const; - - // Disassembly of the translated from the host graphics layer. - // May be empty if the host does not support disassembly. - const std::string& host_disassembly() const { return host_disassembly_; } - // A lot of errors that occurred during preparation of the host shader. - const std::string& host_error_log() const { return host_error_log_; } - // Host binary that can be saved and reused across runs. - // May be empty if the host does not support saving binaries. - const std::vector& host_binary() const { return host_binary_; } - - // Dumps the shader to a file in the given path based on ucode hash. - // Both the ucode binary and disassembled and translated shader will be - // written. - // Returns the filename of the shader and the binary. - std::pair Dump( - const std::filesystem::path& base_path, const char* path_prefix); + // Dumps the shader's microcode binary to a file in the given path based on + // ucode hash. Returns the name of the written file. Can be called at any + // time, doesn't require the shader to be translated. + std::filesystem::path DumpUcodeBinary(const std::filesystem::path& base_path); protected: friend class ShaderTranslator; + virtual Translation* CreateTranslationInstance(uint32_t modification); + xenos::ShaderType shader_type_; - HostVertexShaderType host_vertex_shader_type_ = HostVertexShaderType::kVertex; std::vector ucode_data_; uint64_t ucode_data_hash_; + // Modification bits -> translation. + std::unordered_map translations_; + + // Whether setup of the post-translation parameters (listed below, plus those + // specific to the implementation) has been initiated, by any thread. If + // translation is performed on multiple threads, only one thread must be + // setting this up (other threads would write the same data anyway). + std::atomic_flag post_translation_info_set_up_ = ATOMIC_FLAG_INIT; + + // Initialized after the first successful translation (these don't depend on + // the host-side modification bits). + std::string ucode_disassembly_; std::vector vertex_bindings_; std::vector texture_bindings_; ConstantRegisterMap constant_register_map_ = {0}; bool writes_color_targets_[4] = {false, false, false, false}; bool writes_depth_ = false; - bool implicit_early_z_allowed_ = true; + bool kills_pixels_ = false; std::vector memexport_stream_constants_; - bool is_valid_ = false; - bool is_translated_ = false; - std::vector errors_; - - std::string ucode_disassembly_; - std::vector translated_binary_; - std::string host_disassembly_; - std::string host_error_log_; - std::vector host_binary_; + uint32_t ucode_storage_index_ = UINT32_MAX; }; } // namespace gpu diff --git a/src/xenia/gpu/shader_compiler_main.cc b/src/xenia/gpu/shader_compiler_main.cc index a9a2ed609..809499e12 100644 --- a/src/xenia/gpu/shader_compiler_main.cc +++ b/src/xenia/gpu/shader_compiler_main.cc @@ -144,11 +144,15 @@ int shader_compiler_main(const std::vector& args) { Shader::HostVertexShaderType::kQuadDomainPatchIndexed; } } + uint32_t modification = + translator->GetDefaultModification(shader_type, host_vertex_shader_type); - translator->Translate(shader.get(), host_vertex_shader_type); + Shader::Translation* translation = + shader->GetOrCreateTranslation(modification); + translator->Translate(*translation); - const void* source_data = shader->translated_binary().data(); - size_t source_data_size = shader->translated_binary().size(); + const void* source_data = translation->translated_binary().data(); + size_t source_data_size = translation->translated_binary().size(); std::string spirv_disasm; if (cvars::shader_output_type == "spirvtext") { diff --git a/src/xenia/gpu/shader_translator.cc b/src/xenia/gpu/shader_translator.cc index f2bf35bf5..44f033378 100644 --- a/src/xenia/gpu/shader_translator.cc +++ b/src/xenia/gpu/shader_translator.cc @@ -1,4 +1,3 @@ -#include "shader_translator.h" /** ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * @@ -14,6 +13,7 @@ #include #include +#include "xenia/base/assert.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" @@ -46,7 +46,9 @@ ShaderTranslator::ShaderTranslator() = default; ShaderTranslator::~ShaderTranslator() = default; -void ShaderTranslator::Reset() { +void ShaderTranslator::Reset(xenos::ShaderType shader_type) { + shader_type_ = shader_type; + modification_ = GetDefaultModification(shader_type); errors_.clear(); ucode_disasm_buffer_.Reset(); ucode_disasm_line_number_ = 0; @@ -64,37 +66,37 @@ void ShaderTranslator::Reset() { writes_color_targets_[i] = false; } writes_depth_ = false; - implicit_early_z_allowed_ = true; + kills_pixels_ = false; memexport_alloc_count_ = 0; memexport_eA_written_ = 0; std::memset(&memexport_eM_written_, 0, sizeof(memexport_eM_written_)); memexport_stream_constants_.clear(); } -bool ShaderTranslator::Translate( - Shader* shader, reg::SQ_PROGRAM_CNTL cntl, - Shader::HostVertexShaderType host_vertex_shader_type) { - Reset(); - uint32_t cntl_num_reg = shader->type() == xenos::ShaderType::kVertex +bool ShaderTranslator::Translate(Shader::Translation& translation, + reg::SQ_PROGRAM_CNTL cntl) { + xenos::ShaderType shader_type = translation.shader().type(); + Reset(shader_type); + uint32_t cntl_num_reg = shader_type == xenos::ShaderType::kVertex ? cntl.vs_num_reg : cntl.ps_num_reg; register_count_ = (cntl_num_reg & 0x80) ? 0 : (cntl_num_reg + 1); - return TranslateInternal(shader, host_vertex_shader_type); + return TranslateInternal(translation); } -bool ShaderTranslator::Translate( - Shader* shader, Shader::HostVertexShaderType host_vertex_shader_type) { - Reset(); - return TranslateInternal(shader, host_vertex_shader_type); +bool ShaderTranslator::Translate(Shader::Translation& translation) { + Reset(translation.shader().type()); + return TranslateInternal(translation); } -bool ShaderTranslator::TranslateInternal( - Shader* shader, Shader::HostVertexShaderType host_vertex_shader_type) { - shader_type_ = shader->type(); - host_vertex_shader_type_ = host_vertex_shader_type; - ucode_dwords_ = shader->ucode_dwords(); - ucode_dword_count_ = shader->ucode_dword_count(); +bool ShaderTranslator::TranslateInternal(Shader::Translation& translation) { + Shader& shader = translation.shader(); + assert_true(shader_type_ == shader.type()); + shader_type_ = shader.type(); + ucode_dwords_ = shader.ucode_dwords(); + ucode_dword_count_ = shader.ucode_dword_count(); + modification_ = translation.modification(); // Control flow instructions come paired in blocks of 3 dwords and all are // listed at the top of the ucode. @@ -147,12 +149,6 @@ bool ShaderTranslator::TranslateInternal( if (memexport_eA_written_ == 0) { memexport_stream_constants_.clear(); } - if (!memexport_stream_constants_.empty()) { - // TODO(Triang3l): Investigate what happens to memexport when the pixel - // fails the depth/stencil test, but in Direct3D 11 UAV writes disable early - // depth/stencil. - implicit_early_z_allowed_ = false; - } StartTranslation(); @@ -187,35 +183,44 @@ bool ShaderTranslator::TranslateInternal( ++cf_index; } - shader->errors_ = std::move(errors_); - shader->translated_binary_ = CompleteTranslation(); - shader->ucode_disassembly_ = ucode_disasm_buffer_.to_string(); - shader->host_vertex_shader_type_ = host_vertex_shader_type_; - shader->vertex_bindings_ = std::move(vertex_bindings_); - shader->texture_bindings_ = std::move(texture_bindings_); - shader->constant_register_map_ = std::move(constant_register_map_); - for (size_t i = 0; i < xe::countof(writes_color_targets_); ++i) { - shader->writes_color_targets_[i] = writes_color_targets_[i]; - } - shader->writes_depth_ = writes_depth_; - shader->implicit_early_z_allowed_ = implicit_early_z_allowed_; - shader->memexport_stream_constants_.clear(); - for (uint32_t memexport_stream_constant : memexport_stream_constants_) { - shader->memexport_stream_constants_.push_back(memexport_stream_constant); - } + translation.errors_ = std::move(errors_); + translation.translated_binary_ = CompleteTranslation(); + translation.is_translated_ = true; - shader->is_valid_ = true; - shader->is_translated_ = true; - for (const auto& error : shader->errors_) { + bool is_valid = true; + for (const auto& error : translation.errors_) { if (error.is_fatal) { - shader->is_valid_ = false; + is_valid = false; break; } } + translation.is_valid_ = is_valid; - PostTranslation(shader); + // Setup info that doesn't depend on the modification only once. + bool setup_shader_post_translation_info = + is_valid && !shader.post_translation_info_set_up_.test_and_set(); + if (setup_shader_post_translation_info) { + shader.ucode_disassembly_ = ucode_disasm_buffer_.to_string(); + shader.vertex_bindings_ = std::move(vertex_bindings_); + shader.texture_bindings_ = std::move(texture_bindings_); + shader.constant_register_map_ = std::move(constant_register_map_); + for (size_t i = 0; i < xe::countof(writes_color_targets_); ++i) { + shader.writes_color_targets_[i] = writes_color_targets_[i]; + } + shader.writes_depth_ = writes_depth_; + shader.kills_pixels_ = kills_pixels_; + shader.memexport_stream_constants_.clear(); + shader.memexport_stream_constants_.reserve( + memexport_stream_constants_.size()); + shader.memexport_stream_constants_.insert( + shader.memexport_stream_constants_.cend(), + memexport_stream_constants_.cbegin(), + memexport_stream_constants_.cend()); + } + PostTranslation(translation, setup_shader_post_translation_info); - return shader->is_valid_; + // In case is_valid_ is modified by PostTranslation, reload. + return translation.is_valid_; } void ShaderTranslator::MarkUcodeInstruction(uint32_t dword_offset) { @@ -338,14 +343,9 @@ void ShaderTranslator::GatherInstructionInformation( ParsedAluInstruction instr; ParseAluInstruction(op, instr); - const auto& vector_opcode_info = - alu_vector_opcode_infos_[uint32_t(op.vector_opcode())]; - implicit_early_z_allowed_ &= - !vector_opcode_info.disable_implicit_early_z; - const auto& scalar_opcode_info = - alu_scalar_opcode_infos_[uint32_t(op.scalar_opcode())]; - implicit_early_z_allowed_ &= - !scalar_opcode_info.disable_implicit_early_z; + kills_pixels_ = kills_pixels_ || + ucode::AluVectorOpcodeIsKill(op.vector_opcode()) || + ucode::AluScalarOpcodeIsKill(op.scalar_opcode()); if (instr.vector_and_constant_result.storage_target != InstructionStorageTarget::kRegister || @@ -403,7 +403,6 @@ void ShaderTranslator::GatherInstructionInformation( break; case InstructionStorageTarget::kDepth: writes_depth_ = true; - implicit_early_z_allowed_ = false; break; default: break; @@ -1077,91 +1076,91 @@ uint32_t ParsedTextureFetchInstruction::GetNonZeroResultComponents() const { const ShaderTranslator::AluOpcodeInfo ShaderTranslator::alu_vector_opcode_infos_[0x20] = { - {"add", 2, 4, false}, // 0 - {"mul", 2, 4, false}, // 1 - {"max", 2, 4, false}, // 2 - {"min", 2, 4, false}, // 3 - {"seq", 2, 4, false}, // 4 - {"sgt", 2, 4, false}, // 5 - {"sge", 2, 4, false}, // 6 - {"sne", 2, 4, false}, // 7 - {"frc", 1, 4, false}, // 8 - {"trunc", 1, 4, false}, // 9 - {"floor", 1, 4, false}, // 10 - {"mad", 3, 4, false}, // 11 - {"cndeq", 3, 4, false}, // 12 - {"cndge", 3, 4, false}, // 13 - {"cndgt", 3, 4, false}, // 14 - {"dp4", 2, 4, false}, // 15 - {"dp3", 2, 4, false}, // 16 - {"dp2add", 3, 4, false}, // 17 - {"cube", 2, 4, false}, // 18 - {"max4", 1, 4, false}, // 19 - {"setp_eq_push", 2, 4, false}, // 20 - {"setp_ne_push", 2, 4, false}, // 21 - {"setp_gt_push", 2, 4, false}, // 22 - {"setp_ge_push", 2, 4, false}, // 23 - {"kill_eq", 2, 4, true}, // 24 - {"kill_gt", 2, 4, true}, // 25 - {"kill_ge", 2, 4, true}, // 26 - {"kill_ne", 2, 4, true}, // 27 - {"dst", 2, 4, false}, // 28 - {"maxa", 2, 4, false}, // 29 + {"add", 2, 4}, // 0 + {"mul", 2, 4}, // 1 + {"max", 2, 4}, // 2 + {"min", 2, 4}, // 3 + {"seq", 2, 4}, // 4 + {"sgt", 2, 4}, // 5 + {"sge", 2, 4}, // 6 + {"sne", 2, 4}, // 7 + {"frc", 1, 4}, // 8 + {"trunc", 1, 4}, // 9 + {"floor", 1, 4}, // 10 + {"mad", 3, 4}, // 11 + {"cndeq", 3, 4}, // 12 + {"cndge", 3, 4}, // 13 + {"cndgt", 3, 4}, // 14 + {"dp4", 2, 4}, // 15 + {"dp3", 2, 4}, // 16 + {"dp2add", 3, 4}, // 17 + {"cube", 2, 4}, // 18 + {"max4", 1, 4}, // 19 + {"setp_eq_push", 2, 4}, // 20 + {"setp_ne_push", 2, 4}, // 21 + {"setp_gt_push", 2, 4}, // 22 + {"setp_ge_push", 2, 4}, // 23 + {"kill_eq", 2, 4}, // 24 + {"kill_gt", 2, 4}, // 25 + {"kill_ge", 2, 4}, // 26 + {"kill_ne", 2, 4}, // 27 + {"dst", 2, 4}, // 28 + {"maxa", 2, 4}, // 29 }; const ShaderTranslator::AluOpcodeInfo ShaderTranslator::alu_scalar_opcode_infos_[0x40] = { - {"adds", 1, 2, false}, // 0 - {"adds_prev", 1, 1, false}, // 1 - {"muls", 1, 2, false}, // 2 - {"muls_prev", 1, 1, false}, // 3 - {"muls_prev2", 1, 2, false}, // 4 - {"maxs", 1, 2, false}, // 5 - {"mins", 1, 2, false}, // 6 - {"seqs", 1, 1, false}, // 7 - {"sgts", 1, 1, false}, // 8 - {"sges", 1, 1, false}, // 9 - {"snes", 1, 1, false}, // 10 - {"frcs", 1, 1, false}, // 11 - {"truncs", 1, 1, false}, // 12 - {"floors", 1, 1, false}, // 13 - {"exp", 1, 1, false}, // 14 - {"logc", 1, 1, false}, // 15 - {"log", 1, 1, false}, // 16 - {"rcpc", 1, 1, false}, // 17 - {"rcpf", 1, 1, false}, // 18 - {"rcp", 1, 1, false}, // 19 - {"rsqc", 1, 1, false}, // 20 - {"rsqf", 1, 1, false}, // 21 - {"rsq", 1, 1, false}, // 22 - {"maxas", 1, 2, false}, // 23 - {"maxasf", 1, 2, false}, // 24 - {"subs", 1, 2, false}, // 25 - {"subs_prev", 1, 1, false}, // 26 - {"setp_eq", 1, 1, false}, // 27 - {"setp_ne", 1, 1, false}, // 28 - {"setp_gt", 1, 1, false}, // 29 - {"setp_ge", 1, 1, false}, // 30 - {"setp_inv", 1, 1, false}, // 31 - {"setp_pop", 1, 1, false}, // 32 - {"setp_clr", 0, 0, false}, // 33 - {"setp_rstr", 1, 1, false}, // 34 - {"kills_eq", 1, 1, true}, // 35 - {"kills_gt", 1, 1, true}, // 36 - {"kills_ge", 1, 1, true}, // 37 - {"kills_ne", 1, 1, true}, // 38 - {"kills_one", 1, 1, true}, // 39 - {"sqrt", 1, 1, false}, // 40 - {"UNKNOWN", 0, 0, false}, // 41 - {"mulsc", 2, 1, false}, // 42 - {"mulsc", 2, 1, false}, // 43 - {"addsc", 2, 1, false}, // 44 - {"addsc", 2, 1, false}, // 45 - {"subsc", 2, 1, false}, // 46 - {"subsc", 2, 1, false}, // 47 - {"sin", 1, 1, false}, // 48 - {"cos", 1, 1, false}, // 49 - {"retain_prev", 0, 0, false}, // 50 + {"adds", 1, 2}, // 0 + {"adds_prev", 1, 1}, // 1 + {"muls", 1, 2}, // 2 + {"muls_prev", 1, 1}, // 3 + {"muls_prev2", 1, 2}, // 4 + {"maxs", 1, 2}, // 5 + {"mins", 1, 2}, // 6 + {"seqs", 1, 1}, // 7 + {"sgts", 1, 1}, // 8 + {"sges", 1, 1}, // 9 + {"snes", 1, 1}, // 10 + {"frcs", 1, 1}, // 11 + {"truncs", 1, 1}, // 12 + {"floors", 1, 1}, // 13 + {"exp", 1, 1}, // 14 + {"logc", 1, 1}, // 15 + {"log", 1, 1}, // 16 + {"rcpc", 1, 1}, // 17 + {"rcpf", 1, 1}, // 18 + {"rcp", 1, 1}, // 19 + {"rsqc", 1, 1}, // 20 + {"rsqf", 1, 1}, // 21 + {"rsq", 1, 1}, // 22 + {"maxas", 1, 2}, // 23 + {"maxasf", 1, 2}, // 24 + {"subs", 1, 2}, // 25 + {"subs_prev", 1, 1}, // 26 + {"setp_eq", 1, 1}, // 27 + {"setp_ne", 1, 1}, // 28 + {"setp_gt", 1, 1}, // 29 + {"setp_ge", 1, 1}, // 30 + {"setp_inv", 1, 1}, // 31 + {"setp_pop", 1, 1}, // 32 + {"setp_clr", 0, 0}, // 33 + {"setp_rstr", 1, 1}, // 34 + {"kills_eq", 1, 1}, // 35 + {"kills_gt", 1, 1}, // 36 + {"kills_ge", 1, 1}, // 37 + {"kills_ne", 1, 1}, // 38 + {"kills_one", 1, 1}, // 39 + {"sqrt", 1, 1}, // 40 + {"UNKNOWN", 0, 0}, // 41 + {"mulsc", 2, 1}, // 42 + {"mulsc", 2, 1}, // 43 + {"addsc", 2, 1}, // 44 + {"addsc", 2, 1}, // 45 + {"subsc", 2, 1}, // 46 + {"subsc", 2, 1}, // 47 + {"sin", 1, 1}, // 48 + {"cos", 1, 1}, // 49 + {"retain_prev", 0, 0}, // 50 }; void ShaderTranslator::TranslateAluInstruction(const AluInstruction& op) { diff --git a/src/xenia/gpu/shader_translator.h b/src/xenia/gpu/shader_translator.h index 73ab4f6c0..c685be4d7 100644 --- a/src/xenia/gpu/shader_translator.h +++ b/src/xenia/gpu/shader_translator.h @@ -29,18 +29,27 @@ class ShaderTranslator { public: virtual ~ShaderTranslator(); - bool Translate(Shader* shader, reg::SQ_PROGRAM_CNTL cntl, - Shader::HostVertexShaderType host_vertex_shader_type = - Shader::HostVertexShaderType::kVertex); - bool Translate(Shader* shader, - Shader::HostVertexShaderType host_vertex_shader_type = - Shader::HostVertexShaderType::kVertex); + virtual uint32_t GetDefaultModification( + xenos::ShaderType shader_type, + Shader::HostVertexShaderType host_vertex_shader_type = + Shader::HostVertexShaderType::kVertex) const { + return 0; + } + + bool Translate(Shader::Translation& translation, reg::SQ_PROGRAM_CNTL cntl); + bool Translate(Shader::Translation& translation); protected: ShaderTranslator(); // Resets translator state before beginning translation. - virtual void Reset(); + // shader_type is passed here so translator implementations can generate + // special fixed shaders for internal use, and set up the type for this + // purpose. + virtual void Reset(xenos::ShaderType shader_type); + + // Current host-side modification being generated. + uint32_t modification() const { return modification_; } // Register count. uint32_t register_count() const { return register_count_; } @@ -48,11 +57,6 @@ class ShaderTranslator { bool is_vertex_shader() const { return shader_type_ == xenos::ShaderType::kVertex; } - // If translating a vertex shader, type of the shader in a D3D11-like - // rendering pipeline. - Shader::HostVertexShaderType host_vertex_shader_type() const { - return host_vertex_shader_type_; - } // True if the current shader is a pixel shader. bool is_pixel_shader() const { return shader_type_ == xenos::ShaderType::kPixel; @@ -85,10 +89,8 @@ class ShaderTranslator { // True if the current shader overrides the pixel depth, set before // translation. Doesn't include writes with an empty used write mask. bool writes_depth() const { return writes_depth_; } - // True if Xenia can automatically enable early depth/stencil for the pixel - // shader when RB_DEPTHCONTROL EARLY_Z_ENABLE is not set, provided alpha - // testing and alpha to coverage are disabled. - bool implicit_early_z_allowed() const { return implicit_early_z_allowed_; } + // True if the current shader has any `kill` instructions. + bool kills_pixels() const { return kills_pixels_; } // A list of all vertex bindings, populated before translation occurs. const std::vector& vertex_bindings() const { return vertex_bindings_; @@ -112,6 +114,17 @@ class ShaderTranslator { return memexport_stream_constants_; } + // Whether the shader can have early depth and stencil writing enabled, unless + // alpha test or alpha to coverage is enabled. Data gathered before + // translation. + bool CanWriteZEarly() const { + // TODO(Triang3l): Investigate what happens to memexport when the pixel + // fails the depth/stencil test, but in Direct3D 11 UAV writes disable early + // depth/stencil. + return !writes_depth_ && !kills_pixels_ && + memexport_stream_constants_.empty(); + } + // Current line number in the ucode disassembly. size_t ucode_disasm_line_number() const { return ucode_disasm_line_number_; } // Ucode disassembly buffer accumulated during translation. @@ -130,10 +143,14 @@ class ShaderTranslator { } // Handles post-translation tasks when the shader has been fully translated. - virtual void PostTranslation(Shader* shader) {} + // setup_shader_post_translation_info if non-modification-specific parameters + // of the Shader object behind the Translation can be set by this invocation. + virtual void PostTranslation(Shader::Translation& translation, + bool setup_shader_post_translation_info) {} // Sets the host disassembly on a shader. - void set_host_disassembly(Shader* shader, std::string value) { - shader->host_disassembly_ = std::move(value); + void set_host_disassembly(Shader::Translation& translation, + std::string value) { + translation.host_disassembly_ = std::move(value); } // Handles translation for control flow label addresses. @@ -184,11 +201,9 @@ class ShaderTranslator { const char* name; uint32_t argument_count; uint32_t src_swizzle_component_count; - bool disable_implicit_early_z; }; - bool TranslateInternal(Shader* shader, - Shader::HostVertexShaderType host_vertex_shader_type); + bool TranslateInternal(Shader::Translation& translation); void MarkUcodeInstruction(uint32_t dword_offset); void AppendUcodeDisasm(char c); @@ -242,12 +257,13 @@ class ShaderTranslator { // Input shader metadata and microcode. xenos::ShaderType shader_type_; - Shader::HostVertexShaderType host_vertex_shader_type_; const uint32_t* ucode_dwords_; size_t ucode_dword_count_; - reg::SQ_PROGRAM_CNTL program_cntl_; uint32_t register_count_; + // Current host-side modification being generated. + uint32_t modification_ = 0; + // Accumulated translation errors. std::vector errors_; @@ -268,7 +284,8 @@ class ShaderTranslator { // translation. std::set label_addresses_; - // Detected binding information gathered before translation. + // Detected binding information gathered before translation. Must not be + // affected by the modification index. int total_attrib_count_ = 0; std::vector vertex_bindings_; std::vector texture_bindings_; @@ -278,13 +295,15 @@ class ShaderTranslator { // These all are gathered before translation. // uses_register_dynamic_addressing_ for writes, writes_color_targets_, // writes_depth_ don't include empty used write masks. + // Must not be affected by the modification index. Shader::ConstantRegisterMap constant_register_map_ = {0}; bool uses_register_dynamic_addressing_ = false; bool writes_color_targets_[4] = {false, false, false, false}; bool writes_depth_ = false; - bool implicit_early_z_allowed_ = true; + bool kills_pixels_ = false; // Memexport info is gathered before translation. + // Must not be affected by the modification index. uint32_t memexport_alloc_count_ = 0; // For register allocation in implementations - what was used after each // `alloc export`. diff --git a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_float24and32_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_float24and32_cs.cso new file mode 100644 index 0000000000000000000000000000000000000000..c389242a1ee9667b3dfdf6346a236e612ce4770b GIT binary patch literal 3500 zcmbtWO=w(I6h686vGbbRm{ychbhK4c5lNz;i0Ex68C!^L?Zgll@|e!NkdaAVocE+% z)j?4~#FZ;W@E;*A-HSV2*_F6(AtKV13q^1@e&3yU-rRYe7{LSQ-FMIV?sx7v_q?B_ z^)r<}Kl$`%aB=P23#Ing4}bT~dt)N<1qAB4`perajBdfkW+H-QVx z@&t5jvR2`uFW@_JWa&QI?u5=|dJ^L_l=2ny7U&d|JSzS@ZQnx=K=;GlAb5n3&*!`v zg;BFp-wNAty%{EH+=;>@e!0_#WR$BJC8?}O%UrU86*VrX#M?<)-%b$7;$4c8__+Ru=81YAIEwWBhQVMJ_VjxHxmhfAG7^lgKzqBW}oT~;&Y7w#z_~w zNK(k%GBJKE!(RaBy#>wyn2WH#I>HXl;N#4`c8EJ0#F%q4kss0XccD(o`*?=enDl8L zY6~*V?MU@Gk&>QeQ_L~cPKeuq9D?%V4D!9}o{3=(_<5;MdEOE^sQV?JXTuS%S1IPm7>d>Px@J^-#e9aE{DHj4 zt?{eZ%9!;9bq(m}UaPnEamMz0pZ%{oDi6jN`A{?lXQN^u&%=x{4!^OkbSz)4v}qh2 zbuYR?Ahg}Hjc)M{Dofh^-(-4#kJ&6Jo-<31@A%P z`8@)zdm5u+-5Fx8Be+W**1eO-t@)6H=eJjU@Qlo%*E@{w$Urp?^Y;05&ix^dXzG~eR$I)0^L>}{1`7{PcWBdOCyjNxKFh9j9FXz~|tG)WP4>>m$mx6%H^|Nx& zt!KbBct_>+GS+=-U%_$b%Gyv+SGOnLtBoV;w3>1FK4V4x?)9GX^=-7Fo*Vss4S7#n zQ8bpZ^mK<*dp+wb0>s#xc}Mjh*AV*>Mqf1-J9zH&`J+v-I_lmK zW6vHa^7^&n{lD>+3%uIXSjN6{wdB3-!`PemeWFmKLchD5@cfk)@m!IA3Rm(F$GHl4 z0>!LN8u{V%d3Cw5nyp7IaxS&K;eBTb{+!R=&-di(`K~YIHuN2IXJGLRVg;X-Ux23= ziwi{G^&E}G1;TrxSmQPHA32Q0r64>z=0*7}=#Shtg}&MBUcZC-U1AQiUJmp#2lYRR oqo}Q(UVRs~3N-sYa`na(|4AaZL0xV%>>>UcD1^QIe}tU>0517<`~Uy| literal 0 HcmV?d00001 diff --git a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_float24and32_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_float24and32_cs.h new file mode 100644 index 000000000..34f44b18c --- /dev/null +++ b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_float24and32_cs.h @@ -0,0 +1,296 @@ +// generated from `xb buildhlsl` +// source: edram_load_depth_float24and32.cs.hlsl +const uint8_t edram_load_depth_float24and32_cs[] = { + 0x44, 0x58, 0x42, 0x43, 0xF3, 0xA3, 0xA4, 0x14, 0x0A, 0x50, 0x56, 0x49, + 0x5D, 0x09, 0x6C, 0xBF, 0x33, 0xC9, 0xC1, 0x9A, 0x01, 0x00, 0x00, 0x00, + 0xAC, 0x0D, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, + 0x0C, 0x03, 0x00, 0x00, 0x1C, 0x03, 0x00, 0x00, 0x2C, 0x03, 0x00, 0x00, + 0x10, 0x0D, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0xD0, 0x02, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x3C, 0x00, 0x00, 0x00, 0x01, 0x05, 0x53, 0x43, 0x00, 0x05, 0x00, 0x00, + 0xA8, 0x02, 0x00, 0x00, 0x13, 0x13, 0x44, 0x25, 0x3C, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xB4, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xCF, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE8, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F, 0x6C, 0x6F, 0x61, + 0x64, 0x5F, 0x73, 0x74, 0x6F, 0x72, 0x65, 0x5F, 0x73, 0x6F, 0x75, 0x72, + 0x63, 0x65, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F, + 0x6C, 0x6F, 0x61, 0x64, 0x5F, 0x73, 0x74, 0x6F, 0x72, 0x65, 0x5F, 0x64, + 0x65, 0x73, 0x74, 0x00, 0x58, 0x65, 0x45, 0x64, 0x72, 0x61, 0x6D, 0x4C, + 0x6F, 0x61, 0x64, 0x53, 0x74, 0x6F, 0x72, 0x65, 0x43, 0x6F, 0x6E, 0x73, + 0x74, 0x61, 0x6E, 0x74, 0x73, 0x00, 0xAB, 0xAB, 0xE8, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x1C, 0x01, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE4, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x0C, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, + 0x30, 0x02, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x0C, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + 0x00, 0x00, 0x00, 0x00, 0x4E, 0x02, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0C, 0x02, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x69, 0x02, 0x00, 0x00, + 0x0C, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x0C, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, + 0x83, 0x02, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x0C, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + 0x00, 0x00, 0x00, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, + 0x5F, 0x72, 0x74, 0x5F, 0x63, 0x6F, 0x6C, 0x6F, 0x72, 0x5F, 0x64, 0x65, + 0x70, 0x74, 0x68, 0x5F, 0x6F, 0x66, 0x66, 0x73, 0x65, 0x74, 0x00, 0x64, + 0x77, 0x6F, 0x72, 0x64, 0x00, 0xAB, 0xAB, 0xAB, 0x00, 0x00, 0x13, 0x00, + 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x03, 0x02, 0x00, 0x00, 0x78, 0x65, 0x5F, 0x65, + 0x64, 0x72, 0x61, 0x6D, 0x5F, 0x72, 0x74, 0x5F, 0x63, 0x6F, 0x6C, 0x6F, + 0x72, 0x5F, 0x64, 0x65, 0x70, 0x74, 0x68, 0x5F, 0x70, 0x69, 0x74, 0x63, + 0x68, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F, 0x72, + 0x74, 0x5F, 0x73, 0x74, 0x65, 0x6E, 0x63, 0x69, 0x6C, 0x5F, 0x6F, 0x66, + 0x66, 0x73, 0x65, 0x74, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, + 0x6D, 0x5F, 0x72, 0x74, 0x5F, 0x73, 0x74, 0x65, 0x6E, 0x63, 0x69, 0x6C, + 0x5F, 0x70, 0x69, 0x74, 0x63, 0x68, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, + 0x72, 0x61, 0x6D, 0x5F, 0x62, 0x61, 0x73, 0x65, 0x5F, 0x73, 0x61, 0x6D, + 0x70, 0x6C, 0x65, 0x73, 0x5F, 0x32, 0x78, 0x5F, 0x64, 0x65, 0x70, 0x74, + 0x68, 0x5F, 0x70, 0x69, 0x74, 0x63, 0x68, 0x00, 0x4D, 0x69, 0x63, 0x72, + 0x6F, 0x73, 0x6F, 0x66, 0x74, 0x20, 0x28, 0x52, 0x29, 0x20, 0x48, 0x4C, + 0x53, 0x4C, 0x20, 0x53, 0x68, 0x61, 0x64, 0x65, 0x72, 0x20, 0x43, 0x6F, + 0x6D, 0x70, 0x69, 0x6C, 0x65, 0x72, 0x20, 0x31, 0x30, 0x2E, 0x31, 0x00, + 0x49, 0x53, 0x47, 0x4E, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x4F, 0x53, 0x47, 0x4E, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x53, 0x48, 0x45, 0x58, + 0xDC, 0x09, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x77, 0x02, 0x00, 0x00, + 0x6A, 0x08, 0x00, 0x01, 0x59, 0x00, 0x00, 0x07, 0x46, 0x8E, 0x30, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0x00, 0x00, 0x06, + 0x46, 0x7E, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9D, 0x00, 0x00, 0x06, + 0x46, 0xEE, 0x31, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5F, 0x00, 0x00, 0x02, + 0x32, 0x10, 0x02, 0x00, 0x5F, 0x00, 0x00, 0x02, 0x32, 0x20, 0x02, 0x00, + 0x5F, 0x00, 0x00, 0x02, 0x32, 0x00, 0x02, 0x00, 0x68, 0x00, 0x00, 0x02, + 0x07, 0x00, 0x00, 0x00, 0x9B, 0x00, 0x00, 0x04, 0x14, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x06, + 0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00, + 0x01, 0x40, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0C, + 0x62, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x80, 0x30, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, + 0xFF, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x04, 0x03, + 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x00, 0x06, + 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00, + 0x01, 0x40, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x06, + 0x82, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00, + 0x01, 0x40, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0A, + 0xA2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x56, 0x0D, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xD8, 0xFF, 0xFF, 0xFF, + 0x1E, 0x00, 0x00, 0x07, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x01, + 0x55, 0x00, 0x00, 0x09, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0A, 0x80, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x23, 0x00, 0x00, 0x08, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x1A, 0x10, 0x02, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x2A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x06, + 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x0A, 0x10, 0x02, 0x00, 0x26, 0x00, 0x00, 0x07, + 0x00, 0xD0, 0x00, 0x00, 0x42, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x1A, 0x20, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00, 0x40, 0x01, 0x00, 0x00, + 0x23, 0x00, 0x00, 0x09, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, + 0x00, 0x14, 0x00, 0x00, 0x2A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x29, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x00, 0x0B, + 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, + 0x0A, 0x80, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x07, 0x22, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x07, + 0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xA5, 0x00, 0x00, 0x08, 0xF2, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x7E, 0x20, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x0A, + 0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, + 0x00, 0x00, 0xA0, 0x00, 0xA5, 0x00, 0x00, 0x08, 0xF2, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x46, 0x7E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x8A, 0x00, 0x00, 0x0F, 0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x02, 0x40, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x87, 0x00, 0x00, 0x05, 0xF2, 0x00, 0x10, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, + 0xF5, 0xFF, 0xFF, 0xFF, 0xF5, 0xFF, 0xFF, 0xFF, 0xF5, 0xFF, 0xFF, 0xFF, + 0xF5, 0xFF, 0xFF, 0xFF, 0x37, 0x00, 0x00, 0x0C, 0xF2, 0x00, 0x10, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, + 0x15, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, + 0x15, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0B, 0xF2, 0x00, 0x10, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x80, 0x41, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x37, 0x00, 0x00, 0x09, 0xF2, 0x00, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x29, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x02, 0x40, 0x00, 0x00, 0xFF, 0xFF, 0x0F, 0x00, 0xFF, 0xFF, 0x0F, 0x00, + 0xFF, 0xFF, 0x0F, 0x00, 0xFF, 0xFF, 0x0F, 0x00, 0x37, 0x00, 0x00, 0x09, + 0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x0A, + 0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x38, + 0x00, 0x00, 0x00, 0x38, 0x29, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x07, + 0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x37, 0x00, 0x00, 0x0C, 0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x00, 0x08, 0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x80, + 0x41, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x0A, + 0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0x7F, + 0xFF, 0xFF, 0xFF, 0x7F, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x40, 0x00, 0x00, 0xF8, 0xFF, 0xFF, 0x3F, 0xF8, 0xFF, 0xFF, 0x3F, + 0xF8, 0xFF, 0xFF, 0x3F, 0xF8, 0xFF, 0xFF, 0x3F, 0x8C, 0x00, 0x00, 0x14, + 0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, + 0x00, 0x00, 0x80, 0x00, 0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x40, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0B, + 0xF2, 0x00, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x80, + 0x41, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, + 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, + 0x71, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x02, 0x40, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x07, + 0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x4F, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x80, 0x38, 0x00, 0x00, 0x80, 0x38, 0x00, 0x00, 0x80, 0x38, + 0x00, 0x00, 0x80, 0x38, 0x1E, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0xC8, + 0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0xC8, 0x37, 0x00, 0x00, 0x09, + 0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0A, + 0xF2, 0x00, 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x8A, 0x00, 0x00, 0x0F, 0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x00, 0x0F, 0xF2, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0A, + 0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x23, 0x00, 0x00, 0x09, 0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x29, 0x00, 0x00, 0x09, 0x32, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x02, 0x00, 0x02, 0x40, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x23, 0x00, 0x00, 0x0A, 0x32, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x56, 0x05, 0x02, 0x00, 0xD6, 0x85, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x09, 0x32, 0x00, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x86, 0x80, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x08, 0xF2, 0xE0, 0x21, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, + 0xFF, 0x00, 0x00, 0x00, 0x8C, 0x00, 0x00, 0x14, 0xE2, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x56, 0x0E, 0x10, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x00, 0x07, 0x32, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xE6, 0x0A, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x08, + 0x12, 0xE0, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x01, 0x53, 0x54, 0x41, 0x54, + 0x94, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x1F, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, +}; diff --git a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_float24and32_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_float24and32_cs.txt new file mode 100644 index 000000000..4ad3f4288 --- /dev/null +++ b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_float24and32_cs.txt @@ -0,0 +1,117 @@ +// +// Generated by Microsoft (R) HLSL Shader Compiler 10.1 +// +// +// Buffer Definitions: +// +// cbuffer XeEdramLoadStoreConstants +// { +// +// uint xe_edram_rt_color_depth_offset;// Offset: 0 Size: 4 +// uint xe_edram_rt_color_depth_pitch;// Offset: 4 Size: 4 +// uint xe_edram_rt_stencil_offset; // Offset: 8 Size: 4 +// uint xe_edram_rt_stencil_pitch; // Offset: 12 Size: 4 +// uint xe_edram_base_samples_2x_depth_pitch;// Offset: 16 Size: 4 +// +// } +// +// +// Resource Bindings: +// +// Name Type Format Dim ID HLSL Bind Count +// ------------------------------ ---------- ------- ----------- ------- -------------- ------ +// xe_edram_load_store_source texture byte r/o T0 t0 1 +// xe_edram_load_store_dest UAV byte r/w U0 u0 1 +// XeEdramLoadStoreConstants cbuffer NA NA CB0 cb0 1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Input +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Output +cs_5_1 +dcl_globalFlags refactoringAllowed +dcl_constantbuffer CB0[0:0][2], immediateIndexed, space=0 +dcl_resource_raw T0[0:0], space=0 +dcl_uav_raw U0[0:0], space=0 +dcl_input vThreadGroupID.xy +dcl_input vThreadIDInGroup.xy +dcl_input vThreadID.xy +dcl_temps 7 +dcl_thread_group 20, 16, 1 +ishl r0.x, vThreadIDInGroup.x, l(2) +and r0.yz, CB0[0][1].xxxx, l(0, 0x00008000, 2047, 0) +if_nz r0.y + ult r0.y, vThreadIDInGroup.x, l(10) + uge r0.w, vThreadIDInGroup.x, l(10) + and r0.yw, r0.yyyw, l(0, 40, 0, -40) + iadd r0.y, r0.w, r0.y + iadd r0.x, r0.y, r0.x +endif +ushr r0.y, CB0[0][1].x, l(16) +imad r0.y, vThreadGroupID.y, r0.y, r0.z +iadd r0.y, r0.y, vThreadGroupID.x +imul null, r0.z, vThreadIDInGroup.y, l(320) +imad r0.y, r0.y, l(5120), r0.z +ishl r0.x, r0.x, l(2) +iadd r0.x, r0.x, r0.y +ubfe r0.y, l(1), l(13), CB0[0][1].x +ishl r0.y, r0.y, l(1) +ishl r0.x, r0.x, r0.y +ld_raw r1.xyzw, r0.x, T0[0].xyzw +ushr r2.xyzw, r1.xyzw, l(8, 8, 8, 8) +iadd r0.x, r0.x, l(0x00a00000) +ld_raw r0.xyzw, r0.x, T0[0].xyzw +ubfe r3.xyzw, l(20, 20, 20, 20), l(8, 8, 8, 8), r1.xyzw +ushr r4.xyzw, r2.xyzw, l(20, 20, 20, 20) +firstbit_hi r5.xyzw, r3.xyzw +iadd r5.xyzw, r5.xyzw, l(-11, -11, -11, -11) +movc r5.xyzw, r3.xyzw, r5.xyzw, l(21,21,21,21) +iadd r6.xyzw, -r5.xyzw, l(1, 1, 1, 1) +movc r6.xyzw, r4.xyzw, r4.xyzw, r6.xyzw +ishl r5.xyzw, r3.xyzw, r5.xyzw +and r5.xyzw, r5.xyzw, l(0x000fffff, 0x000fffff, 0x000fffff, 0x000fffff) +movc r3.xyzw, r4.xyzw, r3.xyzw, r5.xyzw +ishl r4.xyzw, r6.xyzw, l(23, 23, 23, 23) +iadd r4.xyzw, r4.xyzw, l(0x38000000, 0x38000000, 0x38000000, 0x38000000) +ishl r3.xyzw, r3.xyzw, l(3, 3, 3, 3) +iadd r3.xyzw, r4.xyzw, r3.xyzw +movc r3.xyzw, r2.xyzw, r3.xyzw, l(0,0,0,0) +iadd r4.xyzw, r0.xyzw, -r3.xyzw +uge r5.xyzw, l(0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff), r0.xyzw +and r0.xyzw, r0.xyzw, r5.xyzw +umin r0.xyzw, r0.xyzw, l(0x3ffffff8, 0x3ffffff8, 0x3ffffff8, 0x3ffffff8) +bfi r5.xyzw, l(23, 23, 23, 23), l(0, 0, 0, 0), r0.xyzw, l(0x00800000, 0x00800000, 0x00800000, 0x00800000) +ushr r6.xyzw, r0.xyzw, l(23, 23, 23, 23) +iadd r6.xyzw, -r6.xyzw, l(113, 113, 113, 113) +umin r6.xyzw, r6.xyzw, l(24, 24, 24, 24) +ushr r5.xyzw, r5.xyzw, r6.xyzw +ult r6.xyzw, r0.xyzw, l(0x38800000, 0x38800000, 0x38800000, 0x38800000) +iadd r0.xyzw, r0.xyzw, l(0xc8000000, 0xc8000000, 0xc8000000, 0xc8000000) +movc r0.xyzw, r6.xyzw, r5.xyzw, r0.xyzw +iadd r5.xyzw, r0.xyzw, l(3, 3, 3, 3) +ubfe r0.xyzw, l(1, 1, 1, 1), l(3, 3, 3, 3), r0.xyzw +iadd r0.xyzw, r0.xyzw, r5.xyzw +ubfe r0.xyzw, l(24, 24, 24, 24), l(3, 3, 3, 3), r0.xyzw +ieq r0.xyzw, r2.xyzw, r0.xyzw +and r0.xyzw, r0.xyzw, l(1, 1, 1, 1) +imad r0.xyzw, r4.xyzw, r0.xyzw, r3.xyzw +ishl r2.xy, vThreadID.xxxx, l(4, 2, 0, 0) +imad r2.xy, vThreadID.yyyy, CB0[0][0].ywyy, r2.xyxx +iadd r2.xy, r2.xyxx, CB0[0][0].xzxx +store_raw U0[0].xyzw, r2.x, r0.xyzw +and r0.x, r1.x, l(255) +bfi r0.yzw, l(0, 8, 8, 8), l(0, 8, 16, 24), r1.yyzw, l(0, 0, 0, 0) +iadd r0.xy, r0.zwzz, r0.xyxx +iadd r0.x, r0.y, r0.x +store_raw U0[0].x, r2.y, r0.x +ret +// Approximately 67 instruction slots used diff --git a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_float_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_load_depth_float_cs.cso index c389242a1ee9667b3dfdf6346a236e612ce4770b..01be358b0614568897a08046e96147be2d234110 100644 GIT binary patch delta 343 zcmZ1@{Y1pbCBn&B{2epf<-CZ}H0$Vg{hu!L7#SECQn(lxSb?+&5c4oIFvtM04iKN< z+-UTViE9cQ14AHC!BWP}R?MrICVQ|+aB4Cza2hcPFfdL&$gLa-G9-+ZiQ!tS0g#0N zZXgvvt{hA?kPQ=SYd}>S%D}+&38Ep&Mccn2flg3oA-V1z4`fGKWO$Itlsbce7jc< zm!CddGn?;ke*ExS0XUcdJhrzO=S#p<#sZ@{aoGEH_dT*uyWd*G~=2QEAfwPb54U@ z&=F4$GB)+6C ze?R~3xdB7jtA7T7o6zG4ASWZY|EAn zZK7@^xYzJT$SyGHC3qe<31%E6`<=Qs;Q8RWa9;^-L5yd|yc+mHzMP%(i(xkJM{!sV z{3yIs&IQuSlnw^5*JeGi%HrJ1;!y=e1K*^ zw(NtRn7MGrfa4tI!WDqyjOW7L1g`rZaP^^-V?P%b!?K?bO7Vmrj*mw{Ecq*8Igc?+ z0CC#L1rgQ#USzru9_-qpreIc zIgG;b*zG;F#XUTd8F4cc*?drT2gAuyp@^6Mz8(E?By;E(XH23z-t;?jczEnS7V#ub z{25xkFkcd9vBv3peN`WQpw%ip!P=&;HQH!+ME$q^8pwM3Se1cpN2<~_65BHYJ$LLO zj`#PMd&9p)-+5J@TKOot=yZ#CAFHvVg@SgN~SX~aImSbhM{ z+$rOizUI~v;dugT+S$M3Rd@Y5*1gW@MKxde&r_W0DCsw9#ZVn%{hFECZ}VM|9>(Ce zhxQeaHdEJ3c07>^%(HyjL?xW&sf~YrDhLT8JR4(qQp3#Rd1x@)vUbVyhl6-?9vkU zL2|stv!R_j;%(MGC`q`BN!G$zQ?4Tk4}N&ZJi5*qebTy5oRj0>L8{=c@XrvmR@&Rx z3w?3+$@NV!@wd!-mb}qIkC>^aMt^4E>^)@MUOqq{a3MK+jl0L-H79M_hw6COSp&u_RMK)z^D|n3g0u0v(>~wj(T5eO98hTx#g|ML9NNm(L-a1S_jc7}jq0r8bR&Jzk*o63?@tTnO3RBXcNL{ne*ce#Jb;On!V?hln-O8gzfO zK3dhrxFV0Gl`k#yWv%;AZf5kr=zY9z2lKyz)K(MtXBJrmuJ&bgtpw-PO!WC5V$9#t CxiDz} literal 0 HcmV?d00001 diff --git a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_float24and32_cs.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_float24and32_cs.h new file mode 100644 index 000000000..c5a2d2118 --- /dev/null +++ b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_float24and32_cs.h @@ -0,0 +1,226 @@ +// generated from `xb buildhlsl` +// source: edram_store_depth_float24and32.cs.hlsl +const uint8_t edram_store_depth_float24and32_cs[] = { + 0x44, 0x58, 0x42, 0x43, 0xC6, 0x10, 0x80, 0x14, 0x97, 0x01, 0xE4, 0x46, + 0x76, 0xF1, 0x67, 0xD3, 0xDF, 0x50, 0x25, 0xF7, 0x01, 0x00, 0x00, 0x00, + 0x64, 0x0A, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, + 0x0C, 0x03, 0x00, 0x00, 0x1C, 0x03, 0x00, 0x00, 0x2C, 0x03, 0x00, 0x00, + 0xC8, 0x09, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0xD0, 0x02, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x3C, 0x00, 0x00, 0x00, 0x01, 0x05, 0x53, 0x43, 0x00, 0x05, 0x00, 0x00, + 0xA8, 0x02, 0x00, 0x00, 0x13, 0x13, 0x44, 0x25, 0x3C, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xB4, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xCF, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE8, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F, 0x6C, 0x6F, 0x61, + 0x64, 0x5F, 0x73, 0x74, 0x6F, 0x72, 0x65, 0x5F, 0x73, 0x6F, 0x75, 0x72, + 0x63, 0x65, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F, + 0x6C, 0x6F, 0x61, 0x64, 0x5F, 0x73, 0x74, 0x6F, 0x72, 0x65, 0x5F, 0x64, + 0x65, 0x73, 0x74, 0x00, 0x58, 0x65, 0x45, 0x64, 0x72, 0x61, 0x6D, 0x4C, + 0x6F, 0x61, 0x64, 0x53, 0x74, 0x6F, 0x72, 0x65, 0x43, 0x6F, 0x6E, 0x73, + 0x74, 0x61, 0x6E, 0x74, 0x73, 0x00, 0xAB, 0xAB, 0xE8, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x1C, 0x01, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE4, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x0C, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, + 0x30, 0x02, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x0C, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + 0x00, 0x00, 0x00, 0x00, 0x4E, 0x02, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0C, 0x02, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x69, 0x02, 0x00, 0x00, + 0x0C, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x0C, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, + 0x83, 0x02, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x0C, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + 0x00, 0x00, 0x00, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, + 0x5F, 0x72, 0x74, 0x5F, 0x63, 0x6F, 0x6C, 0x6F, 0x72, 0x5F, 0x64, 0x65, + 0x70, 0x74, 0x68, 0x5F, 0x6F, 0x66, 0x66, 0x73, 0x65, 0x74, 0x00, 0x64, + 0x77, 0x6F, 0x72, 0x64, 0x00, 0xAB, 0xAB, 0xAB, 0x00, 0x00, 0x13, 0x00, + 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x03, 0x02, 0x00, 0x00, 0x78, 0x65, 0x5F, 0x65, + 0x64, 0x72, 0x61, 0x6D, 0x5F, 0x72, 0x74, 0x5F, 0x63, 0x6F, 0x6C, 0x6F, + 0x72, 0x5F, 0x64, 0x65, 0x70, 0x74, 0x68, 0x5F, 0x70, 0x69, 0x74, 0x63, + 0x68, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, 0x6D, 0x5F, 0x72, + 0x74, 0x5F, 0x73, 0x74, 0x65, 0x6E, 0x63, 0x69, 0x6C, 0x5F, 0x6F, 0x66, + 0x66, 0x73, 0x65, 0x74, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, 0x72, 0x61, + 0x6D, 0x5F, 0x72, 0x74, 0x5F, 0x73, 0x74, 0x65, 0x6E, 0x63, 0x69, 0x6C, + 0x5F, 0x70, 0x69, 0x74, 0x63, 0x68, 0x00, 0x78, 0x65, 0x5F, 0x65, 0x64, + 0x72, 0x61, 0x6D, 0x5F, 0x62, 0x61, 0x73, 0x65, 0x5F, 0x73, 0x61, 0x6D, + 0x70, 0x6C, 0x65, 0x73, 0x5F, 0x32, 0x78, 0x5F, 0x64, 0x65, 0x70, 0x74, + 0x68, 0x5F, 0x70, 0x69, 0x74, 0x63, 0x68, 0x00, 0x4D, 0x69, 0x63, 0x72, + 0x6F, 0x73, 0x6F, 0x66, 0x74, 0x20, 0x28, 0x52, 0x29, 0x20, 0x48, 0x4C, + 0x53, 0x4C, 0x20, 0x53, 0x68, 0x61, 0x64, 0x65, 0x72, 0x20, 0x43, 0x6F, + 0x6D, 0x70, 0x69, 0x6C, 0x65, 0x72, 0x20, 0x31, 0x30, 0x2E, 0x31, 0x00, + 0x49, 0x53, 0x47, 0x4E, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x4F, 0x53, 0x47, 0x4E, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x53, 0x48, 0x45, 0x58, + 0x94, 0x06, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0xA5, 0x01, 0x00, 0x00, + 0x6A, 0x08, 0x00, 0x01, 0x59, 0x00, 0x00, 0x07, 0x46, 0x8E, 0x30, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0x00, 0x00, 0x06, + 0x46, 0x7E, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9D, 0x00, 0x00, 0x06, + 0x46, 0xEE, 0x31, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5F, 0x00, 0x00, 0x02, + 0x32, 0x10, 0x02, 0x00, 0x5F, 0x00, 0x00, 0x02, 0x32, 0x20, 0x02, 0x00, + 0x5F, 0x00, 0x00, 0x02, 0x32, 0x00, 0x02, 0x00, 0x68, 0x00, 0x00, 0x02, + 0x05, 0x00, 0x00, 0x00, 0x9B, 0x00, 0x00, 0x04, 0x14, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x09, + 0x32, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x02, 0x00, + 0x02, 0x40, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x0A, + 0x32, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x56, 0x05, 0x02, 0x00, + 0xD6, 0x85, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x46, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x00, 0x09, 0x32, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x46, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x80, 0x30, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xA5, 0x00, 0x00, 0x08, 0xF2, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x7E, 0x20, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x0A, + 0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xFF, 0x7F, + 0xFF, 0xFF, 0xFF, 0x7F, 0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x46, 0x0E, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x02, 0x40, 0x00, 0x00, 0xF8, 0xFF, 0xFF, 0x3F, 0xF8, 0xFF, 0xFF, 0x3F, + 0xF8, 0xFF, 0xFF, 0x3F, 0xF8, 0xFF, 0xFF, 0x3F, 0x8C, 0x00, 0x00, 0x14, + 0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, + 0x00, 0x00, 0x80, 0x00, 0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x02, 0x40, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0B, + 0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x80, + 0x41, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, + 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, + 0x71, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x02, 0x40, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x07, + 0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x4F, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x80, 0x38, 0x00, 0x00, 0x80, 0x38, 0x00, 0x00, 0x80, 0x38, + 0x00, 0x00, 0x80, 0x38, 0x1E, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0xC8, + 0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0xC8, 0x37, 0x00, 0x00, 0x09, + 0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x0A, + 0xF2, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x8A, 0x00, 0x00, 0x0F, 0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x02, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x00, 0x07, 0xF2, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x0A, 0xF2, 0x00, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xA5, 0x00, 0x00, 0x08, + 0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x06, 0x70, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x0A, 0xE2, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x8C, 0x00, 0x00, 0x11, + 0xF2, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x46, 0x0E, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x06, 0x12, 0x00, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0C, 0x62, 0x00, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x06, 0x80, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0xFF, 0x07, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x04, 0x03, 0x1A, 0x00, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x00, 0x06, 0x22, 0x00, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x06, 0x82, 0x00, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x0A, 0x20, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x0A, 0xA2, 0x00, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x56, 0x0D, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xD8, 0xFF, 0xFF, 0xFF, 0x1E, 0x00, 0x00, 0x07, + 0x22, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x01, 0x55, 0x00, 0x00, 0x09, + 0x22, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0A, 0x80, 0x30, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x40, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x08, + 0x22, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1A, 0x10, 0x02, 0x00, + 0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x06, 0x22, 0x00, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x0A, 0x10, 0x02, 0x00, 0x26, 0x00, 0x00, 0x07, 0x00, 0xD0, 0x00, 0x00, + 0x42, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1A, 0x20, 0x02, 0x00, + 0x01, 0x40, 0x00, 0x00, 0x40, 0x01, 0x00, 0x00, 0x23, 0x00, 0x00, 0x09, + 0x22, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, + 0x2A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x07, + 0x12, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x00, 0x0B, 0x22, 0x00, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x40, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x0A, 0x80, 0x30, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x29, 0x00, 0x00, 0x07, 0x22, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x1A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x08, + 0xF2, 0xE0, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x07, 0x12, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0xA0, 0x00, 0xA6, 0x00, 0x00, 0x08, + 0xF2, 0xE0, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x0E, 0x10, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x01, 0x53, 0x54, 0x41, 0x54, + 0x94, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, +}; diff --git a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_float24and32_cs.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_float24and32_cs.txt new file mode 100644 index 000000000..1a0cc82cc --- /dev/null +++ b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_float24and32_cs.txt @@ -0,0 +1,95 @@ +// +// Generated by Microsoft (R) HLSL Shader Compiler 10.1 +// +// +// Buffer Definitions: +// +// cbuffer XeEdramLoadStoreConstants +// { +// +// uint xe_edram_rt_color_depth_offset;// Offset: 0 Size: 4 +// uint xe_edram_rt_color_depth_pitch;// Offset: 4 Size: 4 +// uint xe_edram_rt_stencil_offset; // Offset: 8 Size: 4 +// uint xe_edram_rt_stencil_pitch; // Offset: 12 Size: 4 +// uint xe_edram_base_samples_2x_depth_pitch;// Offset: 16 Size: 4 +// +// } +// +// +// Resource Bindings: +// +// Name Type Format Dim ID HLSL Bind Count +// ------------------------------ ---------- ------- ----------- ------- -------------- ------ +// xe_edram_load_store_source texture byte r/o T0 t0 1 +// xe_edram_load_store_dest UAV byte r/w U0 u0 1 +// XeEdramLoadStoreConstants cbuffer NA NA CB0 cb0 1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Input +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// no Output +cs_5_1 +dcl_globalFlags refactoringAllowed +dcl_constantbuffer CB0[0:0][2], immediateIndexed, space=0 +dcl_resource_raw T0[0:0], space=0 +dcl_uav_raw U0[0:0], space=0 +dcl_input vThreadGroupID.xy +dcl_input vThreadIDInGroup.xy +dcl_input vThreadID.xy +dcl_temps 5 +dcl_thread_group 20, 16, 1 +ishl r0.xy, vThreadID.xxxx, l(4, 2, 0, 0) +imad r0.xy, vThreadID.yyyy, CB0[0][0].ywyy, r0.xyxx +iadd r0.xy, r0.xyxx, CB0[0][0].xzxx +ld_raw r1.xyzw, r0.x, T0[0].xyzw +uge r2.xyzw, l(0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff), r1.xyzw +and r2.xyzw, r1.xyzw, r2.xyzw +umin r2.xyzw, r2.xyzw, l(0x3ffffff8, 0x3ffffff8, 0x3ffffff8, 0x3ffffff8) +bfi r3.xyzw, l(23, 23, 23, 23), l(0, 0, 0, 0), r2.xyzw, l(0x00800000, 0x00800000, 0x00800000, 0x00800000) +ushr r4.xyzw, r2.xyzw, l(23, 23, 23, 23) +iadd r4.xyzw, -r4.xyzw, l(113, 113, 113, 113) +umin r4.xyzw, r4.xyzw, l(24, 24, 24, 24) +ushr r3.xyzw, r3.xyzw, r4.xyzw +ult r4.xyzw, r2.xyzw, l(0x38800000, 0x38800000, 0x38800000, 0x38800000) +iadd r2.xyzw, r2.xyzw, l(0xc8000000, 0xc8000000, 0xc8000000, 0xc8000000) +movc r2.xyzw, r4.xyzw, r3.xyzw, r2.xyzw +iadd r3.xyzw, r2.xyzw, l(3, 3, 3, 3) +ubfe r2.xyzw, l(1, 1, 1, 1), l(3, 3, 3, 3), r2.xyzw +iadd r2.xyzw, r2.xyzw, r3.xyzw +ushr r2.xyzw, r2.xyzw, l(3, 3, 3, 3) +ld_raw r0.x, r0.y, T0[0].xxxx +ushr r0.yzw, r0.xxxx, l(0, 8, 16, 24) +bfi r0.xyzw, l(24, 24, 24, 24), l(8, 8, 8, 8), r2.xyzw, r0.xyzw +ishl r2.x, vThreadIDInGroup.x, l(2) +and r2.yz, CB0[0][1].xxxx, l(0, 0x00008000, 2047, 0) +if_nz r2.y + ult r2.y, vThreadIDInGroup.x, l(10) + uge r2.w, vThreadIDInGroup.x, l(10) + and r2.yw, r2.yyyw, l(0, 40, 0, -40) + iadd r2.y, r2.w, r2.y + iadd r2.x, r2.y, r2.x +endif +ushr r2.y, CB0[0][1].x, l(16) +imad r2.y, vThreadGroupID.y, r2.y, r2.z +iadd r2.y, r2.y, vThreadGroupID.x +imul null, r2.z, vThreadIDInGroup.y, l(320) +imad r2.y, r2.y, l(5120), r2.z +ishl r2.x, r2.x, l(2) +iadd r2.x, r2.x, r2.y +ubfe r2.y, l(1), l(13), CB0[0][1].x +ishl r2.y, r2.y, l(1) +ishl r2.x, r2.x, r2.y +store_raw U0[0].xyzw, r2.x, r0.xyzw +iadd r0.x, r2.x, l(0x00a00000) +store_raw U0[0].xyzw, r0.x, r1.xyzw +ret +// Approximately 45 instruction slots used diff --git a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_float_cs.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/edram_store_depth_float_cs.cso index 1dd12cb19ed6d6ff47944307e6569ccf24516077..b636e8d752c13074a4df604f5c44f04b355f32b7 100644 GIT binary patch delta 740 zcmZ8eKTASU6hHU<^}RAopGYRXN{q_Dj244JEu*a@728`HB4~?;crRiN$^bE}Ijnjk{VN$&4WNklHP6TgOySS+J7fGr88t||J`@1P?-iR#ys4xR zX)CsYVu3W32%lGde$lDr5RrL7?QvwjuiCt(S;LzEW(NY$Q#X*;oAROB=+Cs!h%*(P z#)Lxh<|Q=Z6tq;DbcpNZ=y?bp@ykN%Vf;b63A?|L05bqukkq`WN6>XoPbCP)oZ6&t zHOD1jS+v6b7WU5^{)--Qa##GM{?zSd(k%&1RNR!#@kh+ delta 833 zcmZuwOG{fp6#nizH*Mr%5&}WIZ9{}ULQLt#LM74awn|m}0fM+F-F4GNZI@jYPQYzl zC}x=-pwLa}GLTgl(k}7`%*LgjGnsQ0abPlc&i6Rq43m@6sMLD9y|ny0*)JV9qt#Cr zyTgBL^-Bl9P96w&eZ}*k2h8w%!SheQb>+t05LjpYh!(t$PCz|x=(zMXEYc4@7rU%k zol4-+p`SV){qmcPUifL~8-&jMD$NJE!epE5cew1+N|463!n+SaTIYVoNLNAM0r@c! zDAT1r>uy^94bq10l%sm7&su#}qk5Qfyp^!ovUnu^>Y=OfH?3A{E**sjW!*6WPl0e+ z)U+N|*mW;z)&=L(;S=kcf8@exvlZ9Zxc<%XPos)C#a2Po=TZ7bbg!+ZYFADl8GpvK zmbX|La5!OX`p`nI_C*}3iJLK>-MD0FG8g8}u{#F9l6JL8+x#MG>U^uSoB}04efjiR zI!^V{T%wR9$nEGFS(|?c9!z2v;^*cX3__i_Xby3ge+=Zt{~xpKWGF7LPKNf9Z^cYl n{KS1);fYpW{vu!F`;E>!q*5mlQ)xs^e+>?T!9^$z z4uY$D7XJi62S-s=UYwlzy!yTG%BQ#O z{FgIFZe$+5OqtnG#w-ng3ib;2yvytz>`|{-wLE>X0nNubJfzZb%p~%`!SXTsA=rqs zqwsrR#MS2Qwbpv;&PFs+EkrZ3wb`h)e5crSAnNsFO?IM-iY-E}fpBPA)y92v8^I**O3G z@C0>o`j75X==eMU%VDVX>BXr^rCPSy^_z38_2x#i^}srvP6hXU9Ba6qvx#5=^J5@| zO(Lzlw6d|xoZ8Iv;%fxw5&s6e8(6AQw4TW@O!~3b60Sb=#%vj$@4)FONDm~tHyd^9 z<}Az%J1td8dOhybvjrKgmv-32>N~`k|1*v+JhQOhuXzZ)86#9@>$zA9DF5*d)b@}}*c<0idwWi|b`900r#r!VkLdo~z7GyOK)lzNM z$$NdJaZlYSu<7Mf;~gocwLA4!k4-D)*4#O}kk4}-fa%+%#y&L8kLY#lpIRyX#BtTj zTPY*$tT(?Oc;CjYlvPaY{WS+s8+#2n51s?DKkkQ`=0&b~$@9F_3*H|7>f^=y-f^5m z?Jzs+_NMo4#c?L6Ao f>J#-Bh~;7WN6Yhn#_yl|{2y?={?ohV{Y%YXB~X9q literal 0 HcmV?d00001 diff --git a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/float24_round_ps.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/float24_round_ps.h new file mode 100644 index 000000000..b155ee5d0 --- /dev/null +++ b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/float24_round_ps.h @@ -0,0 +1,156 @@ +// generated from `xb buildhlsl` +// source: float24_round.ps.hlsl +const uint8_t float24_round_ps[] = { + 0x44, 0x58, 0x42, 0x43, 0xDF, 0x71, 0xF3, 0x0A, 0x4A, 0xDB, 0xC3, 0x80, + 0x1E, 0xE4, 0x39, 0x21, 0x59, 0x07, 0x78, 0x97, 0x01, 0x00, 0x00, 0x00, + 0x18, 0x07, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, + 0xA0, 0x00, 0x00, 0x00, 0x90, 0x02, 0x00, 0x00, 0xC4, 0x02, 0x00, 0x00, + 0x7C, 0x06, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0x64, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3C, 0x00, 0x00, 0x00, 0x01, 0x05, 0xFF, 0xFF, 0x00, 0x05, 0x00, 0x00, + 0x3C, 0x00, 0x00, 0x00, 0x13, 0x13, 0x44, 0x25, 0x3C, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x4D, 0x69, 0x63, 0x72, 0x6F, 0x73, 0x6F, 0x66, 0x74, 0x20, 0x28, 0x52, + 0x29, 0x20, 0x48, 0x4C, 0x53, 0x4C, 0x20, 0x53, 0x68, 0x61, 0x64, 0x65, + 0x72, 0x20, 0x43, 0x6F, 0x6D, 0x70, 0x69, 0x6C, 0x65, 0x72, 0x20, 0x31, + 0x30, 0x2E, 0x31, 0x00, 0x49, 0x53, 0x47, 0x4E, 0xE8, 0x01, 0x00, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x0C, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x0E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x0E, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xD9, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x0F, 0x04, 0x00, 0x00, 0x54, 0x45, 0x58, 0x43, + 0x4F, 0x4F, 0x52, 0x44, 0x00, 0x53, 0x56, 0x5F, 0x50, 0x6F, 0x73, 0x69, + 0x74, 0x69, 0x6F, 0x6E, 0x00, 0xAB, 0xAB, 0xAB, 0x4F, 0x53, 0x47, 0x4E, + 0x2C, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, 0x0E, 0x00, 0x00, + 0x53, 0x56, 0x5F, 0x44, 0x65, 0x70, 0x74, 0x68, 0x00, 0xAB, 0xAB, 0xAB, + 0x53, 0x48, 0x45, 0x58, 0xB0, 0x03, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, + 0xEC, 0x00, 0x00, 0x00, 0x6A, 0x08, 0x00, 0x01, 0x64, 0x38, 0x00, 0x04, + 0x42, 0x10, 0x10, 0x00, 0x12, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x65, 0x00, 0x00, 0x02, 0x01, 0xC0, 0x00, 0x00, 0x68, 0x00, 0x00, 0x02, + 0x02, 0x00, 0x00, 0x00, 0x36, 0x20, 0x08, 0x05, 0x12, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x2A, 0x10, 0x10, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x50, 0x00, 0x10, 0x07, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x40, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0x7F, 0x0A, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x08, 0x07, 0x12, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x54, 0x00, 0x08, 0x07, + 0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0xF8, 0xFF, 0xFF, 0x3F, + 0x8C, 0x00, 0x10, 0x0B, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x40, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x55, 0x00, 0x20, 0x07, + 0x42, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x20, 0x08, 0x42, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x2A, 0x00, 0x10, 0x80, 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x40, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x54, 0x00, 0x20, 0x07, + 0x42, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x55, 0x00, 0x10, 0x07, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x20, 0x07, 0x42, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x80, 0x38, 0x1E, 0x00, 0x08, 0x07, + 0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC8, + 0x37, 0x00, 0x08, 0x09, 0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x2A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x10, 0x07, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x08, 0x09, 0x12, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x08, 0x07, 0x12, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x38, 0x0F, + 0x72, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x87, 0x00, 0x40, 0x05, + 0x82, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x40, 0x07, 0x82, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x40, 0x00, 0x00, 0xF5, 0xFF, 0xFF, 0xFF, 0x37, 0x00, 0x40, 0x09, + 0x82, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x40, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x08, 0x08, + 0x12, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x10, 0x80, + 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x37, 0x00, 0x08, 0x09, 0x12, 0x00, 0x10, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x2A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x29, 0x00, 0x40, 0x07, 0x82, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x40, 0x07, + 0x82, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0xFF, 0xFF, 0x0F, 0x00, + 0x37, 0x00, 0x10, 0x09, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x2A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x29, 0x00, 0x20, 0x07, 0x42, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x20, 0x07, 0x42, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x29, 0x00, 0x10, 0x07, + 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x10, 0x07, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x2A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x37, 0x00, 0x08, 0x08, 0x01, 0xC0, 0x00, 0x00, + 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x00, 0x01, 0x53, 0x54, 0x41, 0x54, 0x94, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, +}; diff --git a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/float24_round_ps.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/float24_round_ps.txt new file mode 100644 index 000000000..c9661e6ac --- /dev/null +++ b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/float24_round_ps.txt @@ -0,0 +1,74 @@ +// +// Generated by Microsoft (R) HLSL Shader Compiler 10.1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// TEXCOORD 0 xyzw 0 NONE float +// TEXCOORD 1 xyzw 1 NONE float +// TEXCOORD 2 xyzw 2 NONE float +// TEXCOORD 3 xyzw 3 NONE float +// TEXCOORD 4 xyzw 4 NONE float +// TEXCOORD 5 xyzw 5 NONE float +// TEXCOORD 6 xyzw 6 NONE float +// TEXCOORD 7 xyzw 7 NONE float +// TEXCOORD 8 xyzw 8 NONE float +// TEXCOORD 9 xyzw 9 NONE float +// TEXCOORD 10 xyzw 10 NONE float +// TEXCOORD 11 xyzw 11 NONE float +// TEXCOORD 12 xyzw 12 NONE float +// TEXCOORD 13 xyzw 13 NONE float +// TEXCOORD 14 xyzw 14 NONE float +// TEXCOORD 15 xyzw 15 NONE float +// TEXCOORD 16 xyz 16 NONE float +// TEXCOORD 17 xy 17 NONE float +// SV_Position 0 xyzw 18 POS float z +// +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// SV_Depth 0 N/A oDepth DEPTH float YES +// +// Pixel Shader runs at sample frequency +// +ps_5_1 +dcl_globalFlags refactoringAllowed +dcl_input_ps_siv linear noperspective sample v18.z, position +dcl_output oDepth +dcl_temps 2 +mov_sat [precise(x)] r0.x, v18.z +uge [precise(y)] r0.y, l(0x7fffffff), r0.x +and [precise(x)] r0.x, r0.x, r0.y +umin [precise(x)] r0.x, r0.x, l(0x3ffffff8) +bfi [precise(y)] r0.y, l(23), l(0), r0.x, l(0x00800000) +ushr [precise(z)] r0.z, r0.x, l(23) +iadd [precise(z)] r0.z, -r0.z, l(113) +umin [precise(z)] r0.z, r0.z, l(24) +ushr [precise(y)] r0.y, r0.y, r0.z +ult [precise(z)] r0.z, r0.x, l(0x38800000) +iadd [precise(x)] r0.x, r0.x, l(0xc8000000) +movc [precise(x)] r0.x, r0.z, r0.y, r0.x +iadd [precise(y)] r0.y, r0.x, l(3) +ubfe [precise(x)] r0.x, l(1), l(3), r0.x +iadd [precise(x)] r0.x, r0.x, r0.y +ubfe [precise(xyz)] r0.xyz, l(24, 20, 4, 0), l(3, 3, 23, 0), r0.xxxx +firstbit_hi [precise(w)] r0.w, r0.y +iadd [precise(w)] r0.w, r0.w, l(-11) +movc [precise(w)] r0.w, r0.y, r0.w, l(21) +iadd [precise(x)] r1.x, -r0.w, l(1) +movc [precise(x)] r1.x, r0.z, r0.z, r1.x +ishl [precise(w)] r0.w, r0.y, r0.w +and [precise(w)] r0.w, r0.w, l(0x000fffff) +movc [precise(y)] r0.y, r0.z, r0.y, r0.w +ishl [precise(z)] r0.z, r1.x, l(23) +iadd [precise(z)] r0.z, r0.z, l(0x38000000) +ishl [precise(y)] r0.y, r0.y, l(3) +iadd [precise(y)] r0.y, r0.z, r0.y +movc [precise(x)] oDepth, r0.x, r0.y, l(0) +ret +// Approximately 30 instruction slots used diff --git a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/float24_truncate_ps.cso b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/float24_truncate_ps.cso new file mode 100644 index 0000000000000000000000000000000000000000..a22366f58ae8811c10d9b0abd7460227ca0c1f55 GIT binary patch literal 1148 zcma)*ziU%r6o$`j5_=n?y*7v7iA~%b3U1=y z&{c5HKj7#pI0#NoI=Tw(@qI4$K&RphC+Gd1@0|PnxHr}IM&;A?ql9QW$o(Wv(F)$X2} zlg(PYq{eO4RTR%c3pzI~zY#_0x;(S1Paqi1SdXmnS{xP@(o>jS%j&i)nqACTG0VXn z72CneWy|L-WbCa)b6&?h`z4=K_P$FNgc+}TN7nM^GMCfbI}3~K^(Yrk vd&$0r?lf4lFi$PL=&aanMbcVt9cs+?!FiGYAILDz%>9qN|F~LT>$sYK-_TnG literal 0 HcmV?d00001 diff --git a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/float24_truncate_ps.h b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/float24_truncate_ps.h new file mode 100644 index 000000000..b8d1d7bb7 --- /dev/null +++ b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/float24_truncate_ps.h @@ -0,0 +1,100 @@ +// generated from `xb buildhlsl` +// source: float24_truncate.ps.hlsl +const uint8_t float24_truncate_ps[] = { + 0x44, 0x58, 0x42, 0x43, 0xB8, 0x51, 0x55, 0x1D, 0xF4, 0xF1, 0xC9, 0xC0, + 0x0C, 0x22, 0xD3, 0x43, 0x94, 0xDF, 0x83, 0x9D, 0x01, 0x00, 0x00, 0x00, + 0x7C, 0x04, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, + 0xA0, 0x00, 0x00, 0x00, 0x90, 0x02, 0x00, 0x00, 0xCC, 0x02, 0x00, 0x00, + 0xE0, 0x03, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0x64, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3C, 0x00, 0x00, 0x00, 0x01, 0x05, 0xFF, 0xFF, 0x00, 0x05, 0x00, 0x00, + 0x3C, 0x00, 0x00, 0x00, 0x13, 0x13, 0x44, 0x25, 0x3C, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x4D, 0x69, 0x63, 0x72, 0x6F, 0x73, 0x6F, 0x66, 0x74, 0x20, 0x28, 0x52, + 0x29, 0x20, 0x48, 0x4C, 0x53, 0x4C, 0x20, 0x53, 0x68, 0x61, 0x64, 0x65, + 0x72, 0x20, 0x43, 0x6F, 0x6D, 0x70, 0x69, 0x6C, 0x65, 0x72, 0x20, 0x31, + 0x30, 0x2E, 0x31, 0x00, 0x49, 0x53, 0x47, 0x4E, 0xE8, 0x01, 0x00, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x0C, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x0E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x0E, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xD9, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x0F, 0x04, 0x00, 0x00, 0x54, 0x45, 0x58, 0x43, + 0x4F, 0x4F, 0x52, 0x44, 0x00, 0x53, 0x56, 0x5F, 0x50, 0x6F, 0x73, 0x69, + 0x74, 0x69, 0x6F, 0x6E, 0x00, 0xAB, 0xAB, 0xAB, 0x4F, 0x53, 0x47, 0x4E, + 0x34, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, 0x0E, 0x00, 0x00, + 0x53, 0x56, 0x5F, 0x44, 0x65, 0x70, 0x74, 0x68, 0x4C, 0x65, 0x73, 0x73, + 0x45, 0x71, 0x75, 0x61, 0x6C, 0x00, 0xAB, 0xAB, 0x53, 0x48, 0x45, 0x58, + 0x0C, 0x01, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, + 0x6A, 0x08, 0x00, 0x01, 0x64, 0x38, 0x00, 0x04, 0x42, 0x10, 0x10, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x02, + 0x01, 0x70, 0x02, 0x00, 0x68, 0x00, 0x00, 0x02, 0x01, 0x00, 0x00, 0x00, + 0x36, 0x20, 0x08, 0x05, 0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x2A, 0x10, 0x10, 0x00, 0x12, 0x00, 0x00, 0x00, 0x50, 0x00, 0x10, 0x07, + 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x80, 0x2E, + 0x1F, 0x00, 0x04, 0x03, 0x1A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x8A, 0x00, 0x10, 0x09, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x40, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, + 0x17, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x10, 0x08, 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x1A, 0x00, 0x10, 0x80, 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x40, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x24, 0x00, 0x10, 0x07, + 0x22, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x8C, 0x00, 0x08, 0x0A, 0x01, 0x70, 0x02, 0x00, 0x1A, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x01, 0x36, 0x00, 0x08, 0x04, + 0x01, 0x70, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x15, 0x00, 0x00, 0x01, 0x3E, 0x00, 0x00, 0x01, 0x53, 0x54, 0x41, 0x54, + 0x94, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +}; diff --git a/src/xenia/gpu/shaders/bytecode/d3d12_5_1/float24_truncate_ps.txt b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/float24_truncate_ps.txt new file mode 100644 index 000000000..dd969f04d --- /dev/null +++ b/src/xenia/gpu/shaders/bytecode/d3d12_5_1/float24_truncate_ps.txt @@ -0,0 +1,55 @@ +// +// Generated by Microsoft (R) HLSL Shader Compiler 10.1 +// +// +// +// Input signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// TEXCOORD 0 xyzw 0 NONE float +// TEXCOORD 1 xyzw 1 NONE float +// TEXCOORD 2 xyzw 2 NONE float +// TEXCOORD 3 xyzw 3 NONE float +// TEXCOORD 4 xyzw 4 NONE float +// TEXCOORD 5 xyzw 5 NONE float +// TEXCOORD 6 xyzw 6 NONE float +// TEXCOORD 7 xyzw 7 NONE float +// TEXCOORD 8 xyzw 8 NONE float +// TEXCOORD 9 xyzw 9 NONE float +// TEXCOORD 10 xyzw 10 NONE float +// TEXCOORD 11 xyzw 11 NONE float +// TEXCOORD 12 xyzw 12 NONE float +// TEXCOORD 13 xyzw 13 NONE float +// TEXCOORD 14 xyzw 14 NONE float +// TEXCOORD 15 xyzw 15 NONE float +// TEXCOORD 16 xyz 16 NONE float +// TEXCOORD 17 xy 17 NONE float +// SV_Position 0 xyzw 18 POS float z +// +// +// Output signature: +// +// Name Index Mask Register SysValue Format Used +// -------------------- ----- ------ -------- -------- ------- ------ +// SV_DepthLessEqual 0 N/A oDepthLE DEPTHLE float YES +// +// Pixel Shader runs at sample frequency +// +ps_5_1 +dcl_globalFlags refactoringAllowed +dcl_input_ps_siv linear noperspective sample v18.z, position +dcl_output oDepthLE +dcl_temps 1 +mov_sat [precise(x)] r0.x, v18.z +uge [precise(y)] r0.y, r0.x, l(0x2e800000) +if_nz r0.y + ubfe [precise(y)] r0.y, l(8), l(23), r0.x + iadd [precise(y)] r0.y, -r0.y, l(116) + imax [precise(y)] r0.y, r0.y, l(3) + bfi [precise(x)] oDepthLE, r0.y, l(0), l(0), r0.x +else + mov [precise(x)] oDepthLE, l(0) +endif +ret +// Approximately 11 instruction slots used diff --git a/src/xenia/gpu/shaders/edram_load_depth_float.cs.hlsl b/src/xenia/gpu/shaders/edram_load_depth_float.cs.hlsl index bc02b4623..ef72713a3 100644 --- a/src/xenia/gpu/shaders/edram_load_depth_float.cs.hlsl +++ b/src/xenia/gpu/shaders/edram_load_depth_float.cs.hlsl @@ -7,22 +7,14 @@ void main(uint3 xe_group_id : SV_GroupID, uint3 xe_thread_id : SV_DispatchThreadID) { uint2 tile_sample_index = xe_group_thread_id.xy; tile_sample_index.x *= 4u; - uint edram_offset = XeEdramOffset32bpp(xe_group_id.xy, tile_sample_index); - uint4 depth24_stencil = xe_edram_load_store_source.Load4(edram_offset); - uint4 depth24 = depth24_stencil >> 8u; - uint4 depth32 = xe_edram_load_store_source.Load4(10485760u + edram_offset); - // Depth. If the stored 32-bit depth converted to 24-bit is the same as the - // stored 24-bit depth, load the 32-bit value because it has more precision - // (and multipass rendering is possible), if it's not, convert the 24-bit - // depth because it was overwritten by aliasing. - uint4 depth24to32 = XeFloat20e4To32(depth24); - uint4 depth = depth24to32 + (depth32 - depth24to32) * - uint4(XeFloat32To20e4(depth32) == depth24); + uint4 samples = xe_edram_load_store_source.Load4( + XeEdramOffset32bpp(xe_group_id.xy, tile_sample_index)); + // Depth (exact conversion ensured during drawing). uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch + xe_thread_id.x * 16u + xe_edram_rt_color_depth_offset; - xe_edram_load_store_dest.Store4(rt_offset, depth); + xe_edram_load_store_dest.Store4(rt_offset, XeFloat20e4To32(samples >> 8u)); // Stencil. - uint4 stencil = (depth24_stencil & 0xFFu) << uint4(0u, 8u, 16u, 24u); + uint4 stencil = (samples & 0xFFu) << uint4(0u, 8u, 16u, 24u); stencil.xy |= stencil.zw; stencil.x |= stencil.y; rt_offset = xe_thread_id.y * xe_edram_rt_stencil_pitch + xe_thread_id.x * 4u + diff --git a/src/xenia/gpu/shaders/edram_load_depth_float24and32.cs.hlsl b/src/xenia/gpu/shaders/edram_load_depth_float24and32.cs.hlsl new file mode 100644 index 000000000..bc02b4623 --- /dev/null +++ b/src/xenia/gpu/shaders/edram_load_depth_float24and32.cs.hlsl @@ -0,0 +1,31 @@ +#include "edram_load_store.hlsli" +#include "pixel_formats.hlsli" + +[numthreads(20, 16, 1)] +void main(uint3 xe_group_id : SV_GroupID, + uint3 xe_group_thread_id : SV_GroupThreadID, + uint3 xe_thread_id : SV_DispatchThreadID) { + uint2 tile_sample_index = xe_group_thread_id.xy; + tile_sample_index.x *= 4u; + uint edram_offset = XeEdramOffset32bpp(xe_group_id.xy, tile_sample_index); + uint4 depth24_stencil = xe_edram_load_store_source.Load4(edram_offset); + uint4 depth24 = depth24_stencil >> 8u; + uint4 depth32 = xe_edram_load_store_source.Load4(10485760u + edram_offset); + // Depth. If the stored 32-bit depth converted to 24-bit is the same as the + // stored 24-bit depth, load the 32-bit value because it has more precision + // (and multipass rendering is possible), if it's not, convert the 24-bit + // depth because it was overwritten by aliasing. + uint4 depth24to32 = XeFloat20e4To32(depth24); + uint4 depth = depth24to32 + (depth32 - depth24to32) * + uint4(XeFloat32To20e4(depth32) == depth24); + uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch + + xe_thread_id.x * 16u + xe_edram_rt_color_depth_offset; + xe_edram_load_store_dest.Store4(rt_offset, depth); + // Stencil. + uint4 stencil = (depth24_stencil & 0xFFu) << uint4(0u, 8u, 16u, 24u); + stencil.xy |= stencil.zw; + stencil.x |= stencil.y; + rt_offset = xe_thread_id.y * xe_edram_rt_stencil_pitch + xe_thread_id.x * 4u + + xe_edram_rt_stencil_offset; + xe_edram_load_store_dest.Store(rt_offset, stencil.x); +} diff --git a/src/xenia/gpu/shaders/edram_store_depth_float.cs.hlsl b/src/xenia/gpu/shaders/edram_store_depth_float.cs.hlsl index ac7626721..d0123c69f 100644 --- a/src/xenia/gpu/shaders/edram_store_depth_float.cs.hlsl +++ b/src/xenia/gpu/shaders/edram_store_depth_float.cs.hlsl @@ -5,21 +5,18 @@ void main(uint3 xe_group_id : SV_GroupID, uint3 xe_group_thread_id : SV_GroupThreadID, uint3 xe_thread_id : SV_DispatchThreadID) { - // Depth. + // Depth (exact conversion ensured during drawing). uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch + xe_thread_id.x * 16u + xe_edram_rt_color_depth_offset; - uint4 depth32 = xe_edram_load_store_source.Load4(rt_offset); - uint4 depth24_stencil = XeFloat32To20e4(depth32) << 8u; + uint4 samples = + XeFloat32To20e4(xe_edram_load_store_source.Load4(rt_offset)) << 8u; // Stencil. rt_offset = xe_thread_id.y * xe_edram_rt_stencil_pitch + xe_thread_id.x * 4u + xe_edram_rt_stencil_offset; - depth24_stencil |= (xe_edram_load_store_source.Load(rt_offset).xxxx >> - uint4(0u, 8u, 16u, 24u)) & 0xFFu; + samples |= (xe_edram_load_store_source.Load(rt_offset).xxxx >> + uint4(0u, 8u, 16u, 24u)) & 0xFFu; uint2 tile_sample_index = xe_group_thread_id.xy; tile_sample_index.x *= 4u; - uint edram_offset = XeEdramOffset32bpp(xe_group_id.xy, tile_sample_index); - // Store 24-bit depth for aliasing and checking if 32-bit depth is up to date. - xe_edram_load_store_dest.Store4(edram_offset, depth24_stencil); - // Store 32-bit depth so precision isn't lost when doing multipass rendering. - xe_edram_load_store_dest.Store4(10485760u + edram_offset, depth32); + xe_edram_load_store_dest.Store4( + XeEdramOffset32bpp(xe_group_id.xy, tile_sample_index), samples); } diff --git a/src/xenia/gpu/shaders/edram_store_depth_float24and32.cs.hlsl b/src/xenia/gpu/shaders/edram_store_depth_float24and32.cs.hlsl new file mode 100644 index 000000000..ac7626721 --- /dev/null +++ b/src/xenia/gpu/shaders/edram_store_depth_float24and32.cs.hlsl @@ -0,0 +1,25 @@ +#include "edram_load_store.hlsli" +#include "pixel_formats.hlsli" + +[numthreads(20, 16, 1)] +void main(uint3 xe_group_id : SV_GroupID, + uint3 xe_group_thread_id : SV_GroupThreadID, + uint3 xe_thread_id : SV_DispatchThreadID) { + // Depth. + uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch + + xe_thread_id.x * 16u + xe_edram_rt_color_depth_offset; + uint4 depth32 = xe_edram_load_store_source.Load4(rt_offset); + uint4 depth24_stencil = XeFloat32To20e4(depth32) << 8u; + // Stencil. + rt_offset = xe_thread_id.y * xe_edram_rt_stencil_pitch + xe_thread_id.x * 4u + + xe_edram_rt_stencil_offset; + depth24_stencil |= (xe_edram_load_store_source.Load(rt_offset).xxxx >> + uint4(0u, 8u, 16u, 24u)) & 0xFFu; + uint2 tile_sample_index = xe_group_thread_id.xy; + tile_sample_index.x *= 4u; + uint edram_offset = XeEdramOffset32bpp(xe_group_id.xy, tile_sample_index); + // Store 24-bit depth for aliasing and checking if 32-bit depth is up to date. + xe_edram_load_store_dest.Store4(edram_offset, depth24_stencil); + // Store 32-bit depth so precision isn't lost when doing multipass rendering. + xe_edram_load_store_dest.Store4(10485760u + edram_offset, depth32); +} diff --git a/src/xenia/gpu/shaders/edram_store_depth_unorm.cs.hlsl b/src/xenia/gpu/shaders/edram_store_depth_unorm.cs.hlsl index d5e782bbb..093f533af 100644 --- a/src/xenia/gpu/shaders/edram_store_depth_unorm.cs.hlsl +++ b/src/xenia/gpu/shaders/edram_store_depth_unorm.cs.hlsl @@ -7,8 +7,7 @@ void main(uint3 xe_group_id : SV_GroupID, // Depth. uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch + xe_thread_id.x * 16u + xe_edram_rt_color_depth_offset; - uint4 samples = - (xe_edram_load_store_source.Load4(rt_offset) & 0xFFFFFFu) << 8u; + uint4 samples = xe_edram_load_store_source.Load4(rt_offset) << 8u; // Stencil. rt_offset = xe_thread_id.y * xe_edram_rt_stencil_pitch + xe_thread_id.x * 4u + xe_edram_rt_stencil_offset; diff --git a/src/xenia/gpu/shaders/float24_round.ps.hlsl b/src/xenia/gpu/shaders/float24_round.ps.hlsl new file mode 100644 index 000000000..346b21b4f --- /dev/null +++ b/src/xenia/gpu/shaders/float24_round.ps.hlsl @@ -0,0 +1,13 @@ +#include "pixel_formats.hlsli" +#include "xenos_draw.hlsli" + +struct XePSInput { + XeVertexPrePS pre_ps; + sample float4 position : SV_Position; +}; + +precise float main(XePSInput xe_input) : SV_Depth { + // Input Z may be outside the viewport range (it's clamped after the shader). + return asfloat( + XeFloat20e4To32(XeFloat32To20e4(asuint(saturate(xe_input.position.z))))); +} diff --git a/src/xenia/gpu/shaders/float24_truncate.ps.hlsl b/src/xenia/gpu/shaders/float24_truncate.ps.hlsl new file mode 100644 index 000000000..83a5d08d9 --- /dev/null +++ b/src/xenia/gpu/shaders/float24_truncate.ps.hlsl @@ -0,0 +1,38 @@ +#include "pixel_formats.hlsli" +#include "xenos_draw.hlsli" + +struct XePSInput { + XeVertexPrePS pre_ps; + sample float4 position : SV_Position; +}; + +precise float main(XePSInput xe_input) : SV_DepthLessEqual { + // Simplified conversion, always less than or equal to the original value - + // just drop the lower bits. + // The float32 exponent bias is 127. + // After saturating, the exponent range is -127...0. + // The smallest normalized 20e4 exponent is -14 - should drop 3 mantissa bits + // at -14 or above. + // The smallest denormalized 20e4 number is -34 - should drop 23 mantissa bits + // at -34. + // Anything smaller than 2^-34 becomes 0. + // Input Z may be outside the viewport range (it's clamped after the shader). + precise uint depth = asuint(saturate(xe_input.position.z)); + // Check if the number is representable as a float24 after truncation - the + // exponent is at least -34. + if (depth >= 0x2E800000u) { + // Extract the biased float32 exponent: + // 113+ at exponent -14+. + // 93 at exponent -34. + uint exponent = (depth >> 23u) & 0xFFu; + // Convert exponent to the shift amount. + // 116 - 113 = 3. + // 116 - 93 = 23. + uint shift = asuint(max(116 - asint(exponent), 3)); + depth = depth >> shift << shift; + } else { + // The number is not representable as float24 after truncation - zero. + depth = 0u; + } + return asfloat(depth); +} diff --git a/src/xenia/gpu/shaders/pixel_formats.hlsli b/src/xenia/gpu/shaders/pixel_formats.hlsli index 1e7f5e319..e3654211d 100644 --- a/src/xenia/gpu/shaders/pixel_formats.hlsli +++ b/src/xenia/gpu/shaders/pixel_formats.hlsli @@ -495,6 +495,16 @@ void XeR11G11B10SNormToRGBA16(uint4 packed_texels, out uint4 out_01, // 6e4 has a different exponent bias allowing [0,512) values, 20e4 allows [0,2). // We also can't clamp the stored value to 1 as load->store->load must be exact. +uint XeFloat32To20e4(uint f32u32) { + // Keep only positive (high bit set means negative for both float and int) and + // saturate to the maximum representable value near 2 (also dropping NaNs). + f32u32 = min((f32u32 <= 0x7FFFFFFFu) ? f32u32 : 0u, 0x3FFFFFF8u); + uint denormalized = + ((f32u32 & 0x7FFFFFu) | 0x800000u) >> min(113u - (f32u32 >> 23u), 24u); + uint f24u32 = (f32u32 < 0x38800000u) ? denormalized : (f32u32 + 0xC8000000u); + return ((f24u32 + 3u + ((f24u32 >> 3u) & 1u)) >> 3u) & 0xFFFFFFu; +} + uint4 XeFloat32To20e4(uint4 f32u32) { // Keep only positive (high bit set means negative for both float and int) and // saturate to the maximum representable value near 2 (also dropping NaNs). @@ -505,6 +515,21 @@ uint4 XeFloat32To20e4(uint4 f32u32) { return ((f24u32 + 3u + ((f24u32 >> 3u) & 1u)) >> 3u) & 0xFFFFFFu; } +uint XeFloat20e4To32(uint f24u32) { + uint mantissa = f24u32 & 0xFFFFFu; + uint exponent = f24u32 >> 20u; + // Normalize the values for the denormalized components. + // Exponent = 1; + // do { Exponent--; Mantissa <<= 1; } while ((Mantissa & 0x100000) == 0); + bool is_denormalized = exponent == 0u; + uint mantissa_lzcnt = 20u - firstbithigh(mantissa); + exponent = is_denormalized ? (1u - mantissa_lzcnt) : exponent; + mantissa = + is_denormalized ? ((mantissa << mantissa_lzcnt) & 0xFFFFFu) : mantissa; + // Combine into 32-bit float bits and clear zeros. + return (f24u32 != 0u) ? (((exponent + 112u) << 23u) | (mantissa << 3u)) : 0u; +} + uint4 XeFloat20e4To32(uint4 f24u32) { uint4 mantissa = f24u32 & 0xFFFFFu; uint4 exponent = f24u32 >> 20u; diff --git a/src/xenia/gpu/shaders/primitive_point_list.gs.hlsl b/src/xenia/gpu/shaders/primitive_point_list.gs.hlsl index 33d5a5c48..ab165504a 100644 --- a/src/xenia/gpu/shaders/primitive_point_list.gs.hlsl +++ b/src/xenia/gpu/shaders/primitive_point_list.gs.hlsl @@ -10,9 +10,9 @@ void main(point XeVertexPreGS xe_in[1], } XeVertexPostGS xe_out; - xe_out.interpolators = xe_in[0].post_gs.interpolators; - xe_out.point_params.z = xe_in[0].post_gs.point_params.z; - xe_out.clip_space_zw = xe_in[0].post_gs.clip_space_zw; + xe_out.pre_ps.interpolators = xe_in[0].post_gs.pre_ps.interpolators; + xe_out.pre_ps.point_params.z = xe_in[0].post_gs.pre_ps.point_params.z; + xe_out.pre_ps.clip_space_zw = xe_in[0].post_gs.pre_ps.clip_space_zw; xe_out.position.zw = xe_in[0].post_gs.position.zw; xe_out.clip_distance_0123 = xe_in[0].post_gs.clip_distance_0123; xe_out.clip_distance_45 = xe_in[0].post_gs.clip_distance_45; @@ -20,26 +20,27 @@ void main(point XeVertexPreGS xe_in[1], // Shader header writes -1.0f to point_size by default, so any positive value // means that it was overwritten by the translated vertex shader. float2 point_size = - (xe_in[0].post_gs.point_params.z > 0.0f ? xe_in[0].post_gs.point_params.zz - : xe_point_size); + xe_in[0].post_gs.pre_ps.point_params.z > 0.0f + ? xe_in[0].post_gs.pre_ps.point_params.zz + : xe_point_size; point_size = clamp(point_size, xe_point_size_min_max.xx, xe_point_size_min_max.yy) * xe_point_screen_to_ndc * xe_in[0].post_gs.position.w; - xe_out.point_params.xy = float2(0.0, 0.0); + xe_out.pre_ps.point_params.xy = float2(0.0, 0.0); // TODO(Triang3l): On Vulkan, sign of Y needs to inverted because of // upper-left origin. // TODO(Triang3l): Investigate the true signs of point sprites. xe_out.position.xy = xe_in[0].post_gs.position.xy + float2(-point_size.x, point_size.y); xe_stream.Append(xe_out); - xe_out.point_params.xy = float2(0.0, 1.0); + xe_out.pre_ps.point_params.xy = float2(0.0, 1.0); xe_out.position.xy = xe_in[0].post_gs.position.xy - point_size; xe_stream.Append(xe_out); - xe_out.point_params.xy = float2(1.0, 0.0); + xe_out.pre_ps.point_params.xy = float2(1.0, 0.0); xe_out.position.xy = xe_in[0].post_gs.position.xy + point_size; xe_stream.Append(xe_out); - xe_out.point_params.xy = float2(1.0, 1.0); + xe_out.pre_ps.point_params.xy = float2(1.0, 1.0); xe_out.position.xy = xe_in[0].post_gs.position.xy + float2(point_size.x, -point_size.y); xe_stream.Append(xe_out); diff --git a/src/xenia/gpu/shaders/primitive_rectangle_list.gs.hlsl b/src/xenia/gpu/shaders/primitive_rectangle_list.gs.hlsl index 8411e54c2..45b7b05e5 100644 --- a/src/xenia/gpu/shaders/primitive_rectangle_list.gs.hlsl +++ b/src/xenia/gpu/shaders/primitive_rectangle_list.gs.hlsl @@ -80,16 +80,19 @@ void main(triangle XeVertexPreGS xe_in[3], v3_signs = float3(1.0f, 1.0f, -1.0f); } [unroll] for (int i = 0; i < 16; ++i) { - xe_out.interpolators[i] = v3_signs.x * xe_in[0].post_gs.interpolators[i] + - v3_signs.y * xe_in[1].post_gs.interpolators[i] + - v3_signs.z * xe_in[2].post_gs.interpolators[i]; + xe_out.pre_ps.interpolators[i] = + v3_signs.x * xe_in[0].post_gs.pre_ps.interpolators[i] + + v3_signs.y * xe_in[1].post_gs.pre_ps.interpolators[i] + + v3_signs.z * xe_in[2].post_gs.pre_ps.interpolators[i]; } - xe_out.point_params = v3_signs.x * xe_in[0].post_gs.point_params + - v3_signs.y * xe_in[1].post_gs.point_params + - v3_signs.z * xe_in[2].post_gs.point_params; - xe_out.clip_space_zw = v3_signs.x * xe_in[0].post_gs.clip_space_zw + - v3_signs.y * xe_in[1].post_gs.clip_space_zw + - v3_signs.z * xe_in[2].post_gs.clip_space_zw; + xe_out.pre_ps.point_params = + v3_signs.x * xe_in[0].post_gs.pre_ps.point_params + + v3_signs.y * xe_in[1].post_gs.pre_ps.point_params + + v3_signs.z * xe_in[2].post_gs.pre_ps.point_params; + xe_out.pre_ps.clip_space_zw = + v3_signs.x * xe_in[0].post_gs.pre_ps.clip_space_zw + + v3_signs.y * xe_in[1].post_gs.pre_ps.clip_space_zw + + v3_signs.z * xe_in[2].post_gs.pre_ps.clip_space_zw; xe_out.position = v3_signs.x * xe_in[0].post_gs.position + v3_signs.y * xe_in[1].post_gs.position + v3_signs.z * xe_in[2].post_gs.position; diff --git a/src/xenia/gpu/shaders/xenos_draw.hlsli b/src/xenia/gpu/shaders/xenos_draw.hlsli index a7e841eeb..98c5f26ed 100644 --- a/src/xenia/gpu/shaders/xenos_draw.hlsli +++ b/src/xenia/gpu/shaders/xenos_draw.hlsli @@ -63,10 +63,14 @@ struct XeHSControlPointOutput { float index : XEVERTEXID; }; -struct XeVertexPostGS { +struct XeVertexPrePS { float4 interpolators[16] : TEXCOORD0; float3 point_params : TEXCOORD16; float2 clip_space_zw : TEXCOORD17; +}; + +struct XeVertexPostGS { + XeVertexPrePS pre_ps; // Precise needed to preserve NaN - guest primitives may be converted to more // than 1 triangle, so need to kill them entirely manually in GS if any vertex // is NaN. diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index de9c6c969..e87a51c8c 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -66,8 +66,22 @@ SpirvShaderTranslator::Features::Features( SpirvShaderTranslator::SpirvShaderTranslator(const Features& features) : features_(features) {} -void SpirvShaderTranslator::Reset() { - ShaderTranslator::Reset(); +uint32_t SpirvShaderTranslator::GetDefaultModification( + xenos::ShaderType shader_type, + Shader::HostVertexShaderType host_vertex_shader_type) const { + Modification shader_modification; + switch (shader_type) { + case xenos::ShaderType::kVertex: + shader_modification.host_vertex_shader_type = host_vertex_shader_type; + break; + case xenos::ShaderType::kPixel: + break; + } + return shader_modification.value; +} + +void SpirvShaderTranslator::Reset(xenos::ShaderType shader_type) { + ShaderTranslator::Reset(shader_type); builder_.reset(); @@ -226,8 +240,8 @@ void SpirvShaderTranslator::StartTranslation() { "xe_uniform_float_constants"); builder_->addDecoration( uniform_float_constants_, spv::DecorationDescriptorSet, - int(IsSpirvFragmentShader() ? kDescriptorSetFloatConstantsPixel - : kDescriptorSetFloatConstantsVertex)); + int(is_pixel_shader() ? kDescriptorSetFloatConstantsPixel + : kDescriptorSetFloatConstantsVertex)); builder_->addDecoration(uniform_float_constants_, spv::DecorationBinding, 0); if (features_.spirv_version >= spv::Spv_1_4) { @@ -335,7 +349,7 @@ void SpirvShaderTranslator::StartTranslation() { main_interface_.push_back(buffers_shared_memory_); } - if (IsSpirvVertexOrTessEvalShader()) { + if (is_vertex_shader()) { StartVertexOrTessEvalShaderBeforeMain(); } @@ -383,7 +397,7 @@ void SpirvShaderTranslator::StartTranslation() { // Write the execution model-specific prologue with access to variables in the // main function. - if (IsSpirvVertexOrTessEvalShader()) { + if (is_vertex_shader()) { StartVertexOrTessEvalShaderInMain(); } @@ -507,7 +521,7 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { function_main_->addBlock(main_loop_merge_); builder_->setBuildPoint(main_loop_merge_); - if (IsSpirvVertexOrTessEvalShader()) { + if (is_vertex_shader()) { CompleteVertexOrTessEvalShaderInMain(); } @@ -516,12 +530,12 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { // Make the main function the entry point. spv::ExecutionModel execution_model; - if (IsSpirvFragmentShader()) { + if (is_pixel_shader()) { execution_model = spv::ExecutionModelFragment; builder_->addExecutionMode(function_main_, spv::ExecutionModeOriginUpperLeft); } else { - assert_true(IsSpirvVertexOrTessEvalShader()); + assert_true(is_vertex_shader()); execution_model = IsSpirvTessEvalShader() ? spv::ExecutionModelTessellationEvaluation : spv::ExecutionModelVertex; @@ -1479,7 +1493,7 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result, spv::StorageClassFunction, var_main_registers_, id_vector_temp_util_); } break; case InstructionStorageTarget::kPosition: - assert_true(IsSpirvVertexOrTessEvalShader()); + assert_true(is_vertex_shader()); id_vector_temp_util_.clear(); id_vector_temp_util_.push_back( builder_->makeIntConstant(kOutputPerVertexMemberPosition)); diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index fadcf2a6b..6882ccbde 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -25,6 +25,25 @@ namespace gpu { class SpirvShaderTranslator : public ShaderTranslator { public: + union Modification { + // If anything in this is structure is changed in a way not compatible with + // the previous layout, invalidate the pipeline storages by increasing this + // version number (0xYYYYMMDD)! + // TODO(Triang3l): Change to 0xYYYYMMDD once it's out of the rapid + // prototyping stage (easier to do small granular updates with an + // incremental counter). + static constexpr uint32_t kVersion = 1; + + struct { + // VS - pipeline stage and input configuration. + Shader::HostVertexShaderType host_vertex_shader_type + : Shader::kHostVertexShaderTypeBitCount; + }; + uint32_t value = 0; + + Modification(uint32_t modification_value = 0) : value(modification_value) {} + }; + enum : uint32_t { kSysFlag_XYDividedByW_Shift, kSysFlag_ZDividedByW_Shift, @@ -118,6 +137,11 @@ class SpirvShaderTranslator : public ShaderTranslator { }; SpirvShaderTranslator(const Features& features); + uint32_t GetDefaultModification( + xenos::ShaderType shader_type, + Shader::HostVertexShaderType host_vertex_shader_type = + Shader::HostVertexShaderType::kVertex) const override; + static constexpr uint32_t GetSharedMemoryStorageBufferCountLog2( uint32_t max_storage_buffer_range) { if (max_storage_buffer_range >= 512 * 1024 * 1024) { @@ -134,7 +158,7 @@ class SpirvShaderTranslator : public ShaderTranslator { } protected: - void Reset() override; + void Reset(xenos::ShaderType shader_type) override; void StartTranslation() override; @@ -166,17 +190,21 @@ class SpirvShaderTranslator : public ShaderTranslator { builder_->getBuildPoint()->addInstruction(std::move(selection_merge_op)); } + Modification GetSpirvShaderModification() const { + return Modification(modification()); + } + // TODO(Triang3l): Depth-only pixel shader. - bool IsSpirvVertexOrTessEvalShader() const { return is_vertex_shader(); } bool IsSpirvVertexShader() const { - return IsSpirvVertexOrTessEvalShader() && - host_vertex_shader_type() == Shader::HostVertexShaderType::kVertex; + return is_vertex_shader() && + GetSpirvShaderModification().host_vertex_shader_type == + Shader::HostVertexShaderType::kVertex; } bool IsSpirvTessEvalShader() const { - return IsSpirvVertexOrTessEvalShader() && - host_vertex_shader_type() != Shader::HostVertexShaderType::kVertex; + return is_vertex_shader() && + GetSpirvShaderModification().host_vertex_shader_type != + Shader::HostVertexShaderType::kVertex; } - bool IsSpirvFragmentShader() const { return is_pixel_shader(); } // Must be called before emitting any SPIR-V operations that must be in a // block in translator callbacks to ensure that if the last instruction added diff --git a/src/xenia/gpu/texture_conversion.cc b/src/xenia/gpu/texture_conversion.cc index 27c228780..bd028f47e 100644 --- a/src/xenia/gpu/texture_conversion.cc +++ b/src/xenia/gpu/texture_conversion.cc @@ -18,8 +18,7 @@ #include "xenia/base/math.h" #include "xenia/base/memory.h" #include "xenia/base/profiling.h" - -#include "third_party/xxhash/xxhash.h" +#include "xenia/base/xxhash.h" namespace xe { namespace gpu { diff --git a/src/xenia/gpu/texture_info.cc b/src/xenia/gpu/texture_info.cc index b20096d19..d190fb31e 100644 --- a/src/xenia/gpu/texture_info.cc +++ b/src/xenia/gpu/texture_info.cc @@ -16,8 +16,7 @@ #include "xenia/base/logging.h" #include "xenia/base/math.h" #include "xenia/base/memory.h" - -#include "third_party/xxhash/xxhash.h" +#include "xenia/base/xxhash.h" namespace xe { namespace gpu { @@ -319,7 +318,7 @@ bool TextureInfo::GetPackedTileOffset(int packed_tile, uint32_t* offset_x, } uint64_t TextureInfo::hash() const { - return XXH64(this, sizeof(TextureInfo), 0); + return XXH3_64bits(this, sizeof(TextureInfo)); } void TextureInfo::SetupMemoryInfo(uint32_t base_address, uint32_t mip_address) { diff --git a/src/xenia/gpu/trace_dump.cc b/src/xenia/gpu/trace_dump.cc index 984984c4a..fdebcfba4 100644 --- a/src/xenia/gpu/trace_dump.cc +++ b/src/xenia/gpu/trace_dump.cc @@ -92,7 +92,7 @@ int TraceDump::Main(const std::vector& args) { bool TraceDump::Setup() { // Create the emulator but don't initialize so we can setup the window. - emulator_ = std::make_unique("", "", ""); + emulator_ = std::make_unique("", "", "", ""); X_STATUS result = emulator_->Setup( nullptr, nullptr, [this]() { return CreateGraphicsSystem(); }, nullptr); if (XFAILED(result)) { diff --git a/src/xenia/gpu/trace_viewer.cc b/src/xenia/gpu/trace_viewer.cc index 5305c50ae..5297d6856 100644 --- a/src/xenia/gpu/trace_viewer.cc +++ b/src/xenia/gpu/trace_viewer.cc @@ -121,7 +121,7 @@ bool TraceViewer::Setup() { window_->Resize(1920, 1200); // Create the emulator but don't initialize so we can setup the window. - emulator_ = std::make_unique("", "", ""); + emulator_ = std::make_unique("", "", "", ""); X_STATUS result = emulator_->Setup( window_.get(), nullptr, [this]() { return CreateGraphicsSystem(); }, nullptr); @@ -566,8 +566,21 @@ TraceViewer::ShaderDisplayType TraceViewer::DrawShaderTypeUI() { void TraceViewer::DrawShaderUI(Shader* shader, ShaderDisplayType display_type) { // Must be prepared for advanced display modes. + // FIXME(Triang3l): This should display the actual translation used in the + // draw, but it may depend on multiple backend-related factors, including + // drawing multiple times with multiple modifications, even depending on + // values obtained during translation of other modifications (for instance, + // a memexporting shader can be executed both as a vertex shader (to draw the + // points) and as a compute shader (to actually export) if the host doesn't + // support writes from vertex shaders. + const Shader::Translation* translation = nullptr; if (display_type != ShaderDisplayType::kUcode) { - if (!shader->is_valid()) { + for (const auto& translation_pair : shader->translations()) { + if (translation_pair.second->is_valid()) { + translation = translation_pair.second; + } + } + if (!translation) { ImGui::TextColored(kColorError, "ERROR: shader error during parsing/translation"); return; @@ -580,7 +593,7 @@ void TraceViewer::DrawShaderUI(Shader* shader, ShaderDisplayType display_type) { break; } case ShaderDisplayType::kTranslated: { - const auto& str = shader->GetTranslatedBinaryString(); + const auto& str = translation->GetTranslatedBinaryString(); size_t i = 0; bool done = false; while (!done && i < str.size()) { @@ -600,7 +613,7 @@ void TraceViewer::DrawShaderUI(Shader* shader, ShaderDisplayType display_type) { break; } case ShaderDisplayType::kHostDisasm: { - DrawMultilineString(shader->host_disassembly()); + DrawMultilineString(translation->host_disassembly()); break; } } diff --git a/src/xenia/gpu/ucode.h b/src/xenia/gpu/ucode.h index 21ccbaff9..ea11f10cd 100644 --- a/src/xenia/gpu/ucode.h +++ b/src/xenia/gpu/ucode.h @@ -816,10 +816,11 @@ static_assert_size(TextureFetchInstruction, 12); // move of the third operand in case of zero multiplicands, because the term // may be -0, while the result should be +0 in this case. // http://developer.amd.com/wordpress/media/2013/10/R5xx_Acceleration_v1.5.pdf -// Multiply-add also appears to be not fused (the SM3 behavior instruction on -// GCN is called v_mad_legacy_f32, not v_fma_legacy_f32) - shader translators -// should not use instructions that may be interpreted by the host GPU as -// fused multiply-add. +// Multiply-add also appears to be not fused; the SM3 behavior instruction on +// GCN is called v_mad_legacy_f32, not v_fma_legacy_f32 (in 2012-2020, before +// RDNA 2, which removed v_mad_f32 as well) - shader translators should not +// use instructions that may be interpreted by the host GPU as fused +// multiply-add. enum class AluScalarOpcode : uint32_t { // Floating-Point Add @@ -1147,6 +1148,19 @@ enum class AluScalarOpcode : uint32_t { kRetainPrev = 50, }; +constexpr bool AluScalarOpcodeIsKill(AluScalarOpcode scalar_opcode) { + switch (scalar_opcode) { + case AluScalarOpcode::kKillsEq: + case AluScalarOpcode::kKillsGt: + case AluScalarOpcode::kKillsGe: + case AluScalarOpcode::kKillsNe: + case AluScalarOpcode::kKillsOne: + return true; + default: + return false; + } +} + enum class AluVectorOpcode : uint32_t { // Per-Component Floating-Point Add // add/ADDv dest, src0, src1 @@ -1471,27 +1485,37 @@ enum class AluVectorOpcode : uint32_t { kMaxA = 29, }; +constexpr bool AluVectorOpcodeIsKill(AluVectorOpcode vector_opcode) { + switch (vector_opcode) { + case AluVectorOpcode::kKillEq: + case AluVectorOpcode::kKillGt: + case AluVectorOpcode::kKillGe: + case AluVectorOpcode::kKillNe: + return true; + default: + return false; + } +} + // Whether the vector instruction has side effects such as discarding a pixel or // setting the predicate and can't be ignored even if it doesn't write to // anywhere. Note that all scalar operations except for retain_prev have a side // effect of modifying the previous scalar result register, so they must always // be executed even if not writing. constexpr bool AluVectorOpHasSideEffects(AluVectorOpcode vector_opcode) { + if (AluVectorOpcodeIsKill(vector_opcode)) { + return true; + } switch (vector_opcode) { case AluVectorOpcode::kSetpEqPush: case AluVectorOpcode::kSetpNePush: case AluVectorOpcode::kSetpGtPush: case AluVectorOpcode::kSetpGePush: - case AluVectorOpcode::kKillEq: - case AluVectorOpcode::kKillGt: - case AluVectorOpcode::kKillGe: - case AluVectorOpcode::kKillNe: case AluVectorOpcode::kMaxA: return true; default: - break; + return false; } - return false; } // Whether each component of a source operand is used at all in the instruction diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 5ac052812..e15b2434f 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -627,6 +627,17 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, } // TODO(Triang3l): Get a pixel shader. VulkanShader* pixel_shader = nullptr; + SpirvShaderTranslator::Modification vertex_shader_modification; + SpirvShaderTranslator::Modification pixel_shader_modification; + if (!pipeline_cache_->GetCurrentShaderModifications( + vertex_shader_modification, pixel_shader_modification)) { + return false; + } + VulkanShader::VulkanTranslation* vertex_shader_translation = + static_cast( + vertex_shader->GetOrCreateTranslation( + vertex_shader_modification.value)); + VulkanShader::VulkanTranslation* pixel_shader_translation = nullptr; VulkanRenderTargetCache::FramebufferKey framebuffer_key; if (!render_target_cache_->UpdateRenderTargets(framebuffer_key)) { @@ -648,7 +659,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, // current_graphics_pipeline_layout_. VkPipeline pipeline; const VulkanPipelineCache::PipelineLayoutProvider* pipeline_layout_provider; - if (!pipeline_cache_->ConfigurePipeline(vertex_shader, pixel_shader, + if (!pipeline_cache_->ConfigurePipeline(vertex_shader_translation, + pixel_shader_translation, framebuffer_key.render_pass_key, pipeline, pipeline_layout_provider)) { return false; @@ -713,7 +725,7 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, draw_util::GetHostViewportInfo( regs, 1.0f, 1.0f, false, float(device_properties.limits.maxViewportDimensions[0]), - float(device_properties.limits.maxViewportDimensions[1]), true, + float(device_properties.limits.maxViewportDimensions[1]), true, false, viewport_info); // Update fixed-function dynamic state. diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index 1700584b2..98d6592dd 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -17,6 +17,8 @@ #include "xenia/base/logging.h" #include "xenia/base/math.h" #include "xenia/base/profiling.h" +#include "xenia/base/xxhash.h" +#include "xenia/gpu/gpu_flags.h" #include "xenia/gpu/register_file.h" #include "xenia/gpu/registers.h" #include "xenia/gpu/spirv_shader_translator.h" @@ -84,7 +86,8 @@ VulkanShader* VulkanPipelineCache::LoadShader(xenos::ShaderType shader_type, const uint32_t* host_address, uint32_t dword_count) { // Hash the input memory and lookup the shader. - uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0); + uint64_t data_hash = + XXH3_64bits(host_address, dword_count * sizeof(uint32_t)); auto it = shaders_.find(data_hash); if (it != shaders_.end()) { // Shader has been previously loaded. @@ -94,16 +97,31 @@ VulkanShader* VulkanPipelineCache::LoadShader(xenos::ShaderType shader_type, // Always create the shader and stash it away. // We need to track it even if it fails translation so we know not to try // again. - VulkanShader* shader = - new VulkanShader(shader_type, data_hash, host_address, dword_count); + VulkanShader* shader = new VulkanShader( + shader_type, data_hash, host_address, dword_count, + command_processor_.GetVulkanContext().GetVulkanProvider()); shaders_.emplace(data_hash, shader); + if (!cvars::dump_shaders.empty()) { + shader->DumpUcodeBinary(cvars::dump_shaders); + } return shader; } +bool VulkanPipelineCache::GetCurrentShaderModifications( + SpirvShaderTranslator::Modification& vertex_shader_modification_out, + SpirvShaderTranslator::Modification& pixel_shader_modification_out) const { + // TODO(Triang3l): Tessellation, depth output. + vertex_shader_modification_out = SpirvShaderTranslator::Modification( + shader_translator_->GetDefaultModification(xenos::ShaderType::kVertex)); + pixel_shader_modification_out = SpirvShaderTranslator::Modification( + shader_translator_->GetDefaultModification(xenos::ShaderType::kPixel)); + return true; +} + bool VulkanPipelineCache::EnsureShadersTranslated( - VulkanShader* vertex_shader, VulkanShader* pixel_shader, - Shader::HostVertexShaderType host_vertex_shader_type) { + VulkanShader::VulkanTranslation* vertex_shader, + VulkanShader::VulkanTranslation* pixel_shader) { const RegisterFile& regs = register_file_; auto sq_program_cntl = regs.Get(); @@ -133,7 +151,8 @@ bool VulkanPipelineCache::EnsureShadersTranslated( } bool VulkanPipelineCache::ConfigurePipeline( - VulkanShader* vertex_shader, VulkanShader* pixel_shader, + VulkanShader::VulkanTranslation* vertex_shader, + VulkanShader::VulkanTranslation* pixel_shader, VulkanRenderTargetCache::RenderPassKey render_pass_key, VkPipeline& pipeline_out, const PipelineLayoutProvider*& pipeline_layout_out) { @@ -160,8 +179,7 @@ bool VulkanPipelineCache::ConfigurePipeline( } // Create the pipeline if not the latest and not already existing. - if (!EnsureShadersTranslated(vertex_shader, pixel_shader, - Shader::HostVertexShaderType::kVertex)) { + if (!EnsureShadersTranslated(vertex_shader, pixel_shader)) { return false; } const PipelineLayoutProvider* pipeline_layout = @@ -189,24 +207,22 @@ bool VulkanPipelineCache::ConfigurePipeline( return true; } -bool VulkanPipelineCache::TranslateShader(SpirvShaderTranslator& translator, - VulkanShader& shader, - reg::SQ_PROGRAM_CNTL cntl) { +bool VulkanPipelineCache::TranslateShader( + SpirvShaderTranslator& translator, + VulkanShader::VulkanTranslation& translation, reg::SQ_PROGRAM_CNTL cntl) { // Perform translation. // If this fails the shader will be marked as invalid and ignored later. - // TODO(Triang3l): Host vertex shader type. - if (!translator.Translate(&shader, cntl, - Shader::HostVertexShaderType::kVertex)) { + if (!translator.Translate(translation, cntl)) { XELOGE("Shader {:016X} translation failed; marking as ignored", - shader.ucode_data_hash()); + translation.shader().ucode_data_hash()); return false; } - return shader.InitializeShaderModule( - command_processor_.GetVulkanContext().GetVulkanProvider()); + return translation.GetOrCreateShaderModule() != VK_NULL_HANDLE; } bool VulkanPipelineCache::GetCurrentStateDescription( - const VulkanShader* vertex_shader, const VulkanShader* pixel_shader, + const VulkanShader::VulkanTranslation* vertex_shader, + const VulkanShader::VulkanTranslation* pixel_shader, VulkanRenderTargetCache::RenderPassKey render_pass_key, PipelineDescription& description_out) const { description_out.Reset(); @@ -215,9 +231,14 @@ bool VulkanPipelineCache::GetCurrentStateDescription( auto pa_su_sc_mode_cntl = regs.Get(); auto vgt_draw_initiator = regs.Get(); - description_out.vertex_shader_hash = vertex_shader->ucode_data_hash(); - description_out.pixel_shader_hash = - pixel_shader ? pixel_shader->ucode_data_hash() : 0; + description_out.vertex_shader_hash = + vertex_shader->shader().ucode_data_hash(); + description_out.vertex_shader_modification = vertex_shader->modification(); + if (pixel_shader) { + description_out.pixel_shader_hash = + pixel_shader->shader().ucode_data_hash(); + description_out.pixel_shader_modification = pixel_shader->modification(); + } description_out.render_pass_key = render_pass_key; xenos::PrimitiveType primitive_type = vgt_draw_initiator.prim_type; @@ -321,11 +342,11 @@ bool VulkanPipelineCache::EnsurePipelineCreated( if (creation_arguments.pixel_shader) { XELOGGPU("Creating graphics pipeline state with VS {:016X}, PS {:016X}", - creation_arguments.vertex_shader->ucode_data_hash(), - creation_arguments.pixel_shader->ucode_data_hash()); + creation_arguments.vertex_shader->shader().ucode_data_hash(), + creation_arguments.pixel_shader->shader().ucode_data_hash()); } else { XELOGGPU("Creating graphics pipeline state with VS {:016X}", - creation_arguments.vertex_shader->ucode_data_hash()); + creation_arguments.vertex_shader->shader().ucode_data_hash()); } const PipelineDescription& description = creation_arguments.pipeline->first; @@ -514,11 +535,11 @@ bool VulkanPipelineCache::EnsurePipelineCreated( /* if (creation_arguments.pixel_shader) { XELOGE( "Failed to create graphics pipeline with VS {:016X}, PS {:016X}", - creation_arguments.vertex_shader->ucode_data_hash(), - creation_arguments.pixel_shader->ucode_data_hash()); + creation_arguments.vertex_shader->shader().ucode_data_hash(), + creation_arguments.pixel_shader->shader().ucode_data_hash()); } else { XELOGE("Failed to create graphics pipeline with VS {:016X}", - creation_arguments.vertex_shader->ucode_data_hash()); + creation_arguments.vertex_shader->shader().ucode_data_hash()); } */ return false; } diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h index b22212552..92ccf5325 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h @@ -16,9 +16,9 @@ #include #include -#include "third_party/xxhash/xxhash.h" #include "xenia/base/hash.h" #include "xenia/base/platform.h" +#include "xenia/base/xxhash.h" #include "xenia/gpu/register_file.h" #include "xenia/gpu/spirv_shader_translator.h" #include "xenia/gpu/vulkan/vulkan_render_target_cache.h" @@ -55,14 +55,19 @@ class VulkanPipelineCache { uint32_t guest_address, const uint32_t* host_address, uint32_t dword_count); + // Retrieves the shader modifications for the current state, and returns + // whether they are valid. + bool GetCurrentShaderModifications( + SpirvShaderTranslator::Modification& vertex_shader_modification_out, + SpirvShaderTranslator::Modification& pixel_shader_modification_out) const; + // Translates shaders if needed, also making shader info up to date. - bool EnsureShadersTranslated( - VulkanShader* vertex_shader, VulkanShader* pixel_shader, - Shader::HostVertexShaderType host_vertex_shader_type); + bool EnsureShadersTranslated(VulkanShader::VulkanTranslation* vertex_shader, + VulkanShader::VulkanTranslation* pixel_shader); // TODO(Triang3l): Return a deferred creation handle. - bool ConfigurePipeline(VulkanShader* vertex_shader, - VulkanShader* pixel_shader, + bool ConfigurePipeline(VulkanShader::VulkanTranslation* vertex_shader, + VulkanShader::VulkanTranslation* pixel_shader, VulkanRenderTargetCache::RenderPassKey render_pass_key, VkPipeline& pipeline_out, const PipelineLayoutProvider*& pipeline_layout_out); @@ -102,6 +107,8 @@ class VulkanPipelineCache { uint64_t vertex_shader_hash; // 0 if no pixel shader. uint64_t pixel_shader_hash; + uint32_t vertex_shader_modification; + uint32_t pixel_shader_modification; VulkanRenderTargetCache::RenderPassKey render_pass_key; // Input assembly. @@ -126,7 +133,7 @@ class VulkanPipelineCache { return std::memcmp(this, &description, sizeof(*this)) == 0; } void Reset() { std::memset(this, 0, sizeof(*this)); } - uint64_t GetHash() const { return XXH64(this, sizeof(*this), 0); } + uint64_t GetHash() const { return XXH3_64bits(this, sizeof(*this)); } struct Hasher { size_t operator()(const PipelineDescription& description) const { return size_t(description.GetHash()); @@ -146,17 +153,19 @@ class VulkanPipelineCache { // creation threads, with everything needed from caches pre-looked-up. struct PipelineCreationArguments { std::pair* pipeline; - const VulkanShader* vertex_shader; - const VulkanShader* pixel_shader; + const VulkanShader::VulkanTranslation* vertex_shader; + const VulkanShader::VulkanTranslation* pixel_shader; VkRenderPass render_pass; }; // Can be called from multiple threads. - bool TranslateShader(SpirvShaderTranslator& translator, VulkanShader& shader, + bool TranslateShader(SpirvShaderTranslator& translator, + VulkanShader::VulkanTranslation& translation, reg::SQ_PROGRAM_CNTL cntl); bool GetCurrentStateDescription( - const VulkanShader* vertex_shader, const VulkanShader* pixel_shader, + const VulkanShader::VulkanTranslation* vertex_shader, + const VulkanShader::VulkanTranslation* pixel_shader, VulkanRenderTargetCache::RenderPassKey render_pass_key, PipelineDescription& description_out) const; diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.h b/src/xenia/gpu/vulkan/vulkan_render_target_cache.h index f5c183f70..11be41612 100644 --- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.h @@ -14,7 +14,7 @@ #include #include -#include "third_party/xxhash/xxhash.h" +#include "xenia/base/xxhash.h" #include "xenia/gpu/register_file.h" #include "xenia/ui/vulkan/vulkan_provider.h" @@ -49,7 +49,7 @@ class VulkanRenderTargetCache { return std::memcmp(this, &key, sizeof(*this)) == 0; } void Reset() { std::memset(this, 0, sizeof(*this)); } - uint64_t GetHash() const { return XXH64(this, sizeof(*this), 0); } + uint64_t GetHash() const { return XXH3_64bits(this, sizeof(*this)); } struct Hasher { size_t operator()(const FramebufferKey& description) const { return size_t(description.GetHash()); diff --git a/src/xenia/gpu/vulkan/vulkan_shader.cc b/src/xenia/gpu/vulkan/vulkan_shader.cc index e4fafff96..9a4f4f10b 100644 --- a/src/xenia/gpu/vulkan/vulkan_shader.cc +++ b/src/xenia/gpu/vulkan/vulkan_shader.cc @@ -11,22 +11,30 @@ #include +#include "xenia/ui/vulkan/vulkan_provider.h" + namespace xe { namespace gpu { namespace vulkan { -VulkanShader::VulkanShader(xenos::ShaderType shader_type, uint64_t data_hash, - const uint32_t* dword_ptr, uint32_t dword_count) - : Shader(shader_type, data_hash, dword_ptr, dword_count) {} +VulkanShader::VulkanTranslation::~VulkanTranslation() { + if (shader_module_) { + const ui::vulkan::VulkanProvider& provider = + static_cast(shader()).provider_; + provider.dfn().vkDestroyShaderModule(provider.device(), shader_module_, + nullptr); + } +} -bool VulkanShader::InitializeShaderModule( - const ui::vulkan::VulkanProvider& provider) { +VkShaderModule VulkanShader::VulkanTranslation::GetOrCreateShaderModule() { if (!is_valid()) { - return false; + return VK_NULL_HANDLE; } if (shader_module_ != VK_NULL_HANDLE) { - return true; + return shader_module_; } + const ui::vulkan::VulkanProvider& provider = + static_cast(shader()).provider_; VkShaderModuleCreateInfo shader_module_create_info; shader_module_create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; shader_module_create_info.pNext = nullptr; @@ -37,10 +45,21 @@ bool VulkanShader::InitializeShaderModule( if (provider.dfn().vkCreateShaderModule(provider.device(), &shader_module_create_info, nullptr, &shader_module_) != VK_SUCCESS) { - is_valid_ = false; - return false; + MakeInvalid(); + return VK_NULL_HANDLE; } - return true; + return shader_module_; +} + +VulkanShader::VulkanShader(xenos::ShaderType shader_type, uint64_t data_hash, + const uint32_t* dword_ptr, uint32_t dword_count, + const ui::vulkan::VulkanProvider& provider) + : Shader(shader_type, data_hash, dword_ptr, dword_count), + provider_(provider) {} + +Shader::Translation* VulkanShader::CreateTranslationInstance( + uint32_t modification) { + return new VulkanTranslation(*this, modification); } } // namespace vulkan diff --git a/src/xenia/gpu/vulkan/vulkan_shader.h b/src/xenia/gpu/vulkan/vulkan_shader.h index 23ff5fd90..973bbe80c 100644 --- a/src/xenia/gpu/vulkan/vulkan_shader.h +++ b/src/xenia/gpu/vulkan/vulkan_shader.h @@ -22,14 +22,28 @@ namespace vulkan { class VulkanShader : public Shader { public: - VulkanShader(xenos::ShaderType shader_type, uint64_t data_hash, - const uint32_t* dword_ptr, uint32_t dword_count); + class VulkanTranslation : public Translation { + public: + VulkanTranslation(VulkanShader& shader, uint32_t modification) + : Translation(shader, modification) {} + ~VulkanTranslation() override; - bool InitializeShaderModule(const ui::vulkan::VulkanProvider& provider); - VkShaderModule shader_module() const { return shader_module_; } + VkShaderModule GetOrCreateShaderModule(); + VkShaderModule shader_module() const { return shader_module_; } + + private: + VkShaderModule shader_module_ = VK_NULL_HANDLE; + }; + + VulkanShader(xenos::ShaderType shader_type, uint64_t data_hash, + const uint32_t* dword_ptr, uint32_t dword_count, + const ui::vulkan::VulkanProvider& provider); + + protected: + Translation* CreateTranslationInstance(uint32_t modification) override; private: - VkShaderModule shader_module_ = VK_NULL_HANDLE; + const ui::vulkan::VulkanProvider& provider_; }; } // namespace vulkan diff --git a/src/xenia/gpu/xenos.cc b/src/xenia/gpu/xenos.cc index 4f9e2875f..faaf4818d 100644 --- a/src/xenia/gpu/xenos.cc +++ b/src/xenia/gpu/xenos.cc @@ -9,17 +9,41 @@ #include "xenia/gpu/xenos.h" +#include + #include "xenia/base/math.h" namespace xe { namespace gpu { namespace xenos { +// Based on CFloat24 from d3dref9.dll and the 6e4 code from: +// https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp +// 6e4 has a different exponent bias allowing [0,512) values, 20e4 allows [0,2). + +uint32_t Float32To20e4(float f32) { + if (!(f32 > 0.0f)) { + // Positive only, and not -0 or NaN. + return 0; + } + uint32_t f32u32 = *reinterpret_cast(&f32); + if (f32u32 >= 0x3FFFFFF8) { + // Saturate. + return 0xFFFFFF; + } + if (f32u32 < 0x38800000) { + // The number is too small to be represented as a normalized 20e4. + // Convert it to a denormalized value. + uint32_t shift = std::min(uint32_t(113 - (f32u32 >> 23)), uint32_t(24)); + f32u32 = (0x800000 | (f32u32 & 0x7FFFFF)) >> shift; + } else { + // Rebias the exponent to represent the value as a normalized 20e4. + f32u32 += 0xC8000000u; + } + return ((f32u32 + 3 + ((f32u32 >> 3) & 1)) >> 3) & 0xFFFFFF; +} + float Float20e4To32(uint32_t f24) { - // Based on CFloat24 from d3dref9.dll and the 6e4 code from: - // https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp - // 6e4 has a different exponent bias allowing [0,512) values, 20e4 allows - // [0,2). f24 &= 0xFFFFFF; if (!f24) { return 0.0f; diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h index 4117a8293..1c21ed8ff 100644 --- a/src/xenia/gpu/xenos.h +++ b/src/xenia/gpu/xenos.h @@ -305,6 +305,9 @@ enum class DepthRenderTargetFormat : uint32_t { const char* GetDepthRenderTargetFormatName(DepthRenderTargetFormat format); +// Converts an IEEE-754 32-bit floating-point number to Xenos floating-point +// depth, rounding to the nearest even. +uint32_t Float32To20e4(float f32); // Converts Xenos floating-point depth in bits 0:23 (not clamping) to an // IEEE-754 32-bit floating-point number. float Float20e4To32(uint32_t f24); @@ -1036,10 +1039,9 @@ XEPACKEDUNION(xe_gpu_texture_fetch_t, { ClampMode clamp_y : 3; // +13 ClampMode clamp_z : 3; // +16 SignedRepeatingFractionMode signed_rf_mode_all : 1; // +19 - // TODO(Triang3l): 1 or 2 dim_tbd bits? - uint32_t unk_0 : 2; // +20 - uint32_t pitch : 9; // +22 byte_pitch >> 5 - uint32_t tiled : 1; // +31 + uint32_t dim_tbd : 2; // +20 + uint32_t pitch : 9; // +22 byte_pitch >> 5 + uint32_t tiled : 1; // +31 TextureFormat format : 6; // +0 dword_1 Endian endianness : 2; // +6 diff --git a/src/xenia/hid/hid_demo.cc b/src/xenia/hid/hid_demo.cc index f7bea3a7f..78ce8cbe8 100644 --- a/src/xenia/hid/hid_demo.cc +++ b/src/xenia/hid/hid_demo.cc @@ -38,6 +38,7 @@ DEFINE_string(hid, "any", "Input system. Use: [any, nop, sdl, winkey, xinput]", "General"); #define MAX_USERS 4 +#define ROW_HEIGHT_GENERAL 60 #define COL_WIDTH_STATE 320 #define COL_WIDTH_STROKE 416 @@ -45,6 +46,7 @@ namespace xe { namespace hid { std::unique_ptr input_system_; +bool is_active = true; std::vector> CreateInputDrivers( ui::Window* window) { @@ -118,7 +120,7 @@ int hid_demo_main(const std::vector& args) { loop->on_quit.AddListener([&window](xe::ui::UIEvent* e) { window.reset(); }); // Initial size setting, done here so that it knows the menu exists. - window->Resize(COL_WIDTH_STATE + COL_WIDTH_STROKE, 500); + window->Resize(COL_WIDTH_STATE + COL_WIDTH_STROKE, ROW_HEIGHT_GENERAL + 500); // Create the graphics context used for drawing and setup the window. std::unique_ptr graphics_provider; @@ -133,7 +135,9 @@ int hid_demo_main(const std::vector& args) { input_system_ = std::make_unique(window.get()); auto drivers = CreateInputDrivers(window.get()); for (size_t i = 0; i < drivers.size(); ++i) { - input_system_->AddDriver(std::move(drivers[i])); + auto& driver = drivers[i]; + driver->set_is_active_callback([]() -> bool { return is_active; }); + input_system_->AddDriver(std::move(driver)); } window->Invalidate(); @@ -149,10 +153,22 @@ int hid_demo_main(const std::vector& args) { ImGuiWindowFlags_NoCollapse | ImGuiWindowFlags_NoSavedSettings | ImGuiWindowFlags_NoScrollbar; - ImGui::Begin("GetState()", nullptr, wflags); + ImGui::Begin("General", nullptr, wflags); { ImGui::SetWindowPos(ImVec2(0, 0)); - ImGui::SetWindowSize(ImVec2(COL_WIDTH_STATE, io.DisplaySize.y)); + ImGui::SetWindowSize( + ImVec2(COL_WIDTH_STATE + COL_WIDTH_STROKE, ROW_HEIGHT_GENERAL)); + + ImGui::Text("Input System (hid) = \"%s\"", cvars::hid.c_str()); + ImGui::Checkbox("is_active", &is_active); + } + ImGui::End(); + + ImGui::Begin("GetState()", nullptr, wflags); + { + ImGui::SetWindowPos(ImVec2(0, ROW_HEIGHT_GENERAL)); + ImGui::SetWindowSize( + ImVec2(COL_WIDTH_STATE, io.DisplaySize.y - ROW_HEIGHT_GENERAL)); static bool enable_GetState = false; ImGui::Checkbox("Active", &enable_GetState); @@ -167,8 +183,9 @@ int hid_demo_main(const std::vector& args) { ImGui::Begin("GetKeystroke()", nullptr, wflags); { - ImGui::SetWindowPos(ImVec2(COL_WIDTH_STATE, 0)); - ImGui::SetWindowSize(ImVec2(COL_WIDTH_STROKE, io.DisplaySize.y)); + ImGui::SetWindowPos(ImVec2(COL_WIDTH_STATE, ROW_HEIGHT_GENERAL)); + ImGui::SetWindowSize( + ImVec2(COL_WIDTH_STROKE, io.DisplaySize.y - ROW_HEIGHT_GENERAL)); static bool enable_GetKeystroke = false; static bool hide_repeats = false; diff --git a/src/xenia/hid/sdl/sdl_input_driver.cc b/src/xenia/hid/sdl/sdl_input_driver.cc index fb77397da..97a21cc8d 100644 --- a/src/xenia/hid/sdl/sdl_input_driver.cc +++ b/src/xenia/hid/sdl/sdl_input_driver.cc @@ -77,7 +77,7 @@ X_STATUS SDLInputDriver::Setup() { sdl_events_initialized_ = true; SDL_EventFilter event_filter{[](void* userdata, SDL_Event* event) -> int { - if (!userdata) { + if (!userdata || !event) { assert_always(); return 0; } @@ -102,17 +102,17 @@ X_STATUS SDLInputDriver::Setup() { } switch (type) { case SDL_CONTROLLERDEVICEADDED: - driver->OnControllerDeviceAdded(event); + driver->OnControllerDeviceAdded(*event); break; case SDL_CONTROLLERDEVICEREMOVED: - driver->OnControllerDeviceRemoved(event); + driver->OnControllerDeviceRemoved(*event); break; case SDL_CONTROLLERAXISMOTION: - driver->OnControllerDeviceAxisMotion(event); + driver->OnControllerDeviceAxisMotion(*event); break; case SDL_CONTROLLERBUTTONDOWN: case SDL_CONTROLLERBUTTONUP: - driver->OnControllerDeviceButtonChanged(event); + driver->OnControllerDeviceButtonChanged(*event); break; default: break; @@ -193,7 +193,11 @@ X_RESULT SDLInputDriver::GetState(uint32_t user_index, return X_ERROR_BAD_ARGUMENTS; } - QueueControllerUpdate(); + auto is_active = this->is_active(); + + if (is_active) { + QueueControllerUpdate(); + } std::unique_lock guard(controllers_mutex_); @@ -203,12 +207,20 @@ X_RESULT SDLInputDriver::GetState(uint32_t user_index, } // Make sure packet_number is only incremented by 1, even if there have been - // multiple updates between GetState calls. - if (controller->state_changed) { + // multiple updates between GetState calls. Also track `is_active` to + // increment the packet number if it changed. + if ((is_active != controller->is_active) || + (is_active && controller->state_changed)) { controller->state.packet_number++; + controller->is_active = is_active; controller->state_changed = false; } - *out_state = controller->state; + std::memcpy(out_state, &controller->state, sizeof(*out_state)); + if (!is_active) { + // Simulate an "untouched" controller. When we become active again the + // pressed buttons aren't lost and will be visible again. + std::memset(&out_state->gamepad, 0, sizeof(out_state->gamepad)); + } return X_ERROR_SUCCESS; } @@ -242,6 +254,8 @@ X_RESULT SDLInputDriver::SetState(uint32_t user_index, X_RESULT SDLInputDriver::GetKeystroke(uint32_t users, uint32_t flags, X_INPUT_KEYSTROKE* out_keystroke) { + // TODO(JoelLinn): Figure out the flags + // https://github.com/evilC/UCR/blob/0489929e2a8e39caa3484c67f3993d3fba39e46f/Libraries/XInput.ahk#L85-L98 assert(sdl_events_initialized_ && sdl_gamecontroller_initialized_); bool user_any = users == 0xFF; if (users >= HID_SDL_USER_COUNT && !user_any) { @@ -296,7 +310,11 @@ X_RESULT SDLInputDriver::GetKeystroke(uint32_t users, uint32_t flags, X_INPUT_GAMEPAD_VK_RTHUMB_DOWNLEFT, }; - QueueControllerUpdate(); + auto is_active = this->is_active(); + + if (is_active) { + QueueControllerUpdate(); + } std::unique_lock guard(controllers_mutex_); @@ -311,8 +329,13 @@ X_RESULT SDLInputDriver::GetKeystroke(uint32_t users, uint32_t flags, } } - const uint64_t curr_butts = controller->state.gamepad.buttons | - AnalogToKeyfield(controller->state.gamepad); + // If input is not active (e.g. due to a dialog overlay), force buttons to + // "unpressed". The algorithm will automatically send UP events when + // `is_active()` goes low and DOWN events when it goes high again. + const uint64_t curr_butts = + is_active ? (controller->state.gamepad.buttons | + AnalogToKeyfield(controller->state.gamepad)) + : uint64_t(0); KeystrokeState& last = keystroke_states_.at(user_index); // Handle repeating @@ -384,12 +407,12 @@ X_RESULT SDLInputDriver::GetKeystroke(uint32_t users, uint32_t flags, return X_ERROR_EMPTY; } -void SDLInputDriver::OnControllerDeviceAdded(SDL_Event* event) { +void SDLInputDriver::OnControllerDeviceAdded(const SDL_Event& event) { assert(window()->loop()->is_on_loop_thread()); std::unique_lock guard(controllers_mutex_); // Open the controller. - const auto controller = SDL_GameControllerOpen(event->cdevice.which); + const auto controller = SDL_GameControllerOpen(event.cdevice.which); if (!controller) { assert_always(); return; @@ -423,52 +446,52 @@ void SDLInputDriver::OnControllerDeviceAdded(SDL_Event* event) { } } -void SDLInputDriver::OnControllerDeviceRemoved(SDL_Event* event) { +void SDLInputDriver::OnControllerDeviceRemoved(const SDL_Event& event) { assert(window()->loop()->is_on_loop_thread()); std::unique_lock guard(controllers_mutex_); // Find the disconnected gamecontroller and close it. - auto [found, i] = GetControllerIndexFromInstanceID(event->cdevice.which); - assert(found); - SDL_GameControllerClose(controllers_.at(i).sdl); - controllers_.at(i) = {}; - keystroke_states_.at(i) = {}; + auto idx = GetControllerIndexFromInstanceID(event.cdevice.which); + assert(idx); + SDL_GameControllerClose(controllers_.at(*idx).sdl); + controllers_.at(*idx) = {}; + keystroke_states_.at(*idx) = {}; } -void SDLInputDriver::OnControllerDeviceAxisMotion(SDL_Event* event) { +void SDLInputDriver::OnControllerDeviceAxisMotion(const SDL_Event& event) { assert(window()->loop()->is_on_loop_thread()); std::unique_lock guard(controllers_mutex_); - auto [found, i] = GetControllerIndexFromInstanceID(event->caxis.which); - assert(found); - auto& pad = controllers_.at(i).state.gamepad; - switch (event->caxis.axis) { + auto idx = GetControllerIndexFromInstanceID(event.caxis.which); + assert(idx); + auto& pad = controllers_.at(*idx).state.gamepad; + switch (event.caxis.axis) { case SDL_CONTROLLER_AXIS_LEFTX: - pad.thumb_lx = event->caxis.value; + pad.thumb_lx = event.caxis.value; break; case SDL_CONTROLLER_AXIS_LEFTY: - pad.thumb_ly = ~event->caxis.value; + pad.thumb_ly = ~event.caxis.value; break; case SDL_CONTROLLER_AXIS_RIGHTX: - pad.thumb_rx = event->caxis.value; + pad.thumb_rx = event.caxis.value; break; case SDL_CONTROLLER_AXIS_RIGHTY: - pad.thumb_ry = ~event->caxis.value; + pad.thumb_ry = ~event.caxis.value; break; case SDL_CONTROLLER_AXIS_TRIGGERLEFT: - pad.left_trigger = static_cast(event->caxis.value >> 7); + pad.left_trigger = static_cast(event.caxis.value >> 7); break; case SDL_CONTROLLER_AXIS_TRIGGERRIGHT: - pad.right_trigger = static_cast(event->caxis.value >> 7); + pad.right_trigger = static_cast(event.caxis.value >> 7); break; default: assert_always(); break; } - controllers_.at(i).state_changed = true; + controllers_.at(*idx).state_changed = true; } -void SDLInputDriver::OnControllerDeviceButtonChanged(SDL_Event* event) { +void SDLInputDriver::OnControllerDeviceButtonChanged(const SDL_Event& event) { assert(window()->loop()->is_on_loop_thread()); std::unique_lock guard(controllers_mutex_); @@ -492,15 +515,15 @@ void SDLInputDriver::OnControllerDeviceButtonChanged(SDL_Event* event) { X_INPUT_GAMEPAD_DPAD_LEFT, X_INPUT_GAMEPAD_DPAD_RIGHT}; - auto [found, i] = GetControllerIndexFromInstanceID(event->cbutton.which); - assert(found); - auto& controller = controllers_.at(i); + auto idx = GetControllerIndexFromInstanceID(event.cbutton.which); + assert(idx); + auto& controller = controllers_.at(*idx); uint16_t xbuttons = controller.state.gamepad.buttons; // Lookup the XInput button code. - auto xbutton = xbutton_lookup.at(event->cbutton.button); + auto xbutton = xbutton_lookup.at(event.cbutton.button); // Pressed or released? - if (event->cbutton.state == SDL_PRESSED) { + if (event.cbutton.state == SDL_PRESSED) { if (xbutton == X_INPUT_GAMEPAD_GUIDE && !cvars::guide_button) { return; } @@ -512,7 +535,7 @@ void SDLInputDriver::OnControllerDeviceButtonChanged(SDL_Event* event) { controller.state_changed = true; } -std::pair SDLInputDriver::GetControllerIndexFromInstanceID( +std::optional SDLInputDriver::GetControllerIndexFromInstanceID( SDL_JoystickID instance_id) { // Loop through our controllers and try to match the given ID. for (size_t i = 0; i < controllers_.size(); i++) { @@ -525,10 +548,10 @@ std::pair SDLInputDriver::GetControllerIndexFromInstanceID( auto joy_instance_id = SDL_JoystickInstanceID(joystick); assert(joy_instance_id >= 0); if (joy_instance_id == instance_id) { - return {true, i}; + return i; } } - return {false, 0}; + return std::nullopt; } SDLInputDriver::ControllerState* SDLInputDriver::GetControllerState( diff --git a/src/xenia/hid/sdl/sdl_input_driver.h b/src/xenia/hid/sdl/sdl_input_driver.h index 84555f70d..f98619a2f 100644 --- a/src/xenia/hid/sdl/sdl_input_driver.h +++ b/src/xenia/hid/sdl/sdl_input_driver.h @@ -13,6 +13,7 @@ #include #include #include +#include #include "SDL.h" #include "xenia/hid/input_driver.h" @@ -44,8 +45,9 @@ class SDLInputDriver : public InputDriver { protected: struct ControllerState { SDL_GameController* sdl; - bool state_changed; X_INPUT_STATE state; + bool state_changed; + bool is_active; }; enum class RepeatState { @@ -63,11 +65,11 @@ class SDLInputDriver : public InputDriver { }; protected: - void OnControllerDeviceAdded(SDL_Event* event); - void OnControllerDeviceRemoved(SDL_Event* event); - void OnControllerDeviceAxisMotion(SDL_Event* event); - void OnControllerDeviceButtonChanged(SDL_Event* event); - std::pair GetControllerIndexFromInstanceID( + void OnControllerDeviceAdded(const SDL_Event& event); + void OnControllerDeviceRemoved(const SDL_Event& event); + void OnControllerDeviceAxisMotion(const SDL_Event& event); + void OnControllerDeviceButtonChanged(const SDL_Event& event); + std::optional GetControllerIndexFromInstanceID( SDL_JoystickID instance_id); ControllerState* GetControllerState(uint32_t user_index); bool TestSDLVersion() const; diff --git a/src/xenia/kernel/xam/user_profile.h b/src/xenia/kernel/xam/user_profile.h index 309d0e0f8..92bf80bec 100644 --- a/src/xenia/kernel/xam/user_profile.h +++ b/src/xenia/kernel/xam/user_profile.h @@ -202,7 +202,7 @@ class UserProfile { uint64_t xuid() const { return xuid_; } std::string name() const { return name_; } uint32_t signin_state() const { return 1; } - uint32_t type() const { return 2; /* online profile? */ } + uint32_t type() const { return 1 | 2; /* local | online profile? */ } void AddSetting(std::unique_ptr setting); Setting* GetSetting(uint32_t setting_id); diff --git a/src/xenia/kernel/xam/xam_enum.cc b/src/xenia/kernel/xam/xam_enum.cc index 2cab56ab7..9aec9b056 100644 --- a/src/xenia/kernel/xam/xam_enum.cc +++ b/src/xenia/kernel/xam/xam_enum.cc @@ -32,50 +32,44 @@ uint32_t xeXamEnumerate(uint32_t handle, uint32_t flags, void* buffer, uint32_t overlapped_ptr) { assert_true(flags == 0); - auto e = kernel_state()->object_table()->LookupObject(handle); - if (!e) { - if (overlapped_ptr) { - kernel_state()->CompleteOverlappedImmediateEx( - overlapped_ptr, X_ERROR_INVALID_HANDLE, X_ERROR_INVALID_HANDLE, 0); - return X_ERROR_IO_PENDING; - } else { - return X_ERROR_INVALID_HANDLE; - } - } - - size_t actual_buffer_length = buffer_length; - if (buffer_length == e->items_per_enumerate()) { - actual_buffer_length = e->item_size() * e->items_per_enumerate(); - // Known culprits: - // Final Fight: Double Impact (saves) - XELOGW( - "Broken usage of XamEnumerate! buffer length={:X} vs actual " - "length={:X} " - "(item size={:X}, items per enumerate={})", - (uint32_t)buffer_length, actual_buffer_length, e->item_size(), - e->items_per_enumerate()); - } - - std::memset(buffer, 0, actual_buffer_length); - X_RESULT result; uint32_t item_count = 0; - if (actual_buffer_length < e->item_size()) { - result = X_ERROR_INSUFFICIENT_BUFFER; - } else if (e->current_item() >= e->item_count()) { - result = X_ERROR_NO_MORE_FILES; + auto e = kernel_state()->object_table()->LookupObject(handle); + if (!e) { + result = X_ERROR_INVALID_HANDLE; } else { - auto item_buffer = static_cast(buffer); - auto max_items = actual_buffer_length / e->item_size(); - while (max_items--) { - if (!e->WriteItem(item_buffer)) { - break; - } - item_buffer += e->item_size(); - item_count++; + size_t actual_buffer_length = buffer_length; + if (buffer_length == e->items_per_enumerate()) { + actual_buffer_length = e->item_size() * e->items_per_enumerate(); + // Known culprits: + // Final Fight: Double Impact (saves) + XELOGW( + "Broken usage of XamEnumerate! buffer length={:X} vs actual " + "length={:X} " + "(item size={:X}, items per enumerate={})", + (uint32_t)buffer_length, actual_buffer_length, e->item_size(), + e->items_per_enumerate()); + } + + std::memset(buffer, 0, actual_buffer_length); + + if (actual_buffer_length < e->item_size()) { + result = X_ERROR_INSUFFICIENT_BUFFER; + } else if (e->current_item() >= e->item_count()) { + result = X_ERROR_NO_MORE_FILES; + } else { + auto item_buffer = static_cast(buffer); + auto max_items = actual_buffer_length / e->item_size(); + while (max_items--) { + if (!e->WriteItem(item_buffer)) { + break; + } + item_buffer += e->item_size(); + item_count++; + } + result = X_ERROR_SUCCESS; } - result = X_ERROR_SUCCESS; } if (items_returned) { diff --git a/src/xenia/kernel/xam/xam_net.cc b/src/xenia/kernel/xam/xam_net.cc index a28b788e3..7b37e2b94 100644 --- a/src/xenia/kernel/xam/xam_net.cc +++ b/src/xenia/kernel/xam/xam_net.cc @@ -958,6 +958,11 @@ dword_result_t NetDll___WSAFDIsSet(dword_t socket_handle, } DECLARE_XAM_EXPORT1(NetDll___WSAFDIsSet, kNetworking, kImplemented); +void NetDll_WSASetLastError(dword_t error_code) { + XThread::SetLastError(error_code); +} +DECLARE_XAM_EXPORT1(NetDll_WSASetLastError, kNetworking, kImplemented); + void RegisterNetExports(xe::cpu::ExportResolver* export_resolver, KernelState* kernel_state) {} diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_io.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_io.cc index 6b0b6783b..5f19d7ca2 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_io.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_io.cc @@ -142,7 +142,8 @@ dword_result_t NtCreateFile(lpdword_t handle_out, dword_t desired_access, X_STATUS result = kernel_state()->file_system()->OpenFile( root_entry, target_path, vfs::FileDisposition((uint32_t)creation_disposition), desired_access, - (create_options & CreateOptions::FILE_DIRECTORY_FILE) != 0, &vfs_file, + (create_options & CreateOptions::FILE_DIRECTORY_FILE) != 0, + (create_options & CreateOptions::FILE_NON_DIRECTORY_FILE) != 0, &vfs_file, &file_action); object_ref file = nullptr; diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.cc index 8ecdabd5d..de672b227 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.cc @@ -135,8 +135,10 @@ dword_result_t NtAllocateVirtualMemory(lpdword_t base_addr_ptr, } uint32_t protect = FromXdkProtectFlags(protect_bits); uint32_t address = 0; + BaseHeap* heap; + if (adjusted_base != 0) { - auto heap = kernel_memory()->LookupHeap(adjusted_base); + heap = kernel_memory()->LookupHeap(adjusted_base); if (heap->page_size() != page_size) { // Specified the wrong page size for the wrong heap. return X_STATUS_ACCESS_DENIED; @@ -148,7 +150,7 @@ dword_result_t NtAllocateVirtualMemory(lpdword_t base_addr_ptr, } } else { bool top_down = !!(alloc_type & X_MEM_TOP_DOWN); - auto heap = kernel_memory()->LookupHeapByType(false, page_size); + heap = kernel_memory()->LookupHeapByType(false, page_size); heap->Alloc(adjusted_size, page_size, allocation_type, protect, top_down, &address); } @@ -160,7 +162,14 @@ dword_result_t NtAllocateVirtualMemory(lpdword_t base_addr_ptr, // Zero memory, if needed. if (address && !(alloc_type & X_MEM_NOZERO)) { if (alloc_type & X_MEM_COMMIT) { + if (!(protect & kMemoryProtectWrite)) { + heap->Protect(address, adjusted_size, + kMemoryProtectRead | kMemoryProtectWrite); + } kernel_memory()->Zero(address, adjusted_size); + if (!(protect & kMemoryProtectWrite)) { + heap->Protect(address, adjusted_size, protect); + } } } @@ -400,7 +409,7 @@ dword_result_t MmQueryAddressProtect(dword_t base_address) { if (!heap->QueryProtect(base_address, &access)) { access = 0; } - access = ToXdkProtectFlags(access); + access = !access ? 0 : ToXdkProtectFlags(access); return access; } diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc index bf399937e..bbe78ec87 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc @@ -205,22 +205,30 @@ dword_result_t NtSuspendThread(dword_t handle, lpdword_t suspend_count_ptr) { } DECLARE_XBOXKRNL_EXPORT1(NtSuspendThread, kThreading, kImplemented); -void KeSetCurrentStackPointers(lpvoid_t stack_ptr, - pointer_t cur_thread, +void KeSetCurrentStackPointers(lpvoid_t stack_ptr, pointer_t thread, lpvoid_t stack_alloc_base, lpvoid_t stack_base, lpvoid_t stack_limit) { - auto thread = XThread::GetCurrentThread(); - auto context = thread->thread_state()->context(); - context->r[1] = stack_ptr.guest_address(); + auto current_thread = XThread::GetCurrentThread(); + auto context = current_thread->thread_state()->context(); + auto pcr = kernel_memory()->TranslateVirtual( + static_cast(context->r[13])); - auto pcr = - kernel_memory()->TranslateVirtual((uint32_t)context->r[13]); + thread->stack_alloc_base = stack_alloc_base.value(); + thread->stack_base = stack_base.value(); + thread->stack_limit = stack_limit.value(); pcr->stack_base_ptr = stack_base.guest_address(); pcr->stack_end_ptr = stack_limit.guest_address(); + context->r[1] = stack_ptr.guest_address(); - // TODO: Do we need to set the stack info on cur_thread? + // If a fiber is set, and the thread matches, reenter to avoid issues with + // host stack overflowing. + if (thread->fiber_ptr && + current_thread->guest_object() == thread.guest_address()) { + current_thread->Reenter(static_cast(context->lr)); + } } -DECLARE_XBOXKRNL_EXPORT1(KeSetCurrentStackPointers, kThreading, kImplemented); +DECLARE_XBOXKRNL_EXPORT2(KeSetCurrentStackPointers, kThreading, kImplemented, + kHighFrequency); dword_result_t KeSetAffinityThread(lpvoid_t thread_ptr, dword_t affinity, lpdword_t previous_affinity_ptr) { diff --git a/src/xenia/kernel/xfile.cc b/src/xenia/kernel/xfile.cc index ee749b9e7..dc4553505 100644 --- a/src/xenia/kernel/xfile.cc +++ b/src/xenia/kernel/xfile.cc @@ -266,7 +266,7 @@ object_ref XFile::Restore(KernelState* kernel_state, vfs::FileAction action; auto res = kernel_state->file_system()->OpenFile( nullptr, abs_path, vfs::FileDisposition::kOpen, access, is_directory, - &vfs_file, &action); + false, &vfs_file, &action); if (XFAILED(res)) { XELOGE("Failed to open XFile: error {:08X}", res); return object_ref(file); diff --git a/src/xenia/kernel/xthread.cc b/src/xenia/kernel/xthread.cc index baa014b01..46f1ef961 100644 --- a/src/xenia/kernel/xthread.cc +++ b/src/xenia/kernel/xthread.cc @@ -498,6 +498,16 @@ X_STATUS XThread::Terminate(int exit_code) { return X_STATUS_SUCCESS; } +class reenter_exception { + public: + reenter_exception(uint32_t address) : address_(address){}; + virtual ~reenter_exception(){}; + uint32_t address() const { return address_; } + + private: + uint32_t address_; +}; + void XThread::Execute() { XELOGKERNEL("XThread::Execute thid {} (handle={:08X}, '{}', native={:08X})", thread_id_, handle(), thread_name_, thread_->system_id()); @@ -510,31 +520,61 @@ void XThread::Execute() { // have time to initialize shared structures AFTER CreateThread (RR). xe::threading::Sleep(std::chrono::milliseconds(10)); - int exit_code = 0; - // Dispatch any APCs that were queued before the thread was created first. DeliverAPCs(); + uint32_t address; + std::vector args; + bool want_exit_code; + int exit_code = 0; + // If a XapiThreadStartup value is present, we use that as a trampoline. // Otherwise, we are a raw thread. if (creation_params_.xapi_thread_startup) { - uint64_t args[] = {creation_params_.start_address, - creation_params_.start_context}; - kernel_state()->processor()->Execute(thread_state_, - creation_params_.xapi_thread_startup, - args, xe::countof(args)); + address = creation_params_.xapi_thread_startup; + args.push_back(creation_params_.start_address); + args.push_back(creation_params_.start_context); + want_exit_code = false; } else { // Run user code. - uint64_t args[] = {creation_params_.start_context}; - exit_code = static_cast(kernel_state()->processor()->Execute( - thread_state_, creation_params_.start_address, args, - xe::countof(args))); - // If we got here it means the execute completed without an exit being - // called. - // Treat the return code as an implicit exit code. + address = creation_params_.start_address; + args.push_back(creation_params_.start_context); + want_exit_code = true; } - Exit(exit_code); + uint32_t next_address; + try { + exit_code = static_cast(kernel_state()->processor()->Execute( + thread_state_, address, args.data(), args.size())); + next_address = 0; + } catch (const reenter_exception& ree) { + next_address = ree.address(); + } + + // See XThread::Reenter comments. + while (next_address != 0) { + try { + kernel_state()->processor()->ExecuteRaw(thread_state_, next_address); + next_address = 0; + if (want_exit_code) { + exit_code = static_cast(thread_state_->context()->r[3]); + } + } catch (const reenter_exception& ree) { + next_address = ree.address(); + } + } + + // If we got here it means the execute completed without an exit being called. + // Treat the return code as an implicit exit code (if desired). + Exit(!want_exit_code ? 0 : exit_code); +} + +void XThread::Reenter(uint32_t address) { + // TODO(gibbed): Maybe use setjmp/longjmp on Windows? + // https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/longjmp#remarks + // On Windows with /EH, setjmp/longjmp do stack unwinding. + // Is there a better solution than exceptions for stack unwinding? + throw reenter_exception(address); } void XThread::EnterCriticalRegion() { diff --git a/src/xenia/kernel/xthread.h b/src/xenia/kernel/xthread.h index ec0fe41ef..78a6591a1 100644 --- a/src/xenia/kernel/xthread.h +++ b/src/xenia/kernel/xthread.h @@ -70,35 +70,72 @@ struct XAPC { // Processor Control Region struct X_KPCR { xe::be tls_ptr; // 0x0 - char unk_04[0x2C]; // 0x4 + uint8_t unk_04[0x2C]; // 0x4 xe::be pcr_ptr; // 0x30 - char unk_34[0x3C]; // 0x34 + uint8_t unk_34[0x3C]; // 0x34 xe::be stack_base_ptr; // 0x70 Stack base address (high addr) xe::be stack_end_ptr; // 0x74 Stack end (low addr) - char unk_78[0x88]; // 0x78 + uint8_t unk_78[0x88]; // 0x78 xe::be current_thread; // 0x100 - char unk_104[0x8]; // 0x104 - xe::be current_cpu; // 0x10C - char unk_10D[0x43]; // 0x10D + uint8_t unk_104[0x8]; // 0x104 + uint8_t current_cpu; // 0x10C + uint8_t unk_10D[0x43]; // 0x10D xe::be dpc_active; // 0x150 }; struct X_KTHREAD { - X_DISPATCH_HEADER header; // 0x0 - char unk_10[0xAC]; // 0x10 - uint8_t suspend_count; // 0xBC - uint8_t unk_BD; // 0xBD - uint8_t unk_BE; // 0xBE - uint8_t current_cpu; // 0xBF - char unk_C0[0x70]; // 0xC0 - xe::be create_time; // 0x130 - xe::be exit_time; // 0x138 - xe::be exit_status; // 0x140 - char unk_144[0x8]; // 0x144 - xe::be thread_id; // 0x14C - char unk_150[0x10]; // 0x150 - xe::be last_error; // 0x160 - char unk_164[0x94C]; // 0x164 + X_DISPATCH_HEADER header; // 0x0 + xe::be unk_10; // 0x10 + xe::be unk_14; // 0x14 + uint8_t unk_18[0x28]; // 0x10 + xe::be unk_40; // 0x40 + xe::be unk_44; // 0x44 + xe::be unk_48; // 0x48 + xe::be unk_4C; // 0x4C + uint8_t unk_50[0x4]; // 0x50 + xe::be unk_54; // 0x54 + xe::be unk_56; // 0x56 + uint8_t unk_58[0x4]; // 0x58 + xe::be stack_base; // 0x5C + xe::be stack_limit; // 0x60 + uint8_t unk_64[0x4]; // 0x64 + xe::be tls_address; // 0x68 + uint8_t unk_6C; // 0x6C + uint8_t unk_6D[0x7]; // 0x6D + xe::be unk_74; // 0x74 + xe::be unk_78; // 0x78 + xe::be unk_7C; // 0x7C + xe::be unk_80; // 0x80 + xe::be unk_84; // 0x84 + uint8_t unk_88[0x3]; // 0x88 + uint8_t unk_8B; // 0x8B + uint8_t unk_8C[0x10]; // 0x8C + xe::be unk_9C; // 0x9C + uint8_t unk_A0[0x1C]; // 0xA0 + uint8_t suspend_count; // 0xBC + uint8_t unk_BD; // 0xBD + uint8_t unk_BE; // 0xBE + uint8_t current_cpu; // 0xBF + uint8_t unk_C0[0x10]; // 0xC0 + xe::be stack_alloc_base; // 0xD0 + uint8_t unk_D4[0x5C]; // 0xD4 + xe::be create_time; // 0x130 + xe::be exit_time; // 0x138 + xe::be exit_status; // 0x140 + xe::be unk_144; // 0x144 + xe::be unk_148; // 0x148 + xe::be thread_id; // 0x14C + xe::be start_address; // 0x150 + xe::be unk_154; // 0x154 + xe::be unk_158; // 0x158 + uint8_t unk_15C[0x4]; // 0x15C + xe::be last_error; // 0x160 + xe::be fiber_ptr; // 0x164 + uint8_t unk_168[0x4]; // 0x168 + xe::be creation_flags; // 0x16C + uint8_t unk_170[0xC]; // 0x170 + xe::be unk_17C; // 0x17C + uint8_t unk_180[0x930]; // 0x180 // This struct is actually quite long... so uh, not filling this out! }; @@ -151,6 +188,8 @@ class XThread : public XObject, public cpu::Thread { virtual void Execute(); + virtual void Reenter(uint32_t address); + static void EnterCriticalRegion(); static void LeaveCriticalRegion(); uint32_t RaiseIrql(uint32_t new_irql); diff --git a/src/xenia/ui/window_gtk.cc b/src/xenia/ui/window_gtk.cc index a6ca2087b..ed8fad055 100644 --- a/src/xenia/ui/window_gtk.cc +++ b/src/xenia/ui/window_gtk.cc @@ -415,14 +415,20 @@ GTKMenuItem::~GTKMenuItem() { void GTKMenuItem::OnChildAdded(MenuItem* generic_child_item) { auto child_item = static_cast(generic_child_item); + GtkWidget* submenu = nullptr; switch (child_item->type()) { case MenuItem::Type::kNormal: // Nothing special. break; case MenuItem::Type::kPopup: if (GTK_IS_MENU_ITEM(menu_)) { - assert(gtk_menu_item_get_submenu(GTK_MENU_ITEM(menu_)) == nullptr); - gtk_menu_item_set_submenu(GTK_MENU_ITEM(menu_), child_item->handle()); + submenu = gtk_menu_item_get_submenu(GTK_MENU_ITEM(menu_)); + // Get sub menu and if it doesn't exist create it + if (submenu == nullptr) { + submenu = gtk_menu_new(); + gtk_menu_item_set_submenu(GTK_MENU_ITEM(menu_), submenu); + } + gtk_menu_shell_append(GTK_MENU_SHELL(submenu), child_item->handle()); } else { gtk_menu_shell_append(GTK_MENU_SHELL(menu_), child_item->handle()); } @@ -431,7 +437,7 @@ void GTKMenuItem::OnChildAdded(MenuItem* generic_child_item) { case MenuItem::Type::kString: assert(GTK_IS_MENU_ITEM(menu_)); // Get sub menu and if it doesn't exist create it - GtkWidget* submenu = gtk_menu_item_get_submenu(GTK_MENU_ITEM(menu_)); + submenu = gtk_menu_item_get_submenu(GTK_MENU_ITEM(menu_)); if (submenu == nullptr) { submenu = gtk_menu_new(); gtk_menu_item_set_submenu(GTK_MENU_ITEM(menu_), submenu); diff --git a/src/xenia/vfs/virtual_file_system.cc b/src/xenia/vfs/virtual_file_system.cc index 31cb82f66..c0f888318 100644 --- a/src/xenia/vfs/virtual_file_system.cc +++ b/src/xenia/vfs/virtual_file_system.cc @@ -172,7 +172,8 @@ X_STATUS VirtualFileSystem::OpenFile(Entry* root_entry, const std::string_view path, FileDisposition creation_disposition, uint32_t desired_access, bool is_directory, - File** out_file, FileAction* out_action) { + bool is_non_directory, File** out_file, + FileAction* out_action) { // TODO(gibbed): should 'is_directory' remain as a bool or should it be // flipped to a generic FileAttributeFlags? @@ -207,6 +208,12 @@ X_STATUS VirtualFileSystem::OpenFile(Entry* root_entry, entry = !root_entry ? ResolvePath(path) : root_entry->GetChild(path); } + if (entry) { + if (entry->attributes() & kFileAttributeDirectory && is_non_directory) { + return X_STATUS_FILE_IS_A_DIRECTORY; + } + } + // Check if exists (if we need it to), or that it doesn't (if it shouldn't). switch (creation_disposition) { case FileDisposition::kOpen: diff --git a/src/xenia/vfs/virtual_file_system.h b/src/xenia/vfs/virtual_file_system.h index 8d5b84697..49e9083dc 100644 --- a/src/xenia/vfs/virtual_file_system.h +++ b/src/xenia/vfs/virtual_file_system.h @@ -43,7 +43,8 @@ class VirtualFileSystem { X_STATUS OpenFile(Entry* root_entry, const std::string_view path, FileDisposition creation_disposition, - uint32_t desired_access, bool is_directory, File** out_file, + uint32_t desired_access, bool is_directory, + bool is_non_directory, File** out_file, FileAction* out_action); private: diff --git a/src/xenia/xbox.h b/src/xenia/xbox.h index 62f4a1f65..2080b236c 100644 --- a/src/xenia/xbox.h +++ b/src/xenia/xbox.h @@ -64,6 +64,7 @@ typedef uint32_t X_STATUS; #define X_STATUS_PROCEDURE_NOT_FOUND ((X_STATUS)0xC000007AL) #define X_STATUS_INSUFFICIENT_RESOURCES ((X_STATUS)0xC000009AL) #define X_STATUS_MEMORY_NOT_ALLOCATED ((X_STATUS)0xC00000A0L) +#define X_STATUS_FILE_IS_A_DIRECTORY ((X_STATUS)0xC00000BAL) #define X_STATUS_NOT_SUPPORTED ((X_STATUS)0xC00000BBL) #define X_STATUS_INVALID_PARAMETER_1 ((X_STATUS)0xC00000EFL) #define X_STATUS_INVALID_PARAMETER_2 ((X_STATUS)0xC00000F0L) diff --git a/third_party/xxhash b/third_party/xxhash new file mode 160000 index 000000000..4c881f796 --- /dev/null +++ b/third_party/xxhash @@ -0,0 +1 @@ +Subproject commit 4c881f796d6af27ef7d9c48f87817da0d3d75dc1 diff --git a/third_party/xxhash/LICENSE b/third_party/xxhash/LICENSE deleted file mode 100644 index 7de801ed1..000000000 --- a/third_party/xxhash/LICENSE +++ /dev/null @@ -1,24 +0,0 @@ -xxHash Library -Copyright (c) 2012-2014, Yann Collet -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, this - list of conditions and the following disclaimer in the documentation and/or - other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/third_party/xxhash/Makefile b/third_party/xxhash/Makefile deleted file mode 100644 index 94cf4a939..000000000 --- a/third_party/xxhash/Makefile +++ /dev/null @@ -1,67 +0,0 @@ -# ################################################################ -# xxHash Makefile -# Copyright (C) Yann Collet 2012-2014 -# GPL v2 License -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# -# You can contact the author at : -# - xxHash source repository : http://code.google.com/p/xxhash/ -# ################################################################ -# xxHash.exe : benchmark program, to demonstrate xxHash speed -# ################################################################ - -CC := $(CC) -CFLAGS ?= -O3 -CFLAGS += -I. -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Wstrict-prototypes - - -# Define *.exe as extension for Windows systems -ifneq (,$(filter Windows%,$(OS))) -EXT =.exe -else -EXT = -endif - - -default: xxhsum - -all: xxhsum xxhsum32 - -xxhsum: xxhash.c xxhsum.c - $(CC) $(CFLAGS) $^ -o $@$(EXT) - ln -sf $@ xxh32sum - ln -sf $@ xxh64sum - -xxhsum32: xxhash.c xxhsum.c - $(CC) -m32 $(CFLAGS) $^ -o $@$(EXT) - -test: $(TEST_TARGETS) - -test: xxhsum - ./xxhsum < xxhash.c - ./xxhsum -b xxhash.c - valgrind --leak-check=yes ./xxhsum -bi1 xxhash.c - valgrind --leak-check=yes ./xxhsum -H0 xxhash.c - valgrind --leak-check=yes ./xxhsum -H1 xxhash.c - -test-all: test xxhsum32 - ./xxhsum32 -b xxhash.c - -clean: - @rm -f core *.o xxhsum$(EXT) xxhsum32$(EXT) xxh32sum xxh64sum - @echo cleaning completed - - diff --git a/third_party/xxhash/README.md b/third_party/xxhash/README.md deleted file mode 100644 index 06f63764c..000000000 --- a/third_party/xxhash/README.md +++ /dev/null @@ -1,74 +0,0 @@ -xxHash - Extremely fast hash algorithm -====================================== - -xxHash is an Extremely fast Hash algorithm, running at RAM speed limits. -It successfully passes the [SMHasher](http://code.google.com/p/smhasher/wiki/SMHasher) Test suite evaluating Hash quality. - -|Branch |Status | -|------------|---------| -|master | [![Build Status](https://travis-ci.org/Cyan4973/xxHash.svg?branch=master)](https://travis-ci.org/Cyan4973/xxHash?branch=master) | -|dev | [![Build Status](https://travis-ci.org/Cyan4973/xxHash.svg?branch=dev)](https://travis-ci.org/Cyan4973/xxHash?branch=dev) | - - -Benchmarks -------------------------- - -The benchmark uses SMHasher speed test, compiled with Visual on a Windows Seven 32 bits system. -The reference system uses a Core 2 Duo @3GHz - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameSpeedQ.ScoreAuthor
xxHash5.4 GB/s10Y.C.
MumurHash 3a2.7 GB/s10Austin Appleby
SBox1.4 GB/s9Bret Mulvey
Lookup31.2 GB/s9Bob Jenkins
CityHash641.05 GB/s10Pike & Alakuijala
FNV0.55 GB/s5Fowler, Noll, Vo
CRC320.43 GB/s9
SipHash0.34 GB/s10Jean-Philippe Aumasson
MD5-320.33 GB/s10Ronald L. Rivest
SHA1-320.28 GB/s10
- - -Q.Score is a measure of quality of the hash function. -It depends on successfully passing SMHasher test set. -10 is a perfect score. - -A new version, XXH64, has been created thanks to Mathias Westerdahl contribution, which offers superior speed and dispersion for 64-bits systems. Note however that 32-bits applications will still run faster using the 32-bits version. - -SMHasher speed test, compiled using GCC 4.8.2, a Linux Mint 64-bits. -The reference system uses a Core i5-3340M @2.7GHz - -| Version | Speed on 64-bits | Speed on 32-bits | -|------------|------------------|------------------| -| XXH64 | 13.8 GB/s | 1.9 GB/s | -| XXH32 | 6.8 GB/s | 6.0 GB/s | - - -This is an official mirror of xxHash project, [hosted on Google Code](http://code.google.com/p/xxhash/). -The intention is to offer github's capabilities to xxhash users, such as cloning, branch, pull requests or source download. - -The "master" branch will reflect, the status of xxhash at its official homepage. The "dev" branch is the one where all contributions will be merged. If you plan to propose a patch, please commit into the "dev" branch. Direct commit to "master" are not permitted. Feature branches will also exist, typically to introduce new requirements, and be temporarily available for testing before merge into "dev" branch. diff --git a/third_party/xxhash/README.xenia b/third_party/xxhash/README.xenia deleted file mode 100644 index b4b90c1f8..000000000 --- a/third_party/xxhash/README.xenia +++ /dev/null @@ -1,2 +0,0 @@ -https://code.google.com/p/xxhash/ -r39 on 12/23/2014 diff --git a/third_party/xxhash/xxhash.c b/third_party/xxhash/xxhash.c deleted file mode 100644 index 24a64b5f8..000000000 --- a/third_party/xxhash/xxhash.c +++ /dev/null @@ -1,928 +0,0 @@ -/* -xxHash - Fast Hash algorithm -Copyright (C) 2012-2014, Yann Collet. -BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -You can contact the author at : -- xxHash source repository : http://code.google.com/p/xxhash/ -- public discussion board : https://groups.google.com/forum/#!forum/lz4c -*/ - - -//************************************** -// Tuning parameters -//************************************** -// Unaligned memory access is automatically enabled for "common" CPU, such as x86. -// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected. -// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance. -// You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for U32). -#if defined(__ARM_FEATURE_UNALIGNED) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) -# define XXH_USE_UNALIGNED_ACCESS 1 -#endif - -// XXH_ACCEPT_NULL_INPUT_POINTER : -// If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. -// When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. -// This option has a very small performance cost (only measurable on small inputs). -// By default, this option is disabled. To enable it, uncomment below define : -// #define XXH_ACCEPT_NULL_INPUT_POINTER 1 - -// XXH_FORCE_NATIVE_FORMAT : -// By default, xxHash library provides endian-independant Hash values, based on little-endian convention. -// Results are therefore identical for little-endian and big-endian CPU. -// This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. -// Should endian-independance be of no importance for your application, you may set the #define below to 1. -// It will improve speed for Big-endian CPU. -// This option has no impact on Little_Endian CPU. -#define XXH_FORCE_NATIVE_FORMAT 0 - -//************************************** -// Compiler Specific Options -//************************************** -// Disable some Visual warning messages -#ifdef _MSC_VER // Visual Studio -# pragma warning(disable : 4127) // disable: C4127: conditional expression is constant -#endif - -#ifdef _MSC_VER // Visual Studio -# define FORCE_INLINE static __forceinline -#else -# ifdef __GNUC__ -# define FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define FORCE_INLINE static inline -# endif -#endif - -//************************************** -// Includes & Memory related functions -//************************************** -#include "xxhash.h" -// Modify the local functions below should you wish to use some other memory routines -// for malloc(), free() -#include -static void* XXH_malloc(size_t s) { return malloc(s); } -static void XXH_free (void* p) { free(p); } -// for memcpy() -#include -static void* XXH_memcpy(void* dest, const void* src, size_t size) -{ - return memcpy(dest,src,size); -} - - -//************************************** -// Basic Types -//************************************** -#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 -# include -typedef uint8_t BYTE; -typedef uint16_t U16; -typedef uint32_t U32; -typedef int32_t S32; -typedef uint64_t U64; -#else -typedef unsigned char BYTE; -typedef unsigned short U16; -typedef unsigned int U32; -typedef signed int S32; -typedef unsigned long long U64; -#endif - -#if defined(__GNUC__) && !defined(XXH_USE_UNALIGNED_ACCESS) -# define _PACKED __attribute__ ((packed)) -#else -# define _PACKED -#endif - -#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__) -# ifdef __IBMC__ -# pragma pack(1) -# else -# pragma pack(push, 1) -# endif -#endif - -typedef struct _U32_S -{ - U32 v; -} _PACKED U32_S; -typedef struct _U64_S -{ - U64 v; -} _PACKED U64_S; - -#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__) -# pragma pack(pop) -#endif - -#define A32(x) (((U32_S *)(x))->v) -#define A64(x) (((U64_S *)(x))->v) - - -//*************************************** -// Compiler-specific Functions and Macros -//*************************************** -#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) - -// Note : although _rotl exists for minGW (GCC under windows), performance seems poor -#if defined(_MSC_VER) -# define XXH_rotl32(x,r) _rotl(x,r) -# define XXH_rotl64(x,r) _rotl64(x,r) -#else -# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) -# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) -#endif - -#if defined(_MSC_VER) // Visual Studio -# define XXH_swap32 _byteswap_ulong -# define XXH_swap64 _byteswap_uint64 -#elif GCC_VERSION >= 403 -# define XXH_swap32 __builtin_bswap32 -# define XXH_swap64 __builtin_bswap64 -#else -static inline U32 XXH_swap32 (U32 x) -{ - return ((x << 24) & 0xff000000 ) | - ((x << 8) & 0x00ff0000 ) | - ((x >> 8) & 0x0000ff00 ) | - ((x >> 24) & 0x000000ff ); -} -static inline U64 XXH_swap64 (U64 x) -{ - return ((x << 56) & 0xff00000000000000ULL) | - ((x << 40) & 0x00ff000000000000ULL) | - ((x << 24) & 0x0000ff0000000000ULL) | - ((x << 8) & 0x000000ff00000000ULL) | - ((x >> 8) & 0x00000000ff000000ULL) | - ((x >> 24) & 0x0000000000ff0000ULL) | - ((x >> 40) & 0x000000000000ff00ULL) | - ((x >> 56) & 0x00000000000000ffULL); -} -#endif - - -//************************************** -// Constants -//************************************** -#define PRIME32_1 2654435761U -#define PRIME32_2 2246822519U -#define PRIME32_3 3266489917U -#define PRIME32_4 668265263U -#define PRIME32_5 374761393U - -#define PRIME64_1 11400714785074694791ULL -#define PRIME64_2 14029467366897019727ULL -#define PRIME64_3 1609587929392839161ULL -#define PRIME64_4 9650029242287828579ULL -#define PRIME64_5 2870177450012600261ULL - -//************************************** -// Architecture Macros -//************************************** -typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; -#ifndef XXH_CPU_LITTLE_ENDIAN // It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch -static const int one = 1; -# define XXH_CPU_LITTLE_ENDIAN (*(char*)(&one)) -#endif - - -//************************************** -// Macros -//************************************** -#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(!!(c)) }; } // use only *after* variable declarations - - -//**************************** -// Memory reads -//**************************** -typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; - -FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) -{ - if (align==XXH_unaligned) - return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr)); - else - return endian==XXH_littleEndian ? *(U32*)ptr : XXH_swap32(*(U32*)ptr); -} - -FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) -{ - return XXH_readLE32_align(ptr, endian, XXH_unaligned); -} - -FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) -{ - if (align==XXH_unaligned) - return endian==XXH_littleEndian ? A64(ptr) : XXH_swap64(A64(ptr)); - else - return endian==XXH_littleEndian ? *(U64*)ptr : XXH_swap64(*(U64*)ptr); -} - -FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) -{ - return XXH_readLE64_align(ptr, endian, XXH_unaligned); -} - - -//**************************** -// Simple Hash Functions -//**************************** -FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) -{ - const BYTE* p = (const BYTE*)input; - const BYTE* bEnd = p + len; - U32 h32; -#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) - -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (p==NULL) - { - len=0; - bEnd=p=(const BYTE*)(size_t)16; - } -#endif - - if (len>=16) - { - const BYTE* const limit = bEnd - 16; - U32 v1 = seed + PRIME32_1 + PRIME32_2; - U32 v2 = seed + PRIME32_2; - U32 v3 = seed + 0; - U32 v4 = seed - PRIME32_1; - - do - { - v1 += XXH_get32bits(p) * PRIME32_2; - v1 = XXH_rotl32(v1, 13); - v1 *= PRIME32_1; - p+=4; - v2 += XXH_get32bits(p) * PRIME32_2; - v2 = XXH_rotl32(v2, 13); - v2 *= PRIME32_1; - p+=4; - v3 += XXH_get32bits(p) * PRIME32_2; - v3 = XXH_rotl32(v3, 13); - v3 *= PRIME32_1; - p+=4; - v4 += XXH_get32bits(p) * PRIME32_2; - v4 = XXH_rotl32(v4, 13); - v4 *= PRIME32_1; - p+=4; - } - while (p<=limit); - - h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); - } - else - { - h32 = seed + PRIME32_5; - } - - h32 += (U32) len; - - while (p+4<=bEnd) - { - h32 += XXH_get32bits(p) * PRIME32_3; - h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; - p+=4; - } - - while (p> 15; - h32 *= PRIME32_2; - h32 ^= h32 >> 13; - h32 *= PRIME32_3; - h32 ^= h32 >> 16; - - return h32; -} - - -unsigned int XXH32 (const void* input, size_t len, unsigned seed) -{ -#if 0 - // Simple version, good for code maintenance, but unfortunately slow for small inputs - XXH32_state_t state; - XXH32_reset(&state, seed); - XXH32_update(&state, input, len); - return XXH32_digest(&state); -#else - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - -# if !defined(XXH_USE_UNALIGNED_ACCESS) - if ((((size_t)input) & 3) == 0) // Input is aligned, let's leverage the speed advantage - { - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); - else - return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); - } -# endif - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); - else - return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); -#endif -} - -FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) -{ - const BYTE* p = (const BYTE*)input; - const BYTE* bEnd = p + len; - U64 h64; -#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) - -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (p==NULL) - { - len=0; - bEnd=p=(const BYTE*)(size_t)32; - } -#endif - - if (len>=32) - { - const BYTE* const limit = bEnd - 32; - U64 v1 = seed + PRIME64_1 + PRIME64_2; - U64 v2 = seed + PRIME64_2; - U64 v3 = seed + 0; - U64 v4 = seed - PRIME64_1; - - do - { - v1 += XXH_get64bits(p) * PRIME64_2; - p+=8; - v1 = XXH_rotl64(v1, 31); - v1 *= PRIME64_1; - v2 += XXH_get64bits(p) * PRIME64_2; - p+=8; - v2 = XXH_rotl64(v2, 31); - v2 *= PRIME64_1; - v3 += XXH_get64bits(p) * PRIME64_2; - p+=8; - v3 = XXH_rotl64(v3, 31); - v3 *= PRIME64_1; - v4 += XXH_get64bits(p) * PRIME64_2; - p+=8; - v4 = XXH_rotl64(v4, 31); - v4 *= PRIME64_1; - } - while (p<=limit); - - h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); - - v1 *= PRIME64_2; - v1 = XXH_rotl64(v1, 31); - v1 *= PRIME64_1; - h64 ^= v1; - h64 = h64 * PRIME64_1 + PRIME64_4; - - v2 *= PRIME64_2; - v2 = XXH_rotl64(v2, 31); - v2 *= PRIME64_1; - h64 ^= v2; - h64 = h64 * PRIME64_1 + PRIME64_4; - - v3 *= PRIME64_2; - v3 = XXH_rotl64(v3, 31); - v3 *= PRIME64_1; - h64 ^= v3; - h64 = h64 * PRIME64_1 + PRIME64_4; - - v4 *= PRIME64_2; - v4 = XXH_rotl64(v4, 31); - v4 *= PRIME64_1; - h64 ^= v4; - h64 = h64 * PRIME64_1 + PRIME64_4; - } - else - { - h64 = seed + PRIME64_5; - } - - h64 += (U64) len; - - while (p+8<=bEnd) - { - U64 k1 = XXH_get64bits(p); - k1 *= PRIME64_2; - k1 = XXH_rotl64(k1,31); - k1 *= PRIME64_1; - h64 ^= k1; - h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; - p+=8; - } - - if (p+4<=bEnd) - { - h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; - h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; - p+=4; - } - - while (p> 33; - h64 *= PRIME64_2; - h64 ^= h64 >> 29; - h64 *= PRIME64_3; - h64 ^= h64 >> 32; - - return h64; -} - - -unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed) -{ -#if 0 - // Simple version, good for code maintenance, but unfortunately slow for small inputs - XXH64_state_t state; - XXH64_reset(&state, seed); - XXH64_update(&state, input, len); - return XXH64_digest(&state); -#else - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - -# if !defined(XXH_USE_UNALIGNED_ACCESS) - if ((((size_t)input) & 7)==0) // Input is aligned, let's leverage the speed advantage - { - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); - else - return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); - } -# endif - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); - else - return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); -#endif -} - -/**************************************************** - * Advanced Hash Functions -****************************************************/ - -/*** Allocation ***/ -typedef struct -{ - U64 total_len; - U32 seed; - U32 v1; - U32 v2; - U32 v3; - U32 v4; - U32 mem32[4]; /* defined as U32 for alignment */ - U32 memsize; -} XXH_istate32_t; - -typedef struct -{ - U64 total_len; - U64 seed; - U64 v1; - U64 v2; - U64 v3; - U64 v4; - U64 mem64[4]; /* defined as U64 for alignment */ - U32 memsize; -} XXH_istate64_t; - - -XXH32_state_t* XXH32_createState(void) -{ - XXH_STATIC_ASSERT(sizeof(XXH32_state_t) >= sizeof(XXH_istate32_t)); // A compilation error here means XXH32_state_t is not large enough - return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); -} -XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) -{ - XXH_free(statePtr); - return XXH_OK; -}; - -XXH64_state_t* XXH64_createState(void) -{ - XXH_STATIC_ASSERT(sizeof(XXH64_state_t) >= sizeof(XXH_istate64_t)); // A compilation error here means XXH64_state_t is not large enough - return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); -} -XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) -{ - XXH_free(statePtr); - return XXH_OK; -}; - - -/*** Hash feed ***/ - -XXH_errorcode XXH32_reset(XXH32_state_t* state_in, U32 seed) -{ - XXH_istate32_t* state = (XXH_istate32_t*) state_in; - state->seed = seed; - state->v1 = seed + PRIME32_1 + PRIME32_2; - state->v2 = seed + PRIME32_2; - state->v3 = seed + 0; - state->v4 = seed - PRIME32_1; - state->total_len = 0; - state->memsize = 0; - return XXH_OK; -} - -XXH_errorcode XXH64_reset(XXH64_state_t* state_in, unsigned long long seed) -{ - XXH_istate64_t* state = (XXH_istate64_t*) state_in; - state->seed = seed; - state->v1 = seed + PRIME64_1 + PRIME64_2; - state->v2 = seed + PRIME64_2; - state->v3 = seed + 0; - state->v4 = seed - PRIME64_1; - state->total_len = 0; - state->memsize = 0; - return XXH_OK; -} - - -FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const void* input, size_t len, XXH_endianess endian) -{ - XXH_istate32_t* state = (XXH_istate32_t *) state_in; - const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; - -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (input==NULL) return XXH_ERROR; -#endif - - state->total_len += len; - - if (state->memsize + len < 16) // fill in tmp buffer - { - XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); - state->memsize += (U32)len; - return XXH_OK; - } - - if (state->memsize) // some data left from previous update - { - XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); - { - const U32* p32 = state->mem32; - state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; - state->v1 = XXH_rotl32(state->v1, 13); - state->v1 *= PRIME32_1; - p32++; - state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; - state->v2 = XXH_rotl32(state->v2, 13); - state->v2 *= PRIME32_1; - p32++; - state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; - state->v3 = XXH_rotl32(state->v3, 13); - state->v3 *= PRIME32_1; - p32++; - state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; - state->v4 = XXH_rotl32(state->v4, 13); - state->v4 *= PRIME32_1; - p32++; - } - p += 16-state->memsize; - state->memsize = 0; - } - - if (p <= bEnd-16) - { - const BYTE* const limit = bEnd - 16; - U32 v1 = state->v1; - U32 v2 = state->v2; - U32 v3 = state->v3; - U32 v4 = state->v4; - - do - { - v1 += XXH_readLE32(p, endian) * PRIME32_2; - v1 = XXH_rotl32(v1, 13); - v1 *= PRIME32_1; - p+=4; - v2 += XXH_readLE32(p, endian) * PRIME32_2; - v2 = XXH_rotl32(v2, 13); - v2 *= PRIME32_1; - p+=4; - v3 += XXH_readLE32(p, endian) * PRIME32_2; - v3 = XXH_rotl32(v3, 13); - v3 *= PRIME32_1; - p+=4; - v4 += XXH_readLE32(p, endian) * PRIME32_2; - v4 = XXH_rotl32(v4, 13); - v4 *= PRIME32_1; - p+=4; - } - while (p<=limit); - - state->v1 = v1; - state->v2 = v2; - state->v3 = v3; - state->v4 = v4; - } - - if (p < bEnd) - { - XXH_memcpy(state->mem32, p, bEnd-p); - state->memsize = (int)(bEnd-p); - } - - return XXH_OK; -} - -XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_update_endian(state_in, input, len, XXH_littleEndian); - else - return XXH32_update_endian(state_in, input, len, XXH_bigEndian); -} - - - -FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state_in, XXH_endianess endian) -{ - XXH_istate32_t* state = (XXH_istate32_t*) state_in; - const BYTE * p = (const BYTE*)state->mem32; - BYTE* bEnd = (BYTE*)(state->mem32) + state->memsize; - U32 h32; - - if (state->total_len >= 16) - { - h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); - } - else - { - h32 = state->seed + PRIME32_5; - } - - h32 += (U32) state->total_len; - - while (p+4<=bEnd) - { - h32 += XXH_readLE32(p, endian) * PRIME32_3; - h32 = XXH_rotl32(h32, 17) * PRIME32_4; - p+=4; - } - - while (p> 15; - h32 *= PRIME32_2; - h32 ^= h32 >> 13; - h32 *= PRIME32_3; - h32 ^= h32 >> 16; - - return h32; -} - - -U32 XXH32_digest (const XXH32_state_t* state_in) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_digest_endian(state_in, XXH_littleEndian); - else - return XXH32_digest_endian(state_in, XXH_bigEndian); -} - - -FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const void* input, size_t len, XXH_endianess endian) -{ - XXH_istate64_t * state = (XXH_istate64_t *) state_in; - const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; - -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (input==NULL) return XXH_ERROR; -#endif - - state->total_len += len; - - if (state->memsize + len < 32) // fill in tmp buffer - { - XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); - state->memsize += (U32)len; - return XXH_OK; - } - - if (state->memsize) // some data left from previous update - { - XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); - { - const U64* p64 = state->mem64; - state->v1 += XXH_readLE64(p64, endian) * PRIME64_2; - state->v1 = XXH_rotl64(state->v1, 31); - state->v1 *= PRIME64_1; - p64++; - state->v2 += XXH_readLE64(p64, endian) * PRIME64_2; - state->v2 = XXH_rotl64(state->v2, 31); - state->v2 *= PRIME64_1; - p64++; - state->v3 += XXH_readLE64(p64, endian) * PRIME64_2; - state->v3 = XXH_rotl64(state->v3, 31); - state->v3 *= PRIME64_1; - p64++; - state->v4 += XXH_readLE64(p64, endian) * PRIME64_2; - state->v4 = XXH_rotl64(state->v4, 31); - state->v4 *= PRIME64_1; - p64++; - } - p += 32-state->memsize; - state->memsize = 0; - } - - if (p+32 <= bEnd) - { - const BYTE* const limit = bEnd - 32; - U64 v1 = state->v1; - U64 v2 = state->v2; - U64 v3 = state->v3; - U64 v4 = state->v4; - - do - { - v1 += XXH_readLE64(p, endian) * PRIME64_2; - v1 = XXH_rotl64(v1, 31); - v1 *= PRIME64_1; - p+=8; - v2 += XXH_readLE64(p, endian) * PRIME64_2; - v2 = XXH_rotl64(v2, 31); - v2 *= PRIME64_1; - p+=8; - v3 += XXH_readLE64(p, endian) * PRIME64_2; - v3 = XXH_rotl64(v3, 31); - v3 *= PRIME64_1; - p+=8; - v4 += XXH_readLE64(p, endian) * PRIME64_2; - v4 = XXH_rotl64(v4, 31); - v4 *= PRIME64_1; - p+=8; - } - while (p<=limit); - - state->v1 = v1; - state->v2 = v2; - state->v3 = v3; - state->v4 = v4; - } - - if (p < bEnd) - { - XXH_memcpy(state->mem64, p, bEnd-p); - state->memsize = (int)(bEnd-p); - } - - return XXH_OK; -} - -XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_update_endian(state_in, input, len, XXH_littleEndian); - else - return XXH64_update_endian(state_in, input, len, XXH_bigEndian); -} - - - -FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state_in, XXH_endianess endian) -{ - XXH_istate64_t * state = (XXH_istate64_t *) state_in; - const BYTE * p = (const BYTE*)state->mem64; - BYTE* bEnd = (BYTE*)state->mem64 + state->memsize; - U64 h64; - - if (state->total_len >= 32) - { - U64 v1 = state->v1; - U64 v2 = state->v2; - U64 v3 = state->v3; - U64 v4 = state->v4; - - h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); - - v1 *= PRIME64_2; - v1 = XXH_rotl64(v1, 31); - v1 *= PRIME64_1; - h64 ^= v1; - h64 = h64*PRIME64_1 + PRIME64_4; - - v2 *= PRIME64_2; - v2 = XXH_rotl64(v2, 31); - v2 *= PRIME64_1; - h64 ^= v2; - h64 = h64*PRIME64_1 + PRIME64_4; - - v3 *= PRIME64_2; - v3 = XXH_rotl64(v3, 31); - v3 *= PRIME64_1; - h64 ^= v3; - h64 = h64*PRIME64_1 + PRIME64_4; - - v4 *= PRIME64_2; - v4 = XXH_rotl64(v4, 31); - v4 *= PRIME64_1; - h64 ^= v4; - h64 = h64*PRIME64_1 + PRIME64_4; - } - else - { - h64 = state->seed + PRIME64_5; - } - - h64 += (U64) state->total_len; - - while (p+8<=bEnd) - { - U64 k1 = XXH_readLE64(p, endian); - k1 *= PRIME64_2; - k1 = XXH_rotl64(k1,31); - k1 *= PRIME64_1; - h64 ^= k1; - h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; - p+=8; - } - - if (p+4<=bEnd) - { - h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1; - h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; - p+=4; - } - - while (p> 33; - h64 *= PRIME64_2; - h64 ^= h64 >> 29; - h64 *= PRIME64_3; - h64 ^= h64 >> 32; - - return h64; -} - - -unsigned long long XXH64_digest (const XXH64_state_t* state_in) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_digest_endian(state_in, XXH_littleEndian); - else - return XXH64_digest_endian(state_in, XXH_bigEndian); -} - - diff --git a/third_party/xxhash/xxhash.h b/third_party/xxhash/xxhash.h deleted file mode 100644 index 55b45015a..000000000 --- a/third_party/xxhash/xxhash.h +++ /dev/null @@ -1,156 +0,0 @@ -/* - xxHash - Extremely Fast Hash algorithm - Header File - Copyright (C) 2012-2014, Yann Collet. - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - xxHash source repository : http://code.google.com/p/xxhash/ -*/ - -/* Notice extracted from xxHash homepage : - -xxHash is an extremely fast Hash algorithm, running at RAM speed limits. -It also successfully passes all tests from the SMHasher suite. - -Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) - -Name Speed Q.Score Author -xxHash 5.4 GB/s 10 -CrapWow 3.2 GB/s 2 Andrew -MumurHash 3a 2.7 GB/s 10 Austin Appleby -SpookyHash 2.0 GB/s 10 Bob Jenkins -SBox 1.4 GB/s 9 Bret Mulvey -Lookup3 1.2 GB/s 9 Bob Jenkins -SuperFastHash 1.2 GB/s 1 Paul Hsieh -CityHash64 1.05 GB/s 10 Pike & Alakuijala -FNV 0.55 GB/s 5 Fowler, Noll, Vo -CRC32 0.43 GB/s 9 -MD5-32 0.33 GB/s 10 Ronald L. Rivest -SHA1-32 0.28 GB/s 10 - -Q.Score is a measure of quality of the hash function. -It depends on successfully passing SMHasher test set. -10 is a perfect score. -*/ - -#pragma once - -#if defined (__cplusplus) -extern "C" { -#endif - - -/***************************** - Includes -*****************************/ -#include /* size_t */ - - -/***************************** - Type -*****************************/ -typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; - - - -/***************************** - Simple Hash Functions -*****************************/ - -unsigned int XXH32 (const void* input, size_t length, unsigned seed); -unsigned long long XXH64 (const void* input, size_t length, unsigned long long seed); - -/* -XXH32() : - Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". - The memory between input & input+length must be valid (allocated and read-accessible). - "seed" can be used to alter the result predictably. - This function successfully passes all SMHasher tests. - Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s -XXH64() : - Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". -*/ - - - -/***************************** - Advanced Hash Functions -*****************************/ -typedef struct { long long ll[ 6]; } XXH32_state_t; -typedef struct { long long ll[11]; } XXH64_state_t; - -/* -These structures allow static allocation of XXH states. -States must then be initialized using XXHnn_reset() before first use. - -If you prefer dynamic allocation, please refer to functions below. -*/ - -XXH32_state_t* XXH32_createState(void); -XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); - -XXH64_state_t* XXH64_createState(void); -XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); - -/* -These functions create and release memory for XXH state. -States must then be initialized using XXHnn_reset() before first use. -*/ - - -XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned seed); -XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); -unsigned int XXH32_digest (const XXH32_state_t* statePtr); - -XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); -XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); -unsigned long long XXH64_digest (const XXH64_state_t* statePtr); - -/* -These functions calculate the xxHash of an input provided in multiple smaller packets, -as opposed to an input provided as a single block. - -XXH state space must first be allocated, using either static or dynamic method provided above. - -Start a new hash by initializing state with a seed, using XXHnn_reset(). - -Then, feed the hash state by calling XXHnn_update() as many times as necessary. -Obviously, input must be valid, meaning allocated and read accessible. -The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. - -Finally, you can produce a hash anytime, by using XXHnn_digest(). -This function returns the final nn-bits hash. -You can nonetheless continue feeding the hash state with more input, -and therefore get some new hashes, by calling again XXHnn_digest(). - -When you are done, don't forget to free XXH state space, using typically XXHnn_freeState(). -*/ - - -#if defined (__cplusplus) -} -#endif diff --git a/third_party/xxhash/xxhsum.c b/third_party/xxhash/xxhsum.c deleted file mode 100644 index e090e5111..000000000 --- a/third_party/xxhash/xxhsum.c +++ /dev/null @@ -1,689 +0,0 @@ -/* -bench.c - Demo program to benchmark open-source algorithm -Copyright (C) Yann Collet 2012-2014 - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -You can contact the author at : -- Blog homepage : http://fastcompression.blogspot.com/ -- Discussion group : https://groups.google.com/forum/?fromgroups#!forum/lz4c -*/ - -/************************************** - * Compiler Options - *************************************/ -/* MS Visual */ -#if defined(_MSC_VER) || defined(_WIN32) -# define _CRT_SECURE_NO_WARNINGS /* removes visual warnings */ -# define BMK_LEGACY_TIMER 1 /* gettimeofday() not supported by MSVC */ -#endif - -/* Under Linux at least, pull in the *64 commands */ -#define _LARGEFILE64_SOURCE - - -/************************************** - * Includes - *************************************/ -#include // malloc -#include // fprintf, fopen, ftello64, fread, stdin, stdout; when present : _fileno -#include // strcmp -#include // stat64 -#include // stat64 - -#include "xxhash.h" - - -/************************************** - * OS-Specific Includes - *************************************/ -// Use ftime() if gettimeofday() is not available on your target -#if defined(BMK_LEGACY_TIMER) -# include // timeb, ftime -#else -# include // gettimeofday -#endif - -#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__) -# include // _O_BINARY -# include // _setmode, _isatty -# ifdef __MINGW32__ - int _fileno(FILE *stream); // MINGW somehow forgets to include this windows declaration into -# endif -# define SET_BINARY_MODE(file) _setmode(_fileno(file), _O_BINARY) -# define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream)) -#else -# include // isatty, STDIN_FILENO -# define SET_BINARY_MODE(file) -# define IS_CONSOLE(stdStream) isatty(STDIN_FILENO) -#endif - -#if !defined(S_ISREG) -# define S_ISREG(x) (((x) & S_IFMT) == S_IFREG) -#endif - - -/************************************** - * Basic Types - *************************************/ -#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 -# include - typedef uint8_t BYTE; - typedef uint16_t U16; - typedef uint32_t U32; - typedef int32_t S32; - typedef uint64_t U64; -#else - typedef unsigned char BYTE; - typedef unsigned short U16; - typedef unsigned int U32; - typedef signed int S32; - typedef unsigned long long U64; -#endif - - -/************************************** - * Constants - *************************************/ -#define PROGRAM_NAME exename -#define PROGRAM_VERSION "" -#define COMPILED __DATE__ -#define AUTHOR "Yann Collet" -#define WELCOME_MESSAGE "*** %s %i-bits %s, by %s (%s) ***\n", PROGRAM_NAME, (int)(sizeof(void*)*8), PROGRAM_VERSION, AUTHOR, COMPILED - -#define NBLOOPS 3 // Default number of benchmark iterations -#define TIMELOOP 2500 // Minimum timing per iteration -#define PRIME 2654435761U - -#define KB *(1<<10) -#define MB *(1<<20) -#define GB *(1U<<30) - -#define MAX_MEM (2 GB - 64 MB) - -static const char stdinName[] = "-"; - - -//************************************** -// Display macros -//************************************** -#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) -#define DISPLAYRESULT(...) fprintf(stdout, __VA_ARGS__) -#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) DISPLAY(__VA_ARGS__); -static unsigned g_displayLevel = 1; - - -//************************************** -// Unit variables -//************************************** -static int g_nbIterations = NBLOOPS; -static int g_fn_selection = 1; // required within main() & usage() - - -//********************************************************* -// Benchmark Functions -//********************************************************* - -#if defined(BMK_LEGACY_TIMER) - -static int BMK_GetMilliStart(void) -{ - // Based on Legacy ftime() - // Rolls over every ~ 12.1 days (0x100000/24/60/60) - // Use GetMilliSpan to correct for rollover - struct timeb tb; - int nCount; - ftime( &tb ); - nCount = (int) (tb.millitm + (tb.time & 0xfffff) * 1000); - return nCount; -} - -#else - -static int BMK_GetMilliStart(void) -{ - // Based on newer gettimeofday() - // Use GetMilliSpan to correct for rollover - struct timeval tv; - int nCount; - gettimeofday(&tv, NULL); - nCount = (int) (tv.tv_usec/1000 + (tv.tv_sec & 0xfffff) * 1000); - return nCount; -} - -#endif - -static int BMK_GetMilliSpan( int nTimeStart ) -{ - int nSpan = BMK_GetMilliStart() - nTimeStart; - if ( nSpan < 0 ) - nSpan += 0x100000 * 1000; - return nSpan; -} - - -static size_t BMK_findMaxMem(U64 requestedMem) -{ - size_t step = (64 MB); - size_t allocatedMemory; - BYTE* testmem=NULL; - - requestedMem += 3*step; - requestedMem -= (size_t)requestedMem & (step-1); - if (requestedMem > MAX_MEM) requestedMem = MAX_MEM; - allocatedMemory = (size_t)requestedMem; - - while (!testmem) - { - allocatedMemory -= step; - testmem = (BYTE*) malloc((size_t)allocatedMemory); - } - free (testmem); - - return (size_t) (allocatedMemory - step); -} - - -static U64 BMK_GetFileSize(char* infilename) -{ - int r; -#if defined(_MSC_VER) - struct _stat64 statbuf; - r = _stat64(infilename, &statbuf); -#else - struct stat statbuf; - r = stat(infilename, &statbuf); -#endif - if (r || !S_ISREG(statbuf.st_mode)) return 0; // No good... - return (U64)statbuf.st_size; -} - - -static int BMK_benchFile(char** fileNamesTable, int nbFiles) -{ - int fileIdx=0; - U32 hashResult=0; - - U64 totals = 0; - double totalc = 0.; - - - // Loop for each file - while (fileIdx inFileSize) benchedSize = (size_t)inFileSize; - if (benchedSize < inFileSize) - { - DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", inFileName, (int)(benchedSize>>20)); - } - - buffer = (char*)malloc((size_t )benchedSize+16); - if(!buffer) - { - DISPLAY("\nError: not enough memory!\n"); - fclose(inFile); - return 12; - } - alignedBuffer = (buffer+15) - (((size_t)(buffer+15)) & 0xF); // align on next 16 bytes boundaries - - // Fill input buffer - DISPLAY("\rLoading %s... \n", inFileName); - readSize = fread(alignedBuffer, 1, benchedSize, inFile); - fclose(inFile); - - if(readSize != benchedSize) - { - DISPLAY("\nError: problem reading file '%s' !! \n", inFileName); - free(buffer); - return 13; - } - - - // Bench XXH32 - { - int interationNb; - double fastestC = 100000000.; - - DISPLAY("\r%79s\r", ""); // Clean display line - for (interationNb = 1; interationNb <= g_nbIterations; interationNb++) - { - int nbHashes = 0; - int milliTime; - - DISPLAY("%1i-%-14.14s : %10i ->\r", interationNb, "XXH32", (int)benchedSize); - - // Hash loop - milliTime = BMK_GetMilliStart(); - while(BMK_GetMilliStart() == milliTime); - milliTime = BMK_GetMilliStart(); - while(BMK_GetMilliSpan(milliTime) < TIMELOOP) - { - int i; - for (i=0; i<100; i++) - { - hashResult = XXH32(alignedBuffer, benchedSize, 0); - nbHashes++; - } - } - milliTime = BMK_GetMilliSpan(milliTime); - if ((double)milliTime < fastestC*nbHashes) fastestC = (double)milliTime/nbHashes; - DISPLAY("%1i-%-14.14s : %10i -> %7.1f MB/s\r", interationNb, "XXH32", (int)benchedSize, (double)benchedSize / fastestC / 1000.); - } - DISPLAY("%-16.16s : %10i -> %7.1f MB/s 0x%08X\n", "XXH32", (int)benchedSize, (double)benchedSize / fastestC / 1000., hashResult); - - totals += benchedSize; - totalc += fastestC; - } - - // Bench Unaligned XXH32 - { - int interationNb; - double fastestC = 100000000.; - - DISPLAY("\r%79s\r", ""); // Clean display line - for (interationNb = 1; (interationNb <= g_nbIterations) && ((benchedSize>1)); interationNb++) - { - int nbHashes = 0; - int milliTime; - - DISPLAY("%1i-%-14.14s : %10i ->\r", interationNb, "(unaligned)", (int)benchedSize); - // Hash loop - milliTime = BMK_GetMilliStart(); - while(BMK_GetMilliStart() == milliTime); - milliTime = BMK_GetMilliStart(); - while(BMK_GetMilliSpan(milliTime) < TIMELOOP) - { - int i; - for (i=0; i<100; i++) - { - hashResult = XXH32(alignedBuffer+1, benchedSize-1, 0); - nbHashes++; - } - } - milliTime = BMK_GetMilliSpan(milliTime); - if ((double)milliTime < fastestC*nbHashes) fastestC = (double)milliTime/nbHashes; - DISPLAY("%1i-%-14.14s : %10i -> %7.1f MB/s\r", interationNb, "XXH32 (unaligned)", (int)(benchedSize-1), (double)(benchedSize-1) / fastestC / 1000.); - } - DISPLAY("%-16.16s : %10i -> %7.1f MB/s \n", "XXH32 (unaligned)", (int)benchedSize-1, (double)(benchedSize-1) / fastestC / 1000.); - } - - // Bench XXH64 - { - int interationNb; - double fastestC = 100000000.; - unsigned long long h64 = 0; - - DISPLAY("\r%79s\r", ""); // Clean display line - for (interationNb = 1; interationNb <= g_nbIterations; interationNb++) - { - int nbHashes = 0; - int milliTime; - - DISPLAY("%1i-%-14.14s : %10i ->\r", interationNb, "XXH64", (int)benchedSize); - - // Hash loop - milliTime = BMK_GetMilliStart(); - while(BMK_GetMilliStart() == milliTime); - milliTime = BMK_GetMilliStart(); - while(BMK_GetMilliSpan(milliTime) < TIMELOOP) - { - int i; - for (i=0; i<100; i++) - { - h64 = XXH64(alignedBuffer, benchedSize, 0); - nbHashes++; - } - } - milliTime = BMK_GetMilliSpan(milliTime); - if ((double)milliTime < fastestC*nbHashes) fastestC = (double)milliTime/nbHashes; - DISPLAY("%1i-%-14.14s : %10i -> %7.1f MB/s\r", interationNb, "XXH64", (int)benchedSize, (double)benchedSize / fastestC / 1000.); - } - DISPLAY("%-16.16s : %10i -> %7.1f MB/s 0x%08X%08X\n", "XXH64", (int)benchedSize, (double)benchedSize / fastestC / 1000., (U32)(h64>>32), (U32)(h64)); - - totals += benchedSize; - totalc += fastestC; - } - - free(buffer); - } - - if (nbFiles > 1) - printf("%-16.16s :%11llu -> %7.1f MB/s\n", " TOTAL", (long long unsigned int)totals, (double)totals/totalc/1000.); - - return 0; -} - - - -static void BMK_checkResult(U32 r1, U32 r2) -{ - static int nbTests = 1; - - if (r1==r2) DISPLAY("\rTest%3i : %08X == %08X ok ", nbTests, r1, r2); - else - { - DISPLAY("\rERROR : Test%3i : %08X <> %08X !!!!! \n", nbTests, r1, r2); - exit(1); - } - nbTests++; -} - - -static void BMK_checkResult64(U64 r1, U64 r2) -{ - static int nbTests = 1; - - if (r1!=r2) - { - DISPLAY("\rERROR : Test%3i : 64-bits values non equals !!!!! \n", nbTests); - DISPLAY("\r %08X%08X != %08X%08X \n", (U32)(r1>>32), (U32)r1, (U32)(r2<<32), (U32)r2); - exit(1); - } - nbTests++; -} - - -static void BMK_testSequence64(void* sentence, int len, U64 seed, U64 Nresult) -{ - U64 Dresult; - XXH64_state_t state; - int index; - - Dresult = XXH64(sentence, len, seed); - BMK_checkResult64(Dresult, Nresult); - - XXH64_reset(&state, seed); - XXH64_update(&state, sentence, len); - Dresult = XXH64_digest(&state); - BMK_checkResult64(Dresult, Nresult); - - XXH64_reset(&state, seed); - for (index=0; index>24); - prime *= prime; - } - - BMK_testSequence(NULL, 0, 0, 0x02CC5D05); - BMK_testSequence(NULL, 0, PRIME, 0x36B78AE7); - BMK_testSequence(sanityBuffer, 1, 0, 0xB85CBEE5); - BMK_testSequence(sanityBuffer, 1, PRIME, 0xD5845D64); - BMK_testSequence(sanityBuffer, 14, 0, 0xE5AA0AB4); - BMK_testSequence(sanityBuffer, 14, PRIME, 0x4481951D); - BMK_testSequence(sanityBuffer, SANITY_BUFFER_SIZE, 0, 0x1F1AA412); - BMK_testSequence(sanityBuffer, SANITY_BUFFER_SIZE, PRIME, 0x498EC8E2); - - BMK_testSequence64(NULL , 0, 0, 0xEF46DB3751D8E999ULL); - BMK_testSequence64(NULL , 0, PRIME, 0xAC75FDA2929B17EFULL); - BMK_testSequence64(sanityBuffer, 1, 0, 0x4FCE394CC88952D8ULL); - BMK_testSequence64(sanityBuffer, 1, PRIME, 0x739840CB819FA723ULL); - BMK_testSequence64(sanityBuffer, 14, 0, 0xCFFA8DB881BC3A3DULL); - BMK_testSequence64(sanityBuffer, 14, PRIME, 0x5B9611585EFCC9CBULL); - BMK_testSequence64(sanityBuffer, SANITY_BUFFER_SIZE, 0, 0x0EAB543384F878ADULL); - BMK_testSequence64(sanityBuffer, SANITY_BUFFER_SIZE, PRIME, 0xCAA65939306F1E21ULL); - - DISPLAY("\r%79s\r", ""); // Clean display line - DISPLAYLEVEL(2, "Sanity check -- all tests ok\n"); -} - - -static int BMK_hash(const char* fileName, U32 hashNb) -{ - FILE* inFile; - size_t const blockSize = 64 KB; - size_t readSize; - char* buffer; - XXH64_state_t state; - - // Check file existence - if (fileName == stdinName) - { - inFile = stdin; - SET_BINARY_MODE(stdin); - } - else - inFile = fopen( fileName, "rb" ); - if (inFile==NULL) - { - DISPLAY( "Pb opening %s\n", fileName); - return 11; - } - - // Memory allocation & restrictions - buffer = (char*)malloc(blockSize); - if(!buffer) - { - DISPLAY("\nError: not enough memory!\n"); - fclose(inFile); - return 12; - } - - // Init - switch(hashNb) - { - case 0: - XXH32_reset((XXH32_state_t*)&state, 0); - break; - case 1: - XXH64_reset(&state, 0); - break; - default: - DISPLAY("Error : bad hash algorithm ID\n"); - fclose(inFile); - free(buffer); - return -1; - } - - - // Load file & update hash - DISPLAY("\rLoading %s... \r", fileName); - readSize = 1; - while (readSize) - { - readSize = fread(buffer, 1, blockSize, inFile); - switch(hashNb) - { - case 0: - XXH32_update((XXH32_state_t*)&state, buffer, readSize); - break; - case 1: - XXH64_update(&state, buffer, readSize); - break; - default: - break; - } - } - fclose(inFile); - free(buffer); - - // display Hash - switch(hashNb) - { - case 0: - { - U32 h32 = XXH32_digest((XXH32_state_t*)&state); - DISPLAYRESULT("%08x %s \n", h32, fileName); - break; - } - case 1: - { - U64 h64 = XXH64_digest(&state); - DISPLAYRESULT("%08x%08x %s \n", (U32)(h64>>32), (U32)(h64), fileName); - break; - } - default: - break; - } - - return 0; -} - - -//********************************************************* -// Main -//********************************************************* - -static int usage(const char* exename) -{ - DISPLAY( WELCOME_MESSAGE ); - DISPLAY( "Usage :\n"); - DISPLAY( " %s [arg] [filename]\n", exename); - DISPLAY( "When no filename provided, or - provided : use stdin as input\n"); - DISPLAY( "Arguments :\n"); - DISPLAY( " -H# : hash selection : 0=32bits, 1=64bits (default %i)\n", g_fn_selection); - DISPLAY( " -b : benchmark mode \n"); - DISPLAY( " -i# : number of iterations (benchmark mode; default %i)\n", g_nbIterations); - DISPLAY( " -h : help (this text)\n"); - return 0; -} - - -static int badusage(const char* exename) -{ - DISPLAY("Wrong parameters\n"); - usage(exename); - return 1; -} - - -int main(int argc, char** argv) -{ - int i, filenamesStart=0; - const char* input_filename = (char*)stdinName; - const char* exename = argv[0]; - U32 benchmarkMode = 0; - - // xxh32sum default to 32 bits checksum - if (strstr(exename, "xxh32sum")!=NULL) g_fn_selection=0; - - for(i=1; i 1) return badusage(exename); - - return BMK_hash(input_filename, g_fn_selection); -} From 2800f6180a354e32c11298dfe0dd274d08c9844e Mon Sep 17 00:00:00 2001 From: Triang3l Date: Wed, 9 Jun 2021 20:54:31 +0300 Subject: [PATCH 069/123] [Vulkan] Primitive processor --- .../gpu/vulkan/vulkan_command_processor.cc | 142 +++++++---- .../gpu/vulkan/vulkan_command_processor.h | 6 + src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc | 50 ++-- src/xenia/gpu/vulkan/vulkan_pipeline_cache.h | 14 +- .../gpu/vulkan/vulkan_primitive_processor.cc | 236 ++++++++++++++++++ .../gpu/vulkan/vulkan_primitive_processor.h | 92 +++++++ 6 files changed, 474 insertions(+), 66 deletions(-) create mode 100644 src/xenia/gpu/vulkan/vulkan_primitive_processor.cc create mode 100644 src/xenia/gpu/vulkan/vulkan_primitive_processor.h diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 0f8ea1075..9936f182f 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * + * Copyright 2021 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -13,6 +13,7 @@ #include #include #include +#include #include "xenia/base/assert.h" #include "xenia/base/logging.h" @@ -44,7 +45,10 @@ VulkanCommandProcessor::VulkanCommandProcessor( VulkanCommandProcessor::~VulkanCommandProcessor() = default; void VulkanCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr, - uint32_t length) {} + uint32_t length) { + shared_memory_->MemoryInvalidationCallback(base_ptr, length, true); + primitive_processor_->MemoryInvalidationCallback(base_ptr, length, true); +} void VulkanCommandProcessor::RestoreEdramSnapshot(const void* snapshot) {} @@ -182,6 +186,13 @@ bool VulkanCommandProcessor::SetupContext() { return false; } + primitive_processor_ = std::make_unique( + *register_file_, *memory_, trace_writer_, *shared_memory_, *this); + if (!primitive_processor_->Initialize()) { + XELOGE("Failed to initialize the geometric primitive processor"); + return false; + } + render_target_cache_ = std::make_unique(*this, *register_file_); if (!render_target_cache_->Initialize()) { @@ -285,6 +296,8 @@ void VulkanCommandProcessor::ShutdownContext() { render_target_cache_.reset(); + primitive_processor_.reset(); + shared_memory_.reset(); for (const auto& pipeline_layout_pair : pipeline_layouts_) { @@ -617,7 +630,13 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, SCOPE_profile_cpu_f("gpu"); #endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES - BeginSubmission(true); + const RegisterFile& regs = *register_file_; + + xenos::ModeControl edram_mode = regs.Get().edram_mode; + if (edram_mode == xenos::ModeControl::kCopy) { + // Special copy handling. + return IssueCopy(); + } // Vertex shader analysis. auto vertex_shader = static_cast(active_vertex_shader()); @@ -627,13 +646,30 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, } pipeline_cache_->AnalyzeShaderUcode(*vertex_shader); + BeginSubmission(true); + + // Process primitives. + PrimitiveProcessor::ProcessingResult primitive_processing_result; + if (!primitive_processor_->Process(primitive_processing_result)) { + return false; + } + if (!primitive_processing_result.host_draw_vertex_count) { + // Nothing to draw. + return true; + } + // TODO(Triang3l): Tessellation. + if (primitive_processing_result.host_vertex_shader_type != + Shader::HostVertexShaderType::kVertex) { + return false; + } + // TODO(Triang3l): Get a pixel shader. VulkanShader* pixel_shader = nullptr; // Shader modifications. SpirvShaderTranslator::Modification vertex_shader_modification = pipeline_cache_->GetCurrentVertexShaderModification( - *vertex_shader, Shader::HostVertexShaderType::kVertex); + *vertex_shader, primitive_processing_result.host_vertex_shader_type); SpirvShaderTranslator::Modification pixel_shader_modification = SpirvShaderTranslator::Modification(0); @@ -664,10 +700,10 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, // current_graphics_pipeline_layout_. VkPipeline pipeline; const VulkanPipelineCache::PipelineLayoutProvider* pipeline_layout_provider; - if (!pipeline_cache_->ConfigurePipeline(vertex_shader_translation, - pixel_shader_translation, - framebuffer_key.render_pass_key, - pipeline, pipeline_layout_provider)) { + if (!pipeline_cache_->ConfigurePipeline( + vertex_shader_translation, pixel_shader_translation, + primitive_processing_result, framebuffer_key.render_pass_key, + pipeline, pipeline_layout_provider)) { return false; } deferred_command_buffer_.CmdVkBindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, @@ -703,7 +739,6 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, current_graphics_pipeline_layout_ = pipeline_layout; } - const RegisterFile& regs = *register_file_; const ui::vulkan::VulkanProvider& provider = GetVulkanContext().GetVulkanProvider(); const VkPhysicalDeviceProperties& device_properties = @@ -718,7 +753,7 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, // offset and is between maxViewportDimensions and viewportBoundsRange[1], // GetHostViewportInfo will adjust ndc_scale/ndc_offset to clamp it, and the // clamped range will be outside the largest possible framebuffer anyway. - // TODO(Triang3l): Possibly handle maxViewportDimensions and + // FIXME(Triang3l): Possibly handle maxViewportDimensions and // viewportBoundsRange separately because when using fragment shader // interlocks, framebuffers are not used, while the range may be wider than // dimensions? Though viewport bigger than 4096 - the smallest possible @@ -793,29 +828,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, << (vfetch_index & 63); } - // Set up the geometry. - if (indexed) { - uint32_t index_size = - index_buffer_info->format == xenos::IndexFormat::kInt32 - ? sizeof(uint32_t) - : sizeof(uint16_t); - assert_false(index_buffer_info->guest_base & (index_size - 1)); - uint32_t index_base = - index_buffer_info->guest_base & 0x1FFFFFFF & ~(index_size - 1); - uint32_t index_buffer_size = index_buffer_info->count * index_size; - if (!shared_memory_->RequestRange(index_base, index_buffer_size)) { - XELOGE( - "Failed to request index buffer at 0x{:08X} (size {}) in the shared " - "memory", - index_base, index_buffer_size); - return false; - } - deferred_command_buffer_.CmdVkBindIndexBuffer( - shared_memory_->buffer(), index_base, - index_buffer_info->format == xenos::IndexFormat::kInt32 - ? VK_INDEX_TYPE_UINT32 - : VK_INDEX_TYPE_UINT16); - } + // Insert the shared memory barrier if needed. + // TODO(Triang3l): Memory export. shared_memory_->Use(VulkanSharedMemory::Usage::kRead); // After all commands that may dispatch or copy, enter the render pass before @@ -843,10 +857,35 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, } // Draw. - if (indexed) { - deferred_command_buffer_.CmdVkDrawIndexed(index_count, 1, 0, 0, 0); + if (primitive_processing_result.index_buffer_type == + PrimitiveProcessor::ProcessedIndexBufferType::kNone) { + deferred_command_buffer_.CmdVkDraw( + primitive_processing_result.host_draw_vertex_count, 1, 0, 0); } else { - deferred_command_buffer_.CmdVkDraw(index_count, 1, 0, 0); + std::pair index_buffer; + switch (primitive_processing_result.index_buffer_type) { + case PrimitiveProcessor::ProcessedIndexBufferType::kGuest: + index_buffer.first = shared_memory_->buffer(); + index_buffer.second = primitive_processing_result.guest_index_base; + break; + case PrimitiveProcessor::ProcessedIndexBufferType::kHostConverted: + index_buffer = primitive_processor_->GetConvertedIndexBuffer( + primitive_processing_result.host_index_buffer_handle); + break; + case PrimitiveProcessor::ProcessedIndexBufferType::kHostBuiltin: + index_buffer = primitive_processor_->GetBuiltinIndexBuffer( + primitive_processing_result.host_index_buffer_handle); + break; + default: + assert_unhandled_case(primitive_processing_result.index_buffer_type); + return false; + } + deferred_command_buffer_.CmdVkBindIndexBuffer( + index_buffer.first, index_buffer.second, + index_buffer_info->format == xenos::IndexFormat::kInt16 + ? VK_INDEX_TYPE_UINT16 + : VK_INDEX_TYPE_UINT32); + deferred_command_buffer_.CmdVkDrawIndexed(index_count, 1, 0, 0, 0); } return true; @@ -952,6 +991,8 @@ void VulkanCommandProcessor::CheckSubmissionFence(uint64_t await_submission) { } shared_memory_->CompletedSubmissionUpdated(); + + primitive_processor_->CompletedSubmissionUpdated(); } void VulkanCommandProcessor::BeginSubmission(bool is_guest_command) { @@ -1006,6 +1047,8 @@ void VulkanCommandProcessor::BeginSubmission(bool is_guest_command) { current_graphics_pipeline_ = VK_NULL_HANDLE; current_graphics_pipeline_layout_ = nullptr; current_graphics_descriptor_sets_bound_up_to_date_ = 0; + + primitive_processor_->BeginSubmission(); } if (is_opening_frame) { @@ -1029,6 +1072,8 @@ void VulkanCommandProcessor::BeginSubmission(bool is_guest_command) { // may be reused. transient_descriptor_pool_uniform_buffers_->Reclaim(frame_completed_); uniform_buffer_pool_->Reclaim(frame_completed_); + + primitive_processor_->BeginFrame(); } } @@ -1100,9 +1145,15 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { bool is_closing_frame = is_swap && frame_open_; + if (is_closing_frame) { + primitive_processor_->EndFrame(); + } + if (submission_open_) { EndRenderPass(); + primitive_processor_->EndSubmission(); + shared_memory_->EndSubmission(); uniform_buffer_pool_->FlushWrites(); @@ -1255,6 +1306,8 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { device, descriptor_set_layout_pair.second, nullptr); } descriptor_set_layouts_textures_.clear(); + + primitive_processor_->ClearCache(); } } @@ -1288,20 +1341,21 @@ void VulkanCommandProcessor::UpdateFixedFunctionState( // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c auto pa_sc_window_offset = regs.Get(); - uint32_t pixel_size_x = 1, pixel_size_y = 1; - // Viewport. VkViewport viewport; - if (!viewport_info.xy_extent[0] || !viewport_info.xy_extent[1]) { - viewport.x = -1; - viewport.y = -1; - viewport.width = 1; - viewport.height = 1; - } else { + if (viewport_info.xy_extent[0] && viewport_info.xy_extent[1]) { viewport.x = float(viewport_info.xy_offset[0]); viewport.y = float(viewport_info.xy_offset[1]); viewport.width = float(viewport_info.xy_extent[0]); viewport.height = float(viewport_info.xy_extent[1]); + } else { + // Vulkan viewport width must be greater than 0.0f, but the Xenia viewport + // may be empty for various reasons - set the viewport to outside the + // framebuffer. + viewport.x = -1.0f; + viewport.y = -1.0f; + viewport.width = 1.0f; + viewport.height = 1.0f; } viewport.minDepth = viewport_info.z_min; viewport.maxDepth = viewport_info.z_max; diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index e083b3755..0dba7caea 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -24,6 +24,7 @@ #include "xenia/gpu/vulkan/deferred_command_buffer.h" #include "xenia/gpu/vulkan/vulkan_graphics_system.h" #include "xenia/gpu/vulkan/vulkan_pipeline_cache.h" +#include "xenia/gpu/vulkan/vulkan_primitive_processor.h" #include "xenia/gpu/vulkan/vulkan_render_target_cache.h" #include "xenia/gpu/vulkan/vulkan_shader.h" #include "xenia/gpu/vulkan/vulkan_shared_memory.h" @@ -74,6 +75,9 @@ class VulkanCommandProcessor : public CommandProcessor { const VkSparseMemoryBind* binds, VkPipelineStageFlags wait_stage_mask); + uint64_t GetCurrentFrame() const { return frame_current_; } + uint64_t GetCompletedFrame() const { return frame_completed_; } + // Must be called before doing anything outside the render pass scope, // including adding pipeline barriers that are not a part of the render pass // scope. Submission must be open. @@ -247,6 +251,8 @@ class VulkanCommandProcessor : public CommandProcessor { std::unique_ptr shared_memory_; + std::unique_ptr primitive_processor_; + std::unique_ptr pipeline_cache_; std::unique_ptr render_target_cache_; diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index 631098fcf..5ce43edc7 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -133,6 +133,7 @@ VulkanPipelineCache::GetCurrentPixelShaderModification( bool VulkanPipelineCache::ConfigurePipeline( VulkanShader::VulkanTranslation* vertex_shader, VulkanShader::VulkanTranslation* pixel_shader, + const PrimitiveProcessor::ProcessingResult& primitive_processing_result, VulkanRenderTargetCache::RenderPassKey render_pass_key, VkPipeline& pipeline_out, const PipelineLayoutProvider*& pipeline_layout_out) { @@ -173,7 +174,8 @@ bool VulkanPipelineCache::ConfigurePipeline( } PipelineDescription description; - if (!GetCurrentStateDescription(vertex_shader, pixel_shader, render_pass_key, + if (!GetCurrentStateDescription(vertex_shader, pixel_shader, + primitive_processing_result, render_pass_key, description)) { return false; } @@ -232,13 +234,13 @@ bool VulkanPipelineCache::TranslateAnalyzedShader( bool VulkanPipelineCache::GetCurrentStateDescription( const VulkanShader::VulkanTranslation* vertex_shader, const VulkanShader::VulkanTranslation* pixel_shader, + const PrimitiveProcessor::ProcessingResult& primitive_processing_result, VulkanRenderTargetCache::RenderPassKey render_pass_key, PipelineDescription& description_out) const { description_out.Reset(); const RegisterFile& regs = register_file_; auto pa_su_sc_mode_cntl = regs.Get(); - auto vgt_draw_initiator = regs.Get(); description_out.vertex_shader_hash = vertex_shader->shader().ucode_data_hash(); @@ -250,13 +252,8 @@ bool VulkanPipelineCache::GetCurrentStateDescription( } description_out.render_pass_key = render_pass_key; - xenos::PrimitiveType primitive_type = vgt_draw_initiator.prim_type; PipelinePrimitiveTopology primitive_topology; - // Vulkan explicitly allows primitive restart only for specific primitive - // types, unlike Direct3D where it's valid for non-strips, but has - // implementation-defined behavior. - bool primitive_restart_allowed = false; - switch (primitive_type) { + switch (primitive_processing_result.host_primitive_type) { case xenos::PrimitiveType::kPointList: primitive_topology = PipelinePrimitiveTopology::kPointList; break; @@ -265,23 +262,19 @@ bool VulkanPipelineCache::GetCurrentStateDescription( break; case xenos::PrimitiveType::kLineStrip: primitive_topology = PipelinePrimitiveTopology::kLineStrip; - primitive_restart_allowed = true; break; case xenos::PrimitiveType::kTriangleList: case xenos::PrimitiveType::kRectangleList: primitive_topology = PipelinePrimitiveTopology::kTriangleList; break; case xenos::PrimitiveType::kTriangleFan: - if (device_pipeline_features_.triangle_fans) { - primitive_topology = PipelinePrimitiveTopology::kTriangleFan; - primitive_restart_allowed = true; - } else { - primitive_topology = PipelinePrimitiveTopology::kTriangleList; - } + primitive_topology = PipelinePrimitiveTopology::kTriangleFan; break; case xenos::PrimitiveType::kTriangleStrip: primitive_topology = PipelinePrimitiveTopology::kTriangleStrip; - primitive_restart_allowed = true; + break; + case xenos::PrimitiveType::kQuadList: + primitive_topology = PipelinePrimitiveTopology::kLineListWithAdjacency; break; default: // TODO(Triang3l): All primitive types and tessellation. @@ -289,7 +282,7 @@ bool VulkanPipelineCache::GetCurrentStateDescription( } description_out.primitive_topology = primitive_topology; description_out.primitive_restart = - primitive_restart_allowed && pa_su_sc_mode_cntl.multi_prim_ib_ena; + primitive_processing_result.host_primitive_reset_enabled; // TODO(Triang3l): Tessellation. bool primitive_polygonal = draw_util::IsPrimitivePolygonal(regs); @@ -313,6 +306,9 @@ bool VulkanPipelineCache::GetCurrentStateDescription( polygon_type = std::min(polygon_type, pa_su_sc_mode_cntl.polymode_back_ptype); } + if (pa_su_sc_mode_cntl.poly_mode != xenos::PolygonModeEnable::kDualMode) { + polygon_type = xenos::PolygonType::kTriangles; + } switch (polygon_type) { case xenos::PolygonType::kPoints: // When points are not supported, use lines instead, preserving @@ -418,15 +414,27 @@ bool VulkanPipelineCache::EnsurePipelineCreated( switch (description.primitive_topology) { case PipelinePrimitiveTopology::kPointList: input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST; + assert_false(description.primitive_restart); + if (description.primitive_restart) { + return false; + } break; case PipelinePrimitiveTopology::kLineList: input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST; + assert_false(description.primitive_restart); + if (description.primitive_restart) { + return false; + } break; case PipelinePrimitiveTopology::kLineStrip: input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; break; case PipelinePrimitiveTopology::kTriangleList: input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + assert_false(description.primitive_restart); + if (description.primitive_restart) { + return false; + } break; case PipelinePrimitiveTopology::kTriangleStrip: input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; @@ -441,9 +449,17 @@ bool VulkanPipelineCache::EnsurePipelineCreated( case PipelinePrimitiveTopology::kLineListWithAdjacency: input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY; + assert_false(description.primitive_restart); + if (description.primitive_restart) { + return false; + } break; case PipelinePrimitiveTopology::kPatchList: input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; + assert_false(description.primitive_restart); + if (description.primitive_restart) { + return false; + } break; default: assert_unhandled_case(description.primitive_topology); diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h index 9eb5ed2d3..60654a99d 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h @@ -19,6 +19,7 @@ #include "xenia/base/hash.h" #include "xenia/base/platform.h" #include "xenia/base/xxhash.h" +#include "xenia/gpu/primitive_processor.h" #include "xenia/gpu/register_file.h" #include "xenia/gpu/spirv_shader_translator.h" #include "xenia/gpu/vulkan/vulkan_render_target_cache.h" @@ -69,11 +70,13 @@ class VulkanPipelineCache { const Shader& shader) const; // TODO(Triang3l): Return a deferred creation handle. - bool ConfigurePipeline(VulkanShader::VulkanTranslation* vertex_shader, - VulkanShader::VulkanTranslation* pixel_shader, - VulkanRenderTargetCache::RenderPassKey render_pass_key, - VkPipeline& pipeline_out, - const PipelineLayoutProvider*& pipeline_layout_out); + bool ConfigurePipeline( + VulkanShader::VulkanTranslation* vertex_shader, + VulkanShader::VulkanTranslation* pixel_shader, + const PrimitiveProcessor::ProcessingResult& primitive_processing_result, + VulkanRenderTargetCache::RenderPassKey render_pass_key, + VkPipeline& pipeline_out, + const PipelineLayoutProvider*& pipeline_layout_out); private: // Can only load pipeline storage if features of the device it was created on @@ -168,6 +171,7 @@ class VulkanPipelineCache { bool GetCurrentStateDescription( const VulkanShader::VulkanTranslation* vertex_shader, const VulkanShader::VulkanTranslation* pixel_shader, + const PrimitiveProcessor::ProcessingResult& primitive_processing_result, VulkanRenderTargetCache::RenderPassKey render_pass_key, PipelineDescription& description_out) const; diff --git a/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc b/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc new file mode 100644 index 000000000..deeef270f --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc @@ -0,0 +1,236 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2021 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/vulkan/vulkan_primitive_processor.h" + +#include +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/base/logging.h" +#include "xenia/gpu/vulkan/deferred_command_buffer.h" +#include "xenia/gpu/vulkan/vulkan_command_processor.h" +#include "xenia/ui/vulkan/vulkan_provider.h" +#include "xenia/ui/vulkan/vulkan_util.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +VulkanPrimitiveProcessor::~VulkanPrimitiveProcessor() { Shutdown(true); } + +bool VulkanPrimitiveProcessor::Initialize() { + // TODO(Triang3l): fullDrawIndexUint32 feature check and indirect index fetch. + // TODO(Triang3l): Portability subset triangleFans check when portability + // subset support is added. + // TODO(Triang3l): geometryShader check for quads when geometry shaders are + // added. + if (!InitializeCommon(true, true, false, false)) { + Shutdown(); + return false; + } + frame_index_buffer_pool_ = + std::make_unique( + command_processor_.GetVulkanContext().GetVulkanProvider(), + VK_BUFFER_USAGE_INDEX_BUFFER_BIT, + std::max(size_t(kMinRequiredConvertedIndexBufferSize), + ui::GraphicsUploadBufferPool::kDefaultPageSize)); + return true; +} + +void VulkanPrimitiveProcessor::Shutdown(bool from_destructor) { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + frame_index_buffers_.clear(); + frame_index_buffer_pool_.reset(); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + builtin_index_buffer_upload_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + builtin_index_buffer_upload_memory_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + builtin_index_buffer_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + builtin_index_buffer_memory_); + + if (!from_destructor) { + ShutdownCommon(); + } +} + +void VulkanPrimitiveProcessor::CompletedSubmissionUpdated() { + if (builtin_index_buffer_upload_ != VK_NULL_HANDLE && + command_processor_.GetCompletedSubmission() >= + builtin_index_buffer_upload_submission_) { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + builtin_index_buffer_upload_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + builtin_index_buffer_upload_memory_); + } +} + +void VulkanPrimitiveProcessor::BeginSubmission() { + if (builtin_index_buffer_upload_ != VK_NULL_HANDLE && + builtin_index_buffer_upload_submission_ == UINT64_MAX) { + // No need to submit deferred barriers - builtin_index_buffer_ has never + // been used yet, and builtin_index_buffer_upload_ is written before + // submitting commands reading it. + + DeferredCommandBuffer& command_buffer = + command_processor_.deferred_command_buffer(); + + VkBufferCopy* copy_region = command_buffer.CmdCopyBufferEmplace( + builtin_index_buffer_upload_, builtin_index_buffer_, 1); + copy_region->srcOffset = 0; + copy_region->dstOffset = 0; + copy_region->size = builtin_index_buffer_size_; + + VkBufferMemoryBarrier builtin_index_buffer_memory_barrier; + builtin_index_buffer_memory_barrier.sType = + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + builtin_index_buffer_memory_barrier.pNext = nullptr; + builtin_index_buffer_memory_barrier.srcAccessMask = + VK_ACCESS_TRANSFER_WRITE_BIT; + builtin_index_buffer_memory_barrier.dstAccessMask = + VK_ACCESS_INDEX_READ_BIT; + builtin_index_buffer_memory_barrier.srcQueueFamilyIndex = + VK_QUEUE_FAMILY_IGNORED; + builtin_index_buffer_memory_barrier.dstQueueFamilyIndex = + VK_QUEUE_FAMILY_IGNORED; + builtin_index_buffer_memory_barrier.buffer = builtin_index_buffer_; + builtin_index_buffer_memory_barrier.offset = 0; + builtin_index_buffer_memory_barrier.size = VK_WHOLE_SIZE; + command_buffer.CmdVkPipelineBarrier( + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, + 0, nullptr, 1, &builtin_index_buffer_memory_barrier, 0, nullptr); + + builtin_index_buffer_upload_submission_ = + command_processor_.GetCurrentSubmission(); + } +} + +void VulkanPrimitiveProcessor::BeginFrame() { + frame_index_buffer_pool_->Reclaim(command_processor_.GetCompletedFrame()); +} + +void VulkanPrimitiveProcessor::EndSubmission() { + frame_index_buffer_pool_->FlushWrites(); +} + +void VulkanPrimitiveProcessor::EndFrame() { + ClearPerFrameCache(); + frame_index_buffers_.clear(); +} + +bool VulkanPrimitiveProcessor::InitializeBuiltin16BitIndexBuffer( + uint32_t index_count, std::function fill_callback) { + assert_not_zero(index_count); + assert_true(builtin_index_buffer_ == VK_NULL_HANDLE); + assert_true(builtin_index_buffer_memory_ == VK_NULL_HANDLE); + assert_true(builtin_index_buffer_upload_ == VK_NULL_HANDLE); + assert_true(builtin_index_buffer_upload_memory_ == VK_NULL_HANDLE); + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + builtin_index_buffer_size_ = VkDeviceSize(sizeof(uint16_t) * index_count); + if (!ui::vulkan::util::CreateDedicatedAllocationBuffer( + provider, builtin_index_buffer_size_, + VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT, + ui::vulkan::util::MemoryPurpose::kDeviceLocal, builtin_index_buffer_, + builtin_index_buffer_memory_)) { + XELOGE( + "Vulkan primitive processor: Failed to create the built-in index " + "buffer GPU resource with {} 16-bit indices", + index_count); + return false; + } + uint32_t upload_memory_type; + if (!ui::vulkan::util::CreateDedicatedAllocationBuffer( + provider, builtin_index_buffer_size_, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + ui::vulkan::util::MemoryPurpose::kUpload, + builtin_index_buffer_upload_, builtin_index_buffer_upload_memory_, + &upload_memory_type)) { + XELOGE( + "Vulkan primitive processor: Failed to create the built-in index " + "buffer upload resource with {} 16-bit indices", + index_count); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + builtin_index_buffer_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + builtin_index_buffer_memory_); + return false; + } + + void* mapping; + if (dfn.vkMapMemory(device, builtin_index_buffer_upload_memory_, 0, + VK_WHOLE_SIZE, 0, &mapping) != VK_SUCCESS) { + XELOGE( + "Vulkan primitive processor: Failed to map the built-in index buffer " + "upload resource with {} 16-bit indices", + index_count); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + builtin_index_buffer_upload_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + builtin_index_buffer_upload_memory_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + builtin_index_buffer_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + builtin_index_buffer_memory_); + return false; + } + fill_callback(reinterpret_cast(mapping)); + ui::vulkan::util::FlushMappedMemoryRange( + provider, builtin_index_buffer_memory_, upload_memory_type); + dfn.vkUnmapMemory(device, builtin_index_buffer_memory_); + + // Schedule uploading in the first submission. + builtin_index_buffer_upload_submission_ = UINT64_MAX; + return true; +} + +void* VulkanPrimitiveProcessor::RequestHostConvertedIndexBufferForCurrentFrame( + xenos::IndexFormat format, uint32_t index_count, bool coalign_for_simd, + uint32_t coalignment_original_address, size_t& backend_handle_out) { + size_t index_size = format == xenos::IndexFormat::kInt16 ? sizeof(uint16_t) + : sizeof(uint32_t); + VkBuffer buffer; + VkDeviceSize offset; + uint8_t* mapping = frame_index_buffer_pool_->Request( + command_processor_.GetCurrentFrame(), + index_size * index_count + + (coalign_for_simd ? XE_GPU_PRIMITIVE_PROCESSOR_SIMD_SIZE : 0), + index_size, buffer, offset); + if (!mapping) { + return false; + } + if (coalign_for_simd) { + ptrdiff_t coalignment_offset = + GetSimdCoalignmentOffset(mapping, coalignment_original_address); + mapping += coalignment_offset; + offset = VkDeviceSize(offset + coalignment_offset); + } + backend_handle_out = frame_index_buffers_.size(); + frame_index_buffers_.emplace_back(buffer, offset); + return mapping; +} + +} // namespace vulkan +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_primitive_processor.h b/src/xenia/gpu/vulkan/vulkan_primitive_processor.h new file mode 100644 index 000000000..50e729577 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_primitive_processor.h @@ -0,0 +1,92 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2021 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_VULKAN_VULKAN_PRIMITIVE_PROCESSOR_H_ +#define XENIA_GPU_VULKAN_VULKAN_PRIMITIVE_PROCESSOR_H_ + +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/gpu/primitive_processor.h" +#include "xenia/ui/vulkan/vulkan_provider.h" +#include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +class VulkanCommandProcessor; + +class VulkanPrimitiveProcessor final : public PrimitiveProcessor { + public: + VulkanPrimitiveProcessor(const RegisterFile& register_file, Memory& memory, + TraceWriter& trace_writer, + SharedMemory& shared_memory, + VulkanCommandProcessor& command_processor) + : PrimitiveProcessor(register_file, memory, trace_writer, shared_memory), + command_processor_(command_processor) {} + ~VulkanPrimitiveProcessor(); + + bool Initialize(); + void Shutdown(bool from_destructor = false); + void ClearCache() { frame_index_buffer_pool_->ClearCache(); } + + void CompletedSubmissionUpdated(); + void BeginSubmission(); + void BeginFrame(); + void EndSubmission(); + void EndFrame(); + + std::pair GetBuiltinIndexBuffer(size_t handle) const { + assert_not_null(builtin_index_buffer_); + return std::make_pair( + builtin_index_buffer_, + VkDeviceSize(GetBuiltinIndexBufferOffsetBytes(handle))); + } + std::pair GetConvertedIndexBuffer( + size_t handle) const { + return frame_index_buffers_[handle]; + } + + protected: + bool InitializeBuiltin16BitIndexBuffer( + uint32_t index_count, + std::function fill_callback) override; + + void* RequestHostConvertedIndexBufferForCurrentFrame( + xenos::IndexFormat format, uint32_t index_count, bool coalign_for_simd, + uint32_t coalignment_original_address, + size_t& backend_handle_out) override; + + private: + VulkanCommandProcessor& command_processor_; + + VkDeviceSize builtin_index_buffer_size_ = 0; + VkBuffer builtin_index_buffer_ = VK_NULL_HANDLE; + VkDeviceMemory builtin_index_buffer_memory_ = VK_NULL_HANDLE; + // Temporary buffer copied in the beginning of the first submission for + // uploading to builtin_index_buffer_, destroyed when the submission when it + // was uploaded is completed. + VkBuffer builtin_index_buffer_upload_ = VK_NULL_HANDLE; + VkDeviceMemory builtin_index_buffer_upload_memory_ = VK_NULL_HANDLE; + // UINT64_MAX means not uploaded yet and needs uploading in the first + // submission (if the upload buffer exists at all). + uint64_t builtin_index_buffer_upload_submission_ = UINT64_MAX; + + std::unique_ptr frame_index_buffer_pool_; + // Indexed by the backend handles. + std::deque> frame_index_buffers_; +}; + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_VULKAN_VULKAN_PRIMITIVE_PROCESSOR_H_ From bee95d9e638dd96bb6627917a477f318a0ac0e0f Mon Sep 17 00:00:00 2001 From: Triang3l Date: Wed, 9 Jun 2021 21:10:27 +0300 Subject: [PATCH 070/123] [Vulkan] Remove remaining IndexBufferInfo references --- src/xenia/gpu/vulkan/vulkan_command_processor.cc | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 9936f182f..14edfe4ab 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -770,12 +770,9 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, // Update fixed-function dynamic state. UpdateFixedFunctionState(viewport_info); - bool indexed = index_buffer_info != nullptr && index_buffer_info->guest_base; - // Update system constants before uploading them. - UpdateSystemConstantValues( - indexed ? index_buffer_info->endianness : xenos::Endian::kNone, - viewport_info); + UpdateSystemConstantValues(primitive_processing_result.host_index_endian, + viewport_info); // Update uniform buffers and descriptor sets after binding the pipeline with // the new layout. @@ -882,10 +879,12 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, } deferred_command_buffer_.CmdVkBindIndexBuffer( index_buffer.first, index_buffer.second, - index_buffer_info->format == xenos::IndexFormat::kInt16 + primitive_processing_result.host_index_format == + xenos::IndexFormat::kInt16 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32); - deferred_command_buffer_.CmdVkDrawIndexed(index_count, 1, 0, 0, 0); + deferred_command_buffer_.CmdVkDrawIndexed( + primitive_processing_result.host_draw_vertex_count, 1, 0, 0, 0); } return true; From 270469d4d4c13b02bd31bf12e1ef6a470d2dab52 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 19 Jun 2021 14:16:24 +0300 Subject: [PATCH 071/123] [Vulkan] Basic framebuffer output --- src/xenia/gpu/spirv_shader_translator.cc | 108 ++- src/xenia/gpu/spirv_shader_translator.h | 15 +- .../gpu/vulkan/vulkan_command_processor.cc | 82 +- src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc | 33 +- .../gpu/vulkan/vulkan_primitive_processor.cc | 1 + .../gpu/vulkan/vulkan_render_target_cache.cc | 736 ++++++++++++++++-- .../gpu/vulkan/vulkan_render_target_cache.h | 224 +++++- .../ui/vulkan/vulkan_immediate_drawer.cc | 4 +- 8 files changed, 1081 insertions(+), 122 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 617cc76b4..4ee35cf11 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * + * Copyright 2021 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -63,6 +64,9 @@ SpirvShaderTranslator::Features::Features( } } +const std::string SpirvShaderTranslator::kInterpolatorNamePrefix = + "xe_interpolator_"; + SpirvShaderTranslator::SpirvShaderTranslator(const Features& features) : features_(features) {} @@ -363,6 +367,8 @@ void SpirvShaderTranslator::StartTranslation() { if (is_vertex_shader()) { StartVertexOrTessEvalShaderBeforeMain(); + } else if (is_pixel_shader()) { + StartFragmentShaderBeforeMain(); } // Begin the main function. @@ -394,8 +400,9 @@ void SpirvShaderTranslator::StartTranslation() { if (register_array_size) { id_vector_temp_.clear(); id_vector_temp_.reserve(register_array_size); - // TODO(Triang3l): In PS, only initialize starting from the interpolators, - // probably manually. But not very important. + // TODO(Triang3l): In PS, only need to initialize starting from the + // interpolators, probably manually. But likely not very important - the + // compiler in the driver will likely eliminate that write. for (uint32_t i = 0; i < register_array_size; ++i) { id_vector_temp_.push_back(const_float4_0_); } @@ -411,6 +418,8 @@ void SpirvShaderTranslator::StartTranslation() { // main function. if (is_vertex_shader()) { StartVertexOrTessEvalShaderInMain(); + } else if (is_pixel_shader()) { + StartFragmentShaderInMain(); } // Open the main loop. @@ -921,6 +930,16 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() { main_interface_.push_back(input_vertex_index_); } + // Create the Xenia-specific outputs. + for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) { + spv::Id interpolator = builder_->createVariable( + spv::NoPrecision, spv::StorageClassOutput, type_float4_, + (kInterpolatorNamePrefix + std::to_string(i)).c_str()); + input_output_interpolators_[i] = interpolator; + builder_->addDecoration(interpolator, spv::DecorationLocation, int(i)); + main_interface_.push_back(interpolator); + } + // Create the entire GLSL 4.50 gl_PerVertex output similar to what glslang // does. Members (like gl_PointSize) don't need to be used, and also // ClipDistance and CullDistance may exist even if the device doesn't support @@ -978,6 +997,11 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() { spv::NoPrecision, spv::StorageClassFunction, type_float3_, "xe_var_point_size_edge_flag_kill_vertex"); + // Zero the interpolators. + for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) { + builder_->createStore(const_float4_0_, input_output_interpolators_[i]); + } + // Load the vertex index or the tessellation parameters. if (register_count()) { // TODO(Triang3l): Barycentric coordinates and patch index. @@ -1167,6 +1191,73 @@ void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() { id_vector_temp_)); } +void SpirvShaderTranslator::StartFragmentShaderBeforeMain() { + // Interpolator inputs. + uint32_t interpolator_count = + std::min(xenos::kMaxInterpolators, register_count()); + for (uint32_t i = 0; i < interpolator_count; ++i) { + spv::Id interpolator = builder_->createVariable( + spv::NoPrecision, spv::StorageClassInput, type_float4_, + (kInterpolatorNamePrefix + std::to_string(i)).c_str()); + input_output_interpolators_[i] = interpolator; + builder_->addDecoration(interpolator, spv::DecorationLocation, int(i)); + main_interface_.push_back(interpolator); + } + + // Framebuffer attachment outputs. + std::fill(output_fragment_data_.begin(), output_fragment_data_.end(), + spv::NoResult); + static const char* const kFragmentDataNames[] = { + "xe_out_fragment_data_0", + "xe_out_fragment_data_1", + "xe_out_fragment_data_2", + "xe_out_fragment_data_3", + }; + uint32_t shader_writes_color_targets = + current_shader().writes_color_targets(); + for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { + if (!(shader_writes_color_targets & (uint32_t(1) << i))) { + continue; + } + spv::Id output_fragment_data_rt = + builder_->createVariable(spv::NoPrecision, spv::StorageClassOutput, + type_float4_, kFragmentDataNames[i]); + output_fragment_data_[i] = output_fragment_data_rt; + builder_->addDecoration(output_fragment_data_rt, spv::DecorationLocation, + int(i)); + // Make invariant as pixel shaders may be used for various precise + // computations. + builder_->addDecoration(output_fragment_data_rt, spv::DecorationInvariant); + main_interface_.push_back(output_fragment_data_rt); + } +} + +void SpirvShaderTranslator::StartFragmentShaderInMain() { + // Copy the interpolators to general-purpose registers. + // TODO(Triang3l): Centroid. + // TODO(Triang3l): ps_param_gen. + uint32_t interpolator_count = + std::min(xenos::kMaxInterpolators, register_count()); + for (uint32_t i = 0; i < interpolator_count; ++i) { + id_vector_temp_.clear(); + // Register array element. + id_vector_temp_.push_back(builder_->makeIntConstant(int(i))); + builder_->createStore( + builder_->createLoad(input_output_interpolators_[i], spv::NoPrecision), + builder_->createAccessChain(spv::StorageClassFunction, + var_main_registers_, id_vector_temp_)); + } + + // Initialize the colors for safety. + uint32_t shader_writes_color_targets = + current_shader().writes_color_targets(); + for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { + if (shader_writes_color_targets & (uint32_t(1) << i)) { + builder_->createStore(const_float4_0_, output_fragment_data_[i]); + } + } +} + void SpirvShaderTranslator::UpdateExecConditionals( ParsedExecInstruction::Type type, uint32_t bool_constant_index, bool condition) { @@ -1507,6 +1598,10 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result, target_pointer = builder_->createAccessChain( spv::StorageClassFunction, var_main_registers_, id_vector_temp_util_); } break; + case InstructionStorageTarget::kInterpolator: + assert_true(is_vertex_shader()); + target_pointer = input_output_interpolators_[result.storage_index]; + break; case InstructionStorageTarget::kPosition: assert_true(is_vertex_shader()); id_vector_temp_util_.clear(); @@ -1515,6 +1610,13 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result, target_pointer = builder_->createAccessChain( spv::StorageClassOutput, output_per_vertex_, id_vector_temp_util_); break; + case InstructionStorageTarget::kColor: + assert_true(is_pixel_shader()); + assert_not_zero(used_write_mask); + assert_true(current_shader().writes_color_target(result.storage_index)); + target_pointer = output_fragment_data_[result.storage_index]; + assert_true(target_pointer != spv::NoResult); + break; default: // TODO(Triang3l): All storage targets. break; diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index ef350b85e..94c58a976 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * + * Copyright 2021 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -10,8 +10,10 @@ #ifndef XENIA_GPU_SPIRV_SHADER_TRANSLATOR_H_ #define XENIA_GPU_SPIRV_SHADER_TRANSLATOR_H_ +#include #include #include +#include #include #include @@ -225,6 +227,9 @@ class SpirvShaderTranslator : public ShaderTranslator { void StartVertexOrTessEvalShaderInMain(); void CompleteVertexOrTessEvalShaderInMain(); + void StartFragmentShaderBeforeMain(); + void StartFragmentShaderInMain(); + // Updates the current flow control condition (to be called in the beginning // of exec and in jumps), closing the previous conditionals if needed. // However, if the condition is not different, the instruction-level predicate @@ -405,6 +410,12 @@ class SpirvShaderTranslator : public ShaderTranslator { // VS as TES only - int. spv::Id input_primitive_id_; + // In vertex or tessellation evaluation shaders - outputs, always + // xenos::kMaxInterpolators. + // In pixel shaders - inputs, min(xenos::kMaxInterpolators, register_count()). + spv::Id input_output_interpolators_[xenos::kMaxInterpolators]; + static const std::string kInterpolatorNamePrefix; + enum OutputPerVertexMember : unsigned int { kOutputPerVertexMemberPosition, kOutputPerVertexMemberPointSize, @@ -414,6 +425,8 @@ class SpirvShaderTranslator : public ShaderTranslator { }; spv::Id output_per_vertex_; + std::array output_fragment_data_; + std::vector main_interface_; spv::Function* function_main_; // bool. diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 14edfe4ab..6313254d0 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -645,6 +645,35 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, return false; } pipeline_cache_->AnalyzeShaderUcode(*vertex_shader); + bool memexport_used_vertex = vertex_shader->is_valid_memexport_used(); + + // Pixel shader analysis. + bool primitive_polygonal = draw_util::IsPrimitivePolygonal(regs); + bool is_rasterization_done = + draw_util::IsRasterizationPotentiallyDone(regs, primitive_polygonal); + VulkanShader* pixel_shader = nullptr; + if (is_rasterization_done) { + // See xenos::ModeControl for explanation why the pixel shader is only used + // when it's kColorDepth here. + if (edram_mode == xenos::ModeControl::kColorDepth) { + pixel_shader = static_cast(active_pixel_shader()); + if (pixel_shader) { + pipeline_cache_->AnalyzeShaderUcode(*pixel_shader); + if (!draw_util::IsPixelShaderNeededWithRasterization(*pixel_shader, + regs)) { + pixel_shader = nullptr; + } + } + } + } else { + // Disabling pixel shader for this case is also required by the pipeline + // cache. + if (!memexport_used_vertex) { + // This draw has no effect. + return true; + } + } + // TODO(Triang3l): Memory export. BeginSubmission(true); @@ -663,28 +692,20 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, return false; } - // TODO(Triang3l): Get a pixel shader. - VulkanShader* pixel_shader = nullptr; - // Shader modifications. SpirvShaderTranslator::Modification vertex_shader_modification = pipeline_cache_->GetCurrentVertexShaderModification( *vertex_shader, primitive_processing_result.host_vertex_shader_type); SpirvShaderTranslator::Modification pixel_shader_modification = - SpirvShaderTranslator::Modification(0); + pixel_shader + ? pipeline_cache_->GetCurrentPixelShaderModification(*pixel_shader) + : SpirvShaderTranslator::Modification(0); - VulkanRenderTargetCache::FramebufferKey framebuffer_key; - if (!render_target_cache_->UpdateRenderTargets(framebuffer_key)) { - return false; - } - VkFramebuffer framebuffer = - render_target_cache_->GetFramebuffer(framebuffer_key); - if (framebuffer == VK_NULL_HANDLE) { - return false; - } - VkRenderPass render_pass = - render_target_cache_->GetRenderPass(framebuffer_key.render_pass_key); - if (render_pass == VK_NULL_HANDLE) { + // Set up the render targets - this may perform dispatches and draws. + uint32_t pixel_shader_writes_color_targets = + pixel_shader ? pixel_shader->writes_color_targets() : 0; + if (!render_target_cache_->Update(is_rasterization_done, + pixel_shader_writes_color_targets)) { return false; } @@ -693,7 +714,11 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, static_cast( vertex_shader->GetOrCreateTranslation( vertex_shader_modification.value)); - VulkanShader::VulkanTranslation* pixel_shader_translation = nullptr; + VulkanShader::VulkanTranslation* pixel_shader_translation = + pixel_shader ? static_cast( + pixel_shader->GetOrCreateTranslation( + pixel_shader_modification.value)) + : nullptr; // Update the graphics pipeline, and if the new graphics pipeline has a // different layout, invalidate incompatible descriptor sets before updating @@ -702,8 +727,9 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, const VulkanPipelineCache::PipelineLayoutProvider* pipeline_layout_provider; if (!pipeline_cache_->ConfigurePipeline( vertex_shader_translation, pixel_shader_translation, - primitive_processing_result, framebuffer_key.render_pass_key, - pipeline, pipeline_layout_provider)) { + primitive_processing_result, + render_target_cache_->last_update_render_pass_key(), pipeline, + pipeline_layout_provider)) { return false; } deferred_command_buffer_.CmdVkBindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, @@ -829,24 +855,28 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, // TODO(Triang3l): Memory export. shared_memory_->Use(VulkanSharedMemory::Usage::kRead); - // After all commands that may dispatch or copy, enter the render pass before - // drawing. + // After all commands that may dispatch, copy or insert barriers, enter the + // render pass before drawing. + VkRenderPass render_pass = render_target_cache_->last_update_render_pass(); + const VulkanRenderTargetCache::Framebuffer* framebuffer = + render_target_cache_->last_update_framebuffer(); if (current_render_pass_ != render_pass || - current_framebuffer_ != framebuffer) { + current_framebuffer_ != framebuffer->framebuffer) { if (current_render_pass_ != VK_NULL_HANDLE) { deferred_command_buffer_.CmdVkEndRenderPass(); } current_render_pass_ = render_pass; - current_framebuffer_ = framebuffer; + current_framebuffer_ = framebuffer->framebuffer; VkRenderPassBeginInfo render_pass_begin_info; render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; render_pass_begin_info.pNext = nullptr; render_pass_begin_info.renderPass = render_pass; - render_pass_begin_info.framebuffer = framebuffer; + render_pass_begin_info.framebuffer = framebuffer->framebuffer; render_pass_begin_info.renderArea.offset.x = 0; render_pass_begin_info.renderArea.offset.y = 0; - render_pass_begin_info.renderArea.extent.width = 1280; - render_pass_begin_info.renderArea.extent.height = 720; + // TODO(Triang3l): Actual dirty width / height in the deferred command + // buffer. + render_pass_begin_info.renderArea.extent = framebuffer->host_extent; render_pass_begin_info.clearValueCount = 0; render_pass_begin_info.pClearValues = nullptr; deferred_command_buffer_.CmdVkBeginRenderPass(&render_pass_begin_info, diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index 5ce43edc7..30892d079 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -513,7 +513,34 @@ bool VulkanPipelineCache::EnsurePipelineCreated( VkPipelineMultisampleStateCreateInfo multisample_state = {}; multisample_state.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; - multisample_state.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + multisample_state.rasterizationSamples = VkSampleCountFlagBits( + uint32_t(1) << uint32_t(description.render_pass_key.msaa_samples)); + + // TODO(Triang3l): Depth / stencil state. + VkPipelineDepthStencilStateCreateInfo depth_stencil_state = {}; + depth_stencil_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + depth_stencil_state.pNext = nullptr; + + // TODO(Triang3l): Color blend state. + // TODO(Triang3l): Handle disabled separate blending. + VkPipelineColorBlendAttachmentState + color_blend_attachments[xenos::kMaxColorRenderTargets] = {}; + for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { + if (!(description.render_pass_key.depth_and_color_used & (1 << (1 + i)))) { + continue; + } + color_blend_attachments[i].colorWriteMask = + VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + } + VkPipelineColorBlendStateCreateInfo color_blend_state = {}; + color_blend_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + color_blend_state.attachmentCount = + 32 - xe::lzcnt( + uint32_t(description.render_pass_key.depth_and_color_used >> 1)); + color_blend_state.pAttachments = color_blend_attachments; static const VkDynamicState dynamic_states[] = { VK_DYNAMIC_STATE_VIEWPORT, @@ -538,8 +565,8 @@ bool VulkanPipelineCache::EnsurePipelineCreated( pipeline_create_info.pViewportState = &viewport_state; pipeline_create_info.pRasterizationState = &rasterization_state; pipeline_create_info.pMultisampleState = &multisample_state; - pipeline_create_info.pDepthStencilState = nullptr; - pipeline_create_info.pColorBlendState = nullptr; + pipeline_create_info.pDepthStencilState = &depth_stencil_state; + pipeline_create_info.pColorBlendState = &color_blend_state; pipeline_create_info.pDynamicState = &dynamic_state; pipeline_create_info.layout = creation_arguments.pipeline->second.pipeline_layout->GetPipelineLayout(); diff --git a/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc b/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc index deeef270f..f78a65d7c 100644 --- a/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc @@ -113,6 +113,7 @@ void VulkanPrimitiveProcessor::BeginSubmission() { builtin_index_buffer_memory_barrier.buffer = builtin_index_buffer_; builtin_index_buffer_memory_barrier.offset = 0; builtin_index_buffer_memory_barrier.size = VK_WHOLE_SIZE; + command_processor_.EndRenderPass(); command_buffer.CmdVkPipelineBarrier( VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, 0, nullptr, 1, &builtin_index_buffer_memory_barrier, 0, nullptr); diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc index e85b6ea8b..bb9058a82 100644 --- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc @@ -2,15 +2,26 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * + * Copyright 2021 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ #include "xenia/gpu/vulkan/vulkan_render_target_cache.h" +#include +#include +#include +#include +#include +#include + +#include "xenia/base/assert.h" #include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/gpu/registers.h" #include "xenia/gpu/vulkan/vulkan_command_processor.h" +#include "xenia/ui/vulkan/vulkan_util.h" namespace xe { namespace gpu { @@ -19,13 +30,38 @@ namespace vulkan { VulkanRenderTargetCache::VulkanRenderTargetCache( VulkanCommandProcessor& command_processor, const RegisterFile& register_file) - : command_processor_(command_processor), register_file_(register_file) {} + : RenderTargetCache(register_file), command_processor_(command_processor) {} -VulkanRenderTargetCache::~VulkanRenderTargetCache() { Shutdown(); } +VulkanRenderTargetCache::~VulkanRenderTargetCache() { Shutdown(true); } -bool VulkanRenderTargetCache::Initialize() { return true; } +bool VulkanRenderTargetCache::Initialize() { + InitializeCommon(); + return true; +} -void VulkanRenderTargetCache::Shutdown() { ClearCache(); } +void VulkanRenderTargetCache::Shutdown(bool from_destructor) { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + last_update_framebuffer_ = VK_NULL_HANDLE; + for (const auto& framebuffer_pair : framebuffers_) { + dfn.vkDestroyFramebuffer(device, framebuffer_pair.second.framebuffer, + nullptr); + } + framebuffers_.clear(); + + last_update_render_pass_ = VK_NULL_HANDLE; + for (const auto& render_pass_pair : render_passes_) { + dfn.vkDestroyRenderPass(device, render_pass_pair.second, nullptr); + } + render_passes_.clear(); + + if (!from_destructor) { + ShutdownCommon(); + } +} void VulkanRenderTargetCache::ClearCache() { const ui::vulkan::VulkanProvider& provider = @@ -33,15 +69,190 @@ void VulkanRenderTargetCache::ClearCache() { const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); + // Framebuffer objects must be destroyed because they reference views of + // attachment images, which may be removed by the common ClearCache. + last_update_framebuffer_ = VK_NULL_HANDLE; for (const auto& framebuffer_pair : framebuffers_) { - dfn.vkDestroyFramebuffer(device, framebuffer_pair.second, nullptr); + dfn.vkDestroyFramebuffer(device, framebuffer_pair.second.framebuffer, + nullptr); } framebuffers_.clear(); + last_update_render_pass_ = VK_NULL_HANDLE; for (const auto& render_pass_pair : render_passes_) { dfn.vkDestroyRenderPass(device, render_pass_pair.second, nullptr); } render_passes_.clear(); + + RenderTargetCache::ClearCache(); +} + +bool VulkanRenderTargetCache::Update(bool is_rasterization_done, + uint32_t shader_writes_color_targets) { + if (!RenderTargetCache::Update(is_rasterization_done, + shader_writes_color_targets)) { + return false; + } + + auto rb_surface_info = register_file().Get(); + RenderTarget* const* depth_and_color_render_targets = + last_update_accumulated_render_targets(); + uint32_t render_targets_are_srgb = + gamma_render_target_as_srgb_ + ? last_update_accumulated_color_targets_are_gamma() + : 0; + + RenderPassKey render_pass_key; + render_pass_key.msaa_samples = rb_surface_info.msaa_samples; + // TODO(Triang3l): 2x MSAA as 4x. + if (depth_and_color_render_targets[0]) { + render_pass_key.depth_and_color_used |= 1 << 0; + render_pass_key.depth_format = + depth_and_color_render_targets[0]->key().GetDepthFormat(); + } + if (depth_and_color_render_targets[1]) { + render_pass_key.depth_and_color_used |= 1 << 1; + render_pass_key.color_0_view_format = + (render_targets_are_srgb & (1 << 0)) + ? xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA + : depth_and_color_render_targets[1]->key().GetColorFormat(); + } + if (depth_and_color_render_targets[2]) { + render_pass_key.depth_and_color_used |= 1 << 2; + render_pass_key.color_1_view_format = + (render_targets_are_srgb & (1 << 1)) + ? xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA + : depth_and_color_render_targets[2]->key().GetColorFormat(); + } + if (depth_and_color_render_targets[3]) { + render_pass_key.depth_and_color_used |= 1 << 3; + render_pass_key.color_2_view_format = + (render_targets_are_srgb & (1 << 2)) + ? xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA + : depth_and_color_render_targets[3]->key().GetColorFormat(); + } + if (depth_and_color_render_targets[4]) { + render_pass_key.depth_and_color_used |= 1 << 4; + render_pass_key.color_3_view_format = + (render_targets_are_srgb & (1 << 3)) + ? xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA + : depth_and_color_render_targets[4]->key().GetColorFormat(); + } + + const Framebuffer* framebuffer = last_update_framebuffer_; + VkRenderPass render_pass = last_update_render_pass_key_ == render_pass_key + ? last_update_render_pass_ + : VK_NULL_HANDLE; + if (render_pass == VK_NULL_HANDLE) { + render_pass = GetRenderPass(render_pass_key); + if (render_pass == VK_NULL_HANDLE) { + return false; + } + // Framebuffer for a different render pass needed now. + framebuffer = nullptr; + } + + uint32_t pitch_tiles_at_32bpp = + ((rb_surface_info.surface_pitch + << uint32_t(rb_surface_info.msaa_samples >= xenos::MsaaSamples::k4X)) + + (xenos::kEdramTileWidthSamples - 1)) / + xenos::kEdramTileWidthSamples; + if (framebuffer) { + if (last_update_framebuffer_pitch_tiles_at_32bpp_ != pitch_tiles_at_32bpp || + std::memcmp(last_update_framebuffer_attachments_, + depth_and_color_render_targets, + sizeof(last_update_framebuffer_attachments_))) { + framebuffer = nullptr; + } + } + if (!framebuffer) { + framebuffer = GetFramebuffer(render_pass_key, pitch_tiles_at_32bpp, + depth_and_color_render_targets); + if (!framebuffer) { + return false; + } + } + + // Successful update - write the new configuration. + last_update_render_pass_key_ = render_pass_key; + last_update_render_pass_ = render_pass; + last_update_framebuffer_pitch_tiles_at_32bpp_ = pitch_tiles_at_32bpp; + std::memcpy(last_update_framebuffer_attachments_, + depth_and_color_render_targets, + sizeof(last_update_framebuffer_attachments_)); + last_update_framebuffer_ = framebuffer; + + // Transition the used render targets. + VkPipelineStageFlags barrier_src_stage_mask = 0; + VkPipelineStageFlags barrier_dst_stage_mask = 0; + VkImageMemoryBarrier barrier_image_memory[1 + xenos::kMaxColorRenderTargets]; + uint32_t barrier_image_memory_count = 0; + for (uint32_t i = 0; i < 1 + xenos::kMaxColorRenderTargets; ++i) { + RenderTarget* rt = depth_and_color_render_targets[i]; + if (!rt) { + continue; + } + auto& vulkan_rt = *static_cast(rt); + VkPipelineStageFlags rt_src_stage_mask = vulkan_rt.current_stage_mask(); + VkAccessFlags rt_src_access_mask = vulkan_rt.current_access_mask(); + VkImageLayout rt_old_layout = vulkan_rt.current_layout(); + VkPipelineStageFlags rt_dst_stage_mask; + VkAccessFlags rt_dst_access_mask; + VkImageLayout rt_new_layout; + if (i) { + rt_dst_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + rt_dst_access_mask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + rt_new_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + } else { + rt_dst_stage_mask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + rt_dst_access_mask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + rt_new_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + } + bool rt_image_memory_barrier_needed = + rt_src_access_mask != rt_dst_access_mask || + rt_old_layout != rt_new_layout; + if (rt_image_memory_barrier_needed || + rt_src_stage_mask != rt_dst_stage_mask) { + barrier_src_stage_mask |= rt_src_stage_mask; + barrier_dst_stage_mask |= rt_dst_stage_mask; + if (rt_image_memory_barrier_needed) { + VkImageMemoryBarrier& rt_image_memory_barrier = + barrier_image_memory[barrier_image_memory_count++]; + rt_image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + rt_image_memory_barrier.pNext = nullptr; + rt_image_memory_barrier.srcAccessMask = rt_src_access_mask; + rt_image_memory_barrier.dstAccessMask = rt_dst_access_mask; + rt_image_memory_barrier.oldLayout = rt_old_layout; + rt_image_memory_barrier.newLayout = rt_new_layout; + rt_image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + rt_image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + rt_image_memory_barrier.image = vulkan_rt.image(); + ui::vulkan::util::InitializeSubresourceRange( + rt_image_memory_barrier.subresourceRange, + i ? VK_IMAGE_ASPECT_COLOR_BIT + : (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)); + } + vulkan_rt.SetUsage(rt_dst_stage_mask, rt_dst_access_mask, rt_new_layout); + } + } + if (barrier_src_stage_mask || barrier_dst_stage_mask || + barrier_image_memory_count) { + if (!barrier_src_stage_mask) { + barrier_src_stage_mask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + } + if (!barrier_dst_stage_mask) { + barrier_dst_stage_mask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + command_processor_.EndRenderPass(); + command_processor_.deferred_command_buffer().CmdVkPipelineBarrier( + barrier_src_stage_mask, barrier_dst_stage_mask, 0, 0, nullptr, 0, + nullptr, barrier_image_memory_count, barrier_image_memory); + } + + return true; } VkRenderPass VulkanRenderTargetCache::GetRenderPass(RenderPassKey key) { @@ -50,30 +261,128 @@ VkRenderPass VulkanRenderTargetCache::GetRenderPass(RenderPassKey key) { return it->second; } - // TODO(Triang3l): Attachments and dependencies. + VkSampleCountFlagBits samples; + switch (key.msaa_samples) { + case xenos::MsaaSamples::k1X: + samples = VK_SAMPLE_COUNT_1_BIT; + break; + case xenos::MsaaSamples::k2X: + // Using unconditionally because if 2x is emulated as 4x, the key will + // also contain 4x. + samples = VK_SAMPLE_COUNT_2_BIT; + break; + case xenos::MsaaSamples::k4X: + samples = VK_SAMPLE_COUNT_4_BIT; + break; + default: + return VK_NULL_HANDLE; + } - VkSubpassDescription subpass_description; - subpass_description.flags = 0; - subpass_description.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - subpass_description.inputAttachmentCount = 0; - subpass_description.pInputAttachments = nullptr; - subpass_description.colorAttachmentCount = 0; - subpass_description.pColorAttachments = nullptr; - subpass_description.pResolveAttachments = nullptr; - subpass_description.pDepthStencilAttachment = nullptr; - subpass_description.preserveAttachmentCount = 0; - subpass_description.pPreserveAttachments = nullptr; + VkAttachmentDescription attachments[1 + xenos::kMaxColorRenderTargets]; + if (key.depth_and_color_used & 0b1) { + VkAttachmentDescription& attachment = attachments[0]; + attachment.flags = 0; + attachment.format = GetDepthVulkanFormat(key.depth_format); + attachment.samples = samples; + attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; + attachment.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + } + VkAttachmentReference color_attachments[xenos::kMaxColorRenderTargets]; + xenos::ColorRenderTargetFormat color_formats[] = { + key.color_0_view_format, + key.color_1_view_format, + key.color_2_view_format, + key.color_3_view_format, + }; + for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { + VkAttachmentReference& color_attachment = color_attachments[i]; + color_attachment.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + uint32_t attachment_bit = uint32_t(1) << (1 + i); + if (!(key.depth_and_color_used & attachment_bit)) { + color_attachment.attachment = VK_ATTACHMENT_UNUSED; + continue; + } + uint32_t attachment_index = + xe::bit_count(key.depth_and_color_used & (attachment_bit - 1)); + color_attachment.attachment = attachment_index; + VkAttachmentDescription& attachment = attachments[attachment_index]; + attachment.flags = 0; + attachment.format = GetColorVulkanFormat(color_formats[i]); + attachment.samples = samples; + attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + attachment.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + attachment.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + } + + VkAttachmentReference depth_stencil_attachment; + depth_stencil_attachment.attachment = + (key.depth_and_color_used & 0b1) ? 0 : VK_ATTACHMENT_UNUSED; + depth_stencil_attachment.layout = + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + + VkSubpassDescription subpass; + subpass.flags = 0; + subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + subpass.inputAttachmentCount = 0; + subpass.pInputAttachments = nullptr; + subpass.colorAttachmentCount = + 32 - xe::lzcnt(uint32_t(key.depth_and_color_used >> 1)); + subpass.pColorAttachments = color_attachments; + subpass.pResolveAttachments = nullptr; + subpass.pDepthStencilAttachment = + (key.depth_and_color_used & 0b1) ? &depth_stencil_attachment : nullptr; + subpass.preserveAttachmentCount = 0; + subpass.pPreserveAttachments = nullptr; + + VkPipelineStageFlags dependency_stage_mask = 0; + VkAccessFlags dependency_access_mask = 0; + if (key.depth_and_color_used & 0b1) { + dependency_stage_mask |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + dependency_access_mask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + } + if (key.depth_and_color_used >> 1) { + dependency_stage_mask |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + dependency_access_mask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + } + VkSubpassDependency subpass_dependencies[2]; + subpass_dependencies[0].srcSubpass = VK_SUBPASS_EXTERNAL; + subpass_dependencies[0].dstSubpass = 0; + subpass_dependencies[0].srcStageMask = dependency_stage_mask; + subpass_dependencies[0].dstStageMask = dependency_stage_mask; + subpass_dependencies[0].srcAccessMask = dependency_access_mask; + subpass_dependencies[0].dstAccessMask = dependency_access_mask; + subpass_dependencies[0].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT; + subpass_dependencies[1].srcSubpass = 0; + subpass_dependencies[1].dstSubpass = VK_SUBPASS_EXTERNAL; + subpass_dependencies[1].srcStageMask = dependency_stage_mask; + subpass_dependencies[1].dstStageMask = dependency_stage_mask; + subpass_dependencies[1].srcAccessMask = dependency_access_mask; + subpass_dependencies[1].dstAccessMask = dependency_access_mask; + subpass_dependencies[1].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT; VkRenderPassCreateInfo render_pass_create_info; render_pass_create_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; render_pass_create_info.pNext = nullptr; render_pass_create_info.flags = 0; - render_pass_create_info.attachmentCount = 0; - render_pass_create_info.pAttachments = nullptr; + render_pass_create_info.attachmentCount = + xe::bit_count(key.depth_and_color_used); + render_pass_create_info.pAttachments = attachments; render_pass_create_info.subpassCount = 1; - render_pass_create_info.pSubpasses = &subpass_description; - render_pass_create_info.dependencyCount = 0; - render_pass_create_info.pDependencies = nullptr; + render_pass_create_info.pSubpasses = &subpass; + render_pass_create_info.dependencyCount = + key.depth_and_color_used ? uint32_t(xe::countof(subpass_dependencies)) + : 0; + render_pass_create_info.pDependencies = subpass_dependencies; const ui::vulkan::VulkanProvider& provider = command_processor_.GetVulkanContext().GetVulkanProvider(); @@ -89,15 +398,343 @@ VkRenderPass VulkanRenderTargetCache::GetRenderPass(RenderPassKey key) { return render_pass; } -VkFramebuffer VulkanRenderTargetCache::GetFramebuffer(FramebufferKey key) { - auto it = framebuffers_.find(key); - if (it != framebuffers_.end()) { - return it->second; +VkFormat VulkanRenderTargetCache::GetDepthVulkanFormat( + xenos::DepthRenderTargetFormat format) const { + // TODO(Triang3l): Conditional 24-bit depth. + return VK_FORMAT_D32_SFLOAT_S8_UINT; +} + +VkFormat VulkanRenderTargetCache::GetColorVulkanFormat( + xenos::ColorRenderTargetFormat format) const { + switch (format) { + case xenos::ColorRenderTargetFormat::k_8_8_8_8: + return VK_FORMAT_R8G8B8A8_UNORM; + case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: + return gamma_render_target_as_srgb_ ? VK_FORMAT_R8G8B8A8_SRGB + : VK_FORMAT_R8G8B8A8_UNORM; + case xenos::ColorRenderTargetFormat::k_2_10_10_10: + case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: + return VK_FORMAT_A8B8G8R8_UNORM_PACK32; + case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT: + case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16: + return VK_FORMAT_R16G16B16A16_SFLOAT; + case xenos::ColorRenderTargetFormat::k_16_16: + // TODO(Triang3l): Fallback to float16 (disregarding clearing correctness + // likely) - possibly on render target gathering, treating them entirely + // as float16. + return VK_FORMAT_R16G16_SNORM; + case xenos::ColorRenderTargetFormat::k_16_16_16_16: + // TODO(Triang3l): Fallback to float16 (disregarding clearing correctness + // likely) - possibly on render target gathering, treating them entirely + // as float16. + return VK_FORMAT_R16G16B16A16_SNORM; + case xenos::ColorRenderTargetFormat::k_16_16_FLOAT: + return VK_FORMAT_R16G16_SFLOAT; + case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT: + return VK_FORMAT_R16G16B16A16_SFLOAT; + case xenos::ColorRenderTargetFormat::k_32_FLOAT: + return VK_FORMAT_R32_SFLOAT; + case xenos::ColorRenderTargetFormat::k_32_32_FLOAT: + return VK_FORMAT_R32G32_SFLOAT; + default: + assert_unhandled_case(format); + return VK_FORMAT_UNDEFINED; + } +} + +VkFormat VulkanRenderTargetCache::GetColorOwnershipTransferVulkanFormat( + xenos::ColorRenderTargetFormat format, bool* is_integer_out) const { + if (is_integer_out) { + *is_integer_out = true; + } + // Floating-point numbers have NaNs that need to be propagated without + // modifications to the bit representation, and SNORM has two representations + // of -1. + switch (format) { + case xenos::ColorRenderTargetFormat::k_16_16: + case xenos::ColorRenderTargetFormat::k_16_16_FLOAT: + return VK_FORMAT_R16G16_UINT; + case xenos::ColorRenderTargetFormat::k_16_16_16_16: + case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT: + return VK_FORMAT_R16G16B16A16_UINT; + case xenos::ColorRenderTargetFormat::k_32_FLOAT: + return VK_FORMAT_R32_UINT; + case xenos::ColorRenderTargetFormat::k_32_32_FLOAT: + return VK_FORMAT_R32G32_UINT; + default: + if (is_integer_out) { + *is_integer_out = false; + } + return GetColorVulkanFormat(format); + } +} + +VulkanRenderTargetCache::VulkanRenderTarget::~VulkanRenderTarget() { + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); + VkDevice device = provider_.device(); + if (view_color_transfer_separate_ != VK_NULL_HANDLE) { + dfn.vkDestroyImageView(device, view_color_transfer_separate_, nullptr); + } + if (view_srgb_ != VK_NULL_HANDLE) { + dfn.vkDestroyImageView(device, view_srgb_, nullptr); + } + if (view_stencil_ != VK_NULL_HANDLE) { + dfn.vkDestroyImageView(device, view_stencil_, nullptr); + } + if (view_depth_stencil_ != VK_NULL_HANDLE) { + dfn.vkDestroyImageView(device, view_depth_stencil_, nullptr); + } + dfn.vkDestroyImageView(device, view_depth_color_, nullptr); + dfn.vkDestroyImage(device, image_, nullptr); + dfn.vkFreeMemory(device, memory_, nullptr); +} + +uint32_t VulkanRenderTargetCache::GetMaxRenderTargetWidth() const { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + return provider.device_properties().limits.maxFramebufferWidth; +} + +uint32_t VulkanRenderTargetCache::GetMaxRenderTargetHeight() const { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + return provider.device_properties().limits.maxFramebufferHeight; +} + +RenderTargetCache::RenderTarget* VulkanRenderTargetCache::CreateRenderTarget( + RenderTargetKey key) { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + // Create the image. + + VkImageCreateInfo image_create_info; + image_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + image_create_info.pNext = nullptr; + image_create_info.flags = 0; + image_create_info.imageType = VK_IMAGE_TYPE_2D; + // TODO(Triang3l): Resolution scaling. + image_create_info.extent.width = key.GetWidth(); + image_create_info.extent.height = + GetRenderTargetHeight(key.pitch_tiles_at_32bpp, key.msaa_samples); + image_create_info.extent.depth = 1; + image_create_info.mipLevels = 1; + image_create_info.arrayLayers = 1; + // TODO(Triang3l): 2x MSAA as 4x. + image_create_info.samples = + VkSampleCountFlagBits(uint32_t(1) << uint32_t(key.msaa_samples)); + image_create_info.tiling = VK_IMAGE_TILING_OPTIMAL; + image_create_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + image_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + image_create_info.queueFamilyIndexCount = 0; + image_create_info.pQueueFamilyIndices = nullptr; + image_create_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + VkFormat transfer_format; + bool is_srgb_view_needed = false; + if (key.is_depth) { + image_create_info.format = GetDepthVulkanFormat(key.GetDepthFormat()); + transfer_format = image_create_info.format; + image_create_info.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + } else { + xenos::ColorRenderTargetFormat color_format = key.GetColorFormat(); + image_create_info.format = GetColorVulkanFormat(color_format); + transfer_format = GetColorOwnershipTransferVulkanFormat(color_format); + is_srgb_view_needed = + gamma_render_target_as_srgb_ && + (color_format == xenos::ColorRenderTargetFormat::k_8_8_8_8 || + color_format == xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA); + if (image_create_info.format != transfer_format || is_srgb_view_needed) { + image_create_info.flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; + } + image_create_info.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + } + if (image_create_info.format == VK_FORMAT_UNDEFINED) { + XELOGE("VulkanRenderTargetCache: Unknown {} render target format {}", + key.is_depth ? "depth" : "color", key.resource_format); + return nullptr; + } + VkImage image; + if (dfn.vkCreateImage(device, &image_create_info, nullptr, &image) != + VK_SUCCESS) { + // TODO(Triang3l): Error message. + return nullptr; } - VkRenderPass render_pass = GetRenderPass(key.render_pass_key); + // Allocate and bind the memory. + + VkMemoryAllocateInfo memory_allocate_info; + VkMemoryRequirements memory_requirements; + dfn.vkGetImageMemoryRequirements(device, image, &memory_requirements); + if (!xe::bit_scan_forward(memory_requirements.memoryTypeBits & + provider.memory_types_device_local(), + &memory_allocate_info.memoryTypeIndex)) { + dfn.vkDestroyImage(device, image, nullptr); + return nullptr; + } + memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + VkMemoryDedicatedAllocateInfoKHR memory_dedicated_allocate_info; + if (provider.device_extensions().khr_dedicated_allocation) { + memory_dedicated_allocate_info.sType = + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR; + memory_dedicated_allocate_info.pNext = nullptr; + memory_dedicated_allocate_info.image = image; + memory_dedicated_allocate_info.buffer = VK_NULL_HANDLE; + memory_allocate_info.pNext = &memory_dedicated_allocate_info; + } else { + memory_allocate_info.pNext = nullptr; + } + memory_allocate_info.allocationSize = memory_requirements.size; + VkDeviceMemory memory; + if (dfn.vkAllocateMemory(device, &memory_allocate_info, nullptr, &memory) != + VK_SUCCESS) { + // TODO(Triang3l): Error message. + dfn.vkDestroyImage(device, image, nullptr); + return nullptr; + } + if (dfn.vkBindImageMemory(device, image, memory, 0) != VK_SUCCESS) { + // TODO(Triang3l): Error message. + dfn.vkDestroyImage(device, image, nullptr); + dfn.vkFreeMemory(device, memory, nullptr); + return nullptr; + } + + // Create the image views. + + VkImageViewCreateInfo view_create_info; + view_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + view_create_info.pNext = nullptr; + view_create_info.flags = 0; + view_create_info.image = image; + view_create_info.viewType = VK_IMAGE_VIEW_TYPE_2D; + view_create_info.format = image_create_info.format; + view_create_info.components.r = VK_COMPONENT_SWIZZLE_IDENTITY; + view_create_info.components.g = VK_COMPONENT_SWIZZLE_IDENTITY; + view_create_info.components.b = VK_COMPONENT_SWIZZLE_IDENTITY; + view_create_info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY; + ui::vulkan::util::InitializeSubresourceRange( + view_create_info.subresourceRange, + key.is_depth ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT); + VkImageView view_depth_color; + if (dfn.vkCreateImageView(device, &view_create_info, nullptr, + &view_depth_color) != VK_SUCCESS) { + // TODO(Triang3l): Error message. + dfn.vkDestroyImage(device, image, nullptr); + dfn.vkFreeMemory(device, memory, nullptr); + return nullptr; + } + VkImageView view_depth_stencil = VK_NULL_HANDLE; + VkImageView view_stencil = VK_NULL_HANDLE; + VkImageView view_srgb = VK_NULL_HANDLE; + VkImageView view_color_transfer_separate = VK_NULL_HANDLE; + if (key.is_depth) { + view_create_info.subresourceRange.aspectMask = + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + if (dfn.vkCreateImageView(device, &view_create_info, nullptr, + &view_depth_stencil) != VK_SUCCESS) { + // TODO(Triang3l): Error message. + dfn.vkDestroyImageView(device, view_depth_color, nullptr); + dfn.vkDestroyImage(device, image, nullptr); + dfn.vkFreeMemory(device, memory, nullptr); + return nullptr; + } + view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT; + if (dfn.vkCreateImageView(device, &view_create_info, nullptr, + &view_stencil) != VK_SUCCESS) { + // TODO(Triang3l): Error message. + dfn.vkDestroyImageView(device, view_depth_stencil, nullptr); + dfn.vkDestroyImageView(device, view_depth_color, nullptr); + dfn.vkDestroyImage(device, image, nullptr); + dfn.vkFreeMemory(device, memory, nullptr); + return nullptr; + } + } else { + if (is_srgb_view_needed) { + view_create_info.format = VK_FORMAT_R8G8B8A8_SRGB; + if (dfn.vkCreateImageView(device, &view_create_info, nullptr, + &view_srgb) != VK_SUCCESS) { + // TODO(Triang3l): Error message. + dfn.vkDestroyImageView(device, view_depth_color, nullptr); + dfn.vkDestroyImage(device, image, nullptr); + dfn.vkFreeMemory(device, memory, nullptr); + return nullptr; + } + } + if (transfer_format != image_create_info.format) { + view_create_info.format = transfer_format; + if (dfn.vkCreateImageView(device, &view_create_info, nullptr, + &view_color_transfer_separate) != VK_SUCCESS) { + // TODO(Triang3l): Error message. + if (view_srgb != VK_NULL_HANDLE) { + dfn.vkDestroyImageView(device, view_srgb, nullptr); + } + dfn.vkDestroyImageView(device, view_depth_color, nullptr); + dfn.vkDestroyImage(device, image, nullptr); + dfn.vkFreeMemory(device, memory, nullptr); + return nullptr; + } + } + } + + VkImageView view_transfer_separate = VK_NULL_HANDLE; + + return new VulkanRenderTarget(key, provider, image, memory, view_depth_color, + view_depth_stencil, view_stencil, view_srgb, + view_color_transfer_separate); +} + +const VulkanRenderTargetCache::Framebuffer* +VulkanRenderTargetCache::GetFramebuffer( + RenderPassKey render_pass_key, uint32_t pitch_tiles_at_32bpp, + const RenderTarget* const* depth_and_color_render_targets) { + FramebufferKey key; + key.render_pass_key = render_pass_key; + key.pitch_tiles_at_32bpp = pitch_tiles_at_32bpp; + if (render_pass_key.depth_and_color_used & (1 << 0)) { + key.depth_base_tiles = depth_and_color_render_targets[0]->key().base_tiles; + } + if (render_pass_key.depth_and_color_used & (1 << 1)) { + key.color_0_base_tiles = + depth_and_color_render_targets[1]->key().base_tiles; + } + if (render_pass_key.depth_and_color_used & (1 << 2)) { + key.color_1_base_tiles = + depth_and_color_render_targets[2]->key().base_tiles; + } + if (render_pass_key.depth_and_color_used & (1 << 3)) { + key.color_2_base_tiles = + depth_and_color_render_targets[3]->key().base_tiles; + } + if (render_pass_key.depth_and_color_used & (1 << 4)) { + key.color_3_base_tiles = + depth_and_color_render_targets[4]->key().base_tiles; + } + auto it = framebuffers_.find(key); + if (it != framebuffers_.end()) { + return &it->second; + } + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + VkRenderPass render_pass = GetRenderPass(render_pass_key); if (render_pass == VK_NULL_HANDLE) { - return VK_NULL_HANDLE; + return nullptr; + } + + VkImageView attachments[1 + xenos::kMaxColorRenderTargets]; + uint32_t attachment_count = 0; + uint32_t depth_and_color_rts_remaining = render_pass_key.depth_and_color_used; + uint32_t rt_index; + while (xe::bit_scan_forward(depth_and_color_rts_remaining, &rt_index)) { + depth_and_color_rts_remaining &= ~(uint32_t(1) << rt_index); + const auto& vulkan_rt = *static_cast( + depth_and_color_render_targets[rt_index]); + attachments[attachment_count++] = rt_index ? vulkan_rt.view_depth_color() + : vulkan_rt.view_depth_stencil(); } VkFramebufferCreateInfo framebuffer_create_info; @@ -105,30 +742,33 @@ VkFramebuffer VulkanRenderTargetCache::GetFramebuffer(FramebufferKey key) { framebuffer_create_info.pNext = nullptr; framebuffer_create_info.flags = 0; framebuffer_create_info.renderPass = render_pass; - framebuffer_create_info.attachmentCount = 0; - framebuffer_create_info.pAttachments = nullptr; - framebuffer_create_info.width = 1280; - framebuffer_create_info.height = 720; + framebuffer_create_info.attachmentCount = attachment_count; + framebuffer_create_info.pAttachments = attachments; + VkExtent2D host_extent; + if (pitch_tiles_at_32bpp) { + host_extent.width = RenderTargetKey::GetWidth(pitch_tiles_at_32bpp, + render_pass_key.msaa_samples); + host_extent.height = GetRenderTargetHeight(pitch_tiles_at_32bpp, + render_pass_key.msaa_samples); + } else { + assert_zero(render_pass_key.depth_and_color_used); + host_extent.width = 0; + host_extent.height = 0; + } + // Vulkan requires width and height greater than 0. + framebuffer_create_info.width = std::max(host_extent.width, uint32_t(1)); + framebuffer_create_info.height = std::max(host_extent.height, uint32_t(1)); framebuffer_create_info.layers = 1; - - const ui::vulkan::VulkanProvider& provider = - command_processor_.GetVulkanContext().GetVulkanProvider(); - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); - VkDevice device = provider.device(); VkFramebuffer framebuffer; if (dfn.vkCreateFramebuffer(device, &framebuffer_create_info, nullptr, &framebuffer) != VK_SUCCESS) { - XELOGE("Failed to create a Vulkan framebuffer"); - return VK_NULL_HANDLE; + return nullptr; } - framebuffers_.emplace(key, framebuffer); - return framebuffer; -} - -bool VulkanRenderTargetCache::UpdateRenderTargets( - FramebufferKey& framebuffer_key_out) { - framebuffer_key_out = FramebufferKey(); - return true; + // Creates at a persistent location - safe to use pointers. + return &framebuffers_ + .emplace(std::piecewise_construct, std::forward_as_tuple(key), + std::forward_as_tuple(framebuffer, host_extent)) + .first->second; } } // namespace vulkan diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.h b/src/xenia/gpu/vulkan/vulkan_render_target_cache.h index 11be41612..080724ceb 100644 --- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.h @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * + * Copyright 2021 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -14,8 +14,8 @@ #include #include -#include "xenia/base/xxhash.h" -#include "xenia/gpu/register_file.h" +#include "xenia/base/hash.h" +#include "xenia/gpu/render_target_cache.h" #include "xenia/ui/vulkan/vulkan_provider.h" namespace xe { @@ -24,18 +24,183 @@ namespace vulkan { class VulkanCommandProcessor; -// TODO(Triang3l): Create a common base for both the Vulkan and the Direct3D -// implementations. -class VulkanRenderTargetCache { +class VulkanRenderTargetCache final : public RenderTargetCache { public: union RenderPassKey { + struct { + // If emulating 2x as 4x, set this to 4x for 2x not to create unnecessary + // render pass objects. + xenos::MsaaSamples msaa_samples : xenos::kMsaaSamplesBits; // 2 + // << 0 is depth, << 1...4 is color. + uint32_t depth_and_color_used : 1 + xenos::kMaxColorRenderTargets; // 7 + // 0 for unused attachments. + // If VK_FORMAT_D24_UNORM_S8_UINT is not supported, this must be kD24FS8 + // even for kD24S8. + xenos::DepthRenderTargetFormat depth_format + : xenos::kDepthRenderTargetFormatBits; // 8 + // Linear or sRGB included if host sRGB is used. + xenos::ColorRenderTargetFormat color_0_view_format + : xenos::kColorRenderTargetFormatBits; // 12 + xenos::ColorRenderTargetFormat color_1_view_format + : xenos::kColorRenderTargetFormatBits; // 16 + xenos::ColorRenderTargetFormat color_2_view_format + : xenos::kColorRenderTargetFormatBits; // 20 + xenos::ColorRenderTargetFormat color_3_view_format + : xenos::kColorRenderTargetFormatBits; // 24 + }; uint32_t key = 0; + struct Hasher { + size_t operator()(const RenderPassKey& key) const { + return std::hash{}(key.key); + } + }; + bool operator==(const RenderPassKey& other_key) const { + return key == other_key.key; + } + bool operator!=(const RenderPassKey& other_key) const { + return !(*this == other_key); + } }; - static_assert(sizeof(RenderPassKey) == sizeof(uint32_t)); + static_assert_size(RenderPassKey, sizeof(uint32_t)); + + struct Framebuffer { + VkFramebuffer framebuffer; + VkExtent2D host_extent; + Framebuffer(VkFramebuffer framebuffer, const VkExtent2D& host_extent) + : framebuffer(framebuffer), host_extent(host_extent) {} + }; + + VulkanRenderTargetCache(VulkanCommandProcessor& command_processor, + const RegisterFile& register_file); + ~VulkanRenderTargetCache(); + + bool Initialize(); + void Shutdown(bool from_destructor = false); + void ClearCache() override; + + // TOOD(Triang3l): Fragment shader interlock. + Path GetPath() const override { return Path::kHostRenderTargets; } + + // TODO(Triang3l): Resolution scaling. + uint32_t GetResolutionScale() const override { return 1; } + + bool Update(bool is_rasterization_done, + uint32_t shader_writes_color_targets) override; + // Binding information for the last successful update. + RenderPassKey last_update_render_pass_key() const { + return last_update_render_pass_key_; + } + VkRenderPass last_update_render_pass() const { + return last_update_render_pass_; + } + const Framebuffer* last_update_framebuffer() const { + return last_update_framebuffer_; + } + + // Returns the render pass object, or VK_NULL_HANDLE if failed to create. + // A render pass managed by the render target cache may be ended and resumed + // at any time (to allow for things like copying and texture loading). + VkRenderPass GetRenderPass(RenderPassKey key); + + VkFormat GetDepthVulkanFormat(xenos::DepthRenderTargetFormat format) const; + VkFormat GetColorVulkanFormat(xenos::ColorRenderTargetFormat format) const; + VkFormat GetColorOwnershipTransferVulkanFormat( + xenos::ColorRenderTargetFormat format, + bool* is_integer_out = nullptr) const; + + protected: + // Can only be destroyed when framebuffers referencing it are destroyed! + class VulkanRenderTarget final : public RenderTarget { + public: + // Takes ownership of the Vulkan objects passed to the constructor. + VulkanRenderTarget(RenderTargetKey key, + const ui::vulkan::VulkanProvider& provider, + VkImage image, VkDeviceMemory memory, + VkImageView view_depth_color, + VkImageView view_depth_stencil, VkImageView view_stencil, + VkImageView view_srgb, + VkImageView view_color_transfer_separate) + : RenderTarget(key), + provider_(provider), + image_(image), + memory_(memory), + view_depth_color_(view_depth_color), + view_depth_stencil_(view_depth_stencil), + view_stencil_(view_stencil), + view_srgb_(view_srgb), + view_color_transfer_separate_(view_color_transfer_separate) {} + ~VulkanRenderTarget(); + + VkImage image() const { return image_; } + + VkImageView view_depth_color() const { return view_depth_color_; } + VkImageView view_depth_stencil() const { return view_depth_stencil_; } + + VkPipelineStageFlags current_stage_mask() const { + return current_stage_mask_; + } + VkAccessFlags current_access_mask() const { return current_access_mask_; } + VkImageLayout current_layout() const { return current_layout_; } + void SetUsage(VkPipelineStageFlags stage_mask, VkAccessFlags access_mask, + VkImageLayout layout) { + current_stage_mask_ = stage_mask; + current_access_mask_ = access_mask; + current_layout_ = layout; + } + + private: + const ui::vulkan::VulkanProvider& provider_; + + VkImage image_; + VkDeviceMemory memory_; + + // TODO(Triang3l): Per-format drawing views for mutable formats with EDRAM + // aliasing without transfers. + VkImageView view_depth_color_; + // Optional views. + VkImageView view_depth_stencil_; + VkImageView view_stencil_; + VkImageView view_srgb_; + VkImageView view_color_transfer_separate_; + + VkPipelineStageFlags current_stage_mask_ = 0; + VkAccessFlags current_access_mask_ = 0; + VkImageLayout current_layout_ = VK_IMAGE_LAYOUT_UNDEFINED; + }; + + uint32_t GetMaxRenderTargetWidth() const override; + uint32_t GetMaxRenderTargetHeight() const override; + + RenderTarget* CreateRenderTarget(RenderTargetKey key) override; + + // TODO(Triang3l): Check actual unorm24 support. + bool IsHostDepthEncodingDifferent( + xenos::DepthRenderTargetFormat format) const override { + return true; + } + + private: + VulkanCommandProcessor& command_processor_; + + // RenderPassKey::key -> VkRenderPass. + std::unordered_map render_passes_; + + // For host render targets. struct FramebufferKey { RenderPassKey render_pass_key; + // Same as RenderTargetKey::pitch_tiles_at_32bpp. + uint32_t pitch_tiles_at_32bpp : 8; // 8 + // [0, 2047]. + uint32_t depth_base_tiles : xenos::kEdramBaseTilesBits - 1; // 19 + uint32_t color_0_base_tiles : xenos::kEdramBaseTilesBits - 1; // 30 + + uint32_t color_1_base_tiles : xenos::kEdramBaseTilesBits - 1; // 43 + uint32_t color_2_base_tiles : xenos::kEdramBaseTilesBits - 1; // 54 + + uint32_t color_3_base_tiles : xenos::kEdramBaseTilesBits - 1; // 75 + // Including all the padding, for a stable hash. FramebufferKey() { Reset(); } FramebufferKey(const FramebufferKey& key) { @@ -48,44 +213,27 @@ class VulkanRenderTargetCache { bool operator==(const FramebufferKey& key) const { return std::memcmp(this, &key, sizeof(*this)) == 0; } + using Hasher = xe::hash::XXHasher; void Reset() { std::memset(this, 0, sizeof(*this)); } - uint64_t GetHash() const { return XXH3_64bits(this, sizeof(*this)); } - struct Hasher { - size_t operator()(const FramebufferKey& description) const { - return size_t(description.GetHash()); - } - }; }; - static_assert(sizeof(FramebufferKey) == sizeof(uint32_t)); - - VulkanRenderTargetCache(VulkanCommandProcessor& command_processor, - const RegisterFile& register_file); - ~VulkanRenderTargetCache(); - - bool Initialize(); - void Shutdown(); - void ClearCache(); - - // Returns the render pass object, or VK_NULL_HANDLE if failed to create. - // A render pass managed by the render target cache may be ended and resumed - // at any time (to allow for things like copying and texture loading). - VkRenderPass GetRenderPass(RenderPassKey key); // Returns the framebuffer object, or VK_NULL_HANDLE if failed to create. - VkFramebuffer GetFramebuffer(FramebufferKey key); + const Framebuffer* GetFramebuffer( + RenderPassKey render_pass_key, uint32_t pitch_tiles_at_32bpp, + const RenderTarget* const* depth_and_color_render_targets); - // May dispatch computations. - bool UpdateRenderTargets(FramebufferKey& framebuffer_key_out); + bool gamma_render_target_as_srgb_ = false; - private: - VulkanCommandProcessor& command_processor_; - const RegisterFile& register_file_; - - // RenderPassKey::key -> VkRenderPass. - std::unordered_map render_passes_; - - std::unordered_map + std::unordered_map framebuffers_; + + RenderPassKey last_update_render_pass_key_; + VkRenderPass last_update_render_pass_ = VK_NULL_HANDLE; + uint32_t last_update_framebuffer_pitch_tiles_at_32bpp_ = 0; + const RenderTarget* const* + last_update_framebuffer_attachments_[1 + xenos::kMaxColorRenderTargets] = + {}; + const Framebuffer* last_update_framebuffer_ = VK_NULL_HANDLE; }; } // namespace vulkan diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc index b30386793..245fbb684 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc @@ -834,8 +834,6 @@ bool VulkanImmediateDrawer::CreateTextureResource( const VulkanProvider& provider = context_.GetVulkanProvider(); const VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); - bool dedicated_allocation_supported = - provider.device_extensions().khr_dedicated_allocation; // Create the image and the descriptor. @@ -882,7 +880,7 @@ bool VulkanImmediateDrawer::CreateTextureResource( } image_memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; VkMemoryDedicatedAllocateInfoKHR image_memory_dedicated_allocate_info; - if (dedicated_allocation_supported) { + if (provider.device_extensions().khr_dedicated_allocation) { image_memory_dedicated_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR; image_memory_dedicated_allocate_info.pNext = nullptr; From 8ccb00d03d91214dcfc9f9296db330b282a9da85 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Mon, 7 Feb 2022 23:00:23 +0300 Subject: [PATCH 072/123] [SPIR-V] Store vfetch_full address in a variable --- src/xenia/gpu/spirv_shader_translator.cc | 5 +- src/xenia/gpu/spirv_shader_translator.h | 5 +- .../gpu/spirv_shader_translator_fetch.cc | 115 ++++++++++-------- 3 files changed, 74 insertions(+), 51 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 4ee35cf11..aee9ec161 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2021 Ben Vanik. All rights reserved. * + * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -396,6 +396,9 @@ void SpirvShaderTranslator::StartTranslation() { var_main_previous_scalar_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassFunction, type_float_, "xe_var_previous_scalar", const_float_0_); + var_main_vfetch_address_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassFunction, type_int_, + "xe_var_vfetch_address", const_int_0_); uint32_t register_array_size = register_count(); if (register_array_size) { id_vector_temp_.clear(); diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 94c58a976..c5f41df09 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2021 Ben Vanik. All rights reserved. * + * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -439,6 +439,9 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id var_main_address_absolute_; // float. spv::Id var_main_previous_scalar_; + // `base + index * stride` in dwords from the last vfetch_full as it may be + // needed by vfetch_mini - int. + spv::Id var_main_vfetch_address_; // float4[register_count()]. spv::Id var_main_registers_; // VS only - float3 (special exports). diff --git a/src/xenia/gpu/spirv_shader_translator_fetch.cc b/src/xenia/gpu/spirv_shader_translator_fetch.cc index 51a4d720f..23dc33765 100644 --- a/src/xenia/gpu/spirv_shader_translator_fetch.cc +++ b/src/xenia/gpu/spirv_shader_translator_fetch.cc @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * + * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -27,7 +27,9 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction( uint32_t used_result_components = instr.result.GetUsedResultComponents(); uint32_t needed_words = xenos::GetVertexFormatNeededWords( instr.attributes.data_format, used_result_components); - if (!needed_words) { + // If this is vfetch_full, the address may still be needed for vfetch_mini - + // don't exit before calculating the address. + if (!needed_words && instr.is_mini_fetch) { // Nothing to load - just constant 0/1 writes, or the swizzle includes only // components that don't exist in the format (writing zero instead of them). // Unpacking assumes at least some word is needed. @@ -37,56 +39,71 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction( EnsureBuildPointAvailable(); - // Get the base address in dwords from the bits 2:31 of the first fetch - // constant word. uint32_t fetch_constant_word_0_index = instr.operands[1].storage_index << 1; - id_vector_temp_.clear(); - id_vector_temp_.reserve(3); - // The only element of the fetch constant buffer. - id_vector_temp_.push_back(const_int_0_); - // Vector index. - id_vector_temp_.push_back( - builder_->makeIntConstant(int(fetch_constant_word_0_index >> 2))); - // Component index. - id_vector_temp_.push_back( - builder_->makeIntConstant(int(fetch_constant_word_0_index & 3))); - spv::Id fetch_constant_word_0 = builder_->createLoad( - builder_->createAccessChain(spv::StorageClassUniform, - uniform_fetch_constants_, id_vector_temp_), - spv::NoPrecision); - // TODO(Triang3l): Verify the fetch constant type (that it's a vertex fetch, - // not a texture fetch) here instead of dropping draws with invalid vertex - // fetch constants on the CPU when proper bound checks are added - vfetch may - // be conditional, so fetch constants may also be used conditionally. - spv::Id address = builder_->createUnaryOp( - spv::OpBitcast, type_int_, - builder_->createBinOp(spv::OpShiftRightLogical, type_uint_, - fetch_constant_word_0, - builder_->makeUintConstant(2))); - if (instr.attributes.stride) { - // Convert the index to an integer by flooring or by rounding to the nearest - // (as floor(index + 0.5) because rounding to the nearest even makes no - // sense for addressing, both 1.5 and 2.5 would be 2). - // http://web.archive.org/web/20100302145413/http://msdn.microsoft.com:80/en-us/library/bb313960.aspx - spv::Id index = GetOperandComponents(LoadOperandStorage(instr.operands[0]), - instr.operands[0], 0b0001); - if (instr.attributes.is_index_rounded) { - index = builder_->createBinOp(spv::OpFAdd, type_float_, index, - builder_->makeFloatConstant(0.5f)); - builder_->addDecoration(index, spv::DecorationNoContraction); - } + + spv::Id address; + if (instr.is_mini_fetch) { + // `base + index * stride` loaded by vfetch_full. + address = builder_->createLoad(var_main_vfetch_address_, spv::NoPrecision); + } else { + // Get the base address in dwords from the bits 2:31 of the first fetch + // constant word. id_vector_temp_.clear(); - id_vector_temp_.push_back(index); - index = builder_->createUnaryOp( - spv::OpConvertFToS, type_int_, - builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, - GLSLstd450Floor, id_vector_temp_)); - if (instr.attributes.stride > 1) { - index = builder_->createBinOp( - spv::OpIMul, type_int_, index, - builder_->makeIntConstant(int(instr.attributes.stride))); + id_vector_temp_.reserve(3); + // The only element of the fetch constant buffer. + id_vector_temp_.push_back(const_int_0_); + // Vector index. + id_vector_temp_.push_back( + builder_->makeIntConstant(int(fetch_constant_word_0_index >> 2))); + // Component index. + id_vector_temp_.push_back( + builder_->makeIntConstant(int(fetch_constant_word_0_index & 3))); + spv::Id fetch_constant_word_0 = builder_->createLoad( + builder_->createAccessChain(spv::StorageClassUniform, + uniform_fetch_constants_, id_vector_temp_), + spv::NoPrecision); + // TODO(Triang3l): Verify the fetch constant type (that it's a vertex fetch, + // not a texture fetch) here instead of dropping draws with invalid vertex + // fetch constants on the CPU when proper bound checks are added - vfetch + // may be conditional, so fetch constants may also be used conditionally. + address = builder_->createUnaryOp( + spv::OpBitcast, type_int_, + builder_->createBinOp(spv::OpShiftRightLogical, type_uint_, + fetch_constant_word_0, + builder_->makeUintConstant(2))); + if (instr.attributes.stride) { + // Convert the index to an integer by flooring or by rounding to the + // nearest (as floor(index + 0.5) because rounding to the nearest even + // makes no sense for addressing, both 1.5 and 2.5 would be 2). + spv::Id index = GetOperandComponents( + LoadOperandStorage(instr.operands[0]), instr.operands[0], 0b0001); + if (instr.attributes.is_index_rounded) { + index = builder_->createBinOp(spv::OpFAdd, type_float_, index, + builder_->makeFloatConstant(0.5f)); + builder_->addDecoration(index, spv::DecorationNoContraction); + } + id_vector_temp_.clear(); + id_vector_temp_.push_back(index); + index = builder_->createUnaryOp( + spv::OpConvertFToS, type_int_, + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450Floor, id_vector_temp_)); + if (instr.attributes.stride > 1) { + index = builder_->createBinOp( + spv::OpIMul, type_int_, index, + builder_->makeIntConstant(int(instr.attributes.stride))); + } + address = builder_->createBinOp(spv::OpIAdd, type_int_, address, index); } - address = builder_->createBinOp(spv::OpIAdd, type_int_, address, index); + // Store the address for the subsequent vfetch_mini. + builder_->createStore(address, var_main_vfetch_address_); + } + + if (!needed_words) { + // The vfetch_full address has been loaded for the subsequent vfetch_mini, + // but there's no data to load. + StoreResult(instr.result, spv::NoResult); + return; } // Load the needed words. From c75e0dd19e164e00936cea62faf693dac2172eb2 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Tue, 15 Feb 2022 23:00:21 +0300 Subject: [PATCH 073/123] [Vulkan] Blend and depth/stencil state, small pipeline cleanup --- src/xenia/gpu/spirv_shader_translator.cc | 19 +- src/xenia/gpu/spirv_shader_translator.h | 7 +- .../gpu/vulkan/deferred_command_buffer.cc | 33 ++ .../gpu/vulkan/deferred_command_buffer.h | 61 +++ .../gpu/vulkan/vulkan_command_processor.cc | 352 +++++++++--- .../gpu/vulkan/vulkan_command_processor.h | 87 ++- src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc | 515 ++++++++++++++++-- src/xenia/gpu/vulkan/vulkan_pipeline_cache.h | 85 ++- .../gpu/vulkan/vulkan_primitive_processor.cc | 12 +- src/xenia/ui/vulkan/functions/device_1_0.inc | 5 + 10 files changed, 1018 insertions(+), 158 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index aee9ec161..ce940da49 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -1216,10 +1216,11 @@ void SpirvShaderTranslator::StartFragmentShaderBeforeMain() { "xe_out_fragment_data_2", "xe_out_fragment_data_3", }; - uint32_t shader_writes_color_targets = - current_shader().writes_color_targets(); + uint32_t fragment_data_outputs_written = + current_shader().writes_color_targets() & + ~GetSpirvShaderModification().pixel.color_outputs_disabled; for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { - if (!(shader_writes_color_targets & (uint32_t(1) << i))) { + if (!(fragment_data_outputs_written & (uint32_t(1) << i))) { continue; } spv::Id output_fragment_data_rt = @@ -1252,11 +1253,10 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() { } // Initialize the colors for safety. - uint32_t shader_writes_color_targets = - current_shader().writes_color_targets(); for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { - if (shader_writes_color_targets & (uint32_t(1) << i)) { - builder_->createStore(const_float4_0_, output_fragment_data_[i]); + spv::Id output_fragment_data_rt = output_fragment_data_[i]; + if (output_fragment_data_rt != spv::NoResult) { + builder_->createStore(const_float4_0_, output_fragment_data_rt); } } } @@ -1618,7 +1618,10 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result, assert_not_zero(used_write_mask); assert_true(current_shader().writes_color_target(result.storage_index)); target_pointer = output_fragment_data_[result.storage_index]; - assert_true(target_pointer != spv::NoResult); + // May be spv::NoResult if the color output is explicitly removed due to + // an empty write mask without independent blending. + // TODO(Triang3l): Store the alpha of the first output in this case for + // alpha test and alpha to coverage. break; default: // TODO(Triang3l): All storage targets. diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index c5f41df09..932bd608f 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -34,7 +34,7 @@ class SpirvShaderTranslator : public ShaderTranslator { // TODO(Triang3l): Change to 0xYYYYMMDD once it's out of the rapid // prototyping stage (easier to do small granular updates with an // incremental counter). - static constexpr uint32_t kVersion = 2; + static constexpr uint32_t kVersion = 3; struct { // Dynamically indexable register count from SQ_PROGRAM_CNTL. @@ -46,6 +46,11 @@ class SpirvShaderTranslator : public ShaderTranslator { struct PixelShaderModification { // Dynamically indexable register count from SQ_PROGRAM_CNTL. uint32_t dynamic_addressable_register_count : 8; + // Color outputs removed from the shader to implement a zero color write + // mask when independent blending (and thus independent write masks) is + // not supported without switching to a render pass with some attachments + // actually excluded. + uint32_t color_outputs_disabled : 4; } pixel; uint64_t value = 0; diff --git a/src/xenia/gpu/vulkan/deferred_command_buffer.cc b/src/xenia/gpu/vulkan/deferred_command_buffer.cc index efb34d252..470d8adde 100644 --- a/src/xenia/gpu/vulkan/deferred_command_buffer.cc +++ b/src/xenia/gpu/vulkan/deferred_command_buffer.cc @@ -168,6 +168,18 @@ void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) { args.image_memory_barrier_count, image_memory_barriers); } break; + case Command::kVkSetBlendConstants: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdSetBlendConstants(command_buffer, args.blend_constants); + } break; + + case Command::kVkSetDepthBias: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdSetDepthBias(command_buffer, args.depth_bias_constant_factor, + args.depth_bias_clamp, + args.depth_bias_slope_factor); + } break; + case Command::kVkSetScissor: { auto& args = *reinterpret_cast(stream); dfn.vkCmdSetScissor( @@ -177,6 +189,27 @@ void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) { xe::align(sizeof(ArgsVkSetScissor), alignof(VkRect2D)))); } break; + case Command::kVkSetStencilCompareMask: { + auto& args = + *reinterpret_cast(stream); + dfn.vkCmdSetStencilCompareMask(command_buffer, args.face_mask, + args.mask_reference); + } break; + + case Command::kVkSetStencilReference: { + auto& args = + *reinterpret_cast(stream); + dfn.vkCmdSetStencilReference(command_buffer, args.face_mask, + args.mask_reference); + } break; + + case Command::kVkSetStencilWriteMask: { + auto& args = + *reinterpret_cast(stream); + dfn.vkCmdSetStencilWriteMask(command_buffer, args.face_mask, + args.mask_reference); + } break; + case Command::kVkSetViewport: { auto& args = *reinterpret_cast(stream); dfn.vkCmdSetViewport( diff --git a/src/xenia/gpu/vulkan/deferred_command_buffer.h b/src/xenia/gpu/vulkan/deferred_command_buffer.h index 9ed39557b..ac4c88f85 100644 --- a/src/xenia/gpu/vulkan/deferred_command_buffer.h +++ b/src/xenia/gpu/vulkan/deferred_command_buffer.h @@ -162,6 +162,22 @@ class DeferredCommandBuffer { uint32_t image_memory_barrier_count, const VkImageMemoryBarrier* image_memory_barriers); + void CmdVkSetBlendConstants(const float* blend_constants) { + auto& args = *reinterpret_cast(WriteCommand( + Command::kVkSetBlendConstants, sizeof(ArgsVkSetBlendConstants))); + std::memcpy(args.blend_constants, blend_constants, sizeof(float) * 4); + } + + void CmdVkSetDepthBias(float depth_bias_constant_factor, + float depth_bias_clamp, + float depth_bias_slope_factor) { + auto& args = *reinterpret_cast( + WriteCommand(Command::kVkSetDepthBias, sizeof(ArgsVkSetDepthBias))); + args.depth_bias_constant_factor = depth_bias_constant_factor; + args.depth_bias_clamp = depth_bias_clamp; + args.depth_bias_slope_factor = depth_bias_slope_factor; + } + void CmdVkSetScissor(uint32_t first_scissor, uint32_t scissor_count, const VkRect2D* scissors) { const size_t header_size = @@ -176,6 +192,31 @@ class DeferredCommandBuffer { sizeof(VkRect2D) * scissor_count); } + void CmdVkSetStencilCompareMask(VkStencilFaceFlags face_mask, + uint32_t compare_mask) { + auto& args = *reinterpret_cast( + WriteCommand(Command::kVkSetStencilCompareMask, + sizeof(ArgsSetStencilMaskReference))); + args.face_mask = face_mask; + args.mask_reference = compare_mask; + } + + void CmdVkSetStencilReference(VkStencilFaceFlags face_mask, + uint32_t reference) { + auto& args = *reinterpret_cast(WriteCommand( + Command::kVkSetStencilReference, sizeof(ArgsSetStencilMaskReference))); + args.face_mask = face_mask; + args.mask_reference = reference; + } + + void CmdVkSetStencilWriteMask(VkStencilFaceFlags face_mask, + uint32_t write_mask) { + auto& args = *reinterpret_cast(WriteCommand( + Command::kVkSetStencilWriteMask, sizeof(ArgsSetStencilMaskReference))); + args.face_mask = face_mask; + args.mask_reference = write_mask; + } + void CmdVkSetViewport(uint32_t first_viewport, uint32_t viewport_count, const VkViewport* viewports) { const size_t header_size = @@ -201,7 +242,12 @@ class DeferredCommandBuffer { kVkDrawIndexed, kVkEndRenderPass, kVkPipelineBarrier, + kVkSetBlendConstants, + kVkSetDepthBias, kVkSetScissor, + kVkSetStencilCompareMask, + kVkSetStencilReference, + kVkSetStencilWriteMask, kVkSetViewport, }; @@ -280,6 +326,16 @@ class DeferredCommandBuffer { static_assert(alignof(VkImageMemoryBarrier) <= alignof(uintmax_t)); }; + struct ArgsVkSetBlendConstants { + float blend_constants[4]; + }; + + struct ArgsVkSetDepthBias { + float depth_bias_constant_factor; + float depth_bias_clamp; + float depth_bias_slope_factor; + }; + struct ArgsVkSetScissor { uint32_t first_scissor; uint32_t scissor_count; @@ -287,6 +343,11 @@ class DeferredCommandBuffer { static_assert(alignof(VkRect2D) <= alignof(uintmax_t)); }; + struct ArgsSetStencilMaskReference { + VkStencilFaceFlags face_mask; + uint32_t mask_reference; + }; + struct ArgsVkSetViewport { uint32_t first_viewport; uint32_t viewport_count; diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 820bcc7eb..afdb32b03 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "xenia/base/assert.h" @@ -530,7 +531,7 @@ void VulkanCommandProcessor::ShutdownContext() { for (const auto& pipeline_layout_pair : pipeline_layouts_) { dfn.vkDestroyPipelineLayout( - device, pipeline_layout_pair.second.pipeline_layout, nullptr); + device, pipeline_layout_pair.second.GetPipelineLayout(), nullptr); } pipeline_layouts_.clear(); for (const auto& descriptor_set_layout_pair : @@ -824,8 +825,8 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, deferred_command_buffer_.CmdVkBeginRenderPass( &render_pass_begin_info, VK_SUBPASS_CONTENTS_INLINE); - ff_viewport_update_needed_ = true; - ff_scissor_update_needed_ = true; + dynamic_viewport_update_needed_ = true; + dynamic_scissor_update_needed_ = true; VkViewport viewport; viewport.x = 0.0f; viewport.y = 0.0f; @@ -841,11 +842,7 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, scissor_rect.extent.height = scaled_height; deferred_command_buffer_.CmdVkSetScissor(0, 1, &scissor_rect); - // Bind a non-emulation graphics pipeline and invalidate the bindings. - current_graphics_pipeline_ = VK_NULL_HANDLE; - current_graphics_pipeline_layout_ = nullptr; - deferred_command_buffer_.CmdVkBindPipeline( - VK_PIPELINE_BIND_POINT_GRAPHICS, swap_pipeline_); + BindExternalGraphicsPipeline(swap_pipeline_); deferred_command_buffer_.CmdVkDraw(3, 1, 0, 0); @@ -1043,18 +1040,42 @@ VulkanCommandProcessor::GetPipelineLayout(uint32_t texture_count_pixel, texture_count_pixel, texture_count_vertex); return nullptr; } - PipelineLayout pipeline_layout_entry; - pipeline_layout_entry.pipeline_layout = pipeline_layout; - pipeline_layout_entry.descriptor_set_layout_textures_pixel_ref = - descriptor_set_layout_textures_pixel; - pipeline_layout_entry.descriptor_set_layout_textures_vertex_ref = - descriptor_set_layout_textures_vertex; - auto emplaced_pair = - pipeline_layouts_.emplace(pipeline_layout_key.key, pipeline_layout_entry); + auto emplaced_pair = pipeline_layouts_.emplace( + std::piecewise_construct, std::forward_as_tuple(pipeline_layout_key.key), + std::forward_as_tuple(pipeline_layout, + descriptor_set_layout_textures_vertex, + descriptor_set_layout_textures_pixel)); // unordered_map insertion doesn't invalidate element references. return &emplaced_pair.first->second; } +void VulkanCommandProcessor::BindExternalGraphicsPipeline( + VkPipeline pipeline, bool keep_dynamic_depth_bias, + bool keep_dynamic_blend_constants, bool keep_dynamic_stencil_mask_ref) { + if (!keep_dynamic_depth_bias) { + dynamic_depth_bias_update_needed_ = true; + } + if (!keep_dynamic_blend_constants) { + dynamic_blend_constants_update_needed_ = true; + } + if (!keep_dynamic_stencil_mask_ref) { + dynamic_stencil_compare_mask_front_update_needed_ = true; + dynamic_stencil_compare_mask_back_update_needed_ = true; + dynamic_stencil_write_mask_front_update_needed_ = true; + dynamic_stencil_write_mask_back_update_needed_ = true; + dynamic_stencil_reference_front_update_needed_ = true; + dynamic_stencil_reference_back_update_needed_ = true; + } + if (current_external_graphics_pipeline_ == pipeline) { + return; + } + deferred_command_buffer_.CmdVkBindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline); + current_external_graphics_pipeline_ = pipeline; + current_guest_graphics_pipeline_ = VK_NULL_HANDLE; + current_guest_graphics_pipeline_layout_ = VK_NULL_HANDLE; +} + Shader* VulkanCommandProcessor::LoadShader(xenos::ShaderType shader_type, uint32_t guest_address, const uint32_t* host_address, @@ -1134,20 +1155,23 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, return false; } + uint32_t normalized_color_mask = + pixel_shader ? draw_util::GetNormalizedColorMask( + regs, pixel_shader->writes_color_targets()) + : 0; + // Shader modifications. SpirvShaderTranslator::Modification vertex_shader_modification = pipeline_cache_->GetCurrentVertexShaderModification( *vertex_shader, primitive_processing_result.host_vertex_shader_type); SpirvShaderTranslator::Modification pixel_shader_modification = - pixel_shader - ? pipeline_cache_->GetCurrentPixelShaderModification(*pixel_shader) - : SpirvShaderTranslator::Modification(0); + pixel_shader ? pipeline_cache_->GetCurrentPixelShaderModification( + *pixel_shader, normalized_color_mask) + : SpirvShaderTranslator::Modification(0); // Set up the render targets - this may perform dispatches and draws. - uint32_t pixel_shader_writes_color_targets = - pixel_shader ? pixel_shader->writes_color_targets() : 0; if (!render_target_cache_->Update(is_rasterization_done, - pixel_shader_writes_color_targets)) { + normalized_color_mask)) { return false; } @@ -1164,37 +1188,41 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, // Update the graphics pipeline, and if the new graphics pipeline has a // different layout, invalidate incompatible descriptor sets before updating - // current_graphics_pipeline_layout_. + // current_guest_graphics_pipeline_layout_. VkPipeline pipeline; const VulkanPipelineCache::PipelineLayoutProvider* pipeline_layout_provider; if (!pipeline_cache_->ConfigurePipeline( vertex_shader_translation, pixel_shader_translation, - primitive_processing_result, + primitive_processing_result, normalized_color_mask, render_target_cache_->last_update_render_pass_key(), pipeline, pipeline_layout_provider)) { return false; } - deferred_command_buffer_.CmdVkBindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline); + if (current_guest_graphics_pipeline_ != pipeline) { + deferred_command_buffer_.CmdVkBindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline); + current_guest_graphics_pipeline_ = pipeline; + current_external_graphics_pipeline_ = VK_NULL_HANDLE; + } auto pipeline_layout = static_cast(pipeline_layout_provider); - if (current_graphics_pipeline_layout_ != pipeline_layout) { - if (current_graphics_pipeline_layout_) { + if (current_guest_graphics_pipeline_layout_ != pipeline_layout) { + if (current_guest_graphics_pipeline_layout_) { // Keep descriptor set layouts for which the new pipeline layout is // compatible with the previous one (pipeline layouts are compatible for // set N if set layouts 0 through N are compatible). uint32_t descriptor_sets_kept = uint32_t(SpirvShaderTranslator::kDescriptorSetCount); - if (current_graphics_pipeline_layout_ - ->descriptor_set_layout_textures_vertex_ref != - pipeline_layout->descriptor_set_layout_textures_vertex_ref) { + if (current_guest_graphics_pipeline_layout_ + ->descriptor_set_layout_textures_vertex_ref() != + pipeline_layout->descriptor_set_layout_textures_vertex_ref()) { descriptor_sets_kept = std::min( descriptor_sets_kept, uint32_t(SpirvShaderTranslator::kDescriptorSetTexturesVertex)); } - if (current_graphics_pipeline_layout_ - ->descriptor_set_layout_textures_pixel_ref != - pipeline_layout->descriptor_set_layout_textures_pixel_ref) { + if (current_guest_graphics_pipeline_layout_ + ->descriptor_set_layout_textures_pixel_ref() != + pipeline_layout->descriptor_set_layout_textures_pixel_ref()) { descriptor_sets_kept = std::min( descriptor_sets_kept, uint32_t(SpirvShaderTranslator::kDescriptorSetTexturesPixel)); @@ -1204,7 +1232,7 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, // indeterminate state. current_graphics_descriptor_sets_bound_up_to_date_ = 0; } - current_graphics_pipeline_layout_ = pipeline_layout; + current_guest_graphics_pipeline_layout_ = pipeline_layout; } const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); @@ -1234,8 +1262,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, device_properties.limits.maxViewportDimensions[1], true, false, false, false, viewport_info); - // Update fixed-function dynamic state. - UpdateFixedFunctionState(viewport_info); + // Update dynamic graphics pipeline state. + UpdateDynamicState(viewport_info, primitive_polygonal); // Update system constants before uploading them. UpdateSystemConstantValues(primitive_processing_result.host_index_endian, @@ -1550,12 +1578,21 @@ bool VulkanCommandProcessor::BeginSubmission(bool is_guest_command) { deferred_command_buffer_.Reset(); // Reset cached state of the command buffer. - ff_viewport_update_needed_ = true; - ff_scissor_update_needed_ = true; + dynamic_viewport_update_needed_ = true; + dynamic_scissor_update_needed_ = true; + dynamic_depth_bias_update_needed_ = true; + dynamic_blend_constants_update_needed_ = true; + dynamic_stencil_compare_mask_front_update_needed_ = true; + dynamic_stencil_compare_mask_back_update_needed_ = true; + dynamic_stencil_write_mask_front_update_needed_ = true; + dynamic_stencil_write_mask_back_update_needed_ = true; + dynamic_stencil_reference_front_update_needed_ = true; + dynamic_stencil_reference_back_update_needed_ = true; current_render_pass_ = VK_NULL_HANDLE; current_framebuffer_ = VK_NULL_HANDLE; - current_graphics_pipeline_ = VK_NULL_HANDLE; - current_graphics_pipeline_layout_ = nullptr; + current_guest_graphics_pipeline_ = VK_NULL_HANDLE; + current_external_graphics_pipeline_ = VK_NULL_HANDLE; + current_guest_graphics_pipeline_layout_ = nullptr; current_graphics_descriptor_sets_bound_up_to_date_ = 0; primitive_processor_->BeginSubmission(); @@ -1825,7 +1862,7 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { for (const auto& pipeline_layout_pair : pipeline_layouts_) { dfn.vkDestroyPipelineLayout( - device, pipeline_layout_pair.second.pipeline_layout, nullptr); + device, pipeline_layout_pair.second.GetPipelineLayout(), nullptr); } pipeline_layouts_.clear(); for (const auto& descriptor_set_layout_pair : @@ -1859,8 +1896,8 @@ VkShaderStageFlags VulkanCommandProcessor::GetGuestVertexShaderStageFlags() return stages; } -void VulkanCommandProcessor::UpdateFixedFunctionState( - const draw_util::ViewportInfo& viewport_info) { +void VulkanCommandProcessor::UpdateDynamicState( + const draw_util::ViewportInfo& viewport_info, bool primitive_polygonal) { #if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES @@ -1891,16 +1928,19 @@ void VulkanCommandProcessor::UpdateFixedFunctionState( } viewport.minDepth = viewport_info.z_min; viewport.maxDepth = viewport_info.z_max; - ff_viewport_update_needed_ |= ff_viewport_.x != viewport.x; - ff_viewport_update_needed_ |= ff_viewport_.y != viewport.y; - ff_viewport_update_needed_ |= ff_viewport_.width != viewport.width; - ff_viewport_update_needed_ |= ff_viewport_.height != viewport.height; - ff_viewport_update_needed_ |= ff_viewport_.minDepth != viewport.minDepth; - ff_viewport_update_needed_ |= ff_viewport_.maxDepth != viewport.maxDepth; - if (ff_viewport_update_needed_) { - ff_viewport_ = viewport; - deferred_command_buffer_.CmdVkSetViewport(0, 1, &viewport); - ff_viewport_update_needed_ = false; + dynamic_viewport_update_needed_ |= dynamic_viewport_.x != viewport.x; + dynamic_viewport_update_needed_ |= dynamic_viewport_.y != viewport.y; + dynamic_viewport_update_needed_ |= dynamic_viewport_.width != viewport.width; + dynamic_viewport_update_needed_ |= + dynamic_viewport_.height != viewport.height; + dynamic_viewport_update_needed_ |= + dynamic_viewport_.minDepth != viewport.minDepth; + dynamic_viewport_update_needed_ |= + dynamic_viewport_.maxDepth != viewport.maxDepth; + if (dynamic_viewport_update_needed_) { + dynamic_viewport_ = viewport; + deferred_command_buffer_.CmdVkSetViewport(0, 1, &dynamic_viewport_); + dynamic_viewport_update_needed_ = false; } // Scissor. @@ -1911,17 +1951,191 @@ void VulkanCommandProcessor::UpdateFixedFunctionState( scissor_rect.offset.y = int32_t(scissor.offset[1]); scissor_rect.extent.width = scissor.extent[0]; scissor_rect.extent.height = scissor.extent[1]; - ff_scissor_update_needed_ |= ff_scissor_.offset.x != scissor_rect.offset.x; - ff_scissor_update_needed_ |= ff_scissor_.offset.y != scissor_rect.offset.y; - ff_scissor_update_needed_ |= - ff_scissor_.extent.width != scissor_rect.extent.width; - ff_scissor_update_needed_ |= - ff_scissor_.extent.height != scissor_rect.extent.height; - if (ff_scissor_update_needed_) { - ff_scissor_ = scissor_rect; - deferred_command_buffer_.CmdVkSetScissor(0, 1, &scissor_rect); - ff_scissor_update_needed_ = false; + dynamic_scissor_update_needed_ |= + dynamic_scissor_.offset.x != scissor_rect.offset.x; + dynamic_scissor_update_needed_ |= + dynamic_scissor_.offset.y != scissor_rect.offset.y; + dynamic_scissor_update_needed_ |= + dynamic_scissor_.extent.width != scissor_rect.extent.width; + dynamic_scissor_update_needed_ |= + dynamic_scissor_.extent.height != scissor_rect.extent.height; + if (dynamic_scissor_update_needed_) { + dynamic_scissor_ = scissor_rect; + deferred_command_buffer_.CmdVkSetScissor(0, 1, &dynamic_scissor_); + dynamic_scissor_update_needed_ = false; } + + // Depth bias. + // TODO(Triang3l): Disable the depth bias for the fragment shader interlock RB + // implementation. + float depth_bias_constant_factor, depth_bias_slope_factor; + draw_util::GetPreferredFacePolygonOffset(regs, primitive_polygonal, + depth_bias_slope_factor, + depth_bias_constant_factor); + depth_bias_constant_factor *= draw_util::GetD3D10PolygonOffsetFactor( + regs.Get().depth_format, true); + // With non-square resolution scaling, make sure the worst-case impact is + // reverted (slope only along the scaled axis), thus max. More bias is better + // than less bias, because less bias means Z fighting with the background is + // more likely. + depth_bias_slope_factor *= + xenos::kPolygonOffsetScaleSubpixelUnit * + float(std::max(render_target_cache_->GetResolutionScaleX(), + render_target_cache_->GetResolutionScaleY())); + // std::memcmp instead of != so in case of NaN, every draw won't be + // invalidating it. + dynamic_depth_bias_update_needed_ |= + std::memcmp(&dynamic_depth_bias_constant_factor_, + &depth_bias_constant_factor, sizeof(float)) != 0; + dynamic_depth_bias_update_needed_ |= + std::memcmp(&dynamic_depth_bias_slope_factor_, &depth_bias_slope_factor, + sizeof(float)) != 0; + if (dynamic_depth_bias_update_needed_) { + dynamic_depth_bias_constant_factor_ = depth_bias_constant_factor; + dynamic_depth_bias_slope_factor_ = depth_bias_slope_factor; + deferred_command_buffer_.CmdVkSetDepthBias( + dynamic_depth_bias_constant_factor_, 0.0f, + dynamic_depth_bias_slope_factor_); + dynamic_depth_bias_update_needed_ = false; + } + + // Blend constants. + float blend_constants[] = { + regs[XE_GPU_REG_RB_BLEND_RED].f32, + regs[XE_GPU_REG_RB_BLEND_GREEN].f32, + regs[XE_GPU_REG_RB_BLEND_BLUE].f32, + regs[XE_GPU_REG_RB_BLEND_ALPHA].f32, + }; + dynamic_blend_constants_update_needed_ |= + std::memcmp(dynamic_blend_constants_, blend_constants, + sizeof(float) * 4) != 0; + if (dynamic_blend_constants_update_needed_) { + std::memcpy(dynamic_blend_constants_, blend_constants, sizeof(float) * 4); + deferred_command_buffer_.CmdVkSetBlendConstants(dynamic_blend_constants_); + dynamic_blend_constants_update_needed_ = false; + } + + // Stencil masks and references. + // Due to pretty complex conditions involving registers not directly related + // to stencil (primitive type, culling), changing the values only when stencil + // is actually needed. However, due to the way dynamic state needs to be set + // in Vulkan, which doesn't take into account whether the state actually has + // effect on drawing, and because the masks and the references are always + // dynamic in Xenia guest pipelines, they must be set in the command buffer + // before any draw. + auto rb_depthcontrol = draw_util::GetDepthControlForCurrentEdramMode(regs); + if (rb_depthcontrol.stencil_enable) { + Register stencil_ref_mask_front_reg, stencil_ref_mask_back_reg; + if (primitive_polygonal && rb_depthcontrol.backface_enable) { + const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); + const VkPhysicalDevicePortabilitySubsetFeaturesKHR* + device_portability_subset_features = + provider.device_portability_subset_features(); + if (!device_portability_subset_features || + device_portability_subset_features->separateStencilMaskRef) { + // Choose the back face values only if drawing only back faces. + stencil_ref_mask_front_reg = + regs.Get().cull_front + ? XE_GPU_REG_RB_STENCILREFMASK_BF + : XE_GPU_REG_RB_STENCILREFMASK; + stencil_ref_mask_back_reg = stencil_ref_mask_front_reg; + } else { + stencil_ref_mask_front_reg = XE_GPU_REG_RB_STENCILREFMASK; + stencil_ref_mask_back_reg = XE_GPU_REG_RB_STENCILREFMASK_BF; + } + } else { + stencil_ref_mask_front_reg = XE_GPU_REG_RB_STENCILREFMASK; + stencil_ref_mask_back_reg = XE_GPU_REG_RB_STENCILREFMASK; + } + auto stencil_ref_mask_front = + regs.Get(stencil_ref_mask_front_reg); + auto stencil_ref_mask_back = + regs.Get(stencil_ref_mask_back_reg); + // Compare mask. + dynamic_stencil_compare_mask_front_update_needed_ |= + dynamic_stencil_compare_mask_front_ != + stencil_ref_mask_front.stencilmask; + dynamic_stencil_compare_mask_front_ = stencil_ref_mask_front.stencilmask; + dynamic_stencil_compare_mask_back_update_needed_ |= + dynamic_stencil_compare_mask_back_ != stencil_ref_mask_back.stencilmask; + dynamic_stencil_compare_mask_back_ = stencil_ref_mask_back.stencilmask; + // Write mask. + dynamic_stencil_write_mask_front_update_needed_ |= + dynamic_stencil_write_mask_front_ != + stencil_ref_mask_front.stencilwritemask; + dynamic_stencil_write_mask_front_ = stencil_ref_mask_front.stencilwritemask; + dynamic_stencil_write_mask_back_update_needed_ |= + dynamic_stencil_write_mask_back_ != + stencil_ref_mask_back.stencilwritemask; + dynamic_stencil_write_mask_back_ = stencil_ref_mask_back.stencilwritemask; + // Reference. + dynamic_stencil_reference_front_update_needed_ |= + dynamic_stencil_reference_front_ != stencil_ref_mask_front.stencilref; + dynamic_stencil_reference_front_ = stencil_ref_mask_front.stencilref; + dynamic_stencil_reference_back_update_needed_ |= + dynamic_stencil_reference_back_ != stencil_ref_mask_back.stencilref; + dynamic_stencil_reference_back_ = stencil_ref_mask_back.stencilref; + } + // Using VK_STENCIL_FACE_FRONT_AND_BACK for higher safety when running on the + // Vulkan portability subset without separateStencilMaskRef. + if (dynamic_stencil_compare_mask_front_update_needed_ || + dynamic_stencil_compare_mask_back_update_needed_) { + if (dynamic_stencil_compare_mask_front_ == + dynamic_stencil_compare_mask_back_) { + deferred_command_buffer_.CmdVkSetStencilCompareMask( + VK_STENCIL_FACE_FRONT_AND_BACK, dynamic_stencil_compare_mask_front_); + } else { + if (dynamic_stencil_compare_mask_front_update_needed_) { + deferred_command_buffer_.CmdVkSetStencilCompareMask( + VK_STENCIL_FACE_FRONT_BIT, dynamic_stencil_compare_mask_front_); + } + if (dynamic_stencil_compare_mask_back_update_needed_) { + deferred_command_buffer_.CmdVkSetStencilCompareMask( + VK_STENCIL_FACE_BACK_BIT, dynamic_stencil_compare_mask_back_); + } + } + dynamic_stencil_compare_mask_front_update_needed_ = false; + dynamic_stencil_compare_mask_back_update_needed_ = false; + } + if (dynamic_stencil_write_mask_front_update_needed_ || + dynamic_stencil_write_mask_back_update_needed_) { + if (dynamic_stencil_write_mask_front_ == dynamic_stencil_write_mask_back_) { + deferred_command_buffer_.CmdVkSetStencilWriteMask( + VK_STENCIL_FACE_FRONT_AND_BACK, dynamic_stencil_write_mask_front_); + } else { + if (dynamic_stencil_write_mask_front_update_needed_) { + deferred_command_buffer_.CmdVkSetStencilWriteMask( + VK_STENCIL_FACE_FRONT_BIT, dynamic_stencil_write_mask_front_); + } + if (dynamic_stencil_write_mask_back_update_needed_) { + deferred_command_buffer_.CmdVkSetStencilWriteMask( + VK_STENCIL_FACE_BACK_BIT, dynamic_stencil_write_mask_back_); + } + } + dynamic_stencil_write_mask_front_update_needed_ = false; + dynamic_stencil_write_mask_back_update_needed_ = false; + } + if (dynamic_stencil_reference_front_update_needed_ || + dynamic_stencil_reference_back_update_needed_) { + if (dynamic_stencil_reference_front_ == dynamic_stencil_reference_back_) { + deferred_command_buffer_.CmdVkSetStencilReference( + VK_STENCIL_FACE_FRONT_AND_BACK, dynamic_stencil_reference_front_); + } else { + if (dynamic_stencil_reference_front_update_needed_) { + deferred_command_buffer_.CmdVkSetStencilReference( + VK_STENCIL_FACE_FRONT_BIT, dynamic_stencil_reference_front_); + } + if (dynamic_stencil_reference_back_update_needed_) { + deferred_command_buffer_.CmdVkSetStencilReference( + VK_STENCIL_FACE_BACK_BIT, dynamic_stencil_reference_back_); + } + } + dynamic_stencil_reference_front_update_needed_ = false; + dynamic_stencil_reference_back_update_needed_ = false; + } + + // TODO(Triang3l): VK_EXT_extended_dynamic_state and + // VK_EXT_extended_dynamic_state2. } void VulkanCommandProcessor::UpdateSystemConstantValues( @@ -2201,14 +2415,14 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, // Bind the new descriptor sets. uint32_t descriptor_sets_needed = (uint32_t(1) << SpirvShaderTranslator::kDescriptorSetCount) - 1; - if (current_graphics_pipeline_layout_ - ->descriptor_set_layout_textures_vertex_ref == + if (current_guest_graphics_pipeline_layout_ + ->descriptor_set_layout_textures_vertex_ref() == descriptor_set_layout_empty_) { descriptor_sets_needed &= ~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetTexturesVertex); } - if (current_graphics_pipeline_layout_ - ->descriptor_set_layout_textures_pixel_ref == + if (current_guest_graphics_pipeline_layout_ + ->descriptor_set_layout_textures_pixel_ref() == descriptor_set_layout_empty_) { descriptor_sets_needed &= ~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetTexturesPixel); @@ -2226,7 +2440,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, // geometry shaders. deferred_command_buffer_.CmdVkBindDescriptorSets( VK_PIPELINE_BIND_POINT_GRAPHICS, - current_graphics_pipeline_layout_->pipeline_layout, + current_guest_graphics_pipeline_layout_->GetPipelineLayout(), descriptor_set_index, descriptor_set_mask_tzcnt - descriptor_set_index, current_graphics_descriptor_sets_ + descriptor_set_index, 0, nullptr); if (descriptor_set_mask_tzcnt >= 32) { diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index bc43d9ad6..a01f14feb 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -90,6 +90,17 @@ class VulkanCommandProcessor : public CommandProcessor { const VulkanPipelineCache::PipelineLayoutProvider* GetPipelineLayout( uint32_t texture_count_pixel, uint32_t texture_count_vertex); + // Binds a graphics pipeline for host-specific purposes, invalidating the + // affected state. keep_dynamic_* must be false (to invalidate the dynamic + // state after binding the pipeline with the same state being static, or if + // the caller changes the dynamic state bypassing the VulkanCommandProcessor) + // unless the caller has these state variables as dynamic and uses the + // tracking in VulkanCommandProcessor to modify them. + void BindExternalGraphicsPipeline(VkPipeline pipeline, + bool keep_dynamic_depth_bias = false, + bool keep_dynamic_blend_constants = false, + bool keep_dynamic_stencil_mask_ref = false); + protected: bool SetupContext() override; void ShutdownContext() override; @@ -146,12 +157,29 @@ class VulkanCommandProcessor : public CommandProcessor { class PipelineLayout : public VulkanPipelineCache::PipelineLayoutProvider { public: + PipelineLayout( + VkPipelineLayout pipeline_layout, + VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref, + VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref) + : pipeline_layout_(pipeline_layout), + descriptor_set_layout_textures_vertex_ref_( + descriptor_set_layout_textures_vertex_ref), + descriptor_set_layout_textures_pixel_ref_( + descriptor_set_layout_textures_pixel_ref) {} VkPipelineLayout GetPipelineLayout() const override { - return pipeline_layout; + return pipeline_layout_; } - VkPipelineLayout pipeline_layout; - VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref; - VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref; + VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref() const { + return descriptor_set_layout_textures_vertex_ref_; + } + VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref() const { + return descriptor_set_layout_textures_pixel_ref_; + } + + private: + VkPipelineLayout pipeline_layout_; + VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref_; + VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref_; }; // BeginSubmission and EndSubmission may be called at any time. If there's an @@ -179,7 +207,8 @@ class VulkanCommandProcessor : public CommandProcessor { VkShaderStageFlags GetGuestVertexShaderStageFlags() const; - void UpdateFixedFunctionState(const draw_util::ViewportInfo& viewport_info); + void UpdateDynamicState(const draw_util::ViewportInfo& viewport_info, + bool primitive_polygonal); void UpdateSystemConstantValues(xenos::Endian index_endian, const draw_util::ViewportInfo& viewport_info); bool UpdateBindings(const VulkanShader* vertex_shader, @@ -285,22 +314,52 @@ class VulkanCommandProcessor : public CommandProcessor { swap_framebuffers_; std::deque> swap_framebuffers_outdated_; - // The current fixed-function drawing state. - VkViewport ff_viewport_; - VkRect2D ff_scissor_; - bool ff_viewport_update_needed_; - bool ff_scissor_update_needed_; + // The current dynamic state of the graphics pipeline bind point. Note that + // binding any pipeline to the bind point with static state (even if it's + // unused, like depth bias being disabled, but the values themselves still not + // declared as dynamic in the pipeline) invalidates such dynamic state. + VkViewport dynamic_viewport_; + VkRect2D dynamic_scissor_; + float dynamic_depth_bias_constant_factor_; + float dynamic_depth_bias_slope_factor_; + float dynamic_blend_constants_[4]; + // The stencil values are pre-initialized (to D3D11_DEFAULT_STENCIL_*, and the + // initial values for front and back are the same for portability subset + // safety) because they're updated conditionally to avoid changing the back + // face values when stencil is disabled and the primitive type is changed + // between polygonal and non-polygonal. + uint32_t dynamic_stencil_compare_mask_front_ = UINT8_MAX; + uint32_t dynamic_stencil_compare_mask_back_ = UINT8_MAX; + uint32_t dynamic_stencil_write_mask_front_ = UINT8_MAX; + uint32_t dynamic_stencil_write_mask_back_ = UINT8_MAX; + uint32_t dynamic_stencil_reference_front_ = 0; + uint32_t dynamic_stencil_reference_back_ = 0; + bool dynamic_viewport_update_needed_; + bool dynamic_scissor_update_needed_; + bool dynamic_depth_bias_update_needed_; + bool dynamic_blend_constants_update_needed_; + bool dynamic_stencil_compare_mask_front_update_needed_; + bool dynamic_stencil_compare_mask_back_update_needed_; + bool dynamic_stencil_write_mask_front_update_needed_; + bool dynamic_stencil_write_mask_back_update_needed_; + bool dynamic_stencil_reference_front_update_needed_; + bool dynamic_stencil_reference_back_update_needed_; // Cache render pass currently started in the command buffer with the // framebuffer. VkRenderPass current_render_pass_; VkFramebuffer current_framebuffer_; - // Cache graphics pipeline currently bound to the command buffer. - VkPipeline current_graphics_pipeline_; + // Currently bound graphics pipeline, either from the pipeline cache (with + // potentially deferred creation - current_external_graphics_pipeline_ is + // VK_NULL_HANDLE in this case) or a non-Xenos one + // (current_guest_graphics_pipeline_ is VK_NULL_HANDLE in this case). + // TODO(Triang3l): Change to a deferred compilation handle. + VkPipeline current_guest_graphics_pipeline_; + VkPipeline current_external_graphics_pipeline_; - // Pipeline layout of the current graphics pipeline. - const PipelineLayout* current_graphics_pipeline_layout_; + // Pipeline layout of the current guest graphics pipeline. + const PipelineLayout* current_guest_graphics_pipeline_layout_; VkDescriptorSet current_graphics_descriptor_sets_ [SpirvShaderTranslator::kDescriptorSetCount]; // Whether descriptor sets in current_graphics_descriptor_sets_ point to diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index 33d94ad4f..2e3c32d8c 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * + * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -10,6 +10,7 @@ #include "xenia/gpu/vulkan/vulkan_pipeline_cache.h" #include +#include #include #include @@ -45,11 +46,6 @@ bool VulkanPipelineCache::Initialize() { const ui::vulkan::VulkanProvider& provider = command_processor_.GetVulkanProvider(); - device_pipeline_features_.features = 0; - // TODO(Triang3l): Support the portability subset. - device_pipeline_features_.point_polygons = 1; - device_pipeline_features_.triangle_fans = 1; - shader_translator_ = std::make_unique( SpirvShaderTranslator::Features(provider)); @@ -119,21 +115,52 @@ VulkanPipelineCache::GetCurrentVertexShaderModification( SpirvShaderTranslator::Modification VulkanPipelineCache::GetCurrentPixelShaderModification( - const Shader& shader) const { + const Shader& shader, uint32_t normalized_color_mask) const { assert_true(shader.type() == xenos::ShaderType::kPixel); assert_true(shader.is_ucode_analyzed()); const auto& regs = register_file_; + auto sq_program_cntl = regs.Get(); - return SpirvShaderTranslator::Modification( + SpirvShaderTranslator::Modification modification( shader_translator_->GetDefaultPixelShaderModification( shader.GetDynamicAddressableRegisterCount( sq_program_cntl.ps_num_reg))); + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const VkPhysicalDeviceFeatures& device_features = provider.device_features(); + if (!device_features.independentBlend) { + // Since without independent blending, the write mask is common for all + // attachments, but the render pass may still include the attachments from + // previous draws (to prevent excessive render pass changes potentially + // doing stores and loads), disable writing to render targets with a + // completely empty write mask by removing the output from the shader. + // Only explicitly excluding render targets that the shader actually writes + // to, for better pipeline storage compatibility between devices with and + // without independent blending (so in the usual situation - the shader + // doesn't write to any render targets disabled via the color mask - no + // explicit disabling of shader outputs will be needed, and the disabled + // output mask will be 0). + uint32_t color_targets_remaining = shader.writes_color_targets(); + uint32_t color_target_index; + while (xe::bit_scan_forward(color_targets_remaining, &color_target_index)) { + color_targets_remaining &= ~(uint32_t(1) << color_target_index); + if (!(normalized_color_mask & + (uint32_t(0b1111) << (4 * color_target_index)))) { + modification.pixel.color_outputs_disabled |= uint32_t(1) + << color_target_index; + } + } + } + + return modification; } bool VulkanPipelineCache::ConfigurePipeline( VulkanShader::VulkanTranslation* vertex_shader, VulkanShader::VulkanTranslation* pixel_shader, const PrimitiveProcessor::ProcessingResult& primitive_processing_result, + uint32_t normalized_color_mask, VulkanRenderTargetCache::RenderPassKey render_pass_key, VkPipeline& pipeline_out, const PipelineLayoutProvider*& pipeline_layout_out) { @@ -174,9 +201,9 @@ bool VulkanPipelineCache::ConfigurePipeline( } PipelineDescription description; - if (!GetCurrentStateDescription(vertex_shader, pixel_shader, - primitive_processing_result, render_pass_key, - description)) { + if (!GetCurrentStateDescription( + vertex_shader, pixel_shader, primitive_processing_result, + normalized_color_mask, render_pass_key, description)) { return false; } if (last_pipeline_ && last_pipeline_->first == description) { @@ -231,14 +258,92 @@ bool VulkanPipelineCache::TranslateAnalyzedShader( return translation.GetOrCreateShaderModule() != VK_NULL_HANDLE; } +void VulkanPipelineCache::WritePipelineRenderTargetDescription( + reg::RB_BLENDCONTROL blend_control, uint32_t write_mask, + PipelineRenderTarget& render_target_out) const { + if (write_mask) { + assert_zero(write_mask & ~uint32_t(0b1111)); + // 32 because of 0x1F mask, for safety (all unknown to zero). + static const PipelineBlendFactor kBlendFactorMap[32] = { + /* 0 */ PipelineBlendFactor::kZero, + /* 1 */ PipelineBlendFactor::kOne, + /* 2 */ PipelineBlendFactor::kZero, // ? + /* 3 */ PipelineBlendFactor::kZero, // ? + /* 4 */ PipelineBlendFactor::kSrcColor, + /* 5 */ PipelineBlendFactor::kOneMinusSrcColor, + /* 6 */ PipelineBlendFactor::kSrcAlpha, + /* 7 */ PipelineBlendFactor::kOneMinusSrcAlpha, + /* 8 */ PipelineBlendFactor::kDstColor, + /* 9 */ PipelineBlendFactor::kOneMinusDstColor, + /* 10 */ PipelineBlendFactor::kDstAlpha, + /* 11 */ PipelineBlendFactor::kOneMinusDstAlpha, + /* 12 */ PipelineBlendFactor::kConstantColor, + /* 13 */ PipelineBlendFactor::kOneMinusConstantColor, + /* 14 */ PipelineBlendFactor::kConstantAlpha, + /* 15 */ PipelineBlendFactor::kOneMinusConstantAlpha, + /* 16 */ PipelineBlendFactor::kSrcAlphaSaturate, + }; + render_target_out.src_color_blend_factor = + kBlendFactorMap[uint32_t(blend_control.color_srcblend)]; + render_target_out.dst_color_blend_factor = + kBlendFactorMap[uint32_t(blend_control.color_destblend)]; + render_target_out.color_blend_op = blend_control.color_comb_fcn; + render_target_out.src_alpha_blend_factor = + kBlendFactorMap[uint32_t(blend_control.alpha_srcblend)]; + render_target_out.dst_alpha_blend_factor = + kBlendFactorMap[uint32_t(blend_control.alpha_destblend)]; + render_target_out.alpha_blend_op = blend_control.alpha_comb_fcn; + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const VkPhysicalDevicePortabilitySubsetFeaturesKHR* + device_portability_subset_features = + provider.device_portability_subset_features(); + if (device_portability_subset_features && + !device_portability_subset_features->constantAlphaColorBlendFactors) { + if (blend_control.color_srcblend == xenos::BlendFactor::kConstantAlpha) { + render_target_out.src_color_blend_factor = + PipelineBlendFactor::kConstantColor; + } else if (blend_control.color_srcblend == + xenos::BlendFactor::kOneMinusConstantAlpha) { + render_target_out.src_color_blend_factor = + PipelineBlendFactor::kOneMinusConstantColor; + } + if (blend_control.color_destblend == xenos::BlendFactor::kConstantAlpha) { + render_target_out.dst_color_blend_factor = + PipelineBlendFactor::kConstantColor; + } else if (blend_control.color_destblend == + xenos::BlendFactor::kOneMinusConstantAlpha) { + render_target_out.dst_color_blend_factor = + PipelineBlendFactor::kOneMinusConstantColor; + } + } + } else { + render_target_out.src_color_blend_factor = PipelineBlendFactor::kOne; + render_target_out.dst_color_blend_factor = PipelineBlendFactor::kZero; + render_target_out.color_blend_op = xenos::BlendOp::kAdd; + render_target_out.src_alpha_blend_factor = PipelineBlendFactor::kOne; + render_target_out.dst_alpha_blend_factor = PipelineBlendFactor::kZero; + render_target_out.alpha_blend_op = xenos::BlendOp::kAdd; + } + render_target_out.color_write_mask = write_mask; +} + bool VulkanPipelineCache::GetCurrentStateDescription( const VulkanShader::VulkanTranslation* vertex_shader, const VulkanShader::VulkanTranslation* pixel_shader, const PrimitiveProcessor::ProcessingResult& primitive_processing_result, + uint32_t normalized_color_mask, VulkanRenderTargetCache::RenderPassKey render_pass_key, PipelineDescription& description_out) const { description_out.Reset(); + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const VkPhysicalDeviceFeatures& device_features = provider.device_features(); + const VkPhysicalDevicePortabilitySubsetFeaturesKHR* + device_portability_subset_features = + provider.device_portability_subset_features(); + const RegisterFile& regs = register_file_; auto pa_su_sc_mode_cntl = regs.Get(); @@ -268,6 +373,9 @@ bool VulkanPipelineCache::GetCurrentStateDescription( primitive_topology = PipelinePrimitiveTopology::kTriangleList; break; case xenos::PrimitiveType::kTriangleFan: + // The check should be performed at primitive processing time. + assert_true(!device_portability_subset_features || + device_portability_subset_features->triangleFans); primitive_topology = PipelinePrimitiveTopology::kTriangleFan; break; case xenos::PrimitiveType::kTriangleStrip: @@ -284,6 +392,9 @@ bool VulkanPipelineCache::GetCurrentStateDescription( description_out.primitive_restart = primitive_processing_result.host_primitive_reset_enabled; + description_out.depth_clamp_enable = + regs.Get().clip_disable; + // TODO(Triang3l): Tessellation. bool primitive_polygonal = draw_util::IsPrimitivePolygonal(regs); if (primitive_polygonal) { @@ -313,9 +424,11 @@ bool VulkanPipelineCache::GetCurrentStateDescription( case xenos::PolygonType::kPoints: // When points are not supported, use lines instead, preserving // debug-like purpose. - description_out.polygon_mode = device_pipeline_features_.point_polygons - ? PipelinePolygonMode::kPoint - : PipelinePolygonMode::kLine; + description_out.polygon_mode = + (!device_portability_subset_features || + device_portability_subset_features->pointPolygons) + ? PipelinePolygonMode::kPoint + : PipelinePolygonMode::kLine; break; case xenos::PolygonType::kLines: description_out.polygon_mode = PipelinePolygonMode::kLine; @@ -332,6 +445,196 @@ bool VulkanPipelineCache::GetCurrentStateDescription( description_out.polygon_mode = PipelinePolygonMode::kFill; } + // TODO(Triang3l): Skip depth / stencil and color state for the fragment + // shader interlock RB implementation. + + if (render_pass_key.depth_and_color_used & 1) { + auto rb_depthcontrol = draw_util::GetDepthControlForCurrentEdramMode(regs); + if (rb_depthcontrol.z_enable) { + description_out.depth_write_enable = rb_depthcontrol.z_write_enable; + description_out.depth_compare_op = rb_depthcontrol.zfunc; + } else { + description_out.depth_compare_op = xenos::CompareFunction::kAlways; + } + if (rb_depthcontrol.stencil_enable) { + description_out.stencil_test_enable = 1; + description_out.stencil_front_fail_op = rb_depthcontrol.stencilfail; + description_out.stencil_front_pass_op = rb_depthcontrol.stencilzpass; + description_out.stencil_front_depth_fail_op = + rb_depthcontrol.stencilzfail; + description_out.stencil_front_compare_op = rb_depthcontrol.stencilfunc; + if (primitive_polygonal && rb_depthcontrol.backface_enable) { + description_out.stencil_back_fail_op = rb_depthcontrol.stencilfail_bf; + description_out.stencil_back_pass_op = rb_depthcontrol.stencilzpass_bf; + description_out.stencil_back_depth_fail_op = + rb_depthcontrol.stencilzfail_bf; + description_out.stencil_back_compare_op = + rb_depthcontrol.stencilfunc_bf; + } else { + description_out.stencil_back_fail_op = + description_out.stencil_front_fail_op; + description_out.stencil_back_pass_op = + description_out.stencil_front_pass_op; + description_out.stencil_back_depth_fail_op = + description_out.stencil_front_depth_fail_op; + description_out.stencil_back_compare_op = + description_out.stencil_front_compare_op; + } + } + } + + // Color blending and write masks (filled only for the attachments present in + // the render pass object). + uint32_t render_pass_color_rts = render_pass_key.depth_and_color_used >> 1; + if (device_features.independentBlend) { + uint32_t render_pass_color_rts_remaining = render_pass_color_rts; + uint32_t color_rt_index; + while (xe::bit_scan_forward(render_pass_color_rts_remaining, + &color_rt_index)) { + render_pass_color_rts_remaining &= ~(uint32_t(1) << color_rt_index); + WritePipelineRenderTargetDescription( + regs.Get( + reg::RB_BLENDCONTROL::rt_register_indices[color_rt_index]), + (normalized_color_mask >> (color_rt_index * 4)) & 0b1111, + description_out.render_targets[color_rt_index]); + } + } else { + // Take the blend control for the first render target that the guest wants + // to write to (consider it the most important) and use it for all render + // targets, if any. + // TODO(Triang3l): Implement an option for independent blending via multiple + // draw calls with different pipelines maybe? Though independent blending + // support is pretty wide, with a quite prominent exception of Adreno 4xx + // apparently. + uint32_t render_pass_color_rts_remaining = render_pass_color_rts; + uint32_t render_pass_first_color_rt_index; + if (xe::bit_scan_forward(render_pass_color_rts_remaining, + &render_pass_first_color_rt_index)) { + render_pass_color_rts_remaining &= + ~(uint32_t(1) << render_pass_first_color_rt_index); + PipelineRenderTarget& render_pass_first_color_rt = + description_out.render_targets[render_pass_first_color_rt_index]; + uint32_t common_blend_rt_index; + if (xe::bit_scan_forward(normalized_color_mask, &common_blend_rt_index)) { + common_blend_rt_index >>= 2; + // If a common write mask will be used for multiple render targets, use + // the original RB_COLOR_MASK instead of the normalized color mask as + // the normalized color mask has non-existent components forced to + // written (don't need reading to be preserved), while the number of + // components may vary between render targets. The attachments in the + // pass that must not be written to at all will be excluded via a shader + // modification. + WritePipelineRenderTargetDescription( + regs.Get( + reg::RB_BLENDCONTROL::rt_register_indices + [common_blend_rt_index]), + (((normalized_color_mask & + ~(uint32_t(0b1111) << (4 * common_blend_rt_index))) + ? regs[XE_GPU_REG_RB_COLOR_MASK].u32 + : normalized_color_mask) >> + (4 * common_blend_rt_index)) & + 0b1111, + render_pass_first_color_rt); + } else { + // No render targets are written to, though the render pass still may + // contain color attachments - set them to not written and not blending. + render_pass_first_color_rt.src_color_blend_factor = + PipelineBlendFactor::kOne; + render_pass_first_color_rt.dst_color_blend_factor = + PipelineBlendFactor::kZero; + render_pass_first_color_rt.color_blend_op = xenos::BlendOp::kAdd; + render_pass_first_color_rt.src_alpha_blend_factor = + PipelineBlendFactor::kOne; + render_pass_first_color_rt.dst_alpha_blend_factor = + PipelineBlendFactor::kZero; + render_pass_first_color_rt.alpha_blend_op = xenos::BlendOp::kAdd; + } + // Reuse the same blending settings for all render targets in the pass, + // for description consistency. + uint32_t color_rt_index; + while (xe::bit_scan_forward(render_pass_color_rts_remaining, + &color_rt_index)) { + render_pass_color_rts_remaining &= ~(uint32_t(1) << color_rt_index); + description_out.render_targets[color_rt_index] = + render_pass_first_color_rt; + } + } + } + + return true; +} + +bool VulkanPipelineCache::ArePipelineRequirementsMet( + const PipelineDescription& description) const { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const VkPhysicalDeviceFeatures& device_features = provider.device_features(); + + const VkPhysicalDevicePortabilitySubsetFeaturesKHR* + device_portability_subset_features = + provider.device_portability_subset_features(); + if (device_portability_subset_features) { + if (description.primitive_topology == + PipelinePrimitiveTopology::kTriangleFan && + device_portability_subset_features->triangleFans) { + return false; + } + if (description.polygon_mode == PipelinePolygonMode::kPoint && + device_portability_subset_features->pointPolygons) { + return false; + } + if (!device_portability_subset_features->constantAlphaColorBlendFactors) { + uint32_t color_rts_remaining = + description.render_pass_key.depth_and_color_used >> 1; + uint32_t color_rt_index; + while (xe::bit_scan_forward(color_rts_remaining, &color_rt_index)) { + color_rts_remaining &= ~(uint32_t(1) << color_rt_index); + const PipelineRenderTarget& color_rt = + description.render_targets[color_rt_index]; + if (color_rt.src_color_blend_factor == + PipelineBlendFactor::kConstantAlpha || + color_rt.src_color_blend_factor == + PipelineBlendFactor::kOneMinusConstantAlpha || + color_rt.dst_color_blend_factor == + PipelineBlendFactor::kConstantAlpha || + color_rt.dst_color_blend_factor == + PipelineBlendFactor::kOneMinusConstantAlpha) { + return false; + } + } + } + } + + if (!device_features.independentBlend) { + uint32_t color_rts_remaining = + description.render_pass_key.depth_and_color_used >> 1; + uint32_t first_color_rt_index; + if (xe::bit_scan_forward(color_rts_remaining, &first_color_rt_index)) { + color_rts_remaining &= ~(uint32_t(1) << first_color_rt_index); + const PipelineRenderTarget& first_color_rt = + description.render_targets[first_color_rt_index]; + uint32_t color_rt_index; + while (xe::bit_scan_forward(color_rts_remaining, &color_rt_index)) { + color_rts_remaining &= ~(uint32_t(1) << color_rt_index); + const PipelineRenderTarget& color_rt = + description.render_targets[color_rt_index]; + if (color_rt.src_color_blend_factor != + first_color_rt.src_color_blend_factor || + color_rt.dst_color_blend_factor != + first_color_rt.dst_color_blend_factor || + color_rt.color_blend_op != first_color_rt.color_blend_op || + color_rt.src_alpha_blend_factor != + first_color_rt.src_alpha_blend_factor || + color_rt.dst_alpha_blend_factor != + first_color_rt.dst_alpha_blend_factor || + color_rt.alpha_blend_op != first_color_rt.alpha_blend_op || + color_rt.color_write_mask != first_color_rt.color_write_mask) { + return false; + } + } + } + } + return true; } @@ -355,6 +658,17 @@ bool VulkanPipelineCache::EnsurePipelineCreated( } const PipelineDescription& description = creation_arguments.pipeline->first; + if (!ArePipelineRequirementsMet(description)) { + assert_always( + "When creating a new pipeline, the description must not require " + "unsupported features, and when loading the pipeline storage, " + "unsupported supported must be filtered out"); + return false; + } + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const VkPhysicalDeviceFeatures& device_features = provider.device_features(); VkPipelineShaderStageCreateInfo shader_stages[2]; uint32_t shader_stage_count = 0; @@ -434,10 +748,6 @@ bool VulkanPipelineCache::EnsurePipelineCreated( input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; break; case PipelinePrimitiveTopology::kTriangleFan: - assert_true(device_pipeline_features_.triangle_fans); - if (!device_pipeline_features_.triangle_fans) { - return false; - } input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; break; case PipelinePrimitiveTopology::kLineListWithAdjacency: @@ -474,6 +784,8 @@ bool VulkanPipelineCache::EnsurePipelineCreated( VkPipelineRasterizationStateCreateInfo rasterization_state = {}; rasterization_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rasterization_state.depthClampEnable = + description.depth_clamp_enable ? VK_TRUE : VK_FALSE; switch (description.polygon_mode) { case PipelinePolygonMode::kFill: rasterization_state.polygonMode = VK_POLYGON_MODE_FILL; @@ -482,10 +794,6 @@ bool VulkanPipelineCache::EnsurePipelineCreated( rasterization_state.polygonMode = VK_POLYGON_MODE_LINE; break; case PipelinePolygonMode::kPoint: - assert_true(device_pipeline_features_.point_polygons); - if (!device_pipeline_features_.point_polygons) { - return false; - } rasterization_state.polygonMode = VK_POLYGON_MODE_POINT; break; default: @@ -502,6 +810,17 @@ bool VulkanPipelineCache::EnsurePipelineCreated( rasterization_state.frontFace = description.front_face_clockwise ? VK_FRONT_FACE_CLOCKWISE : VK_FRONT_FACE_COUNTER_CLOCKWISE; + // Depth bias is dynamic (even toggling - pipeline creation is expensive). + // "If no depth attachment is present, r is undefined" in the depth bias + // formula, though Z has no effect on anything if a depth attachment is not + // used (the guest shader can't access Z), enabling only when there's a + // depth / stencil attachment for correctness. + // TODO(Triang3l): Disable the depth bias for the fragment shader interlock RB + // implementation. + rasterization_state.depthBiasEnable = + (description.render_pass_key.depth_and_color_used & 0b1) ? VK_TRUE + : VK_FALSE; + // TODO(Triang3l): Wide lines. rasterization_state.lineWidth = 1.0f; VkPipelineMultisampleStateCreateInfo multisample_state = {}; @@ -510,42 +829,156 @@ bool VulkanPipelineCache::EnsurePipelineCreated( multisample_state.rasterizationSamples = VkSampleCountFlagBits( uint32_t(1) << uint32_t(description.render_pass_key.msaa_samples)); - // TODO(Triang3l): Depth / stencil state. VkPipelineDepthStencilStateCreateInfo depth_stencil_state = {}; depth_stencil_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; depth_stencil_state.pNext = nullptr; + if (description.depth_write_enable || + description.depth_compare_op != xenos::CompareFunction::kAlways) { + depth_stencil_state.depthTestEnable = VK_TRUE; + depth_stencil_state.depthWriteEnable = + description.depth_write_enable ? VK_TRUE : VK_FALSE; + depth_stencil_state.depthCompareOp = VkCompareOp( + uint32_t(VK_COMPARE_OP_NEVER) + uint32_t(description.depth_compare_op)); + } + if (description.stencil_test_enable) { + depth_stencil_state.stencilTestEnable = VK_TRUE; + depth_stencil_state.front.failOp = + VkStencilOp(uint32_t(VK_STENCIL_OP_KEEP) + + uint32_t(description.stencil_front_fail_op)); + depth_stencil_state.front.passOp = + VkStencilOp(uint32_t(VK_STENCIL_OP_KEEP) + + uint32_t(description.stencil_front_pass_op)); + depth_stencil_state.front.depthFailOp = + VkStencilOp(uint32_t(VK_STENCIL_OP_KEEP) + + uint32_t(description.stencil_front_depth_fail_op)); + depth_stencil_state.front.compareOp = + VkCompareOp(uint32_t(VK_COMPARE_OP_NEVER) + + uint32_t(description.stencil_front_compare_op)); + depth_stencil_state.back.failOp = + VkStencilOp(uint32_t(VK_STENCIL_OP_KEEP) + + uint32_t(description.stencil_back_fail_op)); + depth_stencil_state.back.passOp = + VkStencilOp(uint32_t(VK_STENCIL_OP_KEEP) + + uint32_t(description.stencil_back_pass_op)); + depth_stencil_state.back.depthFailOp = + VkStencilOp(uint32_t(VK_STENCIL_OP_KEEP) + + uint32_t(description.stencil_back_depth_fail_op)); + depth_stencil_state.back.compareOp = + VkCompareOp(uint32_t(VK_COMPARE_OP_NEVER) + + uint32_t(description.stencil_back_compare_op)); + } - // TODO(Triang3l): Color blend state. - // TODO(Triang3l): Handle disabled separate blending. VkPipelineColorBlendAttachmentState color_blend_attachments[xenos::kMaxColorRenderTargets] = {}; - for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { - if (!(description.render_pass_key.depth_and_color_used & (1 << (1 + i)))) { - continue; + uint32_t color_rts_used = + description.render_pass_key.depth_and_color_used >> 1; + { + static const VkBlendFactor kBlendFactorMap[] = { + VK_BLEND_FACTOR_ZERO, + VK_BLEND_FACTOR_ONE, + VK_BLEND_FACTOR_SRC_COLOR, + VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, + VK_BLEND_FACTOR_DST_COLOR, + VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, + VK_BLEND_FACTOR_SRC_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, + VK_BLEND_FACTOR_DST_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA, + VK_BLEND_FACTOR_CONSTANT_COLOR, + VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR, + VK_BLEND_FACTOR_CONSTANT_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA, + VK_BLEND_FACTOR_SRC_ALPHA_SATURATE, + }; + // 8 entries for safety since 3 bits from the guest are passed directly. + static const VkBlendOp kBlendOpMap[] = {VK_BLEND_OP_ADD, + VK_BLEND_OP_SUBTRACT, + VK_BLEND_OP_MIN, + VK_BLEND_OP_MAX, + VK_BLEND_OP_REVERSE_SUBTRACT, + VK_BLEND_OP_ADD, + VK_BLEND_OP_ADD, + VK_BLEND_OP_ADD}; + uint32_t color_rts_remaining = color_rts_used; + uint32_t color_rt_index; + while (xe::bit_scan_forward(color_rts_remaining, &color_rt_index)) { + color_rts_remaining &= ~(uint32_t(1) << color_rt_index); + VkPipelineColorBlendAttachmentState& color_blend_attachment = + color_blend_attachments[color_rt_index]; + const PipelineRenderTarget& color_rt = + description.render_targets[color_rt_index]; + if (color_rt.src_color_blend_factor != PipelineBlendFactor::kOne || + color_rt.dst_color_blend_factor != PipelineBlendFactor::kZero || + color_rt.color_blend_op != xenos::BlendOp::kAdd || + color_rt.src_alpha_blend_factor != PipelineBlendFactor::kOne || + color_rt.dst_alpha_blend_factor != PipelineBlendFactor::kZero || + color_rt.alpha_blend_op != xenos::BlendOp::kAdd) { + color_blend_attachment.blendEnable = VK_TRUE; + color_blend_attachment.srcColorBlendFactor = + kBlendFactorMap[uint32_t(color_rt.src_color_blend_factor)]; + color_blend_attachment.dstColorBlendFactor = + kBlendFactorMap[uint32_t(color_rt.dst_color_blend_factor)]; + color_blend_attachment.colorBlendOp = + kBlendOpMap[uint32_t(color_rt.color_blend_op)]; + color_blend_attachment.srcAlphaBlendFactor = + kBlendFactorMap[uint32_t(color_rt.src_alpha_blend_factor)]; + color_blend_attachment.dstAlphaBlendFactor = + kBlendFactorMap[uint32_t(color_rt.dst_alpha_blend_factor)]; + color_blend_attachment.alphaBlendOp = + kBlendOpMap[uint32_t(color_rt.alpha_blend_op)]; + } + color_blend_attachment.colorWriteMask = + VkColorComponentFlags(color_rt.color_write_mask); + if (!device_features.independentBlend) { + // For non-independent blend, the pAttachments element for the first + // actually used color will be replicated into all. + break; + } } - color_blend_attachments[i].colorWriteMask = - VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; } VkPipelineColorBlendStateCreateInfo color_blend_state = {}; color_blend_state.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; - color_blend_state.attachmentCount = - 32 - xe::lzcnt( - uint32_t(description.render_pass_key.depth_and_color_used >> 1)); + color_blend_state.attachmentCount = 32 - xe::lzcnt(color_rts_used); color_blend_state.pAttachments = color_blend_attachments; + if (color_rts_used && !device_features.independentBlend) { + // "If the independent blending feature is not enabled, all elements of + // pAttachments must be identical." + uint32_t first_color_rt_index; + xe::bit_scan_forward(color_rts_used, &first_color_rt_index); + for (uint32_t i = 0; i < color_blend_state.attachmentCount; ++i) { + if (i == first_color_rt_index) { + continue; + } + color_blend_attachments[i] = + color_blend_attachments[first_color_rt_index]; + } + } - static const VkDynamicState dynamic_states[] = { - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, - }; + std::array dynamic_states; VkPipelineDynamicStateCreateInfo dynamic_state; dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; dynamic_state.pNext = nullptr; dynamic_state.flags = 0; - dynamic_state.dynamicStateCount = uint32_t(xe::countof(dynamic_states)); - dynamic_state.pDynamicStates = dynamic_states; + dynamic_state.dynamicStateCount = 0; + dynamic_state.pDynamicStates = dynamic_states.data(); + // Regardless of whether some of this state actually has any effect on the + // pipeline, marking all as dynamic because otherwise, binding any pipeline + // with such state not marked as dynamic will cause the dynamic state to be + // invalidated (again, even if it has no effect). + dynamic_states[dynamic_state.dynamicStateCount++] = VK_DYNAMIC_STATE_VIEWPORT; + dynamic_states[dynamic_state.dynamicStateCount++] = VK_DYNAMIC_STATE_SCISSOR; + dynamic_states[dynamic_state.dynamicStateCount++] = + VK_DYNAMIC_STATE_DEPTH_BIAS; + dynamic_states[dynamic_state.dynamicStateCount++] = + VK_DYNAMIC_STATE_BLEND_CONSTANTS; + dynamic_states[dynamic_state.dynamicStateCount++] = + VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK; + dynamic_states[dynamic_state.dynamicStateCount++] = + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK; + dynamic_states[dynamic_state.dynamicStateCount++] = + VK_DYNAMIC_STATE_STENCIL_REFERENCE; VkGraphicsPipelineCreateInfo pipeline_create_info; pipeline_create_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; @@ -569,8 +1002,6 @@ bool VulkanPipelineCache::EnsurePipelineCreated( pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE; pipeline_create_info.basePipelineIndex = UINT32_MAX; - const ui::vulkan::VulkanProvider& provider = - command_processor_.GetVulkanProvider(); const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); VkPipeline pipeline; diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h index 7ca83a751..c753e4dfa 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * + * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -21,6 +21,7 @@ #include "xenia/base/xxhash.h" #include "xenia/gpu/primitive_processor.h" #include "xenia/gpu/register_file.h" +#include "xenia/gpu/registers.h" #include "xenia/gpu/spirv_shader_translator.h" #include "xenia/gpu/vulkan/vulkan_render_target_cache.h" #include "xenia/gpu/vulkan/vulkan_shader.h" @@ -41,6 +42,9 @@ class VulkanPipelineCache { public: virtual ~PipelineLayoutProvider() {} virtual VkPipelineLayout GetPipelineLayout() const = 0; + + protected: + PipelineLayoutProvider() = default; }; VulkanPipelineCache(VulkanCommandProcessor& command_processor, @@ -65,37 +69,25 @@ class VulkanPipelineCache { const Shader& shader, Shader::HostVertexShaderType host_vertex_shader_type) const; SpirvShaderTranslator::Modification GetCurrentPixelShaderModification( - const Shader& shader) const; + const Shader& shader, uint32_t normalized_color_mask) const; // TODO(Triang3l): Return a deferred creation handle. bool ConfigurePipeline( VulkanShader::VulkanTranslation* vertex_shader, VulkanShader::VulkanTranslation* pixel_shader, const PrimitiveProcessor::ProcessingResult& primitive_processing_result, + uint32_t normalized_color_mask, VulkanRenderTargetCache::RenderPassKey render_pass_key, VkPipeline& pipeline_out, const PipelineLayoutProvider*& pipeline_layout_out); private: - // Can only load pipeline storage if features of the device it was created on - // and the current device match because descriptions may requires features not - // supported on the device. Very radical differences (such as RB emulation - // method) should result in a different storage file being used. - union DevicePipelineFeatures { - struct { - uint32_t point_polygons : 1; - uint32_t triangle_fans : 1; - }; - uint32_t features = 0; - }; - enum class PipelinePrimitiveTopology : uint32_t { kPointList, kLineList, kLineStrip, kTriangleList, kTriangleStrip, - // Requires DevicePipelineFeatures::triangle_fans. kTriangleFan, kLineListWithAdjacency, kPatchList, @@ -107,6 +99,35 @@ class VulkanPipelineCache { kPoint, }; + enum class PipelineBlendFactor : uint32_t { + kZero, + kOne, + kSrcColor, + kOneMinusSrcColor, + kDstColor, + kOneMinusDstColor, + kSrcAlpha, + kOneMinusSrcAlpha, + kDstAlpha, + kOneMinusDstAlpha, + kConstantColor, + kOneMinusConstantColor, + kConstantAlpha, + kOneMinusConstantAlpha, + kSrcAlphaSaturate, + }; + + // Update PipelineDescription::kVersion if anything is changed! + XEPACKEDSTRUCT(PipelineRenderTarget, { + PipelineBlendFactor src_color_blend_factor : 4; // 4 + PipelineBlendFactor dst_color_blend_factor : 4; // 8 + xenos::BlendOp color_blend_op : 3; // 11 + PipelineBlendFactor src_alpha_blend_factor : 4; // 15 + PipelineBlendFactor dst_alpha_blend_factor : 4; // 19 + xenos::BlendOp alpha_blend_op : 3; // 22 + uint32_t color_write_mask : 4; // 26 + }); + XEPACKEDSTRUCT(PipelineDescription, { uint64_t vertex_shader_hash; uint64_t vertex_shader_modification; @@ -119,10 +140,27 @@ class VulkanPipelineCache { PipelinePrimitiveTopology primitive_topology : 3; // 3 uint32_t primitive_restart : 1; // 4 // Rasterization. - PipelinePolygonMode polygon_mode : 2; // 6 - uint32_t cull_front : 1; // 7 - uint32_t cull_back : 1; // 8 - uint32_t front_face_clockwise : 1; // 9 + uint32_t depth_clamp_enable : 1; // 5 + PipelinePolygonMode polygon_mode : 2; // 7 + uint32_t cull_front : 1; // 8 + uint32_t cull_back : 1; // 9 + uint32_t front_face_clockwise : 1; // 10 + // Depth / stencil. + uint32_t depth_write_enable : 1; // 11 + xenos::CompareFunction depth_compare_op : 3; // 14 + uint32_t stencil_test_enable : 1; // 15 + xenos::StencilOp stencil_front_fail_op : 3; // 18 + xenos::StencilOp stencil_front_pass_op : 3; // 21 + xenos::StencilOp stencil_front_depth_fail_op : 3; // 24 + xenos::CompareFunction stencil_front_compare_op : 3; // 27 + xenos::StencilOp stencil_back_fail_op : 3; // 30 + + xenos::StencilOp stencil_back_pass_op : 3; // 3 + xenos::StencilOp stencil_back_depth_fail_op : 3; // 6 + xenos::CompareFunction stencil_back_compare_op : 3; // 9 + + // Filled only for the attachments present in the render pass object. + PipelineRenderTarget render_targets[xenos::kMaxColorRenderTargets]; // Including all the padding, for a stable hash. PipelineDescription() { Reset(); } @@ -166,13 +204,20 @@ class VulkanPipelineCache { bool TranslateAnalyzedShader(SpirvShaderTranslator& translator, VulkanShader::VulkanTranslation& translation); + void WritePipelineRenderTargetDescription( + reg::RB_BLENDCONTROL blend_control, uint32_t write_mask, + PipelineRenderTarget& render_target_out) const; bool GetCurrentStateDescription( const VulkanShader::VulkanTranslation* vertex_shader, const VulkanShader::VulkanTranslation* pixel_shader, const PrimitiveProcessor::ProcessingResult& primitive_processing_result, + uint32_t normalized_color_mask, VulkanRenderTargetCache::RenderPassKey render_pass_key, PipelineDescription& description_out) const; + // Whether the pipeline for the given description is supported by the device. + bool ArePipelineRequirementsMet(const PipelineDescription& description) const; + // Can be called from creation threads - all needed data must be fully set up // at the point of the call: shaders must be translated, pipeline layout and // render pass objects must be available. @@ -183,8 +228,6 @@ class VulkanPipelineCache { const RegisterFile& register_file_; VulkanRenderTargetCache& render_target_cache_; - DevicePipelineFeatures device_pipeline_features_; - // Temporary storage for AnalyzeUcode calls on the processor thread. StringBuffer ucode_disasm_buffer_; // Reusable shader translator on the command processor thread. diff --git a/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc b/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc index 5c1dd4168..7b628037b 100644 --- a/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc @@ -28,11 +28,17 @@ VulkanPrimitiveProcessor::~VulkanPrimitiveProcessor() { Shutdown(true); } bool VulkanPrimitiveProcessor::Initialize() { // TODO(Triang3l): fullDrawIndexUint32 feature check and indirect index fetch. - // TODO(Triang3l): Portability subset triangleFans check when portability - // subset support is added. // TODO(Triang3l): geometryShader check for quads when geometry shaders are // added. - if (!InitializeCommon(true, true, false, false)) { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const VkPhysicalDevicePortabilitySubsetFeaturesKHR* + device_portability_subset_features = + provider.device_portability_subset_features(); + if (!InitializeCommon(true, + !device_portability_subset_features || + device_portability_subset_features->triangleFans, + false, false)) { Shutdown(); return false; } diff --git a/src/xenia/ui/vulkan/functions/device_1_0.inc b/src/xenia/ui/vulkan/functions/device_1_0.inc index f0811fcea..2a979f55f 100644 --- a/src/xenia/ui/vulkan/functions/device_1_0.inc +++ b/src/xenia/ui/vulkan/functions/device_1_0.inc @@ -20,7 +20,12 @@ XE_UI_VULKAN_FUNCTION(vkCmdDrawIndexed) XE_UI_VULKAN_FUNCTION(vkCmdEndRenderPass) XE_UI_VULKAN_FUNCTION(vkCmdPipelineBarrier) XE_UI_VULKAN_FUNCTION(vkCmdPushConstants) +XE_UI_VULKAN_FUNCTION(vkCmdSetBlendConstants) +XE_UI_VULKAN_FUNCTION(vkCmdSetDepthBias) XE_UI_VULKAN_FUNCTION(vkCmdSetScissor) +XE_UI_VULKAN_FUNCTION(vkCmdSetStencilCompareMask) +XE_UI_VULKAN_FUNCTION(vkCmdSetStencilReference) +XE_UI_VULKAN_FUNCTION(vkCmdSetStencilWriteMask) XE_UI_VULKAN_FUNCTION(vkCmdSetViewport) XE_UI_VULKAN_FUNCTION(vkCreateBuffer) XE_UI_VULKAN_FUNCTION(vkCreateCommandPool) From b41fb851c62d90152738520b15fd88b2993c640f Mon Sep 17 00:00:00 2001 From: Triang3l Date: Tue, 15 Feb 2022 23:05:04 +0300 Subject: [PATCH 074/123] [Vulkan] Unsupported pipeline features assertion typo fix --- src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index 2e3c32d8c..2674396d1 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -662,7 +662,7 @@ bool VulkanPipelineCache::EnsurePipelineCreated( assert_always( "When creating a new pipeline, the description must not require " "unsupported features, and when loading the pipeline storage, " - "unsupported supported must be filtered out"); + "pipelines with unsupported features must be filtered out"); return false; } From acc4fd6846d930dc56b7f763f561df577c829a7f Mon Sep 17 00:00:00 2001 From: Triang3l Date: Mon, 21 Mar 2022 22:53:19 +0300 Subject: [PATCH 075/123] [Vulkan] Rectangle list geometry shader --- .../primitive_rectangle_list_gs.h | 391 ++++++++++++++++++ .../shaders/primitive_rectangle_list.gs.glsl | 103 +++++ src/xenia/gpu/shaders/xenos_gs.glsli | 25 ++ src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc | 71 +++- src/xenia/gpu/vulkan/vulkan_pipeline_cache.h | 39 +- 5 files changed, 609 insertions(+), 20 deletions(-) create mode 100644 src/xenia/gpu/shaders/bytecode/vulkan_spirv/primitive_rectangle_list_gs.h create mode 100644 src/xenia/gpu/shaders/primitive_rectangle_list.gs.glsl create mode 100644 src/xenia/gpu/shaders/xenos_gs.glsli diff --git a/src/xenia/gpu/shaders/bytecode/vulkan_spirv/primitive_rectangle_list_gs.h b/src/xenia/gpu/shaders/bytecode/vulkan_spirv/primitive_rectangle_list_gs.h new file mode 100644 index 000000000..86590f170 --- /dev/null +++ b/src/xenia/gpu/shaders/bytecode/vulkan_spirv/primitive_rectangle_list_gs.h @@ -0,0 +1,391 @@ +// Generated with `xb buildshaders`. +#if 0 +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 24886 +; Schema: 0 + OpCapability Geometry + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Geometry %5663 "main" %5305 %3631 %3144 %4930 + OpExecutionMode %5663 Triangles + OpExecutionMode %5663 Invocations 1 + OpExecutionMode %5663 OutputTriangleStrip + OpExecutionMode %5663 OutputVertices 6 + OpMemberDecorate %_struct_1017 0 BuiltIn Position + OpDecorate %_struct_1017 Block + OpDecorate %3631 Location 0 + OpDecorate %3144 Location 0 + OpMemberDecorate %_struct_1018 0 BuiltIn Position + OpDecorate %_struct_1018 Block + OpDecorate %7509 NoContraction + OpDecorate %15269 NoContraction + OpDecorate %24885 NoContraction + OpDecorate %14166 NoContraction + OpDecorate %7062 NoContraction + %void = OpTypeVoid + %1282 = OpTypeFunction %void + %bool = OpTypeBool + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_struct_1017 = OpTypeStruct %v4float + %uint = OpTypeInt 32 0 + %uint_3 = OpConstant %uint 3 +%_arr__struct_1017_uint_3 = OpTypeArray %_struct_1017 %uint_3 +%_ptr_Input__arr__struct_1017_uint_3 = OpTypePointer Input %_arr__struct_1017_uint_3 + %5305 = OpVariable %_ptr_Input__arr__struct_1017_uint_3 Input + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Input_v4float = OpTypePointer Input %v4float + %v4bool = OpTypeVector %bool 4 + %int_1 = OpConstant %int 1 + %int_2 = OpConstant %int 2 + %uint_0 = OpConstant %uint 0 + %uint_16 = OpConstant %uint 16 +%_arr_v4float_uint_16 = OpTypeArray %v4float %uint_16 +%_ptr_Output__arr_v4float_uint_16 = OpTypePointer Output %_arr_v4float_uint_16 + %3631 = OpVariable %_ptr_Output__arr_v4float_uint_16 Output +%_arr__arr_v4float_uint_16_uint_3 = OpTypeArray %_arr_v4float_uint_16 %uint_3 +%_ptr_Input__arr__arr_v4float_uint_16_uint_3 = OpTypePointer Input %_arr__arr_v4float_uint_16_uint_3 + %3144 = OpVariable %_ptr_Input__arr__arr_v4float_uint_16_uint_3 Input +%_ptr_Input__arr_v4float_uint_16 = OpTypePointer Input %_arr_v4float_uint_16 +%_struct_1018 = OpTypeStruct %v4float +%_ptr_Output__struct_1018 = OpTypePointer Output %_struct_1018 + %4930 = OpVariable %_ptr_Output__struct_1018 Output +%_ptr_Output_v4float = OpTypePointer Output %v4float + %v3float = OpTypeVector %float 3 + %float_n1 = OpConstant %float -1 + %float_1 = OpConstant %float 1 + %266 = OpConstantComposite %v3float %float_n1 %float_1 %float_1 + %2582 = OpConstantComposite %v3float %float_1 %float_n1 %float_1 + %267 = OpConstantComposite %v3float %float_1 %float_1 %float_n1 + %v3bool = OpTypeVector %bool 3 + %5663 = OpFunction %void None %1282 + %15110 = OpLabel + OpSelectionMerge %23648 None + OpSwitch %uint_0 %11880 + %11880 = OpLabel + %23974 = OpAccessChain %_ptr_Input_v4float %5305 %int_0 %int_0 + %20722 = OpLoad %v4float %23974 + %16842 = OpIsNan %v4bool %20722 + %9783 = OpAny %bool %16842 + %11671 = OpLogicalNot %bool %9783 + OpSelectionMerge %7750 None + OpBranchConditional %11671 %12129 %7750 + %12129 = OpLabel + %19939 = OpAccessChain %_ptr_Input_v4float %5305 %int_1 %int_0 + %20723 = OpLoad %v4float %19939 + %18381 = OpIsNan %v4bool %20723 + %14860 = OpAny %bool %18381 + OpBranch %7750 + %7750 = OpLabel + %24534 = OpPhi %bool %9783 %11880 %14860 %12129 + %22068 = OpLogicalNot %bool %24534 + OpSelectionMerge %9251 None + OpBranchConditional %22068 %12130 %9251 + %12130 = OpLabel + %19940 = OpAccessChain %_ptr_Input_v4float %5305 %int_2 %int_0 + %20724 = OpLoad %v4float %19940 + %18382 = OpIsNan %v4bool %20724 + %14861 = OpAny %bool %18382 + OpBranch %9251 + %9251 = OpLabel + %10924 = OpPhi %bool %24534 %7750 %14861 %12130 + OpSelectionMerge %7205 None + OpBranchConditional %10924 %21992 %7205 + %21992 = OpLabel + OpBranch %23648 + %7205 = OpLabel + OpBranch %6529 + %6529 = OpLabel + %23131 = OpPhi %uint %uint_0 %7205 %11651 %14551 + %13910 = OpULessThan %bool %23131 %uint_3 + OpLoopMerge %8693 %14551 None + OpBranchConditional %13910 %14551 %8693 + %14551 = OpLabel + %18153 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3144 %23131 + %16222 = OpLoad %_arr_v4float_uint_16 %18153 + OpStore %3631 %16222 + %16679 = OpAccessChain %_ptr_Input_v4float %5305 %23131 %int_0 + %7391 = OpLoad %v4float %16679 + %22888 = OpAccessChain %_ptr_Output_v4float %4930 %int_0 + OpStore %22888 %7391 + OpEmitVertex + %11651 = OpIAdd %uint %23131 %int_1 + OpBranch %6529 + %8693 = OpLabel + OpEndPrimitive + %12070 = OpAccessChain %_ptr_Input_v4float %5305 %int_1 %int_0 + %6301 = OpLoad %v4float %12070 + %18018 = OpVectorShuffle %v3float %6301 %6301 0 1 2 + %12374 = OpVectorShuffle %v3float %20722 %20722 0 1 2 + %18845 = OpFSub %v3float %18018 %12374 + %18938 = OpAccessChain %_ptr_Input_v4float %5305 %int_2 %int_0 + %13501 = OpLoad %v4float %18938 + %9022 = OpVectorShuffle %v3float %13501 %13501 0 1 2 + %7477 = OpFSub %v3float %9022 %12374 + %11062 = OpFSub %v3float %9022 %18018 + %14931 = OpDot %float %18845 %18845 + %23734 = OpDot %float %7477 %7477 + %22344 = OpDot %float %11062 %11062 + %24721 = OpFOrdGreaterThan %bool %22344 %14931 + OpSelectionMerge %15688 None + OpBranchConditional %24721 %13839 %15688 + %13839 = OpLabel + %21187 = OpFOrdGreaterThan %bool %22344 %23734 + OpBranch %15688 + %15688 = OpLabel + %10925 = OpPhi %bool %24721 %8693 %21187 %13839 + OpSelectionMerge %11701 None + OpBranchConditional %10925 %12131 %13261 + %12131 = OpLabel + %18154 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3144 %int_2 + %16223 = OpLoad %_arr_v4float_uint_16 %18154 + OpStore %3631 %16223 + %19413 = OpAccessChain %_ptr_Output_v4float %4930 %int_0 + OpStore %19413 %13501 + OpEmitVertex + %22812 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3144 %int_1 + %11341 = OpLoad %_arr_v4float_uint_16 %22812 + OpStore %3631 %11341 + OpStore %19413 %6301 + OpEmitVertex + OpBranch %11701 + %13261 = OpLabel + %23993 = OpFOrdGreaterThan %bool %23734 %14931 + OpSelectionMerge %15689 None + OpBranchConditional %23993 %13840 %15689 + %13840 = OpLabel + %21188 = OpFOrdGreaterThan %bool %23734 %22344 + OpBranch %15689 + %15689 = OpLabel + %10926 = OpPhi %bool %23993 %13261 %21188 %13840 + OpSelectionMerge %11046 None + OpBranchConditional %10926 %12132 %11589 + %12132 = OpLabel + %18155 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3144 %int_0 + %16224 = OpLoad %_arr_v4float_uint_16 %18155 + OpStore %3631 %16224 + %19414 = OpAccessChain %_ptr_Output_v4float %4930 %int_0 + OpStore %19414 %20722 + OpEmitVertex + %22813 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3144 %int_2 + %11342 = OpLoad %_arr_v4float_uint_16 %22813 + OpStore %3631 %11342 + OpStore %19414 %13501 + OpEmitVertex + OpBranch %11046 + %11589 = OpLabel + %20575 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3144 %int_1 + %16225 = OpLoad %_arr_v4float_uint_16 %20575 + OpStore %3631 %16225 + %19415 = OpAccessChain %_ptr_Output_v4float %4930 %int_0 + OpStore %19415 %6301 + OpEmitVertex + %22814 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3144 %int_0 + %11343 = OpLoad %_arr_v4float_uint_16 %22814 + OpStore %3631 %11343 + OpStore %19415 %20722 + OpEmitVertex + OpBranch %11046 + %11046 = OpLabel + %16046 = OpCompositeConstruct %v3bool %10926 %10926 %10926 + %20034 = OpSelect %v3float %16046 %2582 %267 + OpBranch %11701 + %11701 = OpLabel + %10540 = OpPhi %v3float %266 %12131 %20034 %11046 + OpBranch %19952 + %19952 = OpLabel + %23132 = OpPhi %uint %uint_0 %11701 %21301 %11859 + %13911 = OpULessThan %bool %23132 %uint_16 + OpLoopMerge %14959 %11859 None + OpBranchConditional %13911 %11859 %14959 + %11859 = OpLabel + %19851 = OpCompositeExtract %float %10540 0 + %12487 = OpAccessChain %_ptr_Input_v4float %3144 %int_0 %23132 + %12683 = OpLoad %v4float %12487 + %8719 = OpVectorTimesScalar %v4float %12683 %19851 + %15671 = OpCompositeExtract %float %10540 1 + %17096 = OpAccessChain %_ptr_Input_v4float %3144 %int_1 %23132 + %13595 = OpLoad %v4float %17096 + %19790 = OpVectorTimesScalar %v4float %13595 %15671 + %20206 = OpFAdd %v4float %8719 %19790 + %10579 = OpCompositeExtract %float %10540 2 + %16297 = OpAccessChain %_ptr_Input_v4float %3144 %int_2 %23132 + %13596 = OpLoad %v4float %16297 + %19486 = OpVectorTimesScalar %v4float %13596 %10579 + %22917 = OpFAdd %v4float %20206 %19486 + %16419 = OpAccessChain %_ptr_Output_v4float %3631 %23132 + OpStore %16419 %22917 + %21301 = OpIAdd %uint %23132 %int_1 + OpBranch %19952 + %14959 = OpLabel + %9332 = OpCompositeExtract %float %10540 0 + %7509 = OpVectorTimesScalar %v4float %20722 %9332 + %6858 = OpCompositeExtract %float %10540 1 + %15269 = OpVectorTimesScalar %v4float %6301 %6858 + %24885 = OpFAdd %v4float %7509 %15269 + %17621 = OpCompositeExtract %float %10540 2 + %14166 = OpVectorTimesScalar %v4float %13501 %17621 + %7062 = OpFAdd %v4float %24885 %14166 + %18129 = OpAccessChain %_ptr_Output_v4float %4930 %int_0 + OpStore %18129 %7062 + OpEmitVertex + OpEndPrimitive + OpBranch %23648 + %23648 = OpLabel + OpReturn + OpFunctionEnd +#endif + +const uint32_t primitive_rectangle_list_gs[] = { + 0x07230203, 0x00010000, 0x0008000A, 0x00006136, 0x00000000, 0x00020011, + 0x00000002, 0x0006000B, 0x00000001, 0x4C534C47, 0x6474732E, 0x3035342E, + 0x00000000, 0x0003000E, 0x00000000, 0x00000001, 0x0009000F, 0x00000003, + 0x0000161F, 0x6E69616D, 0x00000000, 0x000014B9, 0x00000E2F, 0x00000C48, + 0x00001342, 0x00030010, 0x0000161F, 0x00000016, 0x00040010, 0x0000161F, + 0x00000000, 0x00000001, 0x00030010, 0x0000161F, 0x0000001D, 0x00040010, + 0x0000161F, 0x0000001A, 0x00000006, 0x00050048, 0x000003F9, 0x00000000, + 0x0000000B, 0x00000000, 0x00030047, 0x000003F9, 0x00000002, 0x00040047, + 0x00000E2F, 0x0000001E, 0x00000000, 0x00040047, 0x00000C48, 0x0000001E, + 0x00000000, 0x00050048, 0x000003FA, 0x00000000, 0x0000000B, 0x00000000, + 0x00030047, 0x000003FA, 0x00000002, 0x00030047, 0x00001D55, 0x0000002A, + 0x00030047, 0x00003BA5, 0x0000002A, 0x00030047, 0x00006135, 0x0000002A, + 0x00030047, 0x00003756, 0x0000002A, 0x00030047, 0x00001B96, 0x0000002A, + 0x00020013, 0x00000008, 0x00030021, 0x00000502, 0x00000008, 0x00020014, + 0x00000009, 0x00030016, 0x0000000D, 0x00000020, 0x00040017, 0x0000001D, + 0x0000000D, 0x00000004, 0x0003001E, 0x000003F9, 0x0000001D, 0x00040015, + 0x0000000B, 0x00000020, 0x00000000, 0x0004002B, 0x0000000B, 0x00000A13, + 0x00000003, 0x0004001C, 0x00000A0F, 0x000003F9, 0x00000A13, 0x00040020, + 0x000000C9, 0x00000001, 0x00000A0F, 0x0004003B, 0x000000C9, 0x000014B9, + 0x00000001, 0x00040015, 0x0000000C, 0x00000020, 0x00000001, 0x0004002B, + 0x0000000C, 0x00000A0B, 0x00000000, 0x00040020, 0x0000029A, 0x00000001, + 0x0000001D, 0x00040017, 0x00000011, 0x00000009, 0x00000004, 0x0004002B, + 0x0000000C, 0x00000A0E, 0x00000001, 0x0004002B, 0x0000000C, 0x00000A11, + 0x00000002, 0x0004002B, 0x0000000B, 0x00000A0A, 0x00000000, 0x0004002B, + 0x0000000B, 0x00000A3A, 0x00000010, 0x0004001C, 0x0000066B, 0x0000001D, + 0x00000A3A, 0x00040020, 0x000008E8, 0x00000003, 0x0000066B, 0x0004003B, + 0x000008E8, 0x00000E2F, 0x00000003, 0x0004001C, 0x000001AC, 0x0000066B, + 0x00000A13, 0x00040020, 0x00000429, 0x00000001, 0x000001AC, 0x0004003B, + 0x00000429, 0x00000C48, 0x00000001, 0x00040020, 0x000008E9, 0x00000001, + 0x0000066B, 0x0003001E, 0x000003FA, 0x0000001D, 0x00040020, 0x00000676, + 0x00000003, 0x000003FA, 0x0004003B, 0x00000676, 0x00001342, 0x00000003, + 0x00040020, 0x0000029B, 0x00000003, 0x0000001D, 0x00040017, 0x00000018, + 0x0000000D, 0x00000003, 0x0004002B, 0x0000000D, 0x00000341, 0xBF800000, + 0x0004002B, 0x0000000D, 0x0000008A, 0x3F800000, 0x0006002C, 0x00000018, + 0x0000010A, 0x00000341, 0x0000008A, 0x0000008A, 0x0006002C, 0x00000018, + 0x00000A16, 0x0000008A, 0x00000341, 0x0000008A, 0x0006002C, 0x00000018, + 0x0000010B, 0x0000008A, 0x0000008A, 0x00000341, 0x00040017, 0x00000010, + 0x00000009, 0x00000003, 0x00050036, 0x00000008, 0x0000161F, 0x00000000, + 0x00000502, 0x000200F8, 0x00003B06, 0x000300F7, 0x00005C60, 0x00000000, + 0x000300FB, 0x00000A0A, 0x00002E68, 0x000200F8, 0x00002E68, 0x00060041, + 0x0000029A, 0x00005DA6, 0x000014B9, 0x00000A0B, 0x00000A0B, 0x0004003D, + 0x0000001D, 0x000050F2, 0x00005DA6, 0x0004009C, 0x00000011, 0x000041CA, + 0x000050F2, 0x0004009A, 0x00000009, 0x00002637, 0x000041CA, 0x000400A8, + 0x00000009, 0x00002D97, 0x00002637, 0x000300F7, 0x00001E46, 0x00000000, + 0x000400FA, 0x00002D97, 0x00002F61, 0x00001E46, 0x000200F8, 0x00002F61, + 0x00060041, 0x0000029A, 0x00004DE3, 0x000014B9, 0x00000A0E, 0x00000A0B, + 0x0004003D, 0x0000001D, 0x000050F3, 0x00004DE3, 0x0004009C, 0x00000011, + 0x000047CD, 0x000050F3, 0x0004009A, 0x00000009, 0x00003A0C, 0x000047CD, + 0x000200F9, 0x00001E46, 0x000200F8, 0x00001E46, 0x000700F5, 0x00000009, + 0x00005FD6, 0x00002637, 0x00002E68, 0x00003A0C, 0x00002F61, 0x000400A8, + 0x00000009, 0x00005634, 0x00005FD6, 0x000300F7, 0x00002423, 0x00000000, + 0x000400FA, 0x00005634, 0x00002F62, 0x00002423, 0x000200F8, 0x00002F62, + 0x00060041, 0x0000029A, 0x00004DE4, 0x000014B9, 0x00000A11, 0x00000A0B, + 0x0004003D, 0x0000001D, 0x000050F4, 0x00004DE4, 0x0004009C, 0x00000011, + 0x000047CE, 0x000050F4, 0x0004009A, 0x00000009, 0x00003A0D, 0x000047CE, + 0x000200F9, 0x00002423, 0x000200F8, 0x00002423, 0x000700F5, 0x00000009, + 0x00002AAC, 0x00005FD6, 0x00001E46, 0x00003A0D, 0x00002F62, 0x000300F7, + 0x00001C25, 0x00000000, 0x000400FA, 0x00002AAC, 0x000055E8, 0x00001C25, + 0x000200F8, 0x000055E8, 0x000200F9, 0x00005C60, 0x000200F8, 0x00001C25, + 0x000200F9, 0x00001981, 0x000200F8, 0x00001981, 0x000700F5, 0x0000000B, + 0x00005A5B, 0x00000A0A, 0x00001C25, 0x00002D83, 0x000038D7, 0x000500B0, + 0x00000009, 0x00003656, 0x00005A5B, 0x00000A13, 0x000400F6, 0x000021F5, + 0x000038D7, 0x00000000, 0x000400FA, 0x00003656, 0x000038D7, 0x000021F5, + 0x000200F8, 0x000038D7, 0x00050041, 0x000008E9, 0x000046E9, 0x00000C48, + 0x00005A5B, 0x0004003D, 0x0000066B, 0x00003F5E, 0x000046E9, 0x0003003E, + 0x00000E2F, 0x00003F5E, 0x00060041, 0x0000029A, 0x00004127, 0x000014B9, + 0x00005A5B, 0x00000A0B, 0x0004003D, 0x0000001D, 0x00001CDF, 0x00004127, + 0x00050041, 0x0000029B, 0x00005968, 0x00001342, 0x00000A0B, 0x0003003E, + 0x00005968, 0x00001CDF, 0x000100DA, 0x00050080, 0x0000000B, 0x00002D83, + 0x00005A5B, 0x00000A0E, 0x000200F9, 0x00001981, 0x000200F8, 0x000021F5, + 0x000100DB, 0x00060041, 0x0000029A, 0x00002F26, 0x000014B9, 0x00000A0E, + 0x00000A0B, 0x0004003D, 0x0000001D, 0x0000189D, 0x00002F26, 0x0008004F, + 0x00000018, 0x00004662, 0x0000189D, 0x0000189D, 0x00000000, 0x00000001, + 0x00000002, 0x0008004F, 0x00000018, 0x00003056, 0x000050F2, 0x000050F2, + 0x00000000, 0x00000001, 0x00000002, 0x00050083, 0x00000018, 0x0000499D, + 0x00004662, 0x00003056, 0x00060041, 0x0000029A, 0x000049FA, 0x000014B9, + 0x00000A11, 0x00000A0B, 0x0004003D, 0x0000001D, 0x000034BD, 0x000049FA, + 0x0008004F, 0x00000018, 0x0000233E, 0x000034BD, 0x000034BD, 0x00000000, + 0x00000001, 0x00000002, 0x00050083, 0x00000018, 0x00001D35, 0x0000233E, + 0x00003056, 0x00050083, 0x00000018, 0x00002B36, 0x0000233E, 0x00004662, + 0x00050094, 0x0000000D, 0x00003A53, 0x0000499D, 0x0000499D, 0x00050094, + 0x0000000D, 0x00005CB6, 0x00001D35, 0x00001D35, 0x00050094, 0x0000000D, + 0x00005748, 0x00002B36, 0x00002B36, 0x000500BA, 0x00000009, 0x00006091, + 0x00005748, 0x00003A53, 0x000300F7, 0x00003D48, 0x00000000, 0x000400FA, + 0x00006091, 0x0000360F, 0x00003D48, 0x000200F8, 0x0000360F, 0x000500BA, + 0x00000009, 0x000052C3, 0x00005748, 0x00005CB6, 0x000200F9, 0x00003D48, + 0x000200F8, 0x00003D48, 0x000700F5, 0x00000009, 0x00002AAD, 0x00006091, + 0x000021F5, 0x000052C3, 0x0000360F, 0x000300F7, 0x00002DB5, 0x00000000, + 0x000400FA, 0x00002AAD, 0x00002F63, 0x000033CD, 0x000200F8, 0x00002F63, + 0x00050041, 0x000008E9, 0x000046EA, 0x00000C48, 0x00000A11, 0x0004003D, + 0x0000066B, 0x00003F5F, 0x000046EA, 0x0003003E, 0x00000E2F, 0x00003F5F, + 0x00050041, 0x0000029B, 0x00004BD5, 0x00001342, 0x00000A0B, 0x0003003E, + 0x00004BD5, 0x000034BD, 0x000100DA, 0x00050041, 0x000008E9, 0x0000591C, + 0x00000C48, 0x00000A0E, 0x0004003D, 0x0000066B, 0x00002C4D, 0x0000591C, + 0x0003003E, 0x00000E2F, 0x00002C4D, 0x0003003E, 0x00004BD5, 0x0000189D, + 0x000100DA, 0x000200F9, 0x00002DB5, 0x000200F8, 0x000033CD, 0x000500BA, + 0x00000009, 0x00005DB9, 0x00005CB6, 0x00003A53, 0x000300F7, 0x00003D49, + 0x00000000, 0x000400FA, 0x00005DB9, 0x00003610, 0x00003D49, 0x000200F8, + 0x00003610, 0x000500BA, 0x00000009, 0x000052C4, 0x00005CB6, 0x00005748, + 0x000200F9, 0x00003D49, 0x000200F8, 0x00003D49, 0x000700F5, 0x00000009, + 0x00002AAE, 0x00005DB9, 0x000033CD, 0x000052C4, 0x00003610, 0x000300F7, + 0x00002B26, 0x00000000, 0x000400FA, 0x00002AAE, 0x00002F64, 0x00002D45, + 0x000200F8, 0x00002F64, 0x00050041, 0x000008E9, 0x000046EB, 0x00000C48, + 0x00000A0B, 0x0004003D, 0x0000066B, 0x00003F60, 0x000046EB, 0x0003003E, + 0x00000E2F, 0x00003F60, 0x00050041, 0x0000029B, 0x00004BD6, 0x00001342, + 0x00000A0B, 0x0003003E, 0x00004BD6, 0x000050F2, 0x000100DA, 0x00050041, + 0x000008E9, 0x0000591D, 0x00000C48, 0x00000A11, 0x0004003D, 0x0000066B, + 0x00002C4E, 0x0000591D, 0x0003003E, 0x00000E2F, 0x00002C4E, 0x0003003E, + 0x00004BD6, 0x000034BD, 0x000100DA, 0x000200F9, 0x00002B26, 0x000200F8, + 0x00002D45, 0x00050041, 0x000008E9, 0x0000505F, 0x00000C48, 0x00000A0E, + 0x0004003D, 0x0000066B, 0x00003F61, 0x0000505F, 0x0003003E, 0x00000E2F, + 0x00003F61, 0x00050041, 0x0000029B, 0x00004BD7, 0x00001342, 0x00000A0B, + 0x0003003E, 0x00004BD7, 0x0000189D, 0x000100DA, 0x00050041, 0x000008E9, + 0x0000591E, 0x00000C48, 0x00000A0B, 0x0004003D, 0x0000066B, 0x00002C4F, + 0x0000591E, 0x0003003E, 0x00000E2F, 0x00002C4F, 0x0003003E, 0x00004BD7, + 0x000050F2, 0x000100DA, 0x000200F9, 0x00002B26, 0x000200F8, 0x00002B26, + 0x00060050, 0x00000010, 0x00003EAE, 0x00002AAE, 0x00002AAE, 0x00002AAE, + 0x000600A9, 0x00000018, 0x00004E42, 0x00003EAE, 0x00000A16, 0x0000010B, + 0x000200F9, 0x00002DB5, 0x000200F8, 0x00002DB5, 0x000700F5, 0x00000018, + 0x0000292C, 0x0000010A, 0x00002F63, 0x00004E42, 0x00002B26, 0x000200F9, + 0x00004DF0, 0x000200F8, 0x00004DF0, 0x000700F5, 0x0000000B, 0x00005A5C, + 0x00000A0A, 0x00002DB5, 0x00005335, 0x00002E53, 0x000500B0, 0x00000009, + 0x00003657, 0x00005A5C, 0x00000A3A, 0x000400F6, 0x00003A6F, 0x00002E53, + 0x00000000, 0x000400FA, 0x00003657, 0x00002E53, 0x00003A6F, 0x000200F8, + 0x00002E53, 0x00050051, 0x0000000D, 0x00004D8B, 0x0000292C, 0x00000000, + 0x00060041, 0x0000029A, 0x000030C7, 0x00000C48, 0x00000A0B, 0x00005A5C, + 0x0004003D, 0x0000001D, 0x0000318B, 0x000030C7, 0x0005008E, 0x0000001D, + 0x0000220F, 0x0000318B, 0x00004D8B, 0x00050051, 0x0000000D, 0x00003D37, + 0x0000292C, 0x00000001, 0x00060041, 0x0000029A, 0x000042C8, 0x00000C48, + 0x00000A0E, 0x00005A5C, 0x0004003D, 0x0000001D, 0x0000351B, 0x000042C8, + 0x0005008E, 0x0000001D, 0x00004D4E, 0x0000351B, 0x00003D37, 0x00050081, + 0x0000001D, 0x00004EEE, 0x0000220F, 0x00004D4E, 0x00050051, 0x0000000D, + 0x00002953, 0x0000292C, 0x00000002, 0x00060041, 0x0000029A, 0x00003FA9, + 0x00000C48, 0x00000A11, 0x00005A5C, 0x0004003D, 0x0000001D, 0x0000351C, + 0x00003FA9, 0x0005008E, 0x0000001D, 0x00004C1E, 0x0000351C, 0x00002953, + 0x00050081, 0x0000001D, 0x00005985, 0x00004EEE, 0x00004C1E, 0x00050041, + 0x0000029B, 0x00004023, 0x00000E2F, 0x00005A5C, 0x0003003E, 0x00004023, + 0x00005985, 0x00050080, 0x0000000B, 0x00005335, 0x00005A5C, 0x00000A0E, + 0x000200F9, 0x00004DF0, 0x000200F8, 0x00003A6F, 0x00050051, 0x0000000D, + 0x00002474, 0x0000292C, 0x00000000, 0x0005008E, 0x0000001D, 0x00001D55, + 0x000050F2, 0x00002474, 0x00050051, 0x0000000D, 0x00001ACA, 0x0000292C, + 0x00000001, 0x0005008E, 0x0000001D, 0x00003BA5, 0x0000189D, 0x00001ACA, + 0x00050081, 0x0000001D, 0x00006135, 0x00001D55, 0x00003BA5, 0x00050051, + 0x0000000D, 0x000044D5, 0x0000292C, 0x00000002, 0x0005008E, 0x0000001D, + 0x00003756, 0x000034BD, 0x000044D5, 0x00050081, 0x0000001D, 0x00001B96, + 0x00006135, 0x00003756, 0x00050041, 0x0000029B, 0x000046D1, 0x00001342, + 0x00000A0B, 0x0003003E, 0x000046D1, 0x00001B96, 0x000100DA, 0x000100DB, + 0x000200F9, 0x00005C60, 0x000200F8, 0x00005C60, 0x000100FD, 0x00010038, +}; diff --git a/src/xenia/gpu/shaders/primitive_rectangle_list.gs.glsl b/src/xenia/gpu/shaders/primitive_rectangle_list.gs.glsl new file mode 100644 index 000000000..1dd7f7edb --- /dev/null +++ b/src/xenia/gpu/shaders/primitive_rectangle_list.gs.glsl @@ -0,0 +1,103 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#version 460 +#extension GL_GOOGLE_include_directive : require +#include "xenos_gs.glsli" + +layout(triangles) in; +layout(triangle_strip, max_vertices=6) out; + +void main() { + if (any(isnan(gl_in[0].gl_Position)) || any(isnan(gl_in[1].gl_Position)) || + any(isnan(gl_in[2].gl_Position))) { + return; + } + + uint i; + + for (i = 0; i < 3u; ++i) { + xe_out_interpolators = xe_in_interpolators[i]; + gl_Position = gl_in[i].gl_Position; + EmitVertex(); + } + EndPrimitive(); + + // Find the diagonal (the edge that is longer than both the other two) and + // mirror the other vertex across it. + vec3 edge_01 = gl_in[1].gl_Position.xyz - gl_in[0].gl_Position.xyz; + vec3 edge_02 = gl_in[2].gl_Position.xyz - gl_in[0].gl_Position.xyz; + vec3 edge_12 = gl_in[2].gl_Position.xyz - gl_in[1].gl_Position.xyz; + vec3 edge_squares = vec3( + dot(edge_01, edge_01), dot(edge_02, edge_02), dot(edge_12, edge_12)); + vec3 v3_signs; + if (edge_squares.z > edge_squares.x && edge_squares.z > edge_squares.y) { + // 12 is the diagonal. Most games use this form. + // + // 0 ------ 1 0: -1,-1 + // | - | 1: 1,-1 + // | // | 2: -1, 1 + // | - | 3: [ 1, 1 ] + // 2 ----- [3] + // + // 0 ------ 2 0: -1,-1 + // | - | 1: -1, 1 + // | // | 2: 1,-1 + // | - | 3: [ 1, 1 ] + // 1 ------[3] + xe_out_interpolators = xe_in_interpolators[2]; + gl_Position = gl_in[2].gl_Position; + EmitVertex(); + xe_out_interpolators = xe_in_interpolators[1]; + gl_Position = gl_in[1].gl_Position; + EmitVertex(); + v3_signs = vec3(-1.0, 1.0, 1.0); + } else if (edge_squares.y > edge_squares.x && + edge_squares.y > edge_squares.z) { + // 02 is the diagonal. + // + // 0 ------ 1 0: -1,-1 + // | - | 1: 1,-1 + // | \\ | 2: 1, 1 + // | - | 3: [-1, 1 ] + // [3] ----- 2 + xe_out_interpolators = xe_in_interpolators[0]; + gl_Position = gl_in[0].gl_Position; + EmitVertex(); + xe_out_interpolators = xe_in_interpolators[2]; + gl_Position = gl_in[2].gl_Position; + EmitVertex(); + v3_signs = vec3(1.0, -1.0, 1.0); + } else { + // 01 is the diagonal. Not seen in any game so far. + // + // 0 ------ 2 0: -1,-1 + // | - | 1: 1, 1 + // | \\ | 2: 1,-1 + // | - | 3: [-1, 1 ] + // [3] ----- 1 + xe_out_interpolators = xe_in_interpolators[1]; + gl_Position = gl_in[1].gl_Position; + EmitVertex(); + xe_out_interpolators = xe_in_interpolators[0]; + gl_Position = gl_in[0].gl_Position; + EmitVertex(); + v3_signs = vec3(1.0, 1.0, -1.0); + } + for (i = 0; i < 16u; ++i) { + xe_out_interpolators[i] = v3_signs.x * xe_in_interpolators[0][i] + + v3_signs.y * xe_in_interpolators[1][i] + + v3_signs.z * xe_in_interpolators[2][i]; + } + gl_Position = v3_signs.x * gl_in[0].gl_Position + + v3_signs.y * gl_in[1].gl_Position + + v3_signs.z * gl_in[2].gl_Position; + EmitVertex(); + EndPrimitive(); +} diff --git a/src/xenia/gpu/shaders/xenos_gs.glsli b/src/xenia/gpu/shaders/xenos_gs.glsli new file mode 100644 index 000000000..f27b5cae3 --- /dev/null +++ b/src/xenia/gpu/shaders/xenos_gs.glsli @@ -0,0 +1,25 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_D3D12_SHADERS_XENOS_GS_GLSLI_ +#define XENIA_GPU_D3D12_SHADERS_XENOS_GS_GLSLI_ + +layout(location=0) in vec4 xe_in_interpolators[][16]; + +in gl_PerVertex { + vec4 gl_Position; +} gl_in[]; + +layout(location=0) out vec4 xe_out_interpolators[16]; + +out gl_PerVertex { + precise vec4 gl_Position; +}; + +#endif // XENIA_GPU_D3D12_SHADERS_XENOS_GS_GLSLI_ diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index 2674396d1..8f581f0fa 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -27,11 +27,17 @@ #include "xenia/gpu/vulkan/vulkan_command_processor.h" #include "xenia/gpu/vulkan/vulkan_shader.h" #include "xenia/gpu/xenos.h" +#include "xenia/ui/vulkan/vulkan_util.h" namespace xe { namespace gpu { namespace vulkan { +// Generated with `xb buildshaders`. +namespace shaders { +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/primitive_rectangle_list_gs.h" +} // namespace shaders + VulkanPipelineCache::VulkanPipelineCache( VulkanCommandProcessor& command_processor, const RegisterFile& register_file, @@ -45,6 +51,20 @@ VulkanPipelineCache::~VulkanPipelineCache() { Shutdown(); } bool VulkanPipelineCache::Initialize() { const ui::vulkan::VulkanProvider& provider = command_processor_.GetVulkanProvider(); + const VkPhysicalDeviceFeatures& device_features = provider.device_features(); + + if (device_features.geometryShader) { + gs_rectangle_list_ = ui::vulkan::util::CreateShaderModule( + provider, shaders::primitive_rectangle_list_gs, + sizeof(shaders::primitive_rectangle_list_gs)); + if (gs_rectangle_list_ == VK_NULL_HANDLE) { + XELOGE( + "VulkanPipelineCache: Failed to create the rectangle list geometry " + "shader"); + Shutdown(); + return false; + } + } shader_translator_ = std::make_unique( SpirvShaderTranslator::Features(provider)); @@ -53,9 +73,17 @@ bool VulkanPipelineCache::Initialize() { } void VulkanPipelineCache::Shutdown() { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + ClearCache(); shader_translator_.reset(); + + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyShaderModule, device, + gs_rectangle_list_); } void VulkanPipelineCache::ClearCache() { @@ -357,6 +385,9 @@ bool VulkanPipelineCache::GetCurrentStateDescription( } description_out.render_pass_key = render_pass_key; + // TODO(Triang3l): Implement primitive types currently using geometry shaders + // without them. + PipelineGeometryShader geometry_shader = PipelineGeometryShader::kNone; PipelinePrimitiveTopology primitive_topology; switch (primitive_processing_result.host_primitive_type) { case xenos::PrimitiveType::kPointList: @@ -369,7 +400,6 @@ bool VulkanPipelineCache::GetCurrentStateDescription( primitive_topology = PipelinePrimitiveTopology::kLineStrip; break; case xenos::PrimitiveType::kTriangleList: - case xenos::PrimitiveType::kRectangleList: primitive_topology = PipelinePrimitiveTopology::kTriangleList; break; case xenos::PrimitiveType::kTriangleFan: @@ -381,6 +411,10 @@ bool VulkanPipelineCache::GetCurrentStateDescription( case xenos::PrimitiveType::kTriangleStrip: primitive_topology = PipelinePrimitiveTopology::kTriangleStrip; break; + case xenos::PrimitiveType::kRectangleList: + geometry_shader = PipelineGeometryShader::kRectangleList; + primitive_topology = PipelinePrimitiveTopology::kTriangleList; + break; case xenos::PrimitiveType::kQuadList: primitive_topology = PipelinePrimitiveTopology::kLineListWithAdjacency; break; @@ -388,6 +422,7 @@ bool VulkanPipelineCache::GetCurrentStateDescription( // TODO(Triang3l): All primitive types and tessellation. return false; } + description_out.geometry_shader = geometry_shader; description_out.primitive_topology = primitive_topology; description_out.primitive_restart = primitive_processing_result.host_primitive_reset_enabled; @@ -605,6 +640,11 @@ bool VulkanPipelineCache::ArePipelineRequirementsMet( } } + if (!device_features.geometryShader && + description.geometry_shader != PipelineGeometryShader::kNone) { + return false; + } + if (!device_features.independentBlend) { uint32_t color_rts_remaining = description.render_pass_key.depth_and_color_used >> 1; @@ -670,14 +710,14 @@ bool VulkanPipelineCache::EnsurePipelineCreated( command_processor_.GetVulkanProvider(); const VkPhysicalDeviceFeatures& device_features = provider.device_features(); - VkPipelineShaderStageCreateInfo shader_stages[2]; + std::array shader_stages; uint32_t shader_stage_count = 0; + // Vertex or tessellation evaluation shader. assert_true(creation_arguments.vertex_shader->is_translated()); if (!creation_arguments.vertex_shader->is_valid()) { return false; } - assert_true(shader_stage_count < xe::countof(shader_stages)); VkPipelineShaderStageCreateInfo& shader_stage_vertex = shader_stages[shader_stage_count++]; shader_stage_vertex.sType = @@ -690,12 +730,33 @@ bool VulkanPipelineCache::EnsurePipelineCreated( assert_true(shader_stage_vertex.module != VK_NULL_HANDLE); shader_stage_vertex.pName = "main"; shader_stage_vertex.pSpecializationInfo = nullptr; + // Geometry shader. + VkShaderModule geometry_shader = VK_NULL_HANDLE; + switch (description.geometry_shader) { + case PipelineGeometryShader::kRectangleList: + geometry_shader = gs_rectangle_list_; + break; + default: + break; + } + if (geometry_shader != VK_NULL_HANDLE) { + VkPipelineShaderStageCreateInfo& shader_stage_geometry = + shader_stages[shader_stage_count++]; + shader_stage_geometry.sType = + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shader_stage_geometry.pNext = nullptr; + shader_stage_geometry.flags = 0; + shader_stage_geometry.stage = VK_SHADER_STAGE_GEOMETRY_BIT; + shader_stage_geometry.module = geometry_shader; + shader_stage_geometry.pName = "main"; + shader_stage_geometry.pSpecializationInfo = nullptr; + } + // Pixel shader. if (creation_arguments.pixel_shader) { assert_true(creation_arguments.pixel_shader->is_translated()); if (!creation_arguments.pixel_shader->is_valid()) { return false; } - assert_true(shader_stage_count < xe::countof(shader_stages)); VkPipelineShaderStageCreateInfo& shader_stage_fragment = shader_stages[shader_stage_count++]; shader_stage_fragment.sType = @@ -985,7 +1046,7 @@ bool VulkanPipelineCache::EnsurePipelineCreated( pipeline_create_info.pNext = nullptr; pipeline_create_info.flags = 0; pipeline_create_info.stageCount = shader_stage_count; - pipeline_create_info.pStages = shader_stages; + pipeline_create_info.pStages = shader_stages.data(); pipeline_create_info.pVertexInputState = &vertex_input_state; pipeline_create_info.pInputAssemblyState = &input_assembly_state; pipeline_create_info.pTessellationState = nullptr; diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h index c753e4dfa..af23d1ba5 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h @@ -82,6 +82,11 @@ class VulkanPipelineCache { const PipelineLayoutProvider*& pipeline_layout_out); private: + enum class PipelineGeometryShader : uint32_t { + kNone, + kRectangleList, + }; + enum class PipelinePrimitiveTopology : uint32_t { kPointList, kLineList, @@ -136,24 +141,26 @@ class VulkanPipelineCache { uint64_t pixel_shader_modification; VulkanRenderTargetCache::RenderPassKey render_pass_key; + // Shader stages. + PipelineGeometryShader geometry_shader : 2; // 2 // Input assembly. - PipelinePrimitiveTopology primitive_topology : 3; // 3 - uint32_t primitive_restart : 1; // 4 + PipelinePrimitiveTopology primitive_topology : 3; // 5 + uint32_t primitive_restart : 1; // 6 // Rasterization. - uint32_t depth_clamp_enable : 1; // 5 - PipelinePolygonMode polygon_mode : 2; // 7 - uint32_t cull_front : 1; // 8 - uint32_t cull_back : 1; // 9 - uint32_t front_face_clockwise : 1; // 10 + uint32_t depth_clamp_enable : 1; // 7 + PipelinePolygonMode polygon_mode : 2; // 9 + uint32_t cull_front : 1; // 10 + uint32_t cull_back : 1; // 11 + uint32_t front_face_clockwise : 1; // 12 // Depth / stencil. - uint32_t depth_write_enable : 1; // 11 - xenos::CompareFunction depth_compare_op : 3; // 14 - uint32_t stencil_test_enable : 1; // 15 - xenos::StencilOp stencil_front_fail_op : 3; // 18 - xenos::StencilOp stencil_front_pass_op : 3; // 21 - xenos::StencilOp stencil_front_depth_fail_op : 3; // 24 - xenos::CompareFunction stencil_front_compare_op : 3; // 27 - xenos::StencilOp stencil_back_fail_op : 3; // 30 + uint32_t depth_write_enable : 1; // 13 + xenos::CompareFunction depth_compare_op : 3; // 15 + uint32_t stencil_test_enable : 1; // 17 + xenos::StencilOp stencil_front_fail_op : 3; // 20 + xenos::StencilOp stencil_front_pass_op : 3; // 23 + xenos::StencilOp stencil_front_depth_fail_op : 3; // 26 + xenos::CompareFunction stencil_front_compare_op : 3; // 29 + xenos::StencilOp stencil_back_fail_op : 3; // 32 xenos::StencilOp stencil_back_pass_op : 3; // 3 xenos::StencilOp stencil_back_depth_fail_op : 3; // 6 @@ -228,6 +235,8 @@ class VulkanPipelineCache { const RegisterFile& register_file_; VulkanRenderTargetCache& render_target_cache_; + VkShaderModule gs_rectangle_list_ = VK_NULL_HANDLE; + // Temporary storage for AnalyzeUcode calls on the processor thread. StringBuffer ucode_disasm_buffer_; // Reusable shader translator on the command processor thread. From 1259c9f7a2310e1215fe616e38800fba64d25bff Mon Sep 17 00:00:00 2001 From: Triang3l Date: Mon, 21 Mar 2022 23:02:51 +0300 Subject: [PATCH 076/123] [Vulkan] Pipeline barrier merging --- .../gpu/vulkan/vulkan_command_processor.cc | 346 ++++++++++++++---- .../gpu/vulkan/vulkan_command_processor.h | 41 ++- .../gpu/vulkan/vulkan_primitive_processor.cc | 25 +- .../gpu/vulkan/vulkan_render_target_cache.cc | 97 ++--- .../gpu/vulkan/vulkan_render_target_cache.h | 38 +- src/xenia/gpu/vulkan/vulkan_shared_memory.cc | 56 ++- .../ui/vulkan/vulkan_immediate_drawer.cc | 5 +- src/xenia/ui/vulkan/vulkan_presenter.cc | 2 +- src/xenia/ui/vulkan/vulkan_util.h | 5 +- 9 files changed, 400 insertions(+), 215 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index afdb32b03..69d0c70a3 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -772,41 +772,26 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, new_swap_framebuffer.last_submission = 0; } - // End the current render pass before inserting barriers and starting a - // new one. - EndRenderPass(); if (vulkan_context.image_ever_written_previously()) { // Insert a barrier after the last presenter's usage of the guest - // output image. - VkImageMemoryBarrier guest_output_image_acquire_barrier; - guest_output_image_acquire_barrier.sType = - VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - guest_output_image_acquire_barrier.pNext = nullptr; - guest_output_image_acquire_barrier.srcAccessMask = - ui::vulkan::VulkanPresenter::kGuestOutputInternalAccessMask; - guest_output_image_acquire_barrier.dstAccessMask = - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - // Will be overwriting all the contents. - guest_output_image_acquire_barrier.oldLayout = - VK_IMAGE_LAYOUT_UNDEFINED; - // The render pass will do the layout transition, but newLayout must - // not be UNDEFINED. - guest_output_image_acquire_barrier.newLayout = - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - guest_output_image_acquire_barrier.srcQueueFamilyIndex = - VK_QUEUE_FAMILY_IGNORED; - guest_output_image_acquire_barrier.dstQueueFamilyIndex = - VK_QUEUE_FAMILY_IGNORED; - guest_output_image_acquire_barrier.image = vulkan_context.image(); - ui::vulkan::util::InitializeSubresourceRange( - guest_output_image_acquire_barrier.subresourceRange); - deferred_command_buffer_.CmdVkPipelineBarrier( + // output image. Will be overwriting all the contents, so oldLayout + // layout is UNDEFINED. The render pass will do the layout transition, + // but newLayout must not be UNDEFINED. + PushImageMemoryBarrier( + vulkan_context.image(), + ui::vulkan::util::InitializeSubresourceRange(), ui::vulkan::VulkanPresenter::kGuestOutputInternalStageMask, - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, 0, nullptr, 0, - nullptr, 1, &guest_output_image_acquire_barrier); + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + ui::vulkan::VulkanPresenter::kGuestOutputInternalAccessMask, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); } + // End the current render pass before inserting barriers and starting a + // new one, and insert the barrier. + SubmitBarriers(true); + SwapFramebuffer& swap_framebuffer = swap_framebuffers_[swap_framebuffer_index]; swap_framebuffer.last_submission = GetCurrentSubmission(); @@ -848,33 +833,20 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, deferred_command_buffer_.CmdVkEndRenderPass(); - VkImageMemoryBarrier guest_output_image_release_barrier; - guest_output_image_release_barrier.sType = - VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - guest_output_image_release_barrier.pNext = nullptr; - guest_output_image_release_barrier.srcAccessMask = - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - guest_output_image_release_barrier.dstAccessMask = - ui::vulkan::VulkanPresenter::kGuestOutputInternalAccessMask; - guest_output_image_release_barrier.oldLayout = - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - guest_output_image_release_barrier.newLayout = - ui::vulkan::VulkanPresenter::kGuestOutputInternalLayout; - guest_output_image_release_barrier.srcQueueFamilyIndex = - VK_QUEUE_FAMILY_IGNORED; - guest_output_image_release_barrier.dstQueueFamilyIndex = - VK_QUEUE_FAMILY_IGNORED; - guest_output_image_release_barrier.image = vulkan_context.image(); - ui::vulkan::util::InitializeSubresourceRange( - guest_output_image_release_barrier.subresourceRange); - deferred_command_buffer_.CmdVkPipelineBarrier( + // Insert the release barrier. + PushImageMemoryBarrier( + vulkan_context.image(), + ui::vulkan::util::InitializeSubresourceRange(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - ui::vulkan::VulkanPresenter::kGuestOutputInternalStageMask, 0, 0, - nullptr, 0, nullptr, 1, &guest_output_image_release_barrier); + ui::vulkan::VulkanPresenter::kGuestOutputInternalStageMask, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + ui::vulkan::VulkanPresenter::kGuestOutputInternalAccessMask, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + ui::vulkan::VulkanPresenter::kGuestOutputInternalLayout); // Need to submit all the commands before giving the image back to the // presenter so it can submit its own commands for displaying it to the - // queue. + // queue, and also need to submit the release barrier. EndSubmission(true); return true; }); @@ -884,6 +856,215 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, EndSubmission(true); } +void VulkanCommandProcessor::PushBufferMemoryBarrier( + VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, + VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, + VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask, + uint32_t src_queue_family_index, uint32_t dst_queue_family_index, + bool skip_if_equal) { + if (skip_if_equal && src_stage_mask == dst_stage_mask && + src_access_mask == dst_access_mask && + src_queue_family_index == dst_queue_family_index) { + return; + } + + // Separate different barriers for overlapping buffer ranges into different + // pipeline barrier commands. + for (const VkBufferMemoryBarrier& other_buffer_memory_barrier : + pending_barriers_buffer_memory_barriers_) { + if (other_buffer_memory_barrier.buffer != buffer || + (size != VK_WHOLE_SIZE && + offset + size <= other_buffer_memory_barrier.offset) || + (other_buffer_memory_barrier.size != VK_WHOLE_SIZE && + other_buffer_memory_barrier.offset + + other_buffer_memory_barrier.size <= + offset)) { + continue; + } + if (other_buffer_memory_barrier.offset == offset && + other_buffer_memory_barrier.size == size && + other_buffer_memory_barrier.srcAccessMask == src_access_mask && + other_buffer_memory_barrier.dstAccessMask == dst_access_mask && + other_buffer_memory_barrier.srcQueueFamilyIndex == + src_queue_family_index && + other_buffer_memory_barrier.dstQueueFamilyIndex == + dst_queue_family_index) { + // The barrier is already present. + current_pending_barrier_.src_stage_mask |= src_stage_mask; + current_pending_barrier_.dst_stage_mask |= dst_stage_mask; + return; + } + SplitPendingBarrier(); + break; + } + + current_pending_barrier_.src_stage_mask |= src_stage_mask; + current_pending_barrier_.dst_stage_mask |= dst_stage_mask; + VkBufferMemoryBarrier& buffer_memory_barrier = + pending_barriers_buffer_memory_barriers_.emplace_back(); + buffer_memory_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + buffer_memory_barrier.pNext = nullptr; + buffer_memory_barrier.srcAccessMask = src_access_mask; + buffer_memory_barrier.dstAccessMask = dst_access_mask; + buffer_memory_barrier.srcQueueFamilyIndex = src_queue_family_index; + buffer_memory_barrier.dstQueueFamilyIndex = dst_queue_family_index; + buffer_memory_barrier.buffer = buffer; + buffer_memory_barrier.offset = offset; + buffer_memory_barrier.size = size; +} + +void VulkanCommandProcessor::PushImageMemoryBarrier( + VkImage image, const VkImageSubresourceRange& subresource_range, + VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, + VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask, + VkImageLayout old_layout, VkImageLayout new_layout, + uint32_t src_queue_family_index, uint32_t dst_queue_family_index, + bool skip_if_equal) { + if (skip_if_equal && src_stage_mask == dst_stage_mask && + src_access_mask == dst_access_mask && old_layout == new_layout && + src_queue_family_index == dst_queue_family_index) { + return; + } + + // Separate different barriers for overlapping image subresource ranges into + // different pipeline barrier commands. + for (const VkImageMemoryBarrier& other_image_memory_barrier : + pending_barriers_image_memory_barriers_) { + if (other_image_memory_barrier.image != image || + !(other_image_memory_barrier.subresourceRange.aspectMask & + subresource_range.aspectMask) || + (subresource_range.levelCount != VK_REMAINING_MIP_LEVELS && + subresource_range.baseMipLevel + subresource_range.levelCount <= + other_image_memory_barrier.subresourceRange.baseMipLevel) || + (other_image_memory_barrier.subresourceRange.levelCount != + VK_REMAINING_MIP_LEVELS && + other_image_memory_barrier.subresourceRange.baseMipLevel + + other_image_memory_barrier.subresourceRange.levelCount <= + subresource_range.baseMipLevel) || + (subresource_range.layerCount != VK_REMAINING_ARRAY_LAYERS && + subresource_range.baseArrayLayer + subresource_range.layerCount <= + other_image_memory_barrier.subresourceRange.baseArrayLayer) || + (other_image_memory_barrier.subresourceRange.layerCount != + VK_REMAINING_ARRAY_LAYERS && + other_image_memory_barrier.subresourceRange.baseArrayLayer + + other_image_memory_barrier.subresourceRange.layerCount <= + subresource_range.baseArrayLayer)) { + continue; + } + if (other_image_memory_barrier.subresourceRange.aspectMask == + subresource_range.aspectMask && + other_image_memory_barrier.subresourceRange.baseMipLevel == + subresource_range.baseMipLevel && + other_image_memory_barrier.subresourceRange.levelCount == + subresource_range.levelCount && + other_image_memory_barrier.subresourceRange.baseArrayLayer == + subresource_range.baseArrayLayer && + other_image_memory_barrier.subresourceRange.layerCount == + subresource_range.layerCount && + other_image_memory_barrier.srcAccessMask == src_access_mask && + other_image_memory_barrier.dstAccessMask == dst_access_mask && + other_image_memory_barrier.oldLayout == old_layout && + other_image_memory_barrier.newLayout == new_layout && + other_image_memory_barrier.srcQueueFamilyIndex == + src_queue_family_index && + other_image_memory_barrier.dstQueueFamilyIndex == + dst_queue_family_index) { + // The barrier is already present. + current_pending_barrier_.src_stage_mask |= src_stage_mask; + current_pending_barrier_.dst_stage_mask |= dst_stage_mask; + return; + } + SplitPendingBarrier(); + break; + } + + current_pending_barrier_.src_stage_mask |= src_stage_mask; + current_pending_barrier_.dst_stage_mask |= dst_stage_mask; + VkImageMemoryBarrier& image_memory_barrier = + pending_barriers_image_memory_barriers_.emplace_back(); + image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + image_memory_barrier.pNext = nullptr; + image_memory_barrier.srcAccessMask = src_access_mask; + image_memory_barrier.dstAccessMask = dst_access_mask; + image_memory_barrier.oldLayout = old_layout; + image_memory_barrier.newLayout = new_layout; + image_memory_barrier.srcQueueFamilyIndex = src_queue_family_index; + image_memory_barrier.dstQueueFamilyIndex = dst_queue_family_index; + image_memory_barrier.image = image; + image_memory_barrier.subresourceRange = subresource_range; +} + +bool VulkanCommandProcessor::SubmitBarriers(bool force_end_render_pass) { + assert_true(submission_open_); + SplitPendingBarrier(); + if (pending_barriers_.empty()) { + if (force_end_render_pass) { + EndRenderPass(); + } + return false; + } + EndRenderPass(); + for (auto it = pending_barriers_.cbegin(); it != pending_barriers_.cend(); + ++it) { + auto it_next = std::next(it); + bool is_last = it_next == pending_barriers_.cend(); + // .data() + offset, not &[offset], for buffer and image barriers, because + // if there are no buffer or image memory barriers in the last pipeline + // barriers, the offsets may be equal to the sizes of the vectors. + deferred_command_buffer_.CmdVkPipelineBarrier( + it->src_stage_mask ? it->src_stage_mask + : VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + it->dst_stage_mask ? it->dst_stage_mask + : VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + 0, 0, nullptr, + uint32_t((is_last ? pending_barriers_buffer_memory_barriers_.size() + : it_next->buffer_memory_barriers_offset) - + it->buffer_memory_barriers_offset), + pending_barriers_buffer_memory_barriers_.data() + + it->buffer_memory_barriers_offset, + uint32_t((is_last ? pending_barriers_image_memory_barriers_.size() + : it_next->image_memory_barriers_offset) - + it->image_memory_barriers_offset), + pending_barriers_image_memory_barriers_.data() + + it->image_memory_barriers_offset); + } + pending_barriers_.clear(); + pending_barriers_buffer_memory_barriers_.clear(); + pending_barriers_image_memory_barriers_.clear(); + current_pending_barrier_.buffer_memory_barriers_offset = 0; + current_pending_barrier_.image_memory_barriers_offset = 0; + return true; +} + +void VulkanCommandProcessor::SubmitBarriersAndEnterRenderTargetCacheRenderPass( + VkRenderPass render_pass, + const VulkanRenderTargetCache::Framebuffer* framebuffer) { + SubmitBarriers(false); + if (current_render_pass_ == render_pass && + current_framebuffer_ == framebuffer) { + return; + } + if (current_render_pass_ != VK_NULL_HANDLE) { + deferred_command_buffer_.CmdVkEndRenderPass(); + } + current_render_pass_ = render_pass; + current_framebuffer_ = framebuffer; + VkRenderPassBeginInfo render_pass_begin_info; + render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + render_pass_begin_info.pNext = nullptr; + render_pass_begin_info.renderPass = render_pass; + render_pass_begin_info.framebuffer = framebuffer->framebuffer; + render_pass_begin_info.renderArea.offset.x = 0; + render_pass_begin_info.renderArea.offset.y = 0; + // TODO(Triang3l): Actual dirty width / height in the deferred command + // buffer. + render_pass_begin_info.renderArea.extent = framebuffer->host_extent; + render_pass_begin_info.clearValueCount = 0; + render_pass_begin_info.pClearValues = nullptr; + deferred_command_buffer_.CmdVkBeginRenderPass(&render_pass_begin_info, + VK_SUBPASS_CONTENTS_INLINE); +} + void VulkanCommandProcessor::EndRenderPass() { assert_true(submission_open_); if (current_render_pass_ == VK_NULL_HANDLE) { @@ -891,7 +1072,7 @@ void VulkanCommandProcessor::EndRenderPass() { } deferred_command_buffer_.CmdVkEndRenderPass(); current_render_pass_ = VK_NULL_HANDLE; - current_framebuffer_ = VK_NULL_HANDLE; + current_framebuffer_ = nullptr; } const VulkanPipelineCache::PipelineLayoutProvider* @@ -1324,33 +1505,12 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, // TODO(Triang3l): Memory export. shared_memory_->Use(VulkanSharedMemory::Usage::kRead); - // After all commands that may dispatch, copy or insert barriers, enter the - // render pass before drawing. - VkRenderPass render_pass = render_target_cache_->last_update_render_pass(); - const VulkanRenderTargetCache::Framebuffer* framebuffer = - render_target_cache_->last_update_framebuffer(); - if (current_render_pass_ != render_pass || - current_framebuffer_ != framebuffer->framebuffer) { - if (current_render_pass_ != VK_NULL_HANDLE) { - deferred_command_buffer_.CmdVkEndRenderPass(); - } - current_render_pass_ = render_pass; - current_framebuffer_ = framebuffer->framebuffer; - VkRenderPassBeginInfo render_pass_begin_info; - render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - render_pass_begin_info.pNext = nullptr; - render_pass_begin_info.renderPass = render_pass; - render_pass_begin_info.framebuffer = framebuffer->framebuffer; - render_pass_begin_info.renderArea.offset.x = 0; - render_pass_begin_info.renderArea.offset.y = 0; - // TODO(Triang3l): Actual dirty width / height in the deferred command - // buffer. - render_pass_begin_info.renderArea.extent = framebuffer->host_extent; - render_pass_begin_info.clearValueCount = 0; - render_pass_begin_info.pClearValues = nullptr; - deferred_command_buffer_.CmdVkBeginRenderPass(&render_pass_begin_info, - VK_SUBPASS_CONTENTS_INLINE); - } + // After all commands that may dispatch, copy or insert barriers, submit the + // barriers (may end the render pass), and (re)enter the render pass before + // drawing. + SubmitBarriersAndEnterRenderTargetCacheRenderPass( + render_target_cache_->last_update_render_pass(), + render_target_cache_->last_update_framebuffer()); // Draw. if (primitive_processing_result.index_buffer_type == @@ -1589,7 +1749,7 @@ bool VulkanCommandProcessor::BeginSubmission(bool is_guest_command) { dynamic_stencil_reference_front_update_needed_ = true; dynamic_stencil_reference_back_update_needed_ = true; current_render_pass_ = VK_NULL_HANDLE; - current_framebuffer_ = VK_NULL_HANDLE; + current_framebuffer_ = nullptr; current_guest_graphics_pipeline_ = VK_NULL_HANDLE; current_external_graphics_pipeline_ = VK_NULL_HANDLE; current_guest_graphics_pipeline_layout_ = nullptr; @@ -1759,6 +1919,8 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { sparse_memory_binds_.clear(); } + SubmitBarriers(true); + assert_false(command_buffers_writable_.empty()); CommandBuffer command_buffer = command_buffers_writable_.back(); if (dfn.vkResetCommandPool(device, command_buffer.pool, 0) != VK_SUCCESS) { @@ -1884,6 +2046,28 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { return true; } +void VulkanCommandProcessor::SplitPendingBarrier() { + size_t pending_buffer_memory_barrier_count = + pending_barriers_buffer_memory_barriers_.size(); + size_t pending_image_memory_barrier_count = + pending_barriers_image_memory_barriers_.size(); + if (!current_pending_barrier_.src_stage_mask && + !current_pending_barrier_.dst_stage_mask && + current_pending_barrier_.buffer_memory_barriers_offset >= + pending_buffer_memory_barrier_count && + current_pending_barrier_.image_memory_barriers_offset >= + pending_image_memory_barrier_count) { + return; + } + pending_barriers_.emplace_back(current_pending_barrier_); + current_pending_barrier_.src_stage_mask = 0; + current_pending_barrier_.dst_stage_mask = 0; + current_pending_barrier_.buffer_memory_barriers_offset = + pending_buffer_memory_barrier_count; + current_pending_barrier_.image_memory_barriers_offset = + pending_image_memory_barrier_count; +} + VkShaderStageFlags VulkanCommandProcessor::GetGuestVertexShaderStageFlags() const { VkShaderStageFlags stages = VK_SHADER_STAGE_VERTEX_BIT; diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index a01f14feb..551a3fcae 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -81,6 +81,31 @@ class VulkanCommandProcessor : public CommandProcessor { uint64_t GetCurrentFrame() const { return frame_current_; } uint64_t GetCompletedFrame() const { return frame_completed_; } + // Submission must be open to insert barriers. + void PushBufferMemoryBarrier( + VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, + VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, + VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask, + uint32_t src_queue_family_index = VK_QUEUE_FAMILY_IGNORED, + uint32_t dst_queue_family_index = VK_QUEUE_FAMILY_IGNORED, + bool skip_if_equal = true); + void PushImageMemoryBarrier( + VkImage image, const VkImageSubresourceRange& subresource_range, + VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, + VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask, + VkImageLayout old_layout, VkImageLayout new_layout, + uint32_t src_queue_family_index = VK_QUEUE_FAMILY_IGNORED, + uint32_t dst_queue_family_index = VK_QUEUE_FAMILY_IGNORED, + bool skip_if_equal = true); + // Returns whether any barriers have been submitted - if true is returned, the + // render pass will also be closed. + bool SubmitBarriers(bool force_end_render_pass); + + // If not started yet, begins a render pass from the render target cache. + // Submission must be open. + void SubmitBarriersAndEnterRenderTargetCacheRenderPass( + VkRenderPass render_pass, + const VulkanRenderTargetCache::Framebuffer* framebuffer); // Must be called before doing anything outside the render pass scope, // including adding pipeline barriers that are not a part of the render pass // scope. Submission must be open. @@ -205,6 +230,8 @@ class VulkanCommandProcessor : public CommandProcessor { return !submission_open_ && submissions_in_flight_fences_.empty(); } + void SplitPendingBarrier(); + VkShaderStageFlags GetGuestVertexShaderStageFlags() const; void UpdateDynamicState(const draw_util::ViewportInfo& viewport_info, @@ -314,6 +341,18 @@ class VulkanCommandProcessor : public CommandProcessor { swap_framebuffers_; std::deque> swap_framebuffers_outdated_; + // Pending pipeline barriers. + std::vector pending_barriers_buffer_memory_barriers_; + std::vector pending_barriers_image_memory_barriers_; + struct PendingBarrier { + VkPipelineStageFlags src_stage_mask = 0; + VkPipelineStageFlags dst_stage_mask = 0; + size_t buffer_memory_barriers_offset = 0; + size_t image_memory_barriers_offset = 0; + }; + std::vector pending_barriers_; + PendingBarrier current_pending_barrier_; + // The current dynamic state of the graphics pipeline bind point. Note that // binding any pipeline to the bind point with static state (even if it's // unused, like depth bias being disabled, but the values themselves still not @@ -348,7 +387,7 @@ class VulkanCommandProcessor : public CommandProcessor { // Cache render pass currently started in the command buffer with the // framebuffer. VkRenderPass current_render_pass_; - VkFramebuffer current_framebuffer_; + const VulkanRenderTargetCache::Framebuffer* current_framebuffer_; // Currently bound graphics pipeline, either from the pipeline cache (with // potentially deferred creation - current_external_graphics_pipeline_ is diff --git a/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc b/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc index 7b628037b..0d33aaf64 100644 --- a/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc @@ -95,6 +95,8 @@ void VulkanPrimitiveProcessor::BeginSubmission() { // been used yet, and builtin_index_buffer_upload_ is written before // submitting commands reading it. + command_processor_.EndRenderPass(); + DeferredCommandBuffer& command_buffer = command_processor_.deferred_command_buffer(); @@ -104,25 +106,10 @@ void VulkanPrimitiveProcessor::BeginSubmission() { copy_region->dstOffset = 0; copy_region->size = builtin_index_buffer_size_; - VkBufferMemoryBarrier builtin_index_buffer_memory_barrier; - builtin_index_buffer_memory_barrier.sType = - VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - builtin_index_buffer_memory_barrier.pNext = nullptr; - builtin_index_buffer_memory_barrier.srcAccessMask = - VK_ACCESS_TRANSFER_WRITE_BIT; - builtin_index_buffer_memory_barrier.dstAccessMask = - VK_ACCESS_INDEX_READ_BIT; - builtin_index_buffer_memory_barrier.srcQueueFamilyIndex = - VK_QUEUE_FAMILY_IGNORED; - builtin_index_buffer_memory_barrier.dstQueueFamilyIndex = - VK_QUEUE_FAMILY_IGNORED; - builtin_index_buffer_memory_barrier.buffer = builtin_index_buffer_; - builtin_index_buffer_memory_barrier.offset = 0; - builtin_index_buffer_memory_barrier.size = VK_WHOLE_SIZE; - command_processor_.EndRenderPass(); - command_buffer.CmdVkPipelineBarrier( - VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, - 0, nullptr, 1, &builtin_index_buffer_memory_barrier, 0, nullptr); + command_processor_.PushBufferMemoryBarrier( + builtin_index_buffer_, 0, VK_WHOLE_SIZE, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, + VK_ACCESS_INDEX_READ_BIT); builtin_index_buffer_upload_submission_ = command_processor_.GetCurrentSubmission(); diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc index 4a810263b..24eb8e14b 100644 --- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2021 Ben Vanik. All rights reserved. * + * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -183,73 +183,26 @@ bool VulkanRenderTargetCache::Update(bool is_rasterization_done, last_update_framebuffer_ = framebuffer; // Transition the used render targets. - VkPipelineStageFlags barrier_src_stage_mask = 0; - VkPipelineStageFlags barrier_dst_stage_mask = 0; - VkImageMemoryBarrier barrier_image_memory[1 + xenos::kMaxColorRenderTargets]; - uint32_t barrier_image_memory_count = 0; for (uint32_t i = 0; i < 1 + xenos::kMaxColorRenderTargets; ++i) { RenderTarget* rt = depth_and_color_render_targets[i]; if (!rt) { continue; } auto& vulkan_rt = *static_cast(rt); - VkPipelineStageFlags rt_src_stage_mask = vulkan_rt.current_stage_mask(); - VkAccessFlags rt_src_access_mask = vulkan_rt.current_access_mask(); - VkImageLayout rt_old_layout = vulkan_rt.current_layout(); VkPipelineStageFlags rt_dst_stage_mask; VkAccessFlags rt_dst_access_mask; VkImageLayout rt_new_layout; - if (i) { - rt_dst_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - rt_dst_access_mask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - rt_new_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - } else { - rt_dst_stage_mask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - rt_dst_access_mask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - rt_new_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - } - bool rt_image_memory_barrier_needed = - rt_src_access_mask != rt_dst_access_mask || - rt_old_layout != rt_new_layout; - if (rt_image_memory_barrier_needed || - rt_src_stage_mask != rt_dst_stage_mask) { - barrier_src_stage_mask |= rt_src_stage_mask; - barrier_dst_stage_mask |= rt_dst_stage_mask; - if (rt_image_memory_barrier_needed) { - VkImageMemoryBarrier& rt_image_memory_barrier = - barrier_image_memory[barrier_image_memory_count++]; - rt_image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - rt_image_memory_barrier.pNext = nullptr; - rt_image_memory_barrier.srcAccessMask = rt_src_access_mask; - rt_image_memory_barrier.dstAccessMask = rt_dst_access_mask; - rt_image_memory_barrier.oldLayout = rt_old_layout; - rt_image_memory_barrier.newLayout = rt_new_layout; - rt_image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - rt_image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - rt_image_memory_barrier.image = vulkan_rt.image(); + VulkanRenderTarget::GetDrawUsage(i == 0, &rt_dst_stage_mask, + &rt_dst_access_mask, &rt_new_layout); + command_processor_.PushImageMemoryBarrier( + vulkan_rt.image(), ui::vulkan::util::InitializeSubresourceRange( - rt_image_memory_barrier.subresourceRange, i ? VK_IMAGE_ASPECT_COLOR_BIT - : (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)); - } - vulkan_rt.SetUsage(rt_dst_stage_mask, rt_dst_access_mask, rt_new_layout); - } - } - if (barrier_src_stage_mask || barrier_dst_stage_mask || - barrier_image_memory_count) { - if (!barrier_src_stage_mask) { - barrier_src_stage_mask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; - } - if (!barrier_dst_stage_mask) { - barrier_dst_stage_mask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; - } - command_processor_.EndRenderPass(); - command_processor_.deferred_command_buffer().CmdVkPipelineBarrier( - barrier_src_stage_mask, barrier_dst_stage_mask, 0, 0, nullptr, 0, - nullptr, barrier_image_memory_count, barrier_image_memory); + : (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)), + vulkan_rt.current_stage_mask(), rt_dst_stage_mask, + vulkan_rt.current_access_mask(), rt_dst_access_mask, + vulkan_rt.current_layout(), rt_new_layout); + vulkan_rt.SetUsage(rt_dst_stage_mask, rt_dst_access_mask, rt_new_layout); } return true; @@ -288,8 +241,8 @@ VkRenderPass VulkanRenderTargetCache::GetRenderPass(RenderPassKey key) { attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; - attachment.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + attachment.initialLayout = VulkanRenderTarget::kDepthDrawLayout; + attachment.finalLayout = VulkanRenderTarget::kDepthDrawLayout; } VkAttachmentReference color_attachments[xenos::kMaxColorRenderTargets]; xenos::ColorRenderTargetFormat color_formats[] = { @@ -300,7 +253,7 @@ VkRenderPass VulkanRenderTargetCache::GetRenderPass(RenderPassKey key) { }; for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { VkAttachmentReference& color_attachment = color_attachments[i]; - color_attachment.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + color_attachment.layout = VulkanRenderTarget::kColorDrawLayout; uint32_t attachment_bit = uint32_t(1) << (1 + i); if (!(key.depth_and_color_used & attachment_bit)) { color_attachment.attachment = VK_ATTACHMENT_UNUSED; @@ -317,15 +270,14 @@ VkRenderPass VulkanRenderTargetCache::GetRenderPass(RenderPassKey key) { attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; - attachment.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - attachment.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + attachment.initialLayout = VulkanRenderTarget::kColorDrawLayout; + attachment.finalLayout = VulkanRenderTarget::kColorDrawLayout; } VkAttachmentReference depth_stencil_attachment; depth_stencil_attachment.attachment = (key.depth_and_color_used & 0b1) ? 0 : VK_ATTACHMENT_UNUSED; - depth_stencil_attachment.layout = - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + depth_stencil_attachment.layout = VulkanRenderTarget::kDepthDrawLayout; VkSubpassDescription subpass; subpass.flags = 0; @@ -344,15 +296,12 @@ VkRenderPass VulkanRenderTargetCache::GetRenderPass(RenderPassKey key) { VkPipelineStageFlags dependency_stage_mask = 0; VkAccessFlags dependency_access_mask = 0; if (key.depth_and_color_used & 0b1) { - dependency_stage_mask |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - dependency_access_mask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + dependency_stage_mask |= VulkanRenderTarget::kDepthDrawStageMask; + dependency_access_mask |= VulkanRenderTarget::kDepthDrawAccessMask; } if (key.depth_and_color_used >> 1) { - dependency_stage_mask |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - dependency_access_mask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + dependency_stage_mask |= VulkanRenderTarget::kColorDrawStageMask; + dependency_access_mask |= VulkanRenderTarget::kColorDrawAccessMask; } VkSubpassDependency subpass_dependencies[2]; subpass_dependencies[0].srcSubpass = VK_SUBPASS_EXTERNAL; @@ -577,9 +526,9 @@ RenderTargetCache::RenderTarget* VulkanRenderTargetCache::CreateRenderTarget( view_create_info.components.g = VK_COMPONENT_SWIZZLE_IDENTITY; view_create_info.components.b = VK_COMPONENT_SWIZZLE_IDENTITY; view_create_info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY; - ui::vulkan::util::InitializeSubresourceRange( - view_create_info.subresourceRange, - key.is_depth ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT); + view_create_info.subresourceRange = + ui::vulkan::util::InitializeSubresourceRange( + key.is_depth ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT); VkImageView view_depth_color; if (dfn.vkCreateImageView(device, &view_create_info, nullptr, &view_depth_color) != VK_SUCCESS) { diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.h b/src/xenia/gpu/vulkan/vulkan_render_target_cache.h index e2c3d6d91..97bb690af 100644 --- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.h @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2021 Ben Vanik. All rights reserved. * + * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -113,6 +113,22 @@ class VulkanRenderTargetCache final : public RenderTargetCache { // Can only be destroyed when framebuffers referencing it are destroyed! class VulkanRenderTarget final : public RenderTarget { public: + static constexpr VkPipelineStageFlags kColorDrawStageMask = + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + static constexpr VkAccessFlags kColorDrawAccessMask = + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + static constexpr VkImageLayout kColorDrawLayout = + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + static constexpr VkPipelineStageFlags kDepthDrawStageMask = + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + static constexpr VkAccessFlags kDepthDrawAccessMask = + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + static constexpr VkImageLayout kDepthDrawLayout = + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + // Takes ownership of the Vulkan objects passed to the constructor. VulkanRenderTarget(RenderTargetKey key, const ui::vulkan::VulkanProvider& provider, @@ -137,6 +153,26 @@ class VulkanRenderTargetCache final : public RenderTargetCache { VkImageView view_depth_color() const { return view_depth_color_; } VkImageView view_depth_stencil() const { return view_depth_stencil_; } + static void GetDrawUsage(bool is_depth, + VkPipelineStageFlags* stage_mask_out, + VkAccessFlags* access_mask_out, + VkImageLayout* layout_out) { + if (stage_mask_out) { + *stage_mask_out = is_depth ? kDepthDrawStageMask : kColorDrawStageMask; + } + if (access_mask_out) { + *access_mask_out = + is_depth ? kDepthDrawAccessMask : kColorDrawAccessMask; + } + if (layout_out) { + *layout_out = is_depth ? kDepthDrawLayout : kColorDrawLayout; + } + } + void GetDrawUsage(VkPipelineStageFlags* stage_mask_out, + VkAccessFlags* access_mask_out, + VkImageLayout* layout_out) const { + GetDrawUsage(key().is_depth, stage_mask_out, access_mask_out, layout_out); + } VkPipelineStageFlags current_stage_mask() const { return current_stage_mask_; } diff --git a/src/xenia/gpu/vulkan/vulkan_shared_memory.cc b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc index 9e446c71c..0d95189da 100644 --- a/src/xenia/gpu/vulkan/vulkan_shared_memory.cc +++ b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc @@ -224,31 +224,28 @@ void VulkanSharedMemory::Use(Usage usage, std::min(written_range.second, kBufferSize - written_range.first); assert_true(usage != Usage::kRead || !written_range.second); if (last_usage_ != usage || last_written_range_.second) { - VkPipelineStageFlags stage_mask_src, stage_mask_dst; - VkBufferMemoryBarrier buffer_memory_barrier; - GetBarrier(last_usage_, stage_mask_src, - buffer_memory_barrier.srcAccessMask); - GetBarrier(usage, stage_mask_dst, buffer_memory_barrier.dstAccessMask); - buffer_memory_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - buffer_memory_barrier.pNext = nullptr; - buffer_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - buffer_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - buffer_memory_barrier.buffer = buffer_; + VkPipelineStageFlags src_stage_mask, dst_stage_mask; + VkAccessFlags src_access_mask, dst_access_mask; + GetBarrier(last_usage_, src_stage_mask, src_access_mask); + GetBarrier(usage, dst_stage_mask, dst_access_mask); + VkDeviceSize offset, size; if (last_usage_ == usage) { - // Committing the previous write. - buffer_memory_barrier.offset = VkDeviceSize(last_written_range_.first); - buffer_memory_barrier.size = VkDeviceSize(last_written_range_.second); + // Committing the previous write, while not changing the access mask + // (passing false as whether to skip the barrier if no masks are changed + // for this reason). + offset = VkDeviceSize(last_written_range_.first); + size = VkDeviceSize(last_written_range_.second); } else { // Changing the stage and access mask - all preceding writes must be // available not only to the source stage, but to the destination as well. - buffer_memory_barrier.offset = 0; - buffer_memory_barrier.size = VK_WHOLE_SIZE; + offset = 0; + size = VK_WHOLE_SIZE; last_usage_ = usage; } - command_processor_.EndRenderPass(); - command_processor_.deferred_command_buffer().CmdVkPipelineBarrier( - stage_mask_src, stage_mask_dst, 0, 0, nullptr, 1, - &buffer_memory_barrier, 0, nullptr); + command_processor_.PushBufferMemoryBarrier( + buffer_, offset, size, src_stage_mask, dst_stage_mask, src_access_mask, + dst_access_mask, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, + false); } last_written_range_ = written_range; } @@ -276,8 +273,8 @@ bool VulkanSharedMemory::InitializeTraceSubmitDownloads() { return false; } - command_processor_.EndRenderPass(); Use(Usage::kRead); + command_processor_.SubmitBarriers(true); DeferredCommandBuffer& command_buffer = command_processor_.deferred_command_buffer(); @@ -295,19 +292,10 @@ bool VulkanSharedMemory::InitializeTraceSubmitDownloads() { download_buffer_offset += download_range.second; } - VkBufferMemoryBarrier download_buffer_barrier; - download_buffer_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - download_buffer_barrier.pNext = nullptr; - download_buffer_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - download_buffer_barrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT; - download_buffer_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - download_buffer_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - download_buffer_barrier.buffer = trace_download_buffer_; - download_buffer_barrier.offset = 0; - download_buffer_barrier.size = VK_WHOLE_SIZE; - command_buffer.CmdVkPipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_HOST_BIT, 0, 0, nullptr, - 1, &download_buffer_barrier, 0, nullptr); + command_processor_.PushBufferMemoryBarrier( + trace_download_buffer_, 0, VK_WHOLE_SIZE, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_HOST_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, + VK_ACCESS_HOST_READ_BIT); return true; } @@ -389,7 +377,6 @@ bool VulkanSharedMemory::UploadRanges( if (upload_page_ranges.empty()) { return true; } - command_processor_.EndRenderPass(); // upload_page_ranges are sorted, use them to determine the range for the // ordering barrier. Use(Usage::kTransferDestination, @@ -398,6 +385,7 @@ bool VulkanSharedMemory::UploadRanges( (upload_page_ranges.back().first + upload_page_ranges.back().second - upload_page_ranges.front().first) << page_size_log2())); + command_processor_.SubmitBarriers(true); DeferredCommandBuffer& command_buffer = command_processor_.deferred_command_buffer(); uint64_t submission_current = command_processor_.GetCurrentSubmission(); diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc index b8fecdaa1..a90f530d9 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc @@ -426,7 +426,8 @@ void VulkanImmediateDrawer::End() { image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - util::InitializeSubresourceRange(image_memory_barrier.subresourceRange); + image_memory_barrier.subresourceRange = + util::InitializeSubresourceRange(); for (const PendingTextureUpload& pending_texture_upload : texture_uploads_pending_) { image_memory_barriers.emplace_back(image_memory_barrier).image = @@ -913,7 +914,7 @@ bool VulkanImmediateDrawer::CreateTextureResource( image_view_create_info.components.g = swizzle; image_view_create_info.components.b = swizzle; image_view_create_info.components.a = swizzle; - util::InitializeSubresourceRange(image_view_create_info.subresourceRange); + image_view_create_info.subresourceRange = util::InitializeSubresourceRange(); VkImageView image_view; if (dfn.vkCreateImageView(device, &image_view_create_info, nullptr, &image_view) != VK_SUCCESS) { diff --git a/src/xenia/ui/vulkan/vulkan_presenter.cc b/src/xenia/ui/vulkan/vulkan_presenter.cc index bf129b103..f1353bf3e 100644 --- a/src/xenia/ui/vulkan/vulkan_presenter.cc +++ b/src/xenia/ui/vulkan/vulkan_presenter.cc @@ -313,7 +313,7 @@ bool VulkanPresenter::CaptureGuestOutput(RawImage& image_out) { image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; image_memory_barrier.image = guest_output_image->image(); - util::InitializeSubresourceRange(image_memory_barrier.subresourceRange); + image_memory_barrier.subresourceRange = util::InitializeSubresourceRange(); dfn.vkCmdPipelineBarrier(command_buffer, kGuestOutputInternalStageMask, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); diff --git a/src/xenia/ui/vulkan/vulkan_util.h b/src/xenia/ui/vulkan/vulkan_util.h index be5388934..fda575305 100644 --- a/src/xenia/ui/vulkan/vulkan_util.h +++ b/src/xenia/ui/vulkan/vulkan_util.h @@ -116,17 +116,18 @@ inline VkExtent2D GetMax2DFramebufferExtent(const VulkanProvider& provider) { return max_extent; } -inline void InitializeSubresourceRange( - VkImageSubresourceRange& range, +inline VkImageSubresourceRange InitializeSubresourceRange( VkImageAspectFlags aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT, uint32_t base_mip_level = 0, uint32_t level_count = VK_REMAINING_MIP_LEVELS, uint32_t base_array_layer = 0, uint32_t layer_count = VK_REMAINING_ARRAY_LAYERS) { + VkImageSubresourceRange range; range.aspectMask = aspect_mask; range.baseMipLevel = base_mip_level; range.levelCount = level_count; range.baseArrayLayer = base_array_layer; range.layerCount = layer_count; + return range; } // Creates a buffer backed by a dedicated allocation. The allocation size will From 0acb97d3839771a259063a1e9d387bbfee1d20c0 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 3 Apr 2022 16:40:29 +0300 Subject: [PATCH 077/123] [Vulkan] EDRAM range ownership transfers, resolve clears, 2x-as-4x MSAA Transfers are functional on a D3D12-like level, but need additional work so fallbacks are used when multisampled integer sampled images are not supported, and to eliminate transfers between render targets within Vulkan format compatibility classes by using different views directly. --- src/xenia/gpu/render_target_cache.h | 12 +- src/xenia/gpu/spirv_shader_translator.cc | 14 +- src/xenia/gpu/spirv_shader_translator.h | 36 + src/xenia/gpu/spirv_shader_translator_rb.cc | 425 ++ .../gpu/vulkan/deferred_command_buffer.cc | 45 + .../gpu/vulkan/deferred_command_buffer.h | 111 + src/xenia/gpu/vulkan/premake5.lua | 1 + .../gpu/vulkan/vulkan_command_processor.cc | 129 +- .../gpu/vulkan/vulkan_command_processor.h | 16 +- src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc | 20 +- .../gpu/vulkan/vulkan_render_target_cache.cc | 4242 ++++++++++++++++- .../gpu/vulkan/vulkan_render_target_cache.h | 482 +- src/xenia/gpu/vulkan/vulkan_shared_memory.cc | 17 +- src/xenia/gpu/vulkan/vulkan_shared_memory.h | 13 +- src/xenia/gpu/xenos.h | 1 + src/xenia/ui/vulkan/functions/device_1_0.inc | 2 + .../single_layout_descriptor_set_pool.cc | 120 + .../single_layout_descriptor_set_pool.h | 63 + src/xenia/ui/vulkan/vulkan_provider.cc | 4 + src/xenia/ui/vulkan/vulkan_provider.h | 1 + src/xenia/ui/vulkan/vulkan_util.cc | 47 + src/xenia/ui/vulkan/vulkan_util.h | 11 + 22 files changed, 5668 insertions(+), 144 deletions(-) create mode 100644 src/xenia/gpu/spirv_shader_translator_rb.cc create mode 100644 src/xenia/ui/vulkan/single_layout_descriptor_set_pool.cc create mode 100644 src/xenia/ui/vulkan/single_layout_descriptor_set_pool.h diff --git a/src/xenia/gpu/render_target_cache.h b/src/xenia/gpu/render_target_cache.h index f0e59fb5f..2bac528bd 100644 --- a/src/xenia/gpu/render_target_cache.h +++ b/src/xenia/gpu/render_target_cache.h @@ -302,6 +302,10 @@ class RenderTargetCache { } return xenos::IsColorRenderTargetFormat64bpp(GetColorFormat()); } + const char* GetFormatName() const { + return is_depth ? xenos::GetDepthRenderTargetFormatName(GetDepthFormat()) + : xenos::GetColorRenderTargetFormatName(GetColorFormat()); + } uint32_t GetPitchTiles() const { return pitch_tiles_at_32bpp << uint32_t(Is64bpp()); @@ -317,11 +321,9 @@ class RenderTargetCache { } std::string GetDebugName() const { - return fmt::format( - "RT @ {}t, <{}t>, {}xMSAA, {}", base_tiles, GetPitchTiles(), - uint32_t(1) << uint32_t(msaa_samples), - is_depth ? xenos::GetDepthRenderTargetFormatName(GetDepthFormat()) - : xenos::GetColorRenderTargetFormatName(GetColorFormat())); + return fmt::format("RT @ {}t, <{}t>, {}xMSAA, {}", base_tiles, + GetPitchTiles(), uint32_t(1) << uint32_t(msaa_samples), + GetFormatName()); } }; diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index ce940da49..bcd140445 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -113,11 +113,9 @@ uint32_t SpirvShaderTranslator::GetModificationRegisterCount() const { } void SpirvShaderTranslator::StartTranslation() { - // Tool ID 26 "Xenia Emulator Microcode Translator". - // https://github.com/KhronosGroup/SPIRV-Headers/blob/c43a43c7cc3af55910b9bec2a71e3e8a622443cf/include/spirv/spir-v.xml#L79 // TODO(Triang3l): Logger. - builder_ = std::make_unique(features_.spirv_version, - (26 << 16) | 1, nullptr); + builder_ = std::make_unique( + features_.spirv_version, (kSpirvMagicToolId << 16) | 1, nullptr); builder_->addCapability(IsSpirvTessEvalShader() ? spv::CapabilityTessellation : spv::CapabilityShader); @@ -1535,20 +1533,20 @@ spv::Id SpirvShaderTranslator::GetUnmodifiedOperandComponents( static_cast(original_operand.GetComponent(scalar_index)) - static_cast(SwizzleSource::kX)); } - id_vector_temp_util_.clear(); - id_vector_temp_util_.reserve(component_count); + uint_vector_temp_util_.clear(); + uint_vector_temp_util_.reserve(component_count); uint32_t components_remaining = components; uint32_t component_index; while (xe::bit_scan_forward(components_remaining, &component_index)) { components_remaining &= ~(uint32_t(1) << component_index); - id_vector_temp_util_.push_back( + uint_vector_temp_util_.push_back( static_cast( original_operand.GetComponent(component_index)) - static_cast(SwizzleSource::kX)); } return builder_->createRvalueSwizzle(spv::NoPrecision, type_float_vectors_[component_count - 1], - operand_storage, id_vector_temp_util_); + operand_storage, uint_vector_temp_util_); } void SpirvShaderTranslator::GetOperandScalarXY( diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 932bd608f..beb478bb6 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -138,6 +138,10 @@ class SpirvShaderTranslator : public ShaderTranslator { kDescriptorSetCount, }; + // "Xenia Emulator Microcode Translator". + // https://github.com/KhronosGroup/SPIRV-Headers/blob/c43a43c7cc3af55910b9bec2a71e3e8a622443cf/include/spirv/spir-v.xml#L79 + static constexpr uint32_t kSpirvMagicToolId = 26; + struct Features { explicit Features(const ui::vulkan::VulkanProvider& provider); explicit Features(bool all = false); @@ -172,6 +176,38 @@ class SpirvShaderTranslator : public ShaderTranslator { features_.max_storage_buffer_range); } + // Common functions useful not only for the translator, but also for EDRAM + // emulation via conventional render targets. + + // Converts the color value externally clamped to [0, 31.875] to 7e3 floating + // point, with zeros in bits 10:31, rounding to the nearest even. + static spv::Id PreClampedFloat32To7e3(spv::Builder& builder, + spv::Id f32_scalar, + spv::Id ext_inst_glsl_std_450); + // Same as PreClampedFloat32To7e3, but clamps the input to [0, 31.875]. + static spv::Id UnclampedFloat32To7e3(spv::Builder& builder, + spv::Id f32_scalar, + spv::Id ext_inst_glsl_std_450); + // Converts the 7e3 number in bits [f10_shift, f10_shift + 10) to a 32-bit + // float. + static spv::Id Float7e3To32(spv::Builder& builder, spv::Id f10_uint_scalar, + uint32_t f10_shift, bool result_as_uint, + spv::Id ext_inst_glsl_std_450); + // Converts the depth value externally clamped to the representable [0, 2) + // range to 20e4 floating point, with zeros in bits 24:31, rounding to the + // nearest even. If remap_from_0_to_0_5 is true, it's assumed that 0...1 is + // pre-remapped to 0...0.5 in the input. + static spv::Id PreClampedDepthTo20e4(spv::Builder& builder, + spv::Id f32_scalar, + bool remap_from_0_to_0_5, + spv::Id ext_inst_glsl_std_450); + // Converts the 20e4 number in bits [f24_shift, f24_shift + 10) to a 32-bit + // float. + static spv::Id Depth20e4To32(spv::Builder& builder, spv::Id f24_uint_scalar, + uint32_t f24_shift, bool remap_to_0_to_0_5, + bool result_as_uint, + spv::Id ext_inst_glsl_std_450); + protected: void Reset() override; diff --git a/src/xenia/gpu/spirv_shader_translator_rb.cc b/src/xenia/gpu/spirv_shader_translator_rb.cc new file mode 100644 index 000000000..4cb260bdd --- /dev/null +++ b/src/xenia/gpu/spirv_shader_translator_rb.cc @@ -0,0 +1,425 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/spirv_shader_translator.h" + +#include +#include + +#include "third_party/glslang/SPIRV/GLSL.std.450.h" +#include "xenia/base/assert.h" + +namespace xe { +namespace gpu { + +spv::Id SpirvShaderTranslator::PreClampedFloat32To7e3( + spv::Builder& builder, spv::Id f32_scalar, spv::Id ext_inst_glsl_std_450) { + // https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp + // Assuming the value is already clamped to [0, 31.875]. + + spv::Id type_uint = builder.makeUintType(32); + + // Need the source as uint for bit operations. + { + spv::Id source_type = builder.getTypeId(f32_scalar); + assert_true(builder.isScalarType(source_type)); + if (!builder.isUintType(source_type)) { + f32_scalar = builder.createUnaryOp(spv::OpBitcast, type_uint, f32_scalar); + } + } + + // The denormal 7e3 case. + // denormal_biased_f32 = (f32 & 0x7FFFFF) | 0x800000 + spv::Id denormal_biased_f32; + { + spv::Instruction* denormal_insert_instruction = new spv::Instruction( + builder.getUniqueId(), type_uint, spv::OpBitFieldInsert); + denormal_insert_instruction->addIdOperand(f32_scalar); + denormal_insert_instruction->addIdOperand(builder.makeUintConstant(1)); + denormal_insert_instruction->addIdOperand(builder.makeUintConstant(23)); + denormal_insert_instruction->addIdOperand(builder.makeUintConstant(9)); + builder.getBuildPoint()->addInstruction( + std::unique_ptr(denormal_insert_instruction)); + denormal_biased_f32 = denormal_insert_instruction->getResultId(); + } + // denormal_biased_f32_shift_amount = min(125 - (f32 >> 23), 24) + // Not allowing the shift to overflow as that's undefined in SPIR-V. + spv::Id denormal_biased_f32_shift_amount; + { + spv::Instruction* denormal_shift_amount_instruction = + new spv::Instruction(builder.getUniqueId(), type_uint, spv::OpExtInst); + denormal_shift_amount_instruction->addIdOperand(ext_inst_glsl_std_450); + denormal_shift_amount_instruction->addImmediateOperand(GLSLstd450UMin); + denormal_shift_amount_instruction->addIdOperand(builder.createBinOp( + spv::OpISub, type_uint, builder.makeUintConstant(125), + builder.createBinOp(spv::OpShiftRightLogical, type_uint, f32_scalar, + builder.makeUintConstant(23)))); + denormal_shift_amount_instruction->addIdOperand( + builder.makeUintConstant(24)); + builder.getBuildPoint()->addInstruction( + std::unique_ptr(denormal_shift_amount_instruction)); + denormal_biased_f32_shift_amount = + denormal_shift_amount_instruction->getResultId(); + } + // denormal_biased_f32 = + // ((f32 & 0x7FFFFF) | 0x800000) >> min(125 - (f32 >> 23), 24) + denormal_biased_f32 = builder.createBinOp(spv::OpShiftRightLogical, type_uint, + denormal_biased_f32, + denormal_biased_f32_shift_amount); + + // The normal 7e3 case. + // Bias the exponent. + // normal_biased_f32 = f32 - (124 << 23) + spv::Id normal_biased_f32 = + builder.createBinOp(spv::OpISub, type_uint, f32_scalar, + builder.makeUintConstant(UINT32_C(124) << 23)); + + // Select the needed conversion depending on whether the number is too small + // to be represented as normalized 7e3. + spv::Id biased_f32 = builder.createTriOp( + spv::OpSelect, type_uint, + builder.createBinOp(spv::OpULessThan, builder.makeBoolType(), f32_scalar, + builder.makeUintConstant(0x3E800000)), + denormal_biased_f32, normal_biased_f32); + + // Build the 7e3 number rounding to the nearest even. + // ((biased_f32 + 0x7FFF + ((biased_f32 >> 16) & 1)) >> 16) & 0x3FF + return builder.createTriOp( + spv::OpBitFieldUExtract, type_uint, + builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createBinOp(spv::OpIAdd, type_uint, biased_f32, + builder.makeUintConstant(0x7FFF)), + builder.createTriOp(spv::OpBitFieldUExtract, type_uint, biased_f32, + builder.makeUintConstant(16), + builder.makeUintConstant(1))), + builder.makeUintConstant(16), builder.makeUintConstant(10)); +} + +spv::Id SpirvShaderTranslator::UnclampedFloat32To7e3( + spv::Builder& builder, spv::Id f32_scalar, spv::Id ext_inst_glsl_std_450) { + spv::Id type_float = builder.makeFloatType(32); + + // Need the source as float for clamping. + { + spv::Id source_type = builder.getTypeId(f32_scalar); + assert_true(builder.isScalarType(source_type)); + if (!builder.isFloatType(source_type)) { + f32_scalar = + builder.createUnaryOp(spv::OpBitcast, type_float, f32_scalar); + } + } + + { + spv::Instruction* clamp_instruction = + new spv::Instruction(builder.getUniqueId(), type_float, spv::OpExtInst); + clamp_instruction->addIdOperand(ext_inst_glsl_std_450); + clamp_instruction->addImmediateOperand(GLSLstd450NClamp); + clamp_instruction->addIdOperand(f32_scalar); + clamp_instruction->addIdOperand(builder.makeFloatConstant(0.0f)); + clamp_instruction->addIdOperand(builder.makeFloatConstant(31.875f)); + builder.getBuildPoint()->addInstruction( + std::unique_ptr(clamp_instruction)); + f32_scalar = clamp_instruction->getResultId(); + } + + return PreClampedFloat32To7e3(builder, f32_scalar, ext_inst_glsl_std_450); +} + +spv::Id SpirvShaderTranslator::Float7e3To32(spv::Builder& builder, + spv::Id f10_uint_scalar, + uint32_t f10_shift, + bool result_as_uint, + spv::Id ext_inst_glsl_std_450) { + // https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp + + assert_true(builder.isUintType(builder.getTypeId(f10_uint_scalar))); + assert_true(f10_shift <= (32 - 10)); + + spv::Id type_bool = builder.makeBoolType(); + spv::Id type_int = builder.makeIntType(32); + spv::Id type_uint = builder.makeUintType(32); + + spv::Id f10_unbiased_exponent = builder.createTriOp( + spv::OpBitFieldUExtract, type_uint, f10_uint_scalar, + builder.makeUintConstant(f10_shift + 7), builder.makeUintConstant(3)); + spv::Id f10_mantissa = builder.createTriOp( + spv::OpBitFieldUExtract, type_uint, f10_uint_scalar, + builder.makeUintConstant(f10_shift), builder.makeUintConstant(7)); + + // The denormal nonzero 7e3 case. + // denormal_mantissa_msb = findMSB(f10_mantissa) + spv::Id denormal_mantissa_msb; + { + spv::Instruction* denormal_mantissa_msb_instruction = + new spv::Instruction(builder.getUniqueId(), type_int, spv::OpExtInst); + denormal_mantissa_msb_instruction->addIdOperand(ext_inst_glsl_std_450); + denormal_mantissa_msb_instruction->addImmediateOperand(GLSLstd450FindUMsb); + denormal_mantissa_msb_instruction->addIdOperand(f10_mantissa); + builder.getBuildPoint()->addInstruction( + std::unique_ptr(denormal_mantissa_msb_instruction)); + denormal_mantissa_msb = denormal_mantissa_msb_instruction->getResultId(); + } + denormal_mantissa_msb = + builder.createUnaryOp(spv::OpBitcast, type_uint, denormal_mantissa_msb); + // denormal_f32_unbiased_exponent = 1 - (7 - findMSB(f10_mantissa)) + // Or: + // denormal_f32_unbiased_exponent = findMSB(f10_mantissa) - 6 + spv::Id denormal_f32_unbiased_exponent = + builder.createBinOp(spv::OpISub, type_uint, denormal_mantissa_msb, + builder.makeUintConstant(6)); + // Normalize the mantissa. + // denormal_f32_mantissa = f10_mantissa << (7 - findMSB(f10_mantissa)) + spv::Id denormal_f32_mantissa = builder.createBinOp( + spv::OpShiftLeftLogical, type_uint, f10_mantissa, + builder.createBinOp(spv::OpISub, type_uint, builder.makeUintConstant(7), + denormal_mantissa_msb)); + // If the 7e3 number is zero, make sure the float32 number is zero too. + spv::Id f10_mantissa_is_nonzero = builder.createBinOp( + spv::OpINotEqual, type_bool, f10_mantissa, builder.makeUintConstant(0)); + // Set the unbiased exponent to -124 for zero - 124 will be added later, + // resulting in zero float32. + denormal_f32_unbiased_exponent = builder.createTriOp( + spv::OpSelect, type_uint, f10_mantissa_is_nonzero, + denormal_f32_unbiased_exponent, builder.makeUintConstant(uint32_t(-124))); + denormal_f32_mantissa = + builder.createTriOp(spv::OpSelect, type_uint, f10_mantissa_is_nonzero, + denormal_f32_mantissa, builder.makeUintConstant(0)); + + // Select the needed conversion depending on whether the number is normal. + spv::Id f10_is_normal = + builder.createBinOp(spv::OpINotEqual, type_bool, f10_unbiased_exponent, + builder.makeUintConstant(0)); + spv::Id f32_unbiased_exponent = builder.createTriOp( + spv::OpSelect, type_uint, f10_is_normal, f10_unbiased_exponent, + denormal_f32_unbiased_exponent); + spv::Id f32_mantissa = + builder.createTriOp(spv::OpSelect, type_uint, f10_is_normal, f10_mantissa, + denormal_f32_mantissa); + + // Bias the exponent and construct the build the float32 number. + spv::Id f32_shifted; + { + spv::Instruction* f32_insert_instruction = new spv::Instruction( + builder.getUniqueId(), type_uint, spv::OpBitFieldInsert); + f32_insert_instruction->addIdOperand(f32_mantissa); + f32_insert_instruction->addIdOperand( + builder.createBinOp(spv::OpIAdd, type_uint, f32_unbiased_exponent, + builder.makeUintConstant(124))); + f32_insert_instruction->addIdOperand(builder.makeUintConstant(7)); + f32_insert_instruction->addIdOperand(builder.makeUintConstant(8)); + builder.getBuildPoint()->addInstruction( + std::unique_ptr(f32_insert_instruction)); + f32_shifted = f32_insert_instruction->getResultId(); + } + spv::Id f32 = + builder.createBinOp(spv::OpShiftLeftLogical, type_uint, f32_shifted, + builder.makeUintConstant(23 - 7)); + + if (!result_as_uint) { + f32 = builder.createUnaryOp(spv::OpBitcast, builder.makeFloatType(32), f32); + } + + return f32; +} + +spv::Id SpirvShaderTranslator::PreClampedDepthTo20e4( + spv::Builder& builder, spv::Id f32_scalar, bool remap_from_0_to_0_5, + spv::Id ext_inst_glsl_std_450) { + // CFloat24 from d3dref9.dll + + // https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp + // Assuming the value is already clamped to [0, 2) (in all places, the depth + // is written with saturation). + + uint32_t remap_bias = uint32_t(remap_from_0_to_0_5); + + spv::Id type_uint = builder.makeUintType(32); + + // Need the source as uint for bit operations. + { + spv::Id source_type = builder.getTypeId(f32_scalar); + assert_true(builder.isScalarType(source_type)); + if (!builder.isUintType(source_type)) { + f32_scalar = builder.createUnaryOp(spv::OpBitcast, type_uint, f32_scalar); + } + } + + // The denormal 20e4 case. + // denormal_biased_f32 = (f32 & 0x7FFFFF) | 0x800000 + spv::Id denormal_biased_f32; + { + spv::Instruction* denormal_insert_instruction = new spv::Instruction( + builder.getUniqueId(), type_uint, spv::OpBitFieldInsert); + denormal_insert_instruction->addIdOperand(f32_scalar); + denormal_insert_instruction->addIdOperand(builder.makeUintConstant(1)); + denormal_insert_instruction->addIdOperand(builder.makeUintConstant(23)); + denormal_insert_instruction->addIdOperand(builder.makeUintConstant(9)); + builder.getBuildPoint()->addInstruction( + std::unique_ptr(denormal_insert_instruction)); + denormal_biased_f32 = denormal_insert_instruction->getResultId(); + } + // denormal_biased_f32_shift_amount = min(113 - (f32 >> 23), 24) + // Not allowing the shift to overflow as that's undefined in SPIR-V. + spv::Id denormal_biased_f32_shift_amount; + { + spv::Instruction* denormal_shift_amount_instruction = + new spv::Instruction(builder.getUniqueId(), type_uint, spv::OpExtInst); + denormal_shift_amount_instruction->addIdOperand(ext_inst_glsl_std_450); + denormal_shift_amount_instruction->addImmediateOperand(GLSLstd450UMin); + denormal_shift_amount_instruction->addIdOperand(builder.createBinOp( + spv::OpISub, type_uint, builder.makeUintConstant(113 - remap_bias), + builder.createBinOp(spv::OpShiftRightLogical, type_uint, f32_scalar, + builder.makeUintConstant(23)))); + denormal_shift_amount_instruction->addIdOperand( + builder.makeUintConstant(24)); + builder.getBuildPoint()->addInstruction( + std::unique_ptr(denormal_shift_amount_instruction)); + denormal_biased_f32_shift_amount = + denormal_shift_amount_instruction->getResultId(); + } + // denormal_biased_f32 = + // ((f32 & 0x7FFFFF) | 0x800000) >> min(113 - (f32 >> 23), 24) + denormal_biased_f32 = builder.createBinOp(spv::OpShiftRightLogical, type_uint, + denormal_biased_f32, + denormal_biased_f32_shift_amount); + + // The normal 20e4 case. + // Bias the exponent. + // normal_biased_f32 = f32 - (112 << 23) + spv::Id normal_biased_f32 = builder.createBinOp( + spv::OpISub, type_uint, f32_scalar, + builder.makeUintConstant((UINT32_C(112) + remap_bias) << 23)); + + // Select the needed conversion depending on whether the number is too small + // to be represented as normalized 20e4. + spv::Id biased_f32 = builder.createTriOp( + spv::OpSelect, type_uint, + builder.createBinOp( + spv::OpULessThan, builder.makeBoolType(), f32_scalar, + builder.makeUintConstant(0x38800000 - (remap_bias << 23))), + denormal_biased_f32, normal_biased_f32); + + // Build the 20e4 number rounding to the nearest even. + // ((biased_f32 + 3 + ((biased_f32 >> 3) & 1)) >> 3) & 0xFFFFFF + return builder.createTriOp( + spv::OpBitFieldUExtract, type_uint, + builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createBinOp(spv::OpIAdd, type_uint, biased_f32, + builder.makeUintConstant(3)), + builder.createTriOp(spv::OpBitFieldUExtract, type_uint, biased_f32, + builder.makeUintConstant(3), + builder.makeUintConstant(1))), + builder.makeUintConstant(3), builder.makeUintConstant(24)); +} + +spv::Id SpirvShaderTranslator::Depth20e4To32(spv::Builder& builder, + spv::Id f24_uint_scalar, + uint32_t f24_shift, + bool remap_to_0_to_0_5, + bool result_as_uint, + spv::Id ext_inst_glsl_std_450) { + // CFloat24 from d3dref9.dll + + // https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp + + assert_true(builder.isUintType(builder.getTypeId(f24_uint_scalar))); + assert_true(f24_shift <= (32 - 24)); + + uint32_t remap_bias = uint32_t(remap_to_0_to_0_5); + + spv::Id type_bool = builder.makeBoolType(); + spv::Id type_int = builder.makeIntType(32); + spv::Id type_uint = builder.makeUintType(32); + + spv::Id f24_unbiased_exponent = builder.createTriOp( + spv::OpBitFieldUExtract, type_uint, f24_uint_scalar, + builder.makeUintConstant(f24_shift + 20), builder.makeUintConstant(4)); + spv::Id f24_mantissa = builder.createTriOp( + spv::OpBitFieldUExtract, type_uint, f24_uint_scalar, + builder.makeUintConstant(f24_shift), builder.makeUintConstant(20)); + + // The denormal nonzero 20e4 case. + // denormal_mantissa_msb = findMSB(f24_mantissa) + spv::Id denormal_mantissa_msb; + { + spv::Instruction* denormal_mantissa_msb_instruction = + new spv::Instruction(builder.getUniqueId(), type_int, spv::OpExtInst); + denormal_mantissa_msb_instruction->addIdOperand(ext_inst_glsl_std_450); + denormal_mantissa_msb_instruction->addImmediateOperand(GLSLstd450FindUMsb); + denormal_mantissa_msb_instruction->addIdOperand(f24_mantissa); + builder.getBuildPoint()->addInstruction( + std::unique_ptr(denormal_mantissa_msb_instruction)); + denormal_mantissa_msb = denormal_mantissa_msb_instruction->getResultId(); + } + denormal_mantissa_msb = + builder.createUnaryOp(spv::OpBitcast, type_uint, denormal_mantissa_msb); + // denormal_f32_unbiased_exponent = 1 - (20 - findMSB(f24_mantissa)) + // Or: + // denormal_f32_unbiased_exponent = findMSB(f24_mantissa) - 19 + spv::Id denormal_f32_unbiased_exponent = + builder.createBinOp(spv::OpISub, type_uint, denormal_mantissa_msb, + builder.makeUintConstant(19)); + // Normalize the mantissa. + // denormal_f32_mantissa = f24_mantissa << (20 - findMSB(f24_mantissa)) + spv::Id denormal_f32_mantissa = builder.createBinOp( + spv::OpShiftLeftLogical, type_uint, f24_mantissa, + builder.createBinOp(spv::OpISub, type_uint, builder.makeUintConstant(20), + denormal_mantissa_msb)); + // If the 20e4 number is zero, make sure the float32 number is zero too. + spv::Id f24_mantissa_is_nonzero = builder.createBinOp( + spv::OpINotEqual, type_bool, f24_mantissa, builder.makeUintConstant(0)); + // Set the unbiased exponent to -112 for zero - 112 will be added later, + // resulting in zero float32. + denormal_f32_unbiased_exponent = builder.createTriOp( + spv::OpSelect, type_uint, f24_mantissa_is_nonzero, + denormal_f32_unbiased_exponent, + builder.makeUintConstant(uint32_t(-int32_t(112 - remap_bias)))); + denormal_f32_mantissa = + builder.createTriOp(spv::OpSelect, type_uint, f24_mantissa_is_nonzero, + denormal_f32_mantissa, builder.makeUintConstant(0)); + + // Select the needed conversion depending on whether the number is normal. + spv::Id f24_is_normal = + builder.createBinOp(spv::OpINotEqual, type_bool, f24_unbiased_exponent, + builder.makeUintConstant(0)); + spv::Id f32_unbiased_exponent = builder.createTriOp( + spv::OpSelect, type_uint, f24_is_normal, f24_unbiased_exponent, + denormal_f32_unbiased_exponent); + spv::Id f32_mantissa = + builder.createTriOp(spv::OpSelect, type_uint, f24_is_normal, f24_mantissa, + denormal_f32_mantissa); + + // Bias the exponent and construct the build the float32 number. + spv::Id f32_shifted; + { + spv::Instruction* f32_insert_instruction = new spv::Instruction( + builder.getUniqueId(), type_uint, spv::OpBitFieldInsert); + f32_insert_instruction->addIdOperand(f32_mantissa); + f32_insert_instruction->addIdOperand( + builder.createBinOp(spv::OpIAdd, type_uint, f32_unbiased_exponent, + builder.makeUintConstant(112 - remap_bias))); + f32_insert_instruction->addIdOperand(builder.makeUintConstant(20)); + f32_insert_instruction->addIdOperand(builder.makeUintConstant(8)); + builder.getBuildPoint()->addInstruction( + std::unique_ptr(f32_insert_instruction)); + f32_shifted = f32_insert_instruction->getResultId(); + } + spv::Id f32 = + builder.createBinOp(spv::OpShiftLeftLogical, type_uint, f32_shifted, + builder.makeUintConstant(23 - 20)); + + if (!result_as_uint) { + f32 = builder.createUnaryOp(spv::OpBitcast, builder.makeFloatType(32), f32); + } + + return f32; +} + +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/deferred_command_buffer.cc b/src/xenia/gpu/vulkan/deferred_command_buffer.cc index 470d8adde..98d42865d 100644 --- a/src/xenia/gpu/vulkan/deferred_command_buffer.cc +++ b/src/xenia/gpu/vulkan/deferred_command_buffer.cc @@ -103,6 +103,37 @@ void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) { args.pipeline); } break; + case Command::kVkBindVertexBuffers: { + auto& args = *reinterpret_cast(stream); + size_t offset_bytes = + xe::align(sizeof(ArgsVkBindVertexBuffers), alignof(VkBuffer)); + const VkBuffer* buffers = reinterpret_cast( + reinterpret_cast(stream) + offset_bytes); + offset_bytes = + xe::align(offset_bytes + sizeof(VkBuffer) * args.binding_count, + alignof(VkDeviceSize)); + const VkDeviceSize* offsets = reinterpret_cast( + reinterpret_cast(stream) + offset_bytes); + dfn.vkCmdBindVertexBuffers(command_buffer, args.first_binding, + args.binding_count, buffers, offsets); + } break; + + case Command::kVkClearAttachments: { + auto& args = *reinterpret_cast(stream); + size_t offset_bytes = xe::align(sizeof(ArgsVkClearAttachments), + alignof(VkClearAttachment)); + const VkClearAttachment* attachments = + reinterpret_cast( + reinterpret_cast(stream) + offset_bytes); + offset_bytes = xe::align( + offset_bytes + sizeof(VkClearAttachment) * args.attachment_count, + alignof(VkClearRect)); + const VkClearRect* rects = reinterpret_cast( + reinterpret_cast(stream) + offset_bytes); + dfn.vkCmdClearAttachments(command_buffer, args.attachment_count, + attachments, args.rect_count, rects); + } break; + case Command::kVkCopyBuffer: { auto& args = *reinterpret_cast(stream); dfn.vkCmdCopyBuffer( @@ -112,6 +143,12 @@ void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) { xe::align(sizeof(ArgsVkCopyBuffer), alignof(VkBufferCopy)))); } break; + case Command::kVkDispatch: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdDispatch(command_buffer, args.group_count_x, + args.group_count_y, args.group_count_z); + } break; + case Command::kVkDraw: { auto& args = *reinterpret_cast(stream); dfn.vkCmdDraw(command_buffer, args.vertex_count, args.instance_count, @@ -168,6 +205,14 @@ void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) { args.image_memory_barrier_count, image_memory_barriers); } break; + case Command::kVkPushConstants: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdPushConstants(command_buffer, args.layout, args.stage_flags, + args.offset, args.size, + reinterpret_cast(stream) + + sizeof(ArgsVkPushConstants)); + } break; + case Command::kVkSetBlendConstants: { auto& args = *reinterpret_cast(stream); dfn.vkCmdSetBlendConstants(command_buffer, args.blend_constants); diff --git a/src/xenia/gpu/vulkan/deferred_command_buffer.h b/src/xenia/gpu/vulkan/deferred_command_buffer.h index ac4c88f85..e3605f1e6 100644 --- a/src/xenia/gpu/vulkan/deferred_command_buffer.h +++ b/src/xenia/gpu/vulkan/deferred_command_buffer.h @@ -108,6 +108,61 @@ class DeferredCommandBuffer { args.pipeline = pipeline; } + void CmdVkBindVertexBuffers(uint32_t first_binding, uint32_t binding_count, + const VkBuffer* buffers, + const VkDeviceSize* offsets) { + size_t arguments_size = + xe::align(sizeof(ArgsVkBindVertexBuffers), alignof(VkBuffer)); + size_t buffers_offset = arguments_size; + arguments_size = + xe::align(arguments_size + sizeof(VkBuffer) * binding_count, + alignof(VkDeviceSize)); + size_t offsets_offset = arguments_size; + arguments_size += sizeof(VkDeviceSize) * binding_count; + uint8_t* args_ptr = reinterpret_cast( + WriteCommand(Command::kVkBindVertexBuffers, arguments_size)); + auto& args = *reinterpret_cast(args_ptr); + args.first_binding = first_binding; + args.binding_count = binding_count; + std::memcpy(args_ptr + buffers_offset, buffers, + sizeof(VkBuffer) * binding_count); + std::memcpy(args_ptr + offsets_offset, offsets, + sizeof(VkDeviceSize) * binding_count); + } + + void CmdClearAttachmentsEmplace(uint32_t attachment_count, + VkClearAttachment*& attachments_out, + uint32_t rect_count, + VkClearRect*& rects_out) { + size_t arguments_size = + xe::align(sizeof(ArgsVkClearAttachments), alignof(VkClearAttachment)); + size_t attachments_offset = arguments_size; + arguments_size = + xe::align(arguments_size + sizeof(VkClearAttachment) * attachment_count, + alignof(VkClearRect)); + size_t rects_offset = arguments_size; + arguments_size += sizeof(VkClearRect) * rect_count; + uint8_t* args_ptr = reinterpret_cast( + WriteCommand(Command::kVkClearAttachments, arguments_size)); + auto& args = *reinterpret_cast(args_ptr); + args.attachment_count = attachment_count; + args.rect_count = rect_count; + attachments_out = + reinterpret_cast(args_ptr + attachments_offset); + rects_out = reinterpret_cast(args_ptr + rects_offset); + } + void CmdVkClearAttachments(uint32_t attachment_count, + const VkClearAttachment* attachments, + uint32_t rect_count, const VkClearRect* rects) { + VkClearAttachment* attachments_arg; + VkClearRect* rects_arg; + CmdClearAttachmentsEmplace(attachment_count, attachments_arg, rect_count, + rects_arg); + std::memcpy(attachments_arg, attachments, + sizeof(VkClearAttachment) * attachment_count); + std::memcpy(rects_arg, rects, sizeof(VkClearRect) * rect_count); + } + VkBufferCopy* CmdCopyBufferEmplace(VkBuffer src_buffer, VkBuffer dst_buffer, uint32_t region_count) { const size_t header_size = @@ -127,6 +182,15 @@ class DeferredCommandBuffer { regions, sizeof(VkBufferCopy) * region_count); } + void CmdVkDispatch(uint32_t group_count_x, uint32_t group_count_y, + uint32_t group_count_z) { + auto& args = *reinterpret_cast( + WriteCommand(Command::kVkDispatch, sizeof(ArgsVkDispatch))); + args.group_count_x = group_count_x; + args.group_count_y = group_count_y; + args.group_count_z = group_count_z; + } + void CmdVkDraw(uint32_t vertex_count, uint32_t instance_count, uint32_t first_vertex, uint32_t first_instance) { auto& args = *reinterpret_cast( @@ -162,6 +226,19 @@ class DeferredCommandBuffer { uint32_t image_memory_barrier_count, const VkImageMemoryBarrier* image_memory_barriers); + void CmdVkPushConstants(VkPipelineLayout layout, + VkShaderStageFlags stage_flags, uint32_t offset, + uint32_t size, const void* values) { + uint8_t* args_ptr = reinterpret_cast(WriteCommand( + Command::kVkPushConstants, sizeof(ArgsVkPushConstants) + size)); + auto& args = *reinterpret_cast(args_ptr); + args.layout = layout; + args.stage_flags = stage_flags; + args.offset = offset; + args.size = size; + std::memcpy(args_ptr + sizeof(ArgsVkPushConstants), values, size); + } + void CmdVkSetBlendConstants(const float* blend_constants) { auto& args = *reinterpret_cast(WriteCommand( Command::kVkSetBlendConstants, sizeof(ArgsVkSetBlendConstants))); @@ -237,11 +314,15 @@ class DeferredCommandBuffer { kVkBindDescriptorSets, kVkBindIndexBuffer, kVkBindPipeline, + kVkBindVertexBuffers, + kVkClearAttachments, kVkCopyBuffer, + kVkDispatch, kVkDraw, kVkDrawIndexed, kVkEndRenderPass, kVkPipelineBarrier, + kVkPushConstants, kVkSetBlendConstants, kVkSetDepthBias, kVkSetScissor, @@ -289,6 +370,22 @@ class DeferredCommandBuffer { VkPipeline pipeline; }; + struct ArgsVkBindVertexBuffers { + uint32_t first_binding; + uint32_t binding_count; + // Followed by aligned VkBuffer[], VkDeviceSize[]. + static_assert(alignof(VkBuffer) <= alignof(uintmax_t)); + static_assert(alignof(VkDeviceSize) <= alignof(uintmax_t)); + }; + + struct ArgsVkClearAttachments { + uint32_t attachment_count; + uint32_t rect_count; + // Followed by aligned VkClearAttachment[], VkClearRect[]. + static_assert(alignof(VkClearAttachment) <= alignof(uintmax_t)); + static_assert(alignof(VkClearRect) <= alignof(uintmax_t)); + }; + struct ArgsVkCopyBuffer { VkBuffer src_buffer; VkBuffer dst_buffer; @@ -297,6 +394,12 @@ class DeferredCommandBuffer { static_assert(alignof(VkBufferCopy) <= alignof(uintmax_t)); }; + struct ArgsVkDispatch { + uint32_t group_count_x; + uint32_t group_count_y; + uint32_t group_count_z; + }; + struct ArgsVkDraw { uint32_t vertex_count; uint32_t instance_count; @@ -326,6 +429,14 @@ class DeferredCommandBuffer { static_assert(alignof(VkImageMemoryBarrier) <= alignof(uintmax_t)); }; + struct ArgsVkPushConstants { + VkPipelineLayout layout; + VkShaderStageFlags stage_flags; + uint32_t offset; + uint32_t size; + // Followed by `size` bytes of values. + }; + struct ArgsVkSetBlendConstants { float blend_constants[4]; }; diff --git a/src/xenia/gpu/vulkan/premake5.lua b/src/xenia/gpu/vulkan/premake5.lua index 44205f326..ffc359504 100644 --- a/src/xenia/gpu/vulkan/premake5.lua +++ b/src/xenia/gpu/vulkan/premake5.lua @@ -8,6 +8,7 @@ project("xenia-gpu-vulkan") language("C++") links({ "fmt", + "glslang-spirv", "xenia-base", "xenia-gpu", "xenia-ui", diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 69d0c70a3..4f534c9dd 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -476,7 +476,7 @@ bool VulkanCommandProcessor::SetupContext() { swap_pipeline_create_info.renderPass = swap_render_pass_; swap_pipeline_create_info.subpass = 0; swap_pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE; - swap_pipeline_create_info.basePipelineIndex = UINT32_MAX; + swap_pipeline_create_info.basePipelineIndex = -1; VkResult swap_pipeline_create_result = dfn.vkCreateGraphicsPipelines( device, VK_NULL_HANDLE, 1, &swap_pipeline_create_info, nullptr, &swap_pipeline_); @@ -810,8 +810,6 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, deferred_command_buffer_.CmdVkBeginRenderPass( &render_pass_begin_info, VK_SUBPASS_CONTENTS_INLINE); - dynamic_viewport_update_needed_ = true; - dynamic_scissor_update_needed_ = true; VkViewport viewport; viewport.x = 0.0f; viewport.y = 0.0f; @@ -819,13 +817,13 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, viewport.height = float(scaled_height); viewport.minDepth = 0.0f; viewport.maxDepth = 1.0f; - deferred_command_buffer_.CmdVkSetViewport(0, 1, &viewport); - VkRect2D scissor_rect; - scissor_rect.offset.x = 0; - scissor_rect.offset.y = 0; - scissor_rect.extent.width = scaled_width; - scissor_rect.extent.height = scaled_height; - deferred_command_buffer_.CmdVkSetScissor(0, 1, &scissor_rect); + SetViewport(viewport); + VkRect2D scissor; + scissor.offset.x = 0; + scissor.offset.y = 0; + scissor.extent.width = scaled_width; + scissor.extent.height = scaled_height; + SetScissor(scissor); BindExternalGraphicsPipeline(swap_pipeline_); @@ -856,7 +854,7 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, EndSubmission(true); } -void VulkanCommandProcessor::PushBufferMemoryBarrier( +bool VulkanCommandProcessor::PushBufferMemoryBarrier( VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask, @@ -865,7 +863,7 @@ void VulkanCommandProcessor::PushBufferMemoryBarrier( if (skip_if_equal && src_stage_mask == dst_stage_mask && src_access_mask == dst_access_mask && src_queue_family_index == dst_queue_family_index) { - return; + return false; } // Separate different barriers for overlapping buffer ranges into different @@ -889,10 +887,10 @@ void VulkanCommandProcessor::PushBufferMemoryBarrier( src_queue_family_index && other_buffer_memory_barrier.dstQueueFamilyIndex == dst_queue_family_index) { - // The barrier is already present. + // The barrier is already pending. current_pending_barrier_.src_stage_mask |= src_stage_mask; current_pending_barrier_.dst_stage_mask |= dst_stage_mask; - return; + return true; } SplitPendingBarrier(); break; @@ -911,9 +909,10 @@ void VulkanCommandProcessor::PushBufferMemoryBarrier( buffer_memory_barrier.buffer = buffer; buffer_memory_barrier.offset = offset; buffer_memory_barrier.size = size; + return true; } -void VulkanCommandProcessor::PushImageMemoryBarrier( +bool VulkanCommandProcessor::PushImageMemoryBarrier( VkImage image, const VkImageSubresourceRange& subresource_range, VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask, @@ -923,7 +922,7 @@ void VulkanCommandProcessor::PushImageMemoryBarrier( if (skip_if_equal && src_stage_mask == dst_stage_mask && src_access_mask == dst_access_mask && old_layout == new_layout && src_queue_family_index == dst_queue_family_index) { - return; + return false; } // Separate different barriers for overlapping image subresource ranges into @@ -969,10 +968,10 @@ void VulkanCommandProcessor::PushImageMemoryBarrier( src_queue_family_index && other_image_memory_barrier.dstQueueFamilyIndex == dst_queue_family_index) { - // The barrier is already present. + // The barrier is already pending. current_pending_barrier_.src_stage_mask |= src_stage_mask; current_pending_barrier_.dst_stage_mask |= dst_stage_mask; - return; + return true; } SplitPendingBarrier(); break; @@ -992,6 +991,7 @@ void VulkanCommandProcessor::PushImageMemoryBarrier( image_memory_barrier.dstQueueFamilyIndex = dst_queue_family_index; image_memory_barrier.image = image; image_memory_barrier.subresourceRange = subresource_range; + return true; } bool VulkanCommandProcessor::SubmitBarriers(bool force_end_render_pass) { @@ -1257,6 +1257,53 @@ void VulkanCommandProcessor::BindExternalGraphicsPipeline( current_guest_graphics_pipeline_layout_ = VK_NULL_HANDLE; } +void VulkanCommandProcessor::BindExternalComputePipeline(VkPipeline pipeline) { + if (current_external_compute_pipeline_ == pipeline) { + return; + } + deferred_command_buffer_.CmdVkBindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, + pipeline); + current_external_compute_pipeline_ = pipeline; +} + +void VulkanCommandProcessor::SetViewport(const VkViewport& viewport) { + if (!dynamic_viewport_update_needed_) { + dynamic_viewport_update_needed_ |= dynamic_viewport_.x != viewport.x; + dynamic_viewport_update_needed_ |= dynamic_viewport_.y != viewport.y; + dynamic_viewport_update_needed_ |= + dynamic_viewport_.width != viewport.width; + dynamic_viewport_update_needed_ |= + dynamic_viewport_.height != viewport.height; + dynamic_viewport_update_needed_ |= + dynamic_viewport_.minDepth != viewport.minDepth; + dynamic_viewport_update_needed_ |= + dynamic_viewport_.maxDepth != viewport.maxDepth; + } + if (dynamic_viewport_update_needed_) { + dynamic_viewport_ = viewport; + deferred_command_buffer_.CmdVkSetViewport(0, 1, &dynamic_viewport_); + dynamic_viewport_update_needed_ = false; + } +} + +void VulkanCommandProcessor::SetScissor(const VkRect2D& scissor) { + if (!dynamic_scissor_update_needed_) { + dynamic_scissor_update_needed_ |= + dynamic_scissor_.offset.x != scissor.offset.x; + dynamic_scissor_update_needed_ |= + dynamic_scissor_.offset.y != scissor.offset.y; + dynamic_scissor_update_needed_ |= + dynamic_scissor_.extent.width != scissor.extent.width; + dynamic_scissor_update_needed_ |= + dynamic_scissor_.extent.height != scissor.extent.height; + } + if (dynamic_scissor_update_needed_) { + dynamic_scissor_ = scissor; + deferred_command_buffer_.CmdVkSetScissor(0, 1, &dynamic_scissor_); + dynamic_scissor_update_needed_ = false; + } +} + Shader* VulkanCommandProcessor::LoadShader(xenos::ShaderType shader_type, uint32_t guest_address, const uint32_t* host_address, @@ -1417,8 +1464,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, } const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); - const VkPhysicalDeviceProperties& device_properties = - provider.device_properties(); + const VkPhysicalDeviceLimits& device_limits = + provider.device_properties().limits; // Get dynamic rasterizer state. draw_util::ViewportInfo viewport_info; @@ -1438,10 +1485,10 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, // life. Or even disregard the viewport bounds range in the fragment shader // interlocks case completely - apply the viewport and the scissor offset // directly to pixel address and to things like ps_param_gen. - draw_util::GetHostViewportInfo( - regs, 1, 1, false, device_properties.limits.maxViewportDimensions[0], - device_properties.limits.maxViewportDimensions[1], true, false, false, - false, viewport_info); + draw_util::GetHostViewportInfo(regs, 1, 1, false, + device_limits.maxViewportDimensions[0], + device_limits.maxViewportDimensions[1], true, + false, false, false, viewport_info); // Update dynamic graphics pipeline state. UpdateDynamicState(viewport_info, primitive_polygonal); @@ -1675,6 +1722,8 @@ void VulkanCommandProcessor::CheckSubmissionFenceAndDeviceLoss( primitive_processor_->CompletedSubmissionUpdated(); + render_target_cache_->CompletedSubmissionUpdated(); + // Destroy outdated swap objects. while (!swap_framebuffers_outdated_.empty()) { const auto& framebuffer_pair = swap_framebuffers_outdated_.front(); @@ -1752,6 +1801,7 @@ bool VulkanCommandProcessor::BeginSubmission(bool is_guest_command) { current_framebuffer_ = nullptr; current_guest_graphics_pipeline_ = VK_NULL_HANDLE; current_external_graphics_pipeline_ = VK_NULL_HANDLE; + current_external_compute_pipeline_ = VK_NULL_HANDLE; current_guest_graphics_pipeline_layout_ = nullptr; current_graphics_descriptor_sets_bound_up_to_date_ = 0; @@ -1861,6 +1911,8 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { if (submission_open_) { EndRenderPass(); + render_target_cache_->EndSubmission(); + primitive_processor_->EndSubmission(); shared_memory_->EndSubmission(); @@ -2112,20 +2164,7 @@ void VulkanCommandProcessor::UpdateDynamicState( } viewport.minDepth = viewport_info.z_min; viewport.maxDepth = viewport_info.z_max; - dynamic_viewport_update_needed_ |= dynamic_viewport_.x != viewport.x; - dynamic_viewport_update_needed_ |= dynamic_viewport_.y != viewport.y; - dynamic_viewport_update_needed_ |= dynamic_viewport_.width != viewport.width; - dynamic_viewport_update_needed_ |= - dynamic_viewport_.height != viewport.height; - dynamic_viewport_update_needed_ |= - dynamic_viewport_.minDepth != viewport.minDepth; - dynamic_viewport_update_needed_ |= - dynamic_viewport_.maxDepth != viewport.maxDepth; - if (dynamic_viewport_update_needed_) { - dynamic_viewport_ = viewport; - deferred_command_buffer_.CmdVkSetViewport(0, 1, &dynamic_viewport_); - dynamic_viewport_update_needed_ = false; - } + SetViewport(viewport); // Scissor. draw_util::Scissor scissor; @@ -2135,19 +2174,7 @@ void VulkanCommandProcessor::UpdateDynamicState( scissor_rect.offset.y = int32_t(scissor.offset[1]); scissor_rect.extent.width = scissor.extent[0]; scissor_rect.extent.height = scissor.extent[1]; - dynamic_scissor_update_needed_ |= - dynamic_scissor_.offset.x != scissor_rect.offset.x; - dynamic_scissor_update_needed_ |= - dynamic_scissor_.offset.y != scissor_rect.offset.y; - dynamic_scissor_update_needed_ |= - dynamic_scissor_.extent.width != scissor_rect.extent.width; - dynamic_scissor_update_needed_ |= - dynamic_scissor_.extent.height != scissor_rect.extent.height; - if (dynamic_scissor_update_needed_) { - dynamic_scissor_ = scissor_rect; - deferred_command_buffer_.CmdVkSetScissor(0, 1, &dynamic_scissor_); - dynamic_scissor_update_needed_ = false; - } + SetScissor(scissor_rect); // Depth bias. // TODO(Triang3l): Disable the depth bias for the fragment shader interlock RB diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 551a3fcae..54c25d22f 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * + * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -81,15 +81,16 @@ class VulkanCommandProcessor : public CommandProcessor { uint64_t GetCurrentFrame() const { return frame_current_; } uint64_t GetCompletedFrame() const { return frame_completed_; } - // Submission must be open to insert barriers. - void PushBufferMemoryBarrier( + // Submission must be open to insert barriers. Returning true if the barrier + // has actually been inserted and not dropped. + bool PushBufferMemoryBarrier( VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask, uint32_t src_queue_family_index = VK_QUEUE_FAMILY_IGNORED, uint32_t dst_queue_family_index = VK_QUEUE_FAMILY_IGNORED, bool skip_if_equal = true); - void PushImageMemoryBarrier( + bool PushImageMemoryBarrier( VkImage image, const VkImageSubresourceRange& subresource_range, VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask, @@ -125,6 +126,9 @@ class VulkanCommandProcessor : public CommandProcessor { bool keep_dynamic_depth_bias = false, bool keep_dynamic_blend_constants = false, bool keep_dynamic_stencil_mask_ref = false); + void BindExternalComputePipeline(VkPipeline pipeline); + void SetViewport(const VkViewport& viewport); + void SetScissor(const VkRect2D& scissor); protected: bool SetupContext() override; @@ -211,6 +215,9 @@ class VulkanCommandProcessor : public CommandProcessor { // open non-frame submission, BeginSubmission(true) will promote it to a // frame. EndSubmission(true) will close the frame no matter whether the // submission has already been closed. + // Unlike on Direct3D 12, submission boundaries do not imply any memory + // barriers aside from an incoming host write (but not outgoing host read) + // dependency. // Rechecks submission number and reclaims per-submission resources. Pass 0 as // the submission to await to simply check status, or pass @@ -396,6 +403,7 @@ class VulkanCommandProcessor : public CommandProcessor { // TODO(Triang3l): Change to a deferred compilation handle. VkPipeline current_guest_graphics_pipeline_; VkPipeline current_external_graphics_pipeline_; + VkPipeline current_external_compute_pipeline_; // Pipeline layout of the current guest graphics pipeline. const PipelineLayout* current_guest_graphics_pipeline_layout_; diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index 8f581f0fa..450a346b0 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -884,11 +884,25 @@ bool VulkanPipelineCache::EnsurePipelineCreated( // TODO(Triang3l): Wide lines. rasterization_state.lineWidth = 1.0f; + VkSampleMask sample_mask = UINT32_MAX; VkPipelineMultisampleStateCreateInfo multisample_state = {}; multisample_state.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; - multisample_state.rasterizationSamples = VkSampleCountFlagBits( - uint32_t(1) << uint32_t(description.render_pass_key.msaa_samples)); + if (description.render_pass_key.msaa_samples == xenos::MsaaSamples::k2X && + !render_target_cache_.IsMsaa2xSupported( + description.render_pass_key.depth_and_color_used != 0)) { + // Using sample 0 as 0 and 3 as 1 for 2x instead (not exactly the same + // sample locations, but still top-left and bottom-right - however, this can + // be adjusted with custom sample locations). + multisample_state.rasterizationSamples = VK_SAMPLE_COUNT_4_BIT; + sample_mask = 0b1001; + // TODO(Triang3l): Research sample mask behavior without attachments (in + // Direct3D, it's completely ignored in this case). + multisample_state.pSampleMask = &sample_mask; + } else { + multisample_state.rasterizationSamples = VkSampleCountFlagBits( + uint32_t(1) << uint32_t(description.render_pass_key.msaa_samples)); + } VkPipelineDepthStencilStateCreateInfo depth_stencil_state = {}; depth_stencil_state.sType = @@ -1061,7 +1075,7 @@ bool VulkanPipelineCache::EnsurePipelineCreated( pipeline_create_info.renderPass = creation_arguments.render_pass; pipeline_create_info.subpass = 0; pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE; - pipeline_create_info.basePipelineIndex = UINT32_MAX; + pipeline_create_info.basePipelineIndex = -1; const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc index 24eb8e14b..b029f64dd 100644 --- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc @@ -10,23 +10,109 @@ #include "xenia/gpu/vulkan/vulkan_render_target_cache.h" #include +#include #include #include #include +#include #include #include +#include +#include "third_party/glslang/SPIRV/GLSL.std.450.h" +#include "third_party/glslang/SPIRV/SpvBuilder.h" #include "xenia/base/assert.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" +#include "xenia/gpu/draw_util.h" #include "xenia/gpu/registers.h" +#include "xenia/gpu/spirv_shader_translator.h" +#include "xenia/gpu/vulkan/deferred_command_buffer.h" #include "xenia/gpu/vulkan/vulkan_command_processor.h" +#include "xenia/gpu/xenos.h" #include "xenia/ui/vulkan/vulkan_util.h" namespace xe { namespace gpu { namespace vulkan { +// Generated with `xb buildshaders`. +namespace shaders { +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/host_depth_store_1xmsaa_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/host_depth_store_2xmsaa_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/host_depth_store_4xmsaa_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/passthrough_position_xy_vs.h" +} // namespace shaders + +const VulkanRenderTargetCache::TransferPipelineLayoutInfo + VulkanRenderTargetCache::kTransferPipelineLayoutInfos[size_t( + TransferPipelineLayoutIndex::kCount)] = { + // kColor + {kTransferUsedDescriptorSetColorTextureBit, + kTransferUsedPushConstantDwordAddressBit}, + // kDepth + {kTransferUsedDescriptorSetDepthStencilTexturesBit, + kTransferUsedPushConstantDwordAddressBit}, + // kColorToStencilBit + {kTransferUsedDescriptorSetColorTextureBit, + kTransferUsedPushConstantDwordAddressBit | + kTransferUsedPushConstantDwordStencilMaskBit}, + // kDepthToStencilBit + {kTransferUsedDescriptorSetDepthStencilTexturesBit, + kTransferUsedPushConstantDwordAddressBit | + kTransferUsedPushConstantDwordStencilMaskBit}, + // kColorAndHostDepthTexture + {kTransferUsedDescriptorSetHostDepthStencilTexturesBit | + kTransferUsedDescriptorSetColorTextureBit, + kTransferUsedPushConstantDwordHostDepthAddressBit | + kTransferUsedPushConstantDwordAddressBit}, + // kColorAndHostDepthBuffer + {kTransferUsedDescriptorSetHostDepthBufferBit | + kTransferUsedDescriptorSetColorTextureBit, + kTransferUsedPushConstantDwordHostDepthAddressBit | + kTransferUsedPushConstantDwordAddressBit}, + // kDepthAndHostDepthTexture + {kTransferUsedDescriptorSetHostDepthStencilTexturesBit | + kTransferUsedDescriptorSetDepthStencilTexturesBit, + kTransferUsedPushConstantDwordHostDepthAddressBit | + kTransferUsedPushConstantDwordAddressBit}, + // kDepthAndHostDepthBuffer + {kTransferUsedDescriptorSetHostDepthBufferBit | + kTransferUsedDescriptorSetDepthStencilTexturesBit, + kTransferUsedPushConstantDwordHostDepthAddressBit | + kTransferUsedPushConstantDwordAddressBit}, +}; + +const VulkanRenderTargetCache::TransferModeInfo + VulkanRenderTargetCache::kTransferModes[size_t(TransferMode::kCount)] = { + // kColorToDepth + {TransferOutput::kDepth, TransferPipelineLayoutIndex::kColor}, + // kColorToColor + {TransferOutput::kColor, TransferPipelineLayoutIndex::kColor}, + // kDepthToDepth + {TransferOutput::kDepth, TransferPipelineLayoutIndex::kDepth}, + // kDepthToColor + {TransferOutput::kColor, TransferPipelineLayoutIndex::kDepth}, + // kColorToStencilBit + {TransferOutput::kStencilBit, + TransferPipelineLayoutIndex::kColorToStencilBit}, + // kDepthToStencilBit + {TransferOutput::kStencilBit, + TransferPipelineLayoutIndex::kDepthToStencilBit}, + // kColorAndHostDepthToDepth + {TransferOutput::kDepth, + TransferPipelineLayoutIndex::kColorAndHostDepthTexture}, + // kDepthAndHostDepthToDepth + {TransferOutput::kDepth, + TransferPipelineLayoutIndex::kDepthAndHostDepthTexture}, + // kColorAndHostDepthCopyToDepth + {TransferOutput::kDepth, + TransferPipelineLayoutIndex::kColorAndHostDepthBuffer}, + // kDepthAndHostDepthCopyToDepth + {TransferOutput::kDepth, + TransferPipelineLayoutIndex::kDepthAndHostDepthBuffer}, +}; + VulkanRenderTargetCache::VulkanRenderTargetCache( VulkanCommandProcessor& command_processor, const RegisterFile& register_file) @@ -35,6 +121,342 @@ VulkanRenderTargetCache::VulkanRenderTargetCache( VulkanRenderTargetCache::~VulkanRenderTargetCache() { Shutdown(true); } bool VulkanRenderTargetCache::Initialize() { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + // Descriptor set layouts. + VkDescriptorSetLayoutBinding descriptor_set_layout_bindings[2]; + descriptor_set_layout_bindings[0].binding = 0; + descriptor_set_layout_bindings[0].descriptorType = + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + descriptor_set_layout_bindings[0].descriptorCount = 1; + descriptor_set_layout_bindings[0].stageFlags = + VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT; + descriptor_set_layout_bindings[0].pImmutableSamplers = nullptr; + VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info; + descriptor_set_layout_create_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + descriptor_set_layout_create_info.pNext = nullptr; + descriptor_set_layout_create_info.flags = 0; + descriptor_set_layout_create_info.bindingCount = 1; + descriptor_set_layout_create_info.pBindings = descriptor_set_layout_bindings; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layout_storage_buffer_) != VK_SUCCESS) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the descriptor set layout " + "with one storage buffer"); + Shutdown(); + return false; + } + descriptor_set_layout_bindings[0].descriptorType = + VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layout_sampled_image_) != VK_SUCCESS) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the descriptor set layout " + "with one sampled image"); + Shutdown(); + return false; + } + descriptor_set_layout_bindings[1].binding = 1; + descriptor_set_layout_bindings[1].descriptorType = + VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + descriptor_set_layout_bindings[1].descriptorCount = 1; + descriptor_set_layout_bindings[1].stageFlags = + descriptor_set_layout_bindings[0].stageFlags; + descriptor_set_layout_bindings[1].pImmutableSamplers = nullptr; + descriptor_set_layout_create_info.bindingCount = 2; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layout_sampled_image_x2_) != VK_SUCCESS) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the descriptor set layout " + "with two sampled images"); + Shutdown(); + return false; + } + + // Descriptor set pools. + // The pool sizes were chosen without a specific reason. + VkDescriptorPoolSize descriptor_set_layout_size; + descriptor_set_layout_size.type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + descriptor_set_layout_size.descriptorCount = 1; + descriptor_set_pool_sampled_image_ = + std::make_unique( + provider, 256, 1, &descriptor_set_layout_size, + descriptor_set_layout_sampled_image_); + descriptor_set_layout_size.descriptorCount = 2; + descriptor_set_pool_sampled_image_x2_ = + std::make_unique( + provider, 256, 1, &descriptor_set_layout_size, + descriptor_set_layout_sampled_image_x2_); + + // EDRAM contents reinterpretation buffer. + // 90 MB with 9x resolution scaling - within the minimum + // maxStorageBufferRange. + if (!ui::vulkan::util::CreateDedicatedAllocationBuffer( + provider, + VkDeviceSize(xenos::kEdramSizeBytes * resolution_scale_x_ * + resolution_scale_y_), + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + ui::vulkan::util::MemoryPurpose::kDeviceLocal, edram_buffer_, + edram_buffer_memory_)) { + XELOGE("VulkanRenderTargetCache: Failed to create the EDRAM buffer"); + Shutdown(); + return false; + } + if (GetPath() == Path::kPixelShaderInterlock) { + // The first operation will likely be drawing. + edram_buffer_usage_ = EdramBufferUsage::kFragmentReadWrite; + } else { + // The first operation will likely be depth self-comparison. + edram_buffer_usage_ = EdramBufferUsage::kFragmentRead; + } + edram_buffer_modification_status_ = + EdramBufferModificationStatus::kUnmodified; + VkDescriptorPoolSize edram_storage_buffer_descriptor_pool_size; + edram_storage_buffer_descriptor_pool_size.type = + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + edram_storage_buffer_descriptor_pool_size.descriptorCount = 1; + VkDescriptorPoolCreateInfo edram_storage_buffer_descriptor_pool_create_info; + edram_storage_buffer_descriptor_pool_create_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + edram_storage_buffer_descriptor_pool_create_info.pNext = nullptr; + edram_storage_buffer_descriptor_pool_create_info.flags = 0; + edram_storage_buffer_descriptor_pool_create_info.maxSets = 1; + edram_storage_buffer_descriptor_pool_create_info.poolSizeCount = 1; + edram_storage_buffer_descriptor_pool_create_info.pPoolSizes = + &edram_storage_buffer_descriptor_pool_size; + if (dfn.vkCreateDescriptorPool( + device, &edram_storage_buffer_descriptor_pool_create_info, nullptr, + &edram_storage_buffer_descriptor_pool_) != VK_SUCCESS) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the EDRAM buffer storage " + "buffer descriptor pool"); + Shutdown(); + return false; + } + VkDescriptorSetAllocateInfo edram_storage_buffer_descriptor_set_allocate_info; + edram_storage_buffer_descriptor_set_allocate_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + edram_storage_buffer_descriptor_set_allocate_info.pNext = nullptr; + edram_storage_buffer_descriptor_set_allocate_info.descriptorPool = + edram_storage_buffer_descriptor_pool_; + edram_storage_buffer_descriptor_set_allocate_info.descriptorSetCount = 1; + edram_storage_buffer_descriptor_set_allocate_info.pSetLayouts = + &descriptor_set_layout_storage_buffer_; + if (dfn.vkAllocateDescriptorSets( + device, &edram_storage_buffer_descriptor_set_allocate_info, + &edram_storage_buffer_descriptor_set_) != VK_SUCCESS) { + XELOGE( + "VulkanRenderTargetCache: Failed to allocate the EDRAM buffer storage " + "buffer descriptor set"); + Shutdown(); + return false; + } + VkDescriptorBufferInfo edram_storage_buffer_descriptor_buffer_info; + edram_storage_buffer_descriptor_buffer_info.buffer = edram_buffer_; + edram_storage_buffer_descriptor_buffer_info.offset = 0; + edram_storage_buffer_descriptor_buffer_info.range = VK_WHOLE_SIZE; + VkWriteDescriptorSet edram_storage_buffer_descriptor_write; + edram_storage_buffer_descriptor_write.sType = + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + edram_storage_buffer_descriptor_write.pNext = nullptr; + edram_storage_buffer_descriptor_write.dstSet = + edram_storage_buffer_descriptor_set_; + edram_storage_buffer_descriptor_write.dstBinding = 0; + edram_storage_buffer_descriptor_write.dstArrayElement = 0; + edram_storage_buffer_descriptor_write.descriptorCount = 1; + edram_storage_buffer_descriptor_write.descriptorType = + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + edram_storage_buffer_descriptor_write.pImageInfo = nullptr; + edram_storage_buffer_descriptor_write.pBufferInfo = + &edram_storage_buffer_descriptor_buffer_info; + edram_storage_buffer_descriptor_write.pTexelBufferView = nullptr; + dfn.vkUpdateDescriptorSets(device, 1, &edram_storage_buffer_descriptor_write, + 0, nullptr); + + // TODO(Triang3l): All paths (FSI). + + // TODO(Triang3l): Handle sampledImageIntegerSampleCounts 4 not supported in + // transfers. + if (cvars::native_2x_msaa) { + const VkPhysicalDeviceLimits& device_limits = + provider.device_properties().limits; + // Multisampled integer sampled images are optional in Vulkan and in Xenia. + msaa_2x_attachments_supported_ = + (device_limits.framebufferColorSampleCounts & + device_limits.framebufferDepthSampleCounts & + device_limits.framebufferStencilSampleCounts & + device_limits.sampledImageColorSampleCounts & + device_limits.sampledImageDepthSampleCounts & + device_limits.sampledImageStencilSampleCounts & + VK_SAMPLE_COUNT_2_BIT) && + (device_limits.sampledImageIntegerSampleCounts & + (VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT)) != + VK_SAMPLE_COUNT_4_BIT; + msaa_2x_no_attachments_supported_ = + (device_limits.framebufferNoAttachmentsSampleCounts & + VK_SAMPLE_COUNT_2_BIT) != 0; + } else { + msaa_2x_attachments_supported_ = false; + msaa_2x_no_attachments_supported_ = false; + } + + // Host depth storing pipeline layout. + VkDescriptorSetLayout host_depth_store_descriptor_set_layouts[] = { + // Destination EDRAM storage buffer. + descriptor_set_layout_storage_buffer_, + // Source depth / stencil texture (only depth is used). + descriptor_set_layout_sampled_image_x2_, + }; + VkPushConstantRange host_depth_store_push_constant_range; + host_depth_store_push_constant_range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + host_depth_store_push_constant_range.offset = 0; + host_depth_store_push_constant_range.size = sizeof(HostDepthStoreConstants); + VkPipelineLayoutCreateInfo host_depth_store_pipeline_layout_create_info; + host_depth_store_pipeline_layout_create_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + host_depth_store_pipeline_layout_create_info.pNext = nullptr; + host_depth_store_pipeline_layout_create_info.flags = 0; + host_depth_store_pipeline_layout_create_info.setLayoutCount = + uint32_t(xe::countof(host_depth_store_descriptor_set_layouts)); + host_depth_store_pipeline_layout_create_info.pSetLayouts = + host_depth_store_descriptor_set_layouts; + host_depth_store_pipeline_layout_create_info.pushConstantRangeCount = 1; + host_depth_store_pipeline_layout_create_info.pPushConstantRanges = + &host_depth_store_push_constant_range; + if (dfn.vkCreatePipelineLayout( + device, &host_depth_store_pipeline_layout_create_info, nullptr, + &host_depth_store_pipeline_layout_) != VK_SUCCESS) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the host depth storing " + "pipeline layout"); + Shutdown(); + return false; + } + const std::pair host_depth_store_shaders[] = { + {shaders::host_depth_store_1xmsaa_cs, + sizeof(shaders::host_depth_store_1xmsaa_cs)}, + {shaders::host_depth_store_2xmsaa_cs, + sizeof(shaders::host_depth_store_2xmsaa_cs)}, + {shaders::host_depth_store_4xmsaa_cs, + sizeof(shaders::host_depth_store_4xmsaa_cs)}, + }; + for (size_t i = 0; i < xe::countof(host_depth_store_shaders); ++i) { + const std::pair host_depth_store_shader = + host_depth_store_shaders[i]; + VkPipeline host_depth_store_pipeline = + ui::vulkan::util::CreateComputePipeline( + provider, host_depth_store_pipeline_layout_, + host_depth_store_shader.first, host_depth_store_shader.second); + if (host_depth_store_pipeline == VK_NULL_HANDLE) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the {}-sample host depth " + "storing pipeline", + uint32_t(1) << i); + Shutdown(); + return false; + } + host_depth_store_pipelines_[i] = host_depth_store_pipeline; + } + + // Transfer and clear vertex buffer, for quads of up to tile granularity. + transfer_vertex_buffer_pool_ = + std::make_unique( + provider, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + std::max(ui::vulkan::VulkanUploadBufferPool::kDefaultPageSize, + sizeof(float) * 2 * 6 * + Transfer::kMaxCutoutBorderRectangles * + xenos::kEdramTileCount)); + + // Transfer vertex shader. + transfer_passthrough_vertex_shader_ = ui::vulkan::util::CreateShaderModule( + provider, shaders::passthrough_position_xy_vs, + sizeof(shaders::passthrough_position_xy_vs)); + if (transfer_passthrough_vertex_shader_ == VK_NULL_HANDLE) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the render target ownership " + "transfer vertex shader"); + Shutdown(); + return false; + } + + // Transfer pipeline layouts. + VkDescriptorSetLayout transfer_pipeline_layout_descriptor_set_layouts + [kTransferUsedDescriptorSetCount]; + VkPushConstantRange transfer_pipeline_layout_push_constant_range; + transfer_pipeline_layout_push_constant_range.stageFlags = + VK_SHADER_STAGE_FRAGMENT_BIT; + transfer_pipeline_layout_push_constant_range.offset = 0; + VkPipelineLayoutCreateInfo transfer_pipeline_layout_create_info; + transfer_pipeline_layout_create_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + transfer_pipeline_layout_create_info.pNext = nullptr; + transfer_pipeline_layout_create_info.flags = 0; + transfer_pipeline_layout_create_info.pSetLayouts = + transfer_pipeline_layout_descriptor_set_layouts; + transfer_pipeline_layout_create_info.pPushConstantRanges = + &transfer_pipeline_layout_push_constant_range; + for (size_t i = 0; i < size_t(TransferPipelineLayoutIndex::kCount); ++i) { + const TransferPipelineLayoutInfo& transfer_pipeline_layout_info = + kTransferPipelineLayoutInfos[i]; + transfer_pipeline_layout_create_info.setLayoutCount = 0; + uint32_t transfer_pipeline_layout_descriptor_sets_remaining = + transfer_pipeline_layout_info.used_descriptor_sets; + uint32_t transfer_pipeline_layout_descriptor_set_index; + while ( + xe::bit_scan_forward(transfer_pipeline_layout_descriptor_sets_remaining, + &transfer_pipeline_layout_descriptor_set_index)) { + transfer_pipeline_layout_descriptor_sets_remaining &= + ~(uint32_t(1) << transfer_pipeline_layout_descriptor_set_index); + VkDescriptorSetLayout transfer_pipeline_layout_descriptor_set_layout = + VK_NULL_HANDLE; + switch (TransferUsedDescriptorSet( + transfer_pipeline_layout_descriptor_set_index)) { + case kTransferUsedDescriptorSetHostDepthBuffer: + transfer_pipeline_layout_descriptor_set_layout = + descriptor_set_layout_storage_buffer_; + break; + case kTransferUsedDescriptorSetHostDepthStencilTextures: + case kTransferUsedDescriptorSetDepthStencilTextures: + transfer_pipeline_layout_descriptor_set_layout = + descriptor_set_layout_sampled_image_x2_; + break; + case kTransferUsedDescriptorSetColorTexture: + transfer_pipeline_layout_descriptor_set_layout = + descriptor_set_layout_sampled_image_; + break; + default: + assert_unhandled_case(TransferUsedDescriptorSet( + transfer_pipeline_layout_descriptor_set_index)); + } + transfer_pipeline_layout_descriptor_set_layouts + [transfer_pipeline_layout_create_info.setLayoutCount++] = + transfer_pipeline_layout_descriptor_set_layout; + } + transfer_pipeline_layout_push_constant_range.size = uint32_t( + sizeof(uint32_t) * + xe::bit_count(transfer_pipeline_layout_info.used_push_constant_dwords)); + transfer_pipeline_layout_create_info.pushConstantRangeCount = + transfer_pipeline_layout_info.used_push_constant_dwords ? 1 : 0; + if (dfn.vkCreatePipelineLayout( + device, &transfer_pipeline_layout_create_info, nullptr, + &transfer_pipeline_layouts_[i]) != VK_SUCCESS) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the render target " + "ownership transfer pipeline layout {}", + i); + Shutdown(); + return false; + } + } + InitializeCommon(); return true; } @@ -45,6 +467,36 @@ void VulkanRenderTargetCache::Shutdown(bool from_destructor) { const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); + for (const auto& transfer_pipeline_array_pair : transfer_pipelines_) { + for (VkPipeline transfer_pipeline : transfer_pipeline_array_pair.second) { + // May be null to prevent recreation attempts. + if (transfer_pipeline != VK_NULL_HANDLE) { + dfn.vkDestroyPipeline(device, transfer_pipeline, nullptr); + } + } + } + transfer_pipelines_.clear(); + for (const auto& transfer_shader_pair : transfer_shaders_) { + if (transfer_shader_pair.second != VK_NULL_HANDLE) { + dfn.vkDestroyShaderModule(device, transfer_shader_pair.second, nullptr); + } + } + transfer_shaders_.clear(); + for (size_t i = 0; i < size_t(TransferPipelineLayoutIndex::kCount); ++i) { + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyPipelineLayout, device, + transfer_pipeline_layouts_[i]); + } + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyShaderModule, device, + transfer_passthrough_vertex_shader_); + transfer_vertex_buffer_pool_.reset(); + + for (size_t i = 0; i < xe::countof(host_depth_store_pipelines_); ++i) { + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyPipeline, device, + host_depth_store_pipelines_[i]); + } + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyPipelineLayout, device, + host_depth_store_pipeline_layout_); + last_update_framebuffer_ = VK_NULL_HANDLE; for (const auto& framebuffer_pair : framebuffers_) { dfn.vkDestroyFramebuffer(device, framebuffer_pair.second.framebuffer, @@ -54,10 +506,32 @@ void VulkanRenderTargetCache::Shutdown(bool from_destructor) { last_update_render_pass_ = VK_NULL_HANDLE; for (const auto& render_pass_pair : render_passes_) { - dfn.vkDestroyRenderPass(device, render_pass_pair.second, nullptr); + if (render_pass_pair.second != VK_NULL_HANDLE) { + dfn.vkDestroyRenderPass(device, render_pass_pair.second, nullptr); + } } render_passes_.clear(); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorPool, device, + edram_storage_buffer_descriptor_pool_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + edram_buffer_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + edram_buffer_memory_); + + descriptor_set_pool_sampled_image_x2_.reset(); + descriptor_set_pool_sampled_image_.reset(); + + ui::vulkan::util::DestroyAndNullHandle( + dfn.vkDestroyDescriptorSetLayout, device, + descriptor_set_layout_sampled_image_x2_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout, + device, + descriptor_set_layout_sampled_image_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout, + device, + descriptor_set_layout_storage_buffer_); + if (!from_destructor) { ShutdownCommon(); } @@ -87,6 +561,19 @@ void VulkanRenderTargetCache::ClearCache() { RenderTargetCache::ClearCache(); } +void VulkanRenderTargetCache::CompletedSubmissionUpdated() { + if (transfer_vertex_buffer_pool_) { + transfer_vertex_buffer_pool_->Reclaim( + command_processor_.GetCompletedSubmission()); + } +} + +void VulkanRenderTargetCache::EndSubmission() { + if (transfer_vertex_buffer_pool_) { + transfer_vertex_buffer_pool_->FlushWrites(); + } +} + bool VulkanRenderTargetCache::Update(bool is_rasterization_done, uint32_t shader_writes_color_targets) { if (!RenderTargetCache::Update(is_rasterization_done, @@ -94,9 +581,16 @@ bool VulkanRenderTargetCache::Update(bool is_rasterization_done, return false; } - auto rb_surface_info = register_file().Get(); + // TODO(Triang3l): All paths (FSI). + RenderTarget* const* depth_and_color_render_targets = last_update_accumulated_render_targets(); + + PerformTransfersAndResolveClears(1 + xenos::kMaxColorRenderTargets, + depth_and_color_render_targets, + last_update_transfers()); + + auto rb_surface_info = register_file().Get(); uint32_t render_targets_are_srgb = gamma_render_target_as_srgb_ ? last_update_accumulated_color_targets_are_gamma() @@ -104,7 +598,6 @@ bool VulkanRenderTargetCache::Update(bool is_rasterization_done, RenderPassKey render_pass_key; render_pass_key.msaa_samples = rb_surface_info.msaa_samples; - // TODO(Triang3l): 2x MSAA as 4x. if (depth_and_color_render_targets[0]) { render_pass_key.depth_and_color_used |= 1 << 0; render_pass_key.depth_format = @@ -220,9 +713,9 @@ VkRenderPass VulkanRenderTargetCache::GetRenderPass(RenderPassKey key) { samples = VK_SAMPLE_COUNT_1_BIT; break; case xenos::MsaaSamples::k2X: - // Using unconditionally because if 2x is emulated as 4x, the key will - // also contain 4x. - samples = VK_SAMPLE_COUNT_2_BIT; + samples = IsMsaa2xSupported(key.depth_and_color_used != 0) + ? VK_SAMPLE_COUNT_2_BIT + : VK_SAMPLE_COUNT_4_BIT; break; case xenos::MsaaSamples::k4X: samples = VK_SAMPLE_COUNT_4_BIT; @@ -264,7 +757,11 @@ VkRenderPass VulkanRenderTargetCache::GetRenderPass(RenderPassKey key) { color_attachment.attachment = attachment_index; VkAttachmentDescription& attachment = attachments[attachment_index]; attachment.flags = 0; - attachment.format = GetColorVulkanFormat(color_formats[i]); + xenos::ColorRenderTargetFormat color_format = color_formats[i]; + attachment.format = + key.color_rts_use_transfer_formats + ? GetColorOwnershipTransferVulkanFormat(color_format) + : GetColorVulkanFormat(color_format); attachment.samples = samples; attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; @@ -340,7 +837,8 @@ VkRenderPass VulkanRenderTargetCache::GetRenderPass(RenderPassKey key) { VkRenderPass render_pass; if (dfn.vkCreateRenderPass(device, &render_pass_create_info, nullptr, &render_pass) != VK_SUCCESS) { - XELOGE("Failed to create a Vulkan render pass"); + XELOGE("VulkanRenderTargetCache: Failed to create a render pass"); + render_passes_.emplace(key.key, VK_NULL_HANDLE); return VK_NULL_HANDLE; } render_passes_.emplace(key.key, render_pass); @@ -419,8 +917,15 @@ VkFormat VulkanRenderTargetCache::GetColorOwnershipTransferVulkanFormat( } VulkanRenderTargetCache::VulkanRenderTarget::~VulkanRenderTarget() { - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); + const ui::vulkan::VulkanProvider& provider = + render_target_cache_.command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + ui::vulkan::SingleLayoutDescriptorSetPool& descriptor_set_pool = + key().is_depth + ? *render_target_cache_.descriptor_set_pool_sampled_image_x2_ + : *render_target_cache_.descriptor_set_pool_sampled_image_; + descriptor_set_pool.Free(descriptor_set_index_transfer_source_); if (view_color_transfer_separate_ != VK_NULL_HANDLE) { dfn.vkDestroyImageView(device, view_color_transfer_separate_, nullptr); } @@ -464,16 +969,20 @@ RenderTargetCache::RenderTarget* VulkanRenderTargetCache::CreateRenderTarget( image_create_info.pNext = nullptr; image_create_info.flags = 0; image_create_info.imageType = VK_IMAGE_TYPE_2D; - // TODO(Triang3l): Resolution scaling. - image_create_info.extent.width = key.GetWidth(); + image_create_info.extent.width = key.GetWidth() * resolution_scale_x_; image_create_info.extent.height = - GetRenderTargetHeight(key.pitch_tiles_at_32bpp, key.msaa_samples); + GetRenderTargetHeight(key.pitch_tiles_at_32bpp, key.msaa_samples) * + resolution_scale_y_; image_create_info.extent.depth = 1; image_create_info.mipLevels = 1; image_create_info.arrayLayers = 1; - // TODO(Triang3l): 2x MSAA as 4x. - image_create_info.samples = - VkSampleCountFlagBits(uint32_t(1) << uint32_t(key.msaa_samples)); + if (key.msaa_samples == xenos::MsaaSamples::k2X && + !msaa_2x_attachments_supported_) { + image_create_info.samples = VK_SAMPLE_COUNT_4_BIT; + } else { + image_create_info.samples = + VkSampleCountFlagBits(uint32_t(1) << uint32_t(key.msaa_samples)); + } image_create_info.tiling = VK_IMAGE_TILING_OPTIMAL; image_create_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; image_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; @@ -509,7 +1018,11 @@ RenderTargetCache::RenderTarget* VulkanRenderTargetCache::CreateRenderTarget( if (!ui::vulkan::util::CreateDedicatedAllocationImage( provider, image_create_info, ui::vulkan::util::MemoryPurpose::kDeviceLocal, image, memory)) { - // TODO(Triang3l): Error message. + XELOGE( + "VulkanRenderTarget: Failed to create a {}x{} {}xMSAA {} render target " + "image", + image_create_info.extent.width, image_create_info.extent.height, + uint32_t(1) << uint32_t(key.msaa_samples), key.GetFormatName()); return nullptr; } @@ -532,7 +1045,12 @@ RenderTargetCache::RenderTarget* VulkanRenderTargetCache::CreateRenderTarget( VkImageView view_depth_color; if (dfn.vkCreateImageView(device, &view_create_info, nullptr, &view_depth_color) != VK_SUCCESS) { - // TODO(Triang3l): Error message. + XELOGE( + "VulkanRenderTarget: Failed to create a {} view for a {}x{} {}xMSAA {} " + "render target", + key.is_depth ? "depth" : "color", image_create_info.extent.width, + image_create_info.extent.height, + uint32_t(1) << uint32_t(key.msaa_samples), key.GetFormatName()); dfn.vkDestroyImage(device, image, nullptr); dfn.vkFreeMemory(device, memory, nullptr); return nullptr; @@ -546,7 +1064,12 @@ RenderTargetCache::RenderTarget* VulkanRenderTargetCache::CreateRenderTarget( VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; if (dfn.vkCreateImageView(device, &view_create_info, nullptr, &view_depth_stencil) != VK_SUCCESS) { - // TODO(Triang3l): Error message. + XELOGE( + "VulkanRenderTarget: Failed to create a depth / stencil view for a " + "{}x{} {}xMSAA {} render target", + image_create_info.extent.width, image_create_info.extent.height, + uint32_t(1) << uint32_t(key.msaa_samples), + xenos::GetDepthRenderTargetFormatName(key.GetDepthFormat())); dfn.vkDestroyImageView(device, view_depth_color, nullptr); dfn.vkDestroyImage(device, image, nullptr); dfn.vkFreeMemory(device, memory, nullptr); @@ -555,7 +1078,12 @@ RenderTargetCache::RenderTarget* VulkanRenderTargetCache::CreateRenderTarget( view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT; if (dfn.vkCreateImageView(device, &view_create_info, nullptr, &view_stencil) != VK_SUCCESS) { - // TODO(Triang3l): Error message. + XELOGE( + "VulkanRenderTarget: Failed to create a stencil view for a {}x{} " + "{}xMSAA render target", + image_create_info.extent.width, image_create_info.extent.height, + uint32_t(1) << uint32_t(key.msaa_samples), + xenos::GetDepthRenderTargetFormatName(key.GetDepthFormat())); dfn.vkDestroyImageView(device, view_depth_stencil, nullptr); dfn.vkDestroyImageView(device, view_depth_color, nullptr); dfn.vkDestroyImage(device, image, nullptr); @@ -567,7 +1095,12 @@ RenderTargetCache::RenderTarget* VulkanRenderTargetCache::CreateRenderTarget( view_create_info.format = VK_FORMAT_R8G8B8A8_SRGB; if (dfn.vkCreateImageView(device, &view_create_info, nullptr, &view_srgb) != VK_SUCCESS) { - // TODO(Triang3l): Error message. + XELOGE( + "VulkanRenderTarget: Failed to create an sRGB view for a {}x{} " + "{}xMSAA render target", + image_create_info.extent.width, image_create_info.extent.height, + uint32_t(1) << uint32_t(key.msaa_samples), + xenos::GetColorRenderTargetFormatName(key.GetColorFormat())); dfn.vkDestroyImageView(device, view_depth_color, nullptr); dfn.vkDestroyImage(device, image, nullptr); dfn.vkFreeMemory(device, memory, nullptr); @@ -578,7 +1111,11 @@ RenderTargetCache::RenderTarget* VulkanRenderTargetCache::CreateRenderTarget( view_create_info.format = transfer_format; if (dfn.vkCreateImageView(device, &view_create_info, nullptr, &view_color_transfer_separate) != VK_SUCCESS) { - // TODO(Triang3l): Error message. + XELOGE( + "VulkanRenderTarget: Failed to create a transfer view for a {}x{} " + "{}xMSAA {} render target", + image_create_info.extent.width, image_create_info.extent.height, + uint32_t(1) << uint32_t(key.msaa_samples), key.GetFormatName()); if (view_srgb != VK_NULL_HANDLE) { dfn.vkDestroyImageView(device, view_srgb, nullptr); } @@ -590,11 +1127,170 @@ RenderTargetCache::RenderTarget* VulkanRenderTargetCache::CreateRenderTarget( } } - VkImageView view_transfer_separate = VK_NULL_HANDLE; + ui::vulkan::SingleLayoutDescriptorSetPool& descriptor_set_pool = + key.is_depth ? *descriptor_set_pool_sampled_image_x2_ + : *descriptor_set_pool_sampled_image_; + size_t descriptor_set_index_transfer_source = descriptor_set_pool.Allocate(); + if (descriptor_set_index_transfer_source == SIZE_MAX) { + XELOGE( + "VulkanRenderTargetCache: Failed to allocate sampled image descriptors " + "for a {} render target", + key.is_depth ? "depth/stencil" : "color"); + if (view_color_transfer_separate != VK_NULL_HANDLE) { + dfn.vkDestroyImageView(device, view_color_transfer_separate, nullptr); + } + if (view_srgb != VK_NULL_HANDLE) { + dfn.vkDestroyImageView(device, view_srgb, nullptr); + } + dfn.vkDestroyImageView(device, view_depth_color, nullptr); + dfn.vkDestroyImage(device, image, nullptr); + dfn.vkFreeMemory(device, memory, nullptr); + return nullptr; + } + VkDescriptorSet descriptor_set_transfer_source = + descriptor_set_pool.Get(descriptor_set_index_transfer_source); + VkWriteDescriptorSet descriptor_set_write[2]; + VkDescriptorImageInfo descriptor_set_write_depth_color; + descriptor_set_write_depth_color.sampler = VK_NULL_HANDLE; + descriptor_set_write_depth_color.imageView = + view_color_transfer_separate != VK_NULL_HANDLE + ? view_color_transfer_separate + : view_depth_color; + descriptor_set_write_depth_color.imageLayout = + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + descriptor_set_write[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + descriptor_set_write[0].pNext = nullptr; + descriptor_set_write[0].dstSet = descriptor_set_transfer_source; + descriptor_set_write[0].dstBinding = 0; + descriptor_set_write[0].dstArrayElement = 0; + descriptor_set_write[0].descriptorCount = 1; + descriptor_set_write[0].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + descriptor_set_write[0].pImageInfo = &descriptor_set_write_depth_color; + descriptor_set_write[0].pBufferInfo = nullptr; + descriptor_set_write[0].pTexelBufferView = nullptr; + VkDescriptorImageInfo descriptor_set_write_stencil; + if (key.is_depth) { + descriptor_set_write_stencil.sampler = VK_NULL_HANDLE; + descriptor_set_write_stencil.imageView = view_stencil; + descriptor_set_write_stencil.imageLayout = + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + descriptor_set_write[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + descriptor_set_write[1].pNext = nullptr; + descriptor_set_write[1].dstSet = descriptor_set_transfer_source; + descriptor_set_write[1].dstBinding = 1; + descriptor_set_write[1].dstArrayElement = 0; + descriptor_set_write[1].descriptorCount = 1; + descriptor_set_write[1].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + descriptor_set_write[1].pImageInfo = &descriptor_set_write_stencil; + descriptor_set_write[1].pBufferInfo = nullptr; + descriptor_set_write[1].pTexelBufferView = nullptr; + } + dfn.vkUpdateDescriptorSets(device, key.is_depth ? 2 : 1, descriptor_set_write, + 0, nullptr); - return new VulkanRenderTarget(key, provider, image, memory, view_depth_color, + return new VulkanRenderTarget(key, *this, image, memory, view_depth_color, view_depth_stencil, view_stencil, view_srgb, - view_color_transfer_separate); + view_color_transfer_separate, + descriptor_set_index_transfer_source); +} + +void VulkanRenderTargetCache::GetEdramBufferUsageMasks( + EdramBufferUsage usage, VkPipelineStageFlags& stage_mask_out, + VkAccessFlags& access_mask_out) { + switch (usage) { + case EdramBufferUsage::kFragmentRead: + stage_mask_out = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + access_mask_out = VK_ACCESS_SHADER_READ_BIT; + break; + case EdramBufferUsage::kFragmentReadWrite: + stage_mask_out = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + access_mask_out = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + break; + case EdramBufferUsage::kComputeRead: + stage_mask_out = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + access_mask_out = VK_ACCESS_SHADER_READ_BIT; + break; + case EdramBufferUsage::kComputeWrite: + stage_mask_out = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + access_mask_out = VK_ACCESS_SHADER_WRITE_BIT; + break; + case EdramBufferUsage::kTransferRead: + stage_mask_out = VK_PIPELINE_STAGE_TRANSFER_BIT; + access_mask_out = VK_ACCESS_TRANSFER_READ_BIT; + break; + case EdramBufferUsage::kTransferWrite: + stage_mask_out = VK_PIPELINE_STAGE_TRANSFER_BIT; + access_mask_out = VK_ACCESS_TRANSFER_WRITE_BIT; + break; + default: + assert_unhandled_case(usage); + } +} + +void VulkanRenderTargetCache::UseEdramBuffer(EdramBufferUsage new_usage) { + if (edram_buffer_usage_ == new_usage) { + return; + } + VkPipelineStageFlags src_stage_mask, dst_stage_mask; + VkAccessFlags src_access_mask, dst_access_mask; + GetEdramBufferUsageMasks(edram_buffer_usage_, src_stage_mask, + src_access_mask); + GetEdramBufferUsageMasks(new_usage, dst_stage_mask, dst_access_mask); + if (command_processor_.PushBufferMemoryBarrier( + edram_buffer_, 0, VK_WHOLE_SIZE, src_stage_mask, dst_stage_mask, + src_access_mask, dst_access_mask)) { + // Resetting edram_buffer_modification_status_ only if the barrier has been + // truly inserted. + edram_buffer_modification_status_ = + EdramBufferModificationStatus::kUnmodified; + } + edram_buffer_usage_ = new_usage; +} + +void VulkanRenderTargetCache::MarkEdramBufferModified( + EdramBufferModificationStatus modification_status) { + assert_true(modification_status != + EdramBufferModificationStatus::kUnmodified); + switch (edram_buffer_usage_) { + case EdramBufferUsage::kFragmentReadWrite: + // max because being modified via unordered access requires stricter + // synchronization than via fragment shader interlocks. + edram_buffer_modification_status_ = + std::max(edram_buffer_modification_status_, modification_status); + break; + case EdramBufferUsage::kComputeWrite: + assert_true(modification_status == + EdramBufferModificationStatus::kViaUnordered); + modification_status = EdramBufferModificationStatus::kViaUnordered; + break; + default: + assert_always( + "While changing the usage of the EDRAM buffer before marking it as " + "modified is handled safely (but will cause spurious marking as " + "modified after the changes have been implicitly committed by the " + "usage switch), normally that shouldn't be done and is an " + "indication of architectural mistakes. Alternatively, this may " + "indicate that the usage switch has been forgotten before writing, " + "which is a clearly invalid situation."); + } +} + +void VulkanRenderTargetCache::CommitEdramBufferShaderWrites( + EdramBufferModificationStatus commit_status) { + assert_true(commit_status != EdramBufferModificationStatus::kUnmodified); + if (edram_buffer_modification_status_ < commit_status) { + return; + } + VkPipelineStageFlags stage_mask; + VkAccessFlags access_mask; + GetEdramBufferUsageMasks(edram_buffer_usage_, stage_mask, access_mask); + assert_not_zero(access_mask & VK_ACCESS_SHADER_WRITE_BIT); + command_processor_.PushBufferMemoryBarrier( + edram_buffer_, 0, VK_WHOLE_SIZE, stage_mask, stage_mask, access_mask, + access_mask, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, false); + edram_buffer_modification_status_ = + EdramBufferModificationStatus::kUnmodified; + PixelShaderInterlockFullEdramBarrierPlaced(); } const VulkanRenderTargetCache::Framebuffer* @@ -646,8 +1342,15 @@ VulkanRenderTargetCache::GetFramebuffer( depth_and_color_rts_remaining &= ~(uint32_t(1) << rt_index); const auto& vulkan_rt = *static_cast( depth_and_color_render_targets[rt_index]); - attachments[attachment_count++] = rt_index ? vulkan_rt.view_depth_color() - : vulkan_rt.view_depth_stencil(); + VkImageView attachment; + if (rt_index) { + attachment = render_pass_key.color_rts_use_transfer_formats + ? vulkan_rt.view_color_transfer() + : vulkan_rt.view_depth_color(); + } else { + attachment = vulkan_rt.view_depth_stencil(); + } + attachments[attachment_count++] = attachment; } VkFramebufferCreateInfo framebuffer_create_info; @@ -684,6 +1387,3491 @@ VulkanRenderTargetCache::GetFramebuffer( .first->second; } +VkShaderModule VulkanRenderTargetCache::GetTransferShader( + TransferShaderKey key) { + auto shader_it = transfer_shaders_.find(key); + if (shader_it != transfer_shaders_.end()) { + return shader_it->second; + } + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const VkPhysicalDeviceFeatures& device_features = provider.device_features(); + + std::vector id_vector_temp; + std::vector uint_vector_temp; + + spv::Builder builder(spv::Spv_1_0, + (SpirvShaderTranslator::kSpirvMagicToolId << 16) | 1, + nullptr); + spv::Id ext_inst_glsl_std_450 = builder.import("GLSL.std.450"); + builder.addCapability(spv::CapabilityShader); + builder.setMemoryModel(spv::AddressingModelLogical, spv::MemoryModelGLSL450); + builder.setSource(spv::SourceLanguageUnknown, 0); + + spv::Id type_void = builder.makeVoidType(); + spv::Id type_bool = builder.makeBoolType(); + spv::Id type_int = builder.makeIntType(32); + spv::Id type_int2 = builder.makeVectorType(type_int, 2); + spv::Id type_uint = builder.makeUintType(32); + spv::Id type_uint2 = builder.makeVectorType(type_uint, 2); + spv::Id type_uint4 = builder.makeVectorType(type_uint, 4); + spv::Id type_float = builder.makeFloatType(32); + spv::Id type_float2 = builder.makeVectorType(type_float, 2); + spv::Id type_float4 = builder.makeVectorType(type_float, 4); + + const TransferModeInfo& mode = kTransferModes[size_t(key.mode)]; + const TransferPipelineLayoutInfo& pipeline_layout_info = + kTransferPipelineLayoutInfos[size_t(mode.pipeline_layout)]; + + // If not dest_is_color, it's depth, or stencil bit - 40-sample columns are + // swapped as opposed to color source. + bool dest_is_color = (mode.output == TransferOutput::kColor); + xenos::ColorRenderTargetFormat dest_color_format = + xenos::ColorRenderTargetFormat(key.dest_resource_format); + xenos::DepthRenderTargetFormat dest_depth_format = + xenos::DepthRenderTargetFormat(key.dest_resource_format); + bool dest_is_64bpp = + dest_is_color && xenos::IsColorRenderTargetFormat64bpp(dest_color_format); + + xenos::ColorRenderTargetFormat source_color_format = + xenos::ColorRenderTargetFormat(key.source_resource_format); + xenos::DepthRenderTargetFormat source_depth_format = + xenos::DepthRenderTargetFormat(key.source_resource_format); + // If not source_is_color, it's depth / stencil - 40-sample columns are + // swapped as opposed to color destination. + bool source_is_color = (pipeline_layout_info.used_descriptor_sets & + kTransferUsedDescriptorSetColorTextureBit) != 0; + bool source_is_64bpp; + uint32_t source_color_format_component_count; + uint32_t source_color_texture_component_mask; + bool source_color_is_uint; + spv::Id source_color_component_type; + if (source_is_color) { + assert_zero(pipeline_layout_info.used_descriptor_sets & + kTransferUsedDescriptorSetDepthStencilTexturesBit); + source_is_64bpp = + xenos::IsColorRenderTargetFormat64bpp(source_color_format); + source_color_format_component_count = + xenos::GetColorRenderTargetFormatComponentCount(source_color_format); + if (mode.output == TransferOutput::kStencilBit) { + if (source_is_64bpp && !dest_is_64bpp) { + // Need one component, but choosing from the two 32bpp halves of the + // 64bpp sample. + source_color_texture_component_mask = + 0b1 | (0b1 << (source_color_format_component_count >> 1)); + } else { + // Red is at least 8 bits per component in all formats. + source_color_texture_component_mask = 0b1; + } + } else { + source_color_texture_component_mask = + (uint32_t(1) << source_color_format_component_count) - 1; + } + GetColorOwnershipTransferVulkanFormat(source_color_format, + &source_color_is_uint); + source_color_component_type = source_color_is_uint ? type_uint : type_float; + } else { + source_is_64bpp = false; + source_color_format_component_count = 0; + source_color_texture_component_mask = 0; + source_color_is_uint = false; + source_color_component_type = spv::NoType; + } + + std::vector main_interface; + + // Outputs. + bool shader_uses_stencil_reference_output = + mode.output == TransferOutput::kDepth && + provider.device_extensions().ext_shader_stencil_export; + bool dest_color_is_uint = false; + uint32_t dest_color_component_count = 0; + spv::Id type_fragment_data_component = spv::NoResult; + spv::Id type_fragment_data = spv::NoResult; + spv::Id output_fragment_data = spv::NoResult; + spv::Id output_fragment_depth = spv::NoResult; + spv::Id output_fragment_stencil_ref = spv::NoResult; + switch (mode.output) { + case TransferOutput::kColor: + GetColorOwnershipTransferVulkanFormat(dest_color_format, + &dest_color_is_uint); + dest_color_component_count = + xenos::GetColorRenderTargetFormatComponentCount(dest_color_format); + type_fragment_data_component = + dest_color_is_uint ? type_uint : type_float; + type_fragment_data = + dest_color_component_count > 1 + ? builder.makeVectorType(type_fragment_data_component, + dest_color_component_count) + : type_fragment_data_component; + output_fragment_data = builder.createVariable( + spv::NoPrecision, spv::StorageClassOutput, type_fragment_data, + "xe_transfer_fragment_data"); + builder.addDecoration(output_fragment_data, spv::DecorationLocation, + key.dest_color_rt_index); + main_interface.push_back(output_fragment_data); + break; + case TransferOutput::kDepth: + output_fragment_depth = + builder.createVariable(spv::NoPrecision, spv::StorageClassOutput, + type_float, "gl_FragDepth"); + builder.addDecoration(output_fragment_depth, spv::DecorationBuiltIn, + spv::BuiltInFragDepth); + main_interface.push_back(output_fragment_depth); + if (shader_uses_stencil_reference_output) { + builder.addExtension("SPV_EXT_shader_stencil_export"); + builder.addCapability(spv::CapabilityStencilExportEXT); + output_fragment_stencil_ref = + builder.createVariable(spv::NoPrecision, spv::StorageClassOutput, + type_int, "gl_FragStencilRefARB"); + builder.addDecoration(output_fragment_stencil_ref, + spv::DecorationBuiltIn, + spv::BuiltInFragStencilRefEXT); + main_interface.push_back(output_fragment_stencil_ref); + } + break; + default: + break; + } + + // Bindings. + // Generating SPIR-V 1.0, no need to add bindings to the entry point's + // interface until SPIR-V 1.4. + // Color source. + bool source_is_multisampled = + key.source_msaa_samples != xenos::MsaaSamples::k1X; + spv::Id source_color_texture = spv::NoResult; + if (pipeline_layout_info.used_descriptor_sets & + kTransferUsedDescriptorSetColorTextureBit) { + source_color_texture = builder.createVariable( + spv::NoPrecision, spv::StorageClassUniformConstant, + builder.makeImageType(source_color_component_type, spv::Dim2D, false, + false, source_is_multisampled, 1, + spv::ImageFormatUnknown), + "xe_transfer_color"); + builder.addDecoration( + source_color_texture, spv::DecorationDescriptorSet, + xe::bit_count(pipeline_layout_info.used_descriptor_sets & + (kTransferUsedDescriptorSetColorTextureBit - 1))); + builder.addDecoration(source_color_texture, spv::DecorationBinding, 0); + } + // Depth / stencil source. + spv::Id source_depth_texture = spv::NoResult; + spv::Id source_stencil_texture = spv::NoResult; + if (pipeline_layout_info.used_descriptor_sets & + kTransferUsedDescriptorSetDepthStencilTexturesBit) { + uint32_t source_depth_stencil_descriptor_set = + xe::bit_count(pipeline_layout_info.used_descriptor_sets & + (kTransferUsedDescriptorSetDepthStencilTexturesBit - 1)); + // Using `depth == false` in makeImageType because comparisons are not + // required, and other values of `depth` are causing issues in drivers. + // https://github.com/microsoft/DirectXShaderCompiler/issues/1107 + if (mode.output != TransferOutput::kStencilBit) { + source_depth_texture = builder.createVariable( + spv::NoPrecision, spv::StorageClassUniformConstant, + builder.makeImageType(type_float, spv::Dim2D, false, false, + source_is_multisampled, 1, + spv::ImageFormatUnknown), + "xe_transfer_depth"); + builder.addDecoration(source_depth_texture, spv::DecorationDescriptorSet, + source_depth_stencil_descriptor_set); + builder.addDecoration(source_depth_texture, spv::DecorationBinding, 0); + } + if (mode.output != TransferOutput::kDepth || + shader_uses_stencil_reference_output) { + source_stencil_texture = builder.createVariable( + spv::NoPrecision, spv::StorageClassUniformConstant, + builder.makeImageType(type_uint, spv::Dim2D, false, false, + source_is_multisampled, 1, + spv::ImageFormatUnknown), + "xe_transfer_stencil"); + builder.addDecoration(source_stencil_texture, + spv::DecorationDescriptorSet, + source_depth_stencil_descriptor_set); + builder.addDecoration(source_stencil_texture, spv::DecorationBinding, 1); + } + } + // Host depth source buffer. + spv::Id host_depth_source_buffer = spv::NoResult; + if (pipeline_layout_info.used_descriptor_sets & + kTransferUsedDescriptorSetHostDepthBufferBit) { + id_vector_temp.clear(); + id_vector_temp.push_back(builder.makeRuntimeArray(type_uint)); + // Storage buffers have std430 packing, no padding to 4-component vectors. + builder.addDecoration(id_vector_temp.back(), spv::DecorationArrayStride, + sizeof(float)); + spv::Id type_host_depth_source_buffer = + builder.makeStructType(id_vector_temp, "XeTransferHostDepthBuffer"); + builder.addMemberName(type_host_depth_source_buffer, 0, "host_depth"); + builder.addMemberDecoration(type_host_depth_source_buffer, 0, + spv::DecorationNonWritable); + builder.addMemberDecoration(type_host_depth_source_buffer, 0, + spv::DecorationOffset, 0); + // Block since SPIR-V 1.3, but since SPIR-V 1.0 is generated, it's + // BufferBlock. + builder.addDecoration(type_host_depth_source_buffer, + spv::DecorationBufferBlock); + // StorageBuffer since SPIR-V 1.3, but since SPIR-V 1.0 is generated, it's + // Uniform. + host_depth_source_buffer = builder.createVariable( + spv::NoPrecision, spv::StorageClassUniform, + type_host_depth_source_buffer, "xe_transfer_host_depth_buffer"); + builder.addDecoration( + host_depth_source_buffer, spv::DecorationDescriptorSet, + xe::bit_count(pipeline_layout_info.used_descriptor_sets & + (kTransferUsedDescriptorSetHostDepthBufferBit - 1))); + builder.addDecoration(host_depth_source_buffer, spv::DecorationBinding, 0); + } + // Host depth source texture (the depth / stencil descriptor set is reused, + // but stencil is not needed). + spv::Id host_depth_source_texture = spv::NoResult; + if (pipeline_layout_info.used_descriptor_sets & + kTransferUsedDescriptorSetHostDepthStencilTexturesBit) { + host_depth_source_texture = builder.createVariable( + spv::NoPrecision, spv::StorageClassUniformConstant, + builder.makeImageType( + type_float, spv::Dim2D, false, false, + key.host_depth_source_msaa_samples != xenos::MsaaSamples::k1X, 1, + spv::ImageFormatUnknown), + "xe_transfer_host_depth"); + builder.addDecoration( + host_depth_source_texture, spv::DecorationDescriptorSet, + xe::bit_count( + pipeline_layout_info.used_descriptor_sets & + (kTransferUsedDescriptorSetHostDepthStencilTexturesBit - 1))); + builder.addDecoration(host_depth_source_texture, spv::DecorationBinding, 0); + } + // Push constants. + id_vector_temp.clear(); + uint32_t push_constants_member_host_depth_address = UINT32_MAX; + if (pipeline_layout_info.used_push_constant_dwords & + kTransferUsedPushConstantDwordHostDepthAddressBit) { + push_constants_member_host_depth_address = uint32_t(id_vector_temp.size()); + id_vector_temp.push_back(type_uint); + } + uint32_t push_constants_member_address = UINT32_MAX; + if (pipeline_layout_info.used_push_constant_dwords & + kTransferUsedPushConstantDwordAddressBit) { + push_constants_member_address = uint32_t(id_vector_temp.size()); + id_vector_temp.push_back(type_uint); + } + uint32_t push_constants_member_stencil_mask = UINT32_MAX; + if (pipeline_layout_info.used_push_constant_dwords & + kTransferUsedPushConstantDwordStencilMaskBit) { + push_constants_member_stencil_mask = uint32_t(id_vector_temp.size()); + id_vector_temp.push_back(type_uint); + } + spv::Id push_constants = spv::NoResult; + if (!id_vector_temp.empty()) { + spv::Id type_push_constants = + builder.makeStructType(id_vector_temp, "XeTransferPushConstants"); + if (pipeline_layout_info.used_push_constant_dwords & + kTransferUsedPushConstantDwordHostDepthAddressBit) { + assert_true(push_constants_member_host_depth_address != UINT32_MAX); + builder.addMemberName(type_push_constants, + push_constants_member_host_depth_address, + "host_depth_address"); + builder.addMemberDecoration( + type_push_constants, push_constants_member_host_depth_address, + spv::DecorationOffset, + sizeof(uint32_t) * + xe::bit_count( + pipeline_layout_info.used_push_constant_dwords & + (kTransferUsedPushConstantDwordHostDepthAddressBit - 1))); + } + if (pipeline_layout_info.used_push_constant_dwords & + kTransferUsedPushConstantDwordAddressBit) { + assert_true(push_constants_member_address != UINT32_MAX); + builder.addMemberName(type_push_constants, push_constants_member_address, + "address"); + builder.addMemberDecoration( + type_push_constants, push_constants_member_address, + spv::DecorationOffset, + sizeof(uint32_t) * + xe::bit_count(pipeline_layout_info.used_push_constant_dwords & + (kTransferUsedPushConstantDwordAddressBit - 1))); + } + if (pipeline_layout_info.used_push_constant_dwords & + kTransferUsedPushConstantDwordStencilMaskBit) { + assert_true(push_constants_member_stencil_mask != UINT32_MAX); + builder.addMemberName(type_push_constants, + push_constants_member_stencil_mask, "stencil_mask"); + builder.addMemberDecoration( + type_push_constants, push_constants_member_stencil_mask, + spv::DecorationOffset, + sizeof(uint32_t) * + xe::bit_count( + pipeline_layout_info.used_push_constant_dwords & + (kTransferUsedPushConstantDwordStencilMaskBit - 1))); + } + builder.addDecoration(type_push_constants, spv::DecorationBlock); + push_constants = builder.createVariable( + spv::NoPrecision, spv::StorageClassPushConstant, type_push_constants, + "xe_transfer_push_constants"); + } + + // Coordinate inputs. + spv::Id input_fragment_coord = builder.createVariable( + spv::NoPrecision, spv::StorageClassInput, type_float4, "gl_FragCoord"); + builder.addDecoration(input_fragment_coord, spv::DecorationBuiltIn, + spv::BuiltInFragCoord); + main_interface.push_back(input_fragment_coord); + spv::Id input_sample_id = spv::NoResult; + spv::Id spec_const_sample_id = spv::NoResult; + if (key.dest_msaa_samples != xenos::MsaaSamples::k1X) { + if (device_features.sampleRateShading) { + // One draw for all samples. + builder.addCapability(spv::CapabilitySampleRateShading); + input_sample_id = builder.createVariable( + spv::NoPrecision, spv::StorageClassInput, type_int, "gl_SampleID"); + builder.addDecoration(input_sample_id, spv::DecorationFlat); + builder.addDecoration(input_sample_id, spv::DecorationBuiltIn, + spv::BuiltInSampleId); + main_interface.push_back(input_sample_id); + } else { + // One sample per draw, with different sample masks. + spec_const_sample_id = builder.makeUintConstant(0, true); + builder.addName(spec_const_sample_id, "xe_transfer_sample_id"); + builder.addDecoration(spec_const_sample_id, spv::DecorationSpecId, 0); + } + } + + // Begin the main function. + std::vector main_param_types; + std::vector> main_precisions; + spv::Block* main_entry; + spv::Function* main_function = + builder.makeFunctionEntry(spv::NoPrecision, type_void, "main", + main_param_types, main_precisions, &main_entry); + + // Working with unsigned numbers for simplicity now, bitcasting to signed will + // be done at texture fetch. + + uint32_t tile_width_samples_scaled = + xenos::kEdramTileWidthSamples * resolution_scale_x_; + uint32_t tile_height_samples_scaled = + xenos::kEdramTileHeightSamples * resolution_scale_y_; + + // Convert the fragment coordinates to uint2. + uint_vector_temp.clear(); + uint_vector_temp.reserve(2); + uint_vector_temp.push_back(0); + uint_vector_temp.push_back(1); + spv::Id dest_pixel_coord = builder.createUnaryOp( + spv::OpConvertFToU, type_uint2, + builder.createRvalueSwizzle( + spv::NoPrecision, type_float2, + builder.createLoad(input_fragment_coord, spv::NoPrecision), + uint_vector_temp)); + + // Prove to the AMD compiler that 24*24 multiplication can be done. 16 bits + // are more than enough for coordinates even with 3x resolution scaling (and + // Direct3D 11 hardware has 16.8 fixed-point coordinates). + // TODO(Triang3l): OpUnreachable if the coordinates have upper bits set. + + // Split the destination pixel coordinate into scalars. + spv::Id dest_pixel_x = + builder.createCompositeExtract(dest_pixel_coord, type_uint, 0); + spv::Id dest_pixel_y = + builder.createCompositeExtract(dest_pixel_coord, type_uint, 1); + + // Split the destination pixel index into 32bpp tile and 32bpp-tile-relative + // pixel index. + uint32_t dest_sample_width_log2 = + uint32_t(dest_is_64bpp) + + uint32_t(key.dest_msaa_samples >= xenos::MsaaSamples::k4X); + uint32_t dest_sample_height_log2 = + uint32_t(key.dest_msaa_samples >= xenos::MsaaSamples::k2X); + uint32_t dest_tile_width_divide_scale, dest_tile_width_divide_shift; + draw_util::GetEdramTileWidthDivideScaleAndUpperShift( + resolution_scale_x_, dest_tile_width_divide_scale, + dest_tile_width_divide_shift); + // Doing 16*16=32 multiplication, not 32*32=64. + // TODO(Triang3l): Abstract this away, don't do 32*32 on Direct3D 12 too. + dest_tile_width_divide_scale &= UINT16_MAX; + dest_tile_width_divide_shift += 16; + // Need the host tile size in pixels, not samples. + dest_tile_width_divide_shift -= dest_sample_width_log2; + spv::Id dest_tile_index_x = builder.createBinOp( + spv::OpShiftRightLogical, type_uint, + builder.createBinOp( + spv::OpIMul, type_uint, dest_pixel_x, + builder.makeUintConstant(dest_tile_width_divide_scale)), + builder.makeUintConstant(dest_tile_width_divide_shift)); + spv::Id dest_tile_pixel_x = builder.createBinOp( + spv::OpISub, type_uint, dest_pixel_x, + builder.createBinOp(spv::OpIMul, type_uint, dest_tile_index_x, + builder.makeUintConstant(tile_width_samples_scaled >> + dest_sample_width_log2))); + spv::Id dest_tile_index_y, dest_tile_pixel_y; + if (resolution_scale_y_ == 3) { + dest_tile_index_y = builder.createBinOp( + spv::OpShiftRightLogical, type_uint, + builder.createBinOp( + spv::OpIMul, type_uint, dest_pixel_y, + builder.makeUintConstant(draw_util::kDivideScale3 & UINT16_MAX)), + builder.makeUintConstant(draw_util::kDivideUpperShift3 + 16 + 4 - + dest_sample_height_log2)); + dest_tile_pixel_y = builder.createBinOp( + spv::OpISub, type_uint, dest_pixel_y, + builder.createBinOp( + spv::OpIMul, type_uint, dest_tile_index_y, + builder.makeUintConstant(tile_height_samples_scaled >> + dest_sample_height_log2))); + } else { + assert_true(resolution_scale_y_ <= 2); + uint32_t dest_tile_height_pixels_log2 = + (resolution_scale_y_ == 2 ? 5 : 4) - dest_sample_height_log2; + dest_tile_index_y = builder.createBinOp( + spv::OpShiftRightLogical, type_uint, dest_pixel_y, + builder.makeUintConstant(dest_tile_height_pixels_log2)); + dest_tile_pixel_y = builder.createBinOp( + spv::OpBitwiseAnd, type_uint, dest_pixel_y, + builder.makeUintConstant((uint32_t(1) << dest_tile_height_pixels_log2) - + 1)); + } + + assert_true(push_constants_member_address != UINT32_MAX); + id_vector_temp.clear(); + id_vector_temp.push_back( + builder.makeIntConstant(int32_t(push_constants_member_address))); + spv::Id address_constant = builder.createLoad( + builder.createAccessChain(spv::StorageClassPushConstant, push_constants, + id_vector_temp), + spv::NoPrecision); + + // Calculate the 32bpp tile index from its X and Y parts. + spv::Id dest_tile_index = builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createBinOp( + spv::OpIMul, type_uint, + builder.createTriOp( + spv::OpBitFieldUExtract, type_uint, address_constant, + builder.makeUintConstant(0), + builder.makeUintConstant(xenos::kEdramPitchTilesBits)), + dest_tile_index_y), + dest_tile_index_x); + + // Load the destination sample index. + spv::Id dest_sample_id = spv::NoResult; + if (key.dest_msaa_samples != xenos::MsaaSamples::k1X) { + if (device_features.sampleRateShading) { + assert_true(input_sample_id != spv::NoResult); + dest_sample_id = builder.createUnaryOp( + spv::OpBitcast, type_uint, + builder.createLoad(input_sample_id, spv::NoPrecision)); + } else { + assert_true(spec_const_sample_id != spv::NoResult); + // Already uint. + dest_sample_id = spec_const_sample_id; + } + } + + // Transform the destination framebuffer pixel and sample coordinates into the + // source texture pixel and sample coordinates. + + // First sample bit at 4x with Vulkan standard locations - horizontal sample. + // Second sample bit at 4x with Vulkan standard locations - vertical sample. + // At 2x: + // - Native 2x: top is 1 in Vulkan, bottom is 0. + // - 2x as 4x: top is 0, bottom is 3. + + spv::Id source_sample_id = dest_sample_id; + spv::Id source_tile_pixel_x = dest_tile_pixel_x; + spv::Id source_tile_pixel_y = dest_tile_pixel_y; + spv::Id source_color_half = spv::NoResult; + if (!source_is_64bpp && dest_is_64bpp) { + // 32bpp -> 64bpp, need two samples of the source. + if (key.source_msaa_samples >= xenos::MsaaSamples::k4X) { + // 32bpp -> 64bpp, 4x ->. + // Source has 32bpp halves in two adjacent samples. + if (key.dest_msaa_samples >= xenos::MsaaSamples::k4X) { + // 32bpp -> 64bpp, 4x -> 4x. + // 1 destination horizontal sample = 2 source horizontal samples. + // D p0,0 s0,0 = S p0,0 s0,0 | S p0,0 s1,0 + // D p0,0 s1,0 = S p1,0 s0,0 | S p1,0 s1,0 + // D p0,0 s0,1 = S p0,0 s0,1 | S p0,0 s1,1 + // D p0,0 s1,1 = S p1,0 s0,1 | S p1,0 s1,1 + // Thus destination horizontal sample -> source horizontal pixel, + // vertical samples are 1:1. + source_sample_id = + builder.createBinOp(spv::OpBitwiseAnd, type_uint, dest_sample_id, + builder.makeUintConstant(1 << 1)); + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(dest_sample_id); + id_vector_temp.push_back(dest_tile_pixel_x); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(31)); + source_tile_pixel_x = + builder.createOp(spv::OpBitFieldInsert, type_uint, id_vector_temp); + } else if (key.dest_msaa_samples == xenos::MsaaSamples::k2X) { + // 32bpp -> 64bpp, 4x -> 2x. + // 1 destination horizontal pixel = 2 source horizontal samples. + // D p0,0 s0 = S p0,0 s0,0 | S p0,0 s1,0 + // D p0,0 s1 = S p0,0 s0,1 | S p0,0 s1,1 + // D p1,0 s0 = S p1,0 s0,0 | S p1,0 s1,0 + // D p1,0 s1 = S p1,0 s0,1 | S p1,0 s1,1 + // Pixel index can be reused. Sample 1 (for native 2x) or 0 (for 2x as + // 4x) should become samples 01, sample 0 or 3 should become samples 23. + if (msaa_2x_attachments_supported_) { + source_sample_id = builder.createBinOp( + spv::OpShiftLeftLogical, type_uint, + builder.createBinOp(spv::OpBitwiseXor, type_uint, dest_sample_id, + builder.makeUintConstant(1)), + builder.makeUintConstant(1)); + } else { + source_sample_id = + builder.createBinOp(spv::OpBitwiseAnd, type_uint, dest_sample_id, + builder.makeUintConstant(1 << 1)); + } + } else { + // 32bpp -> 64bpp, 4x -> 1x. + // 1 destination horizontal pixel = 2 source horizontal samples. + // D p0,0 = S p0,0 s0,0 | S p0,0 s1,0 + // D p0,1 = S p0,0 s0,1 | S p0,0 s1,1 + // Horizontal pixel index can be reused. Vertical pixel 1 should + // become sample 2. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(builder.makeUintConstant(0)); + id_vector_temp.push_back(dest_tile_pixel_y); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(1)); + source_sample_id = + builder.createOp(spv::OpBitFieldInsert, type_uint, id_vector_temp); + source_tile_pixel_y = + builder.createBinOp(spv::OpShiftRightLogical, type_uint, + dest_tile_pixel_y, builder.makeUintConstant(1)); + } + } else { + // 32bpp -> 64bpp, 1x/2x ->. + // Source has 32bpp halves in two adjacent pixels. + if (key.dest_msaa_samples >= xenos::MsaaSamples::k4X) { + // 32bpp -> 64bpp, 1x/2x -> 4x. + // The X part. + // 1 destination horizontal sample = 2 source horizontal pixels. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(builder.createBinOp( + spv::OpShiftLeftLogical, type_uint, dest_tile_pixel_x, + builder.makeUintConstant(2))); + id_vector_temp.push_back(dest_sample_id); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(1)); + source_tile_pixel_x = + builder.createOp(spv::OpBitFieldInsert, type_uint, id_vector_temp); + // Y is handled by common code. + } else { + // 32bpp -> 64bpp, 1x/2x -> 1x/2x. + // The X part. + // 1 destination horizontal pixel = 2 source horizontal pixels. + source_tile_pixel_x = + builder.createBinOp(spv::OpShiftLeftLogical, type_uint, + dest_tile_pixel_x, builder.makeUintConstant(1)); + // Y is handled by common code. + } + } + } else if (source_is_64bpp && !dest_is_64bpp) { + // 64bpp -> 32bpp, also the half to load. + if (key.dest_msaa_samples >= xenos::MsaaSamples::k4X) { + // 64bpp -> 32bpp, -> 4x. + // The needed half is in the destination horizontal sample index. + if (key.source_msaa_samples >= xenos::MsaaSamples::k4X) { + // 64bpp -> 32bpp, 4x -> 4x. + // D p0,0 s0,0 = S s0,0 low + // D p0,0 s1,0 = S s0,0 high + // D p1,0 s0,0 = S s1,0 low + // D p1,0 s1,0 = S s1,0 high + // Vertical pixel and sample (second bit) addressing is the same. + // However, 1 horizontal destination pixel = 1 horizontal source sample. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(dest_sample_id); + id_vector_temp.push_back(dest_tile_pixel_x); + id_vector_temp.push_back(builder.makeUintConstant(0)); + id_vector_temp.push_back(builder.makeUintConstant(1)); + source_sample_id = + builder.createOp(spv::OpBitFieldInsert, type_uint, id_vector_temp); + // 2 destination horizontal samples = 1 source horizontal sample, thus + // 2 destination horizontal pixels = 1 source horizontal pixel. + source_tile_pixel_x = + builder.createBinOp(spv::OpShiftRightLogical, type_uint, + dest_tile_pixel_x, builder.makeUintConstant(1)); + } else { + // 64bpp -> 32bpp, 1x/2x -> 4x. + // 2 destination horizontal samples = 1 source horizontal pixel, thus + // 1 destination horizontal pixel = 1 source horizontal pixel. Can reuse + // horizontal pixel index. + // Y is handled by common code. + } + // Half from the destination horizontal sample index. + source_color_half = + builder.createBinOp(spv::OpBitwiseAnd, type_uint, dest_sample_id, + builder.makeUintConstant(1)); + } else { + // 64bpp -> 32bpp, -> 1x/2x. + // The needed half is in the destination horizontal pixel index. + if (key.source_msaa_samples >= xenos::MsaaSamples::k4X) { + // 64bpp -> 32bpp, 4x -> 1x/2x. + // (Destination horizontal pixel >> 1) & 1 = source horizontal sample + // (first bit). + source_sample_id = builder.createTriOp( + spv::OpBitFieldUExtract, type_uint, dest_tile_pixel_x, + builder.makeUintConstant(1), builder.makeUintConstant(1)); + if (key.dest_msaa_samples == xenos::MsaaSamples::k2X) { + // 64bpp -> 32bpp, 4x -> 2x. + // Destination vertical samples (1/0 in the first bit for native 2x or + // 0/1 in the second bit for 2x as 4x) = source vertical samples + // (second bit). + if (msaa_2x_attachments_supported_) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(source_sample_id); + id_vector_temp.push_back(builder.createBinOp( + spv::OpBitwiseXor, type_uint, dest_sample_id, + builder.makeUintConstant(1))); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(1)); + source_sample_id = builder.createOp(spv::OpBitFieldInsert, + type_uint, id_vector_temp); + } else { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(dest_sample_id); + id_vector_temp.push_back(source_sample_id); + id_vector_temp.push_back(builder.makeUintConstant(0)); + id_vector_temp.push_back(builder.makeUintConstant(1)); + source_sample_id = builder.createOp(spv::OpBitFieldInsert, + type_uint, id_vector_temp); + } + } else { + // 64bpp -> 32bpp, 4x -> 1x. + // 1 destination vertical pixel = 1 source vertical sample. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(source_sample_id); + id_vector_temp.push_back(source_tile_pixel_y); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(1)); + source_sample_id = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + source_tile_pixel_y = builder.createBinOp( + spv::OpShiftRightLogical, type_uint, dest_tile_pixel_y, + builder.makeUintConstant(1)); + } + // 2 destination horizontal pixels = 1 source horizontal sample. + // 4 destination horizontal pixels = 1 source horizontal pixel. + source_tile_pixel_x = + builder.createBinOp(spv::OpShiftRightLogical, type_uint, + dest_tile_pixel_x, builder.makeUintConstant(2)); + } else { + // 64bpp -> 32bpp, 1x/2x -> 1x/2x. + // The X part. + // 2 destination horizontal pixels = 1 destination source pixel. + source_tile_pixel_x = + builder.createBinOp(spv::OpShiftRightLogical, type_uint, + dest_tile_pixel_x, builder.makeUintConstant(1)); + // Y is handled by common code. + } + // Half from the destination horizontal pixel index. + source_color_half = + builder.createBinOp(spv::OpBitwiseAnd, type_uint, dest_tile_pixel_x, + builder.makeUintConstant(1)); + } + assert_true(source_color_half != spv::NoResult); + } else { + // Same bit count. + if (key.source_msaa_samples != key.dest_msaa_samples) { + if (key.source_msaa_samples >= xenos::MsaaSamples::k4X) { + // Same BPP, 4x -> 1x/2x. + if (key.dest_msaa_samples == xenos::MsaaSamples::k2X) { + // Same BPP, 4x -> 2x. + // Horizontal pixels to samples. Vertical sample (1/0 in the first bit + // for native 2x or 0/1 in the second bit for 2x as 4x) to second + // sample bit. + if (msaa_2x_attachments_supported_) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(dest_tile_pixel_x); + id_vector_temp.push_back(builder.createBinOp( + spv::OpBitwiseXor, type_uint, dest_sample_id, + builder.makeUintConstant(1))); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(31)); + source_sample_id = builder.createOp(spv::OpBitFieldInsert, + type_uint, id_vector_temp); + } else { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(dest_sample_id); + id_vector_temp.push_back(dest_tile_pixel_x); + id_vector_temp.push_back(builder.makeUintConstant(0)); + id_vector_temp.push_back(builder.makeUintConstant(1)); + source_sample_id = builder.createOp(spv::OpBitFieldInsert, + type_uint, id_vector_temp); + } + source_tile_pixel_x = builder.createBinOp( + spv::OpShiftRightLogical, type_uint, dest_tile_pixel_x, + builder.makeUintConstant(1)); + } else { + // Same BPP, 4x -> 1x. + // Pixels to samples. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(builder.createBinOp( + spv::OpBitwiseAnd, type_uint, dest_tile_pixel_x, + builder.makeUintConstant(1))); + id_vector_temp.push_back(dest_tile_pixel_y); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(1)); + source_sample_id = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + source_tile_pixel_x = builder.createBinOp( + spv::OpShiftRightLogical, type_uint, dest_tile_pixel_x, + builder.makeUintConstant(1)); + source_tile_pixel_y = builder.createBinOp( + spv::OpShiftRightLogical, type_uint, dest_tile_pixel_y, + builder.makeUintConstant(1)); + } + } else { + // Same BPP, 1x/2x -> 1x/2x/4x (as long as they're different). + // Only the X part - Y is handled by common code. + if (key.dest_msaa_samples >= xenos::MsaaSamples::k4X) { + // Horizontal samples to pixels. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(dest_sample_id); + id_vector_temp.push_back(dest_tile_pixel_x); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(31)); + source_tile_pixel_x = builder.createOp(spv::OpBitFieldInsert, + type_uint, id_vector_temp); + } + } + } + } + // Common source Y and sample index for 1x/2x AA sources, independent of bits + // per sample. + if (key.source_msaa_samples < xenos::MsaaSamples::k4X && + key.source_msaa_samples != key.dest_msaa_samples) { + if (key.dest_msaa_samples >= xenos::MsaaSamples::k4X) { + // 1x/2x -> 4x. + if (key.source_msaa_samples == xenos::MsaaSamples::k2X) { + // 2x -> 4x. + // Vertical samples (second bit) of 4x destination to vertical sample + // (1, 0 for native 2x, or 0, 3 for 2x as 4x) of 2x source. + source_sample_id = + builder.createBinOp(spv::OpShiftRightLogical, type_uint, + dest_sample_id, builder.makeUintConstant(1)); + if (msaa_2x_attachments_supported_) { + source_sample_id = builder.createBinOp(spv::OpBitwiseXor, type_uint, + source_sample_id, + builder.makeUintConstant(1)); + } else { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(source_sample_id); + id_vector_temp.push_back(source_sample_id); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(1)); + source_sample_id = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + } else { + // 1x -> 4x. + // Vertical samples (second bit) to Y pixels. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back( + builder.createBinOp(spv::OpShiftRightLogical, type_uint, + dest_sample_id, builder.makeUintConstant(1))); + id_vector_temp.push_back(dest_tile_pixel_y); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(31)); + source_tile_pixel_y = + builder.createOp(spv::OpBitFieldInsert, type_uint, id_vector_temp); + } + } else { + // 1x/2x -> different 1x/2x. + if (key.source_msaa_samples == xenos::MsaaSamples::k2X) { + // 2x -> 1x. + // Vertical pixels of 2x destination to vertical samples (1, 0 for + // native 2x, or 0, 3 for 2x as 4x) of 1x source. + source_sample_id = + builder.createBinOp(spv::OpBitwiseAnd, type_uint, dest_tile_pixel_y, + builder.makeUintConstant(1)); + if (msaa_2x_attachments_supported_) { + source_sample_id = builder.createBinOp(spv::OpBitwiseXor, type_uint, + source_sample_id, + builder.makeUintConstant(1)); + } else { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(source_sample_id); + id_vector_temp.push_back(source_sample_id); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(1)); + source_sample_id = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + source_tile_pixel_y = + builder.createBinOp(spv::OpShiftRightLogical, type_uint, + dest_tile_pixel_y, builder.makeUintConstant(1)); + } else { + // 1x -> 2x. + // Vertical samples (1/0 in the first bit for native 2x or 0/1 in the + // second bit for 2x as 4x) of 2x destination to vertical pixels of 1x + // source. + if (msaa_2x_attachments_supported_) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back( + builder.createBinOp(spv::OpBitwiseXor, type_uint, dest_sample_id, + builder.makeUintConstant(1))); + id_vector_temp.push_back(dest_tile_pixel_y); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(31)); + source_tile_pixel_y = builder.createOp(spv::OpBitFieldInsert, + type_uint, id_vector_temp); + } else { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back( + builder.createBinOp(spv::OpShiftRightLogical, type_uint, + dest_sample_id, builder.makeUintConstant(1))); + id_vector_temp.push_back(dest_tile_pixel_y); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(31)); + source_tile_pixel_y = builder.createOp(spv::OpBitFieldInsert, + type_uint, id_vector_temp); + } + } + } + } + + uint32_t source_pixel_width_dwords_log2 = + uint32_t(key.source_msaa_samples >= xenos::MsaaSamples::k4X) + + uint32_t(source_is_64bpp); + + if (source_is_color != dest_is_color) { + // Copying between color and depth / stencil - swap 40-32bpp-sample columns + // in the pixel index within the source 32bpp tile. + uint32_t source_32bpp_tile_half_pixels = + tile_width_samples_scaled >> (1 + source_pixel_width_dwords_log2); + source_tile_pixel_x = builder.createUnaryOp( + spv::OpBitcast, type_uint, + builder.createBinOp( + spv::OpIAdd, type_int, + builder.createUnaryOp(spv::OpBitcast, type_int, + source_tile_pixel_x), + builder.createTriOp( + spv::OpSelect, type_int, + builder.createBinOp( + spv::OpULessThan, builder.makeBoolType(), + source_tile_pixel_x, + builder.makeUintConstant(source_32bpp_tile_half_pixels)), + builder.makeIntConstant(int32_t(source_32bpp_tile_half_pixels)), + builder.makeIntConstant( + -int32_t(source_32bpp_tile_half_pixels))))); + } + + // Transform the destination 32bpp tile index into the source. + spv::Id source_tile_index = builder.createUnaryOp( + spv::OpBitcast, type_uint, + builder.createBinOp( + spv::OpIAdd, type_int, + builder.createUnaryOp(spv::OpBitcast, type_int, dest_tile_index), + builder.createTriOp( + spv::OpBitFieldSExtract, type_int, + builder.createUnaryOp(spv::OpBitcast, type_int, address_constant), + builder.makeUintConstant(xenos::kEdramPitchTilesBits * 2), + builder.makeUintConstant(xenos::kEdramBaseTilesBits)))); + // Split the source 32bpp tile index into X and Y tile index within the source + // image. + spv::Id source_pitch_tiles = builder.createTriOp( + spv::OpBitFieldUExtract, type_uint, address_constant, + builder.makeUintConstant(xenos::kEdramPitchTilesBits), + builder.makeUintConstant(xenos::kEdramPitchTilesBits)); + spv::Id source_tile_index_y = builder.createBinOp( + spv::OpUDiv, type_uint, source_tile_index, source_pitch_tiles); + spv::Id source_tile_index_x = builder.createBinOp( + spv::OpUMod, type_uint, source_tile_index, source_pitch_tiles); + // Finally calculate the source texture coordinates. + spv::Id source_pixel_x_int = builder.createUnaryOp( + spv::OpBitcast, type_int, + builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createBinOp( + spv::OpIMul, type_uint, + builder.makeUintConstant(tile_width_samples_scaled >> + source_pixel_width_dwords_log2), + source_tile_index_x), + source_tile_pixel_x)); + spv::Id source_pixel_y_int = builder.createUnaryOp( + spv::OpBitcast, type_int, + builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createBinOp( + spv::OpIMul, type_uint, + builder.makeUintConstant( + tile_height_samples_scaled >> + uint32_t(key.source_msaa_samples >= xenos::MsaaSamples::k2X)), + source_tile_index_y), + source_tile_pixel_y)); + + // Load the source. + + spv::Builder::TextureParameters source_texture_parameters = {}; + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(source_pixel_x_int); + id_vector_temp.push_back(source_pixel_y_int); + spv::Id source_coordinates[2] = { + builder.createCompositeConstruct(type_int2, id_vector_temp), + }; + spv::Id source_sample_ids_int[2] = {}; + if (key.source_msaa_samples != xenos::MsaaSamples::k1X) { + source_sample_ids_int[0] = + builder.createUnaryOp(spv::OpBitcast, type_int, source_sample_id); + } else { + source_texture_parameters.lod = builder.makeIntConstant(0); + } + // Go to the next sample or pixel along X if need to load two dwords. + bool source_load_is_two_32bpp_samples = !source_is_64bpp && dest_is_64bpp; + if (source_load_is_two_32bpp_samples) { + if (key.source_msaa_samples >= xenos::MsaaSamples::k4X) { + source_coordinates[1] = source_coordinates[0]; + source_sample_ids_int[1] = builder.createBinOp( + spv::OpBitwiseOr, type_int, source_sample_ids_int[0], + builder.makeIntConstant(1)); + } else { + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(builder.createBinOp(spv::OpBitwiseOr, type_int, + source_pixel_x_int, + builder.makeIntConstant(1))); + id_vector_temp.push_back(source_pixel_y_int); + source_coordinates[1] = + builder.createCompositeConstruct(type_int2, id_vector_temp); + source_sample_ids_int[1] = source_sample_ids_int[0]; + } + } + spv::Id source_color[2][4] = {}; + if (source_color_texture != spv::NoResult) { + source_texture_parameters.sampler = + builder.createLoad(source_color_texture, spv::NoPrecision); + assert_true(source_color_component_type != spv::NoType); + spv::Id source_color_vec4_type = + builder.makeVectorType(source_color_component_type, 4); + for (uint32_t i = 0; i <= uint32_t(source_load_is_two_32bpp_samples); ++i) { + source_texture_parameters.coords = source_coordinates[i]; + source_texture_parameters.sample = source_sample_ids_int[i]; + spv::Id source_color_vec4 = builder.createTextureCall( + spv::NoPrecision, source_color_vec4_type, false, true, false, false, + false, source_texture_parameters, spv::ImageOperandsMaskNone); + uint32_t source_color_components_remaining = + source_color_texture_component_mask; + uint32_t source_color_component_index; + while (xe::bit_scan_forward(source_color_components_remaining, + &source_color_component_index)) { + source_color_components_remaining &= + ~(uint32_t(1) << source_color_component_index); + source_color[i][source_color_component_index] = + builder.createCompositeExtract(source_color_vec4, + source_color_component_type, + source_color_component_index); + } + } + } + spv::Id source_depth_float[2] = {}; + if (source_depth_texture != spv::NoResult) { + source_texture_parameters.sampler = + builder.createLoad(source_depth_texture, spv::NoPrecision); + for (uint32_t i = 0; i <= uint32_t(source_load_is_two_32bpp_samples); ++i) { + source_texture_parameters.coords = source_coordinates[i]; + source_texture_parameters.sample = source_sample_ids_int[i]; + source_depth_float[i] = builder.createCompositeExtract( + builder.createTextureCall( + spv::NoPrecision, type_float4, false, true, false, false, false, + source_texture_parameters, spv::ImageOperandsMaskNone), + type_float, 0); + } + } + spv::Id source_stencil[2] = {}; + if (source_stencil_texture != spv::NoResult) { + source_texture_parameters.sampler = + builder.createLoad(source_stencil_texture, spv::NoPrecision); + for (uint32_t i = 0; i <= uint32_t(source_load_is_two_32bpp_samples); ++i) { + source_texture_parameters.coords = source_coordinates[i]; + source_texture_parameters.sample = source_sample_ids_int[i]; + source_stencil[i] = builder.createCompositeExtract( + builder.createTextureCall( + spv::NoPrecision, type_uint4, false, true, false, false, false, + source_texture_parameters, spv::ImageOperandsMaskNone), + type_uint, 0); + } + } + + // Pick the needed 32bpp half of the 64bpp color. + if (source_is_64bpp && !dest_is_64bpp) { + uint32_t source_color_half_component_count = + source_color_format_component_count >> 1; + assert_true(source_color_half != spv::NoResult); + spv::Id source_color_is_second_half = + builder.createBinOp(spv::OpINotEqual, type_bool, source_color_half, + builder.makeUintConstant(0)); + if (mode.output == TransferOutput::kStencilBit) { + source_color[0][0] = builder.createTriOp( + spv::OpSelect, source_color_component_type, + source_color_is_second_half, + source_color[0][source_color_half_component_count], + source_color[0][0]); + } else { + for (uint32_t i = 0; i < source_color_half_component_count; ++i) { + source_color[0][i] = builder.createTriOp( + spv::OpSelect, source_color_component_type, + source_color_is_second_half, + source_color[0][source_color_half_component_count + i], + source_color[0][i]); + } + } + } + + if (output_fragment_stencil_ref != spv::NoResult && + source_stencil[0] != spv::NoResult) { + // For the depth -> depth case, write the stencil directly to the output. + assert_true(mode.output == TransferOutput::kDepth); + builder.createStore(source_stencil[0], output_fragment_stencil_ref); + } + + if (dest_is_64bpp) { + // Construct the 64bpp color from two 32-bit samples or one 64-bit sample. + // If `packed` (two uints) are created, use the generic path involving + // unpacking. + // Otherwise, the fragment data output must be written to directly by the + // reached control flow path. + spv::Id packed[2] = {}; + if (source_is_color) { + switch (source_color_format) { + case xenos::ColorRenderTargetFormat::k_8_8_8_8: + case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: { + spv::Id unorm_round_offset = builder.makeFloatConstant(0.5f); + spv::Id unorm_scale = builder.makeFloatConstant(255.0f); + spv::Id component_width = builder.makeUintConstant(8); + for (uint32_t i = 0; i < 2; ++i) { + packed[i] = builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp(spv::OpFMul, type_float, + source_color[i][0], unorm_scale), + unorm_round_offset)); + for (uint32_t j = 1; j < 4; ++j) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(packed[i]); + id_vector_temp.push_back(builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp(spv::OpFMul, type_float, + source_color[i][j], unorm_scale), + unorm_round_offset))); + id_vector_temp.push_back(builder.makeUintConstant(8 * j)); + id_vector_temp.push_back(component_width); + packed[i] = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + } + } break; + case xenos::ColorRenderTargetFormat::k_2_10_10_10: + case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: { + spv::Id unorm_round_offset = builder.makeFloatConstant(0.5f); + spv::Id unorm_scale_rgb = builder.makeFloatConstant(1023.0f); + spv::Id width_rgb = builder.makeUintConstant(10); + spv::Id unorm_scale_a = builder.makeFloatConstant(3.0f); + spv::Id width_a = builder.makeUintConstant(2); + for (uint32_t i = 0; i < 2; ++i) { + packed[i] = builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp(spv::OpFMul, type_float, + source_color[i][0], unorm_scale_rgb), + unorm_round_offset)); + for (uint32_t j = 1; j < 4; ++j) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(packed[i]); + id_vector_temp.push_back(builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp( + spv::OpFMul, type_float, source_color[i][j], + j == 3 ? unorm_scale_a : unorm_scale_rgb), + unorm_round_offset))); + id_vector_temp.push_back(builder.makeUintConstant(10 * j)); + id_vector_temp.push_back(j == 3 ? width_a : width_rgb); + packed[i] = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + } + } break; + case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT: + case xenos::ColorRenderTargetFormat:: + k_2_10_10_10_FLOAT_AS_16_16_16_16: { + spv::Id width_rgb = builder.makeUintConstant(10); + spv::Id float_0 = builder.makeFloatConstant(0.0f); + spv::Id float_1 = builder.makeFloatConstant(1.0f); + spv::Id unorm_round_offset = builder.makeFloatConstant(0.5f); + spv::Id unorm_scale_a = builder.makeFloatConstant(3.0f); + spv::Id offset_a = builder.makeUintConstant(30); + spv::Id width_a = builder.makeUintConstant(2); + for (uint32_t i = 0; i < 2; ++i) { + // Float16 has a wider range for both color and alpha, also NaNs - + // clamp and convert. + packed[i] = SpirvShaderTranslator::UnclampedFloat32To7e3( + builder, source_color[i][0], ext_inst_glsl_std_450); + for (uint32_t j = 1; j < 3; ++j) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(packed[i]); + id_vector_temp.push_back( + SpirvShaderTranslator::UnclampedFloat32To7e3( + builder, source_color[i][j], ext_inst_glsl_std_450)); + id_vector_temp.push_back(builder.makeUintConstant(10 * j)); + id_vector_temp.push_back(width_rgb); + packed[i] = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + // Saturate and convert the alpha. + id_vector_temp.clear(); + id_vector_temp.reserve(3); + id_vector_temp.push_back(source_color[i][3]); + id_vector_temp.push_back(float_0); + id_vector_temp.push_back(float_1); + spv::Id alpha_saturated = + builder.createBuiltinCall(type_float, ext_inst_glsl_std_450, + GLSLstd450NClamp, id_vector_temp); + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(packed[i]); + id_vector_temp.push_back(builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp(spv::OpFMul, type_float, + alpha_saturated, unorm_scale_a), + unorm_round_offset))); + id_vector_temp.push_back(offset_a); + id_vector_temp.push_back(width_a); + packed[i] = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + } break; + // All 64bpp formats, and all 16 bits per component formats, are + // represented as integers in ownership transfer for safe handling of + // NaN encodings and -32768 / -32767. + // TODO(Triang3l): Handle the case when that's not true (no multisampled + // sampled images, no 16-bit UNORM, no cross-packing 32bpp aliasing on a + // portability subset device or a 64bpp format where that wouldn't help + // anyway). + case xenos::ColorRenderTargetFormat::k_16_16: + case xenos::ColorRenderTargetFormat::k_16_16_FLOAT: { + if (dest_color_format == + xenos::ColorRenderTargetFormat::k_32_32_FLOAT) { + spv::Id component_offset_width = builder.makeUintConstant(16); + spv::Id color_16_in_32[2]; + for (uint32_t i = 0; i < 2; ++i) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(source_color[i][0]); + id_vector_temp.push_back(source_color[i][1]); + id_vector_temp.push_back(component_offset_width); + id_vector_temp.push_back(component_offset_width); + color_16_in_32[i] = builder.createOp(spv::OpBitFieldInsert, + type_uint, id_vector_temp); + } + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(color_16_in_32[0]); + id_vector_temp.push_back(color_16_in_32[1]); + builder.createStore(builder.createCompositeConstruct( + type_fragment_data, id_vector_temp), + output_fragment_data); + } else { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + for (uint32_t i = 0; i < 4; ++i) { + id_vector_temp.push_back(source_color[i >> 1][i & 1]); + } + builder.createStore(builder.createCompositeConstruct( + type_fragment_data, id_vector_temp), + output_fragment_data); + } + } break; + case xenos::ColorRenderTargetFormat::k_16_16_16_16: + case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT: { + if (dest_color_format == + xenos::ColorRenderTargetFormat::k_32_32_FLOAT) { + spv::Id component_offset_width = builder.makeUintConstant(16); + spv::Id color_16_in_32[2]; + for (uint32_t i = 0; i < 2; ++i) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(source_color[0][i << 1]); + id_vector_temp.push_back(source_color[0][(i << 1) + 1]); + id_vector_temp.push_back(component_offset_width); + id_vector_temp.push_back(component_offset_width); + color_16_in_32[i] = builder.createOp(spv::OpBitFieldInsert, + type_uint, id_vector_temp); + } + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(color_16_in_32[0]); + id_vector_temp.push_back(color_16_in_32[1]); + builder.createStore(builder.createCompositeConstruct( + type_fragment_data, id_vector_temp), + output_fragment_data); + } else { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + for (uint32_t i = 0; i < 4; ++i) { + id_vector_temp.push_back(source_color[0][i]); + } + builder.createStore(builder.createCompositeConstruct( + type_fragment_data, id_vector_temp), + output_fragment_data); + } + } break; + // Float32 is transferred as uint32 to preserve NaN encodings. However, + // multisampled sampled image support is optional in Vulkan. + case xenos::ColorRenderTargetFormat::k_32_FLOAT: { + for (uint32_t i = 0; i < 2; ++i) { + packed[i] = source_color[i][0]; + if (!source_color_is_uint) { + packed[i] = + builder.createUnaryOp(spv::OpBitcast, type_uint, packed[i]); + } + } + } break; + case xenos::ColorRenderTargetFormat::k_32_32_FLOAT: { + for (uint32_t i = 0; i < 2; ++i) { + packed[i] = source_color[0][i]; + if (!source_color_is_uint) { + packed[i] = + builder.createUnaryOp(spv::OpBitcast, type_uint, packed[i]); + } + } + } break; + } + } else { + assert_true(source_depth_texture != spv::NoResult); + assert_true(source_stencil_texture != spv::NoResult); + spv::Id depth_offset = builder.makeUintConstant(8); + spv::Id depth_width = builder.makeUintConstant(24); + for (uint32_t i = 0; i < 2; ++i) { + spv::Id depth24 = spv::NoResult; + switch (source_depth_format) { + case xenos::DepthRenderTargetFormat::kD24S8: { + // Round to the nearest even integer. This seems to be the + // correct, adding +0.5 and rounding towards zero results in red + // instead of black in the 4D5307E6 clear shader. + id_vector_temp.clear(); + id_vector_temp.push_back(builder.createBinOp( + spv::OpFMul, type_float, source_depth_float[i], + builder.makeFloatConstant(float(0xFFFFFF)))); + depth24 = builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBuiltinCall(type_float, ext_inst_glsl_std_450, + GLSLstd450RoundEven, id_vector_temp)); + } break; + case xenos::DepthRenderTargetFormat::kD24FS8: { + depth24 = SpirvShaderTranslator::PreClampedDepthTo20e4( + builder, source_depth_float[i], true, ext_inst_glsl_std_450); + } break; + } + // Merge depth and stencil. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(source_stencil[i]); + id_vector_temp.push_back(depth24); + id_vector_temp.push_back(depth_offset); + id_vector_temp.push_back(depth_width); + packed[i] = + builder.createOp(spv::OpBitFieldInsert, type_uint, id_vector_temp); + } + } + // Common path unless there was a specialized one - unpack two packed 32-bit + // parts. + if (packed[0] != spv::NoResult) { + assert_true(packed[1] != spv::NoResult); + if (dest_color_format == xenos::ColorRenderTargetFormat::k_32_32_FLOAT) { + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(packed[0]); + id_vector_temp.push_back(packed[1]); + // Multisampled sampled images are optional in Vulkan, and image views + // of different formats can't be created separately for sampled image + // and color attachment usages, so no multisampled integer sampled image + // support implies no multisampled integer framebuffer attachment + // support in Xenia. + if (!dest_color_is_uint) { + for (spv::Id& float32 : id_vector_temp) { + float32 = + builder.createUnaryOp(spv::OpBitcast, type_float, float32); + } + } + builder.createStore(builder.createCompositeConstruct(type_fragment_data, + id_vector_temp), + output_fragment_data); + } else { + spv::Id const_uint_0 = builder.makeUintConstant(0); + spv::Id const_uint_16 = builder.makeUintConstant(16); + id_vector_temp.clear(); + id_vector_temp.reserve(4); + for (uint32_t i = 0; i < 4; ++i) { + id_vector_temp.push_back(builder.createTriOp( + spv::OpBitFieldUExtract, type_uint, packed[i >> 1], + (i & 1) ? const_uint_16 : const_uint_0, const_uint_16)); + } + // TODO(Triang3l): Handle the case when that's not true (no multisampled + // sampled images, no 16-bit UNORM, no cross-packing 32bpp aliasing on a + // portability subset device or a 64bpp format where that wouldn't help + // anyway). + builder.createStore(builder.createCompositeConstruct(type_fragment_data, + id_vector_temp), + output_fragment_data); + } + } + } else { + // If `packed` is created, use the generic path involving unpacking. + // - For a color destination, the packed 32bpp color. + // - For a depth / stencil destination, stencil in 0:7, depth in 8:31 + // normally, or depth in 0:23 and zeros in 24:31 with packed_only_depth. + // - For a stencil bit, stencil in 0:7. + // Otherwise, the fragment data or fragment depth / stencil output must be + // written to directly by the reached control flow path. + spv::Id packed = spv::NoResult; + bool packed_only_depth = false; + if (source_is_color) { + switch (source_color_format) { + case xenos::ColorRenderTargetFormat::k_8_8_8_8: + case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: { + if (dest_is_color && + (dest_color_format == xenos::ColorRenderTargetFormat::k_8_8_8_8 || + dest_color_format == + xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA)) { + // Same format - passthrough. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + for (uint32_t i = 0; i < 4; ++i) { + id_vector_temp.push_back(source_color[0][i]); + } + builder.createStore(builder.createCompositeConstruct( + type_fragment_data, id_vector_temp), + output_fragment_data); + } else { + spv::Id unorm_round_offset = builder.makeFloatConstant(0.5f); + spv::Id unorm_scale = builder.makeFloatConstant(255.0f); + uint32_t packed_component_offset = 0; + if (mode.output == TransferOutput::kDepth) { + // When need only depth, not stencil, skip the red component, and + // put the depth from GBA directly in the lower bits. + packed_component_offset = 1; + packed_only_depth = true; + if (output_fragment_stencil_ref != spv::NoResult) { + builder.createStore( + builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp(spv::OpFMul, type_float, + source_color[0][0], + unorm_scale), + unorm_round_offset)), + output_fragment_stencil_ref); + } + } + packed = builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp( + spv::OpFMul, type_float, + source_color[0][packed_component_offset], unorm_scale), + unorm_round_offset)); + if (mode.output != TransferOutput::kStencilBit) { + spv::Id component_width = builder.makeUintConstant(8); + for (uint32_t i = 1; i < 4 - packed_component_offset; ++i) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(packed); + id_vector_temp.push_back(builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp( + spv::OpFMul, type_float, + source_color[0][packed_component_offset + i], + unorm_scale), + unorm_round_offset))); + id_vector_temp.push_back(builder.makeUintConstant(8 * i)); + id_vector_temp.push_back(component_width); + packed = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + } + } + } break; + case xenos::ColorRenderTargetFormat::k_2_10_10_10: + case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: { + if (dest_is_color && + (dest_color_format == + xenos::ColorRenderTargetFormat::k_2_10_10_10 || + dest_color_format == xenos::ColorRenderTargetFormat:: + k_2_10_10_10_AS_10_10_10_10)) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + for (uint32_t i = 0; i < 4; ++i) { + id_vector_temp.push_back(source_color[0][i]); + } + builder.createStore(builder.createCompositeConstruct( + type_fragment_data, id_vector_temp), + output_fragment_data); + } else { + spv::Id unorm_round_offset = builder.makeFloatConstant(0.5f); + spv::Id unorm_scale_rgb = builder.makeFloatConstant(1023.0f); + packed = builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp(spv::OpFMul, type_float, + source_color[0][0], unorm_scale_rgb), + unorm_round_offset)); + if (mode.output != TransferOutput::kStencilBit) { + spv::Id width_rgb = builder.makeUintConstant(10); + spv::Id unorm_scale_a = builder.makeFloatConstant(3.0f); + spv::Id width_a = builder.makeUintConstant(2); + for (uint32_t i = 1; i < 4; ++i) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(packed); + id_vector_temp.push_back(builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp( + spv::OpFMul, type_float, source_color[0][i], + i == 3 ? unorm_scale_a : unorm_scale_rgb), + unorm_round_offset))); + id_vector_temp.push_back(builder.makeUintConstant(10 * i)); + id_vector_temp.push_back(i == 3 ? width_a : width_rgb); + packed = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + } + } + } break; + case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT: + case xenos::ColorRenderTargetFormat:: + k_2_10_10_10_FLOAT_AS_16_16_16_16: { + if (dest_is_color && + (dest_color_format == + xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT || + dest_color_format == xenos::ColorRenderTargetFormat:: + k_2_10_10_10_FLOAT_AS_16_16_16_16)) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + for (uint32_t i = 0; i < 4; ++i) { + id_vector_temp.push_back(source_color[0][i]); + } + builder.createStore(builder.createCompositeConstruct( + type_fragment_data, id_vector_temp), + output_fragment_data); + } else { + // Float16 has a wider range for both color and alpha, also NaNs - + // clamp and convert. + packed = SpirvShaderTranslator::UnclampedFloat32To7e3( + builder, source_color[0][0], ext_inst_glsl_std_450); + if (mode.output != TransferOutput::kStencilBit) { + spv::Id width_rgb = builder.makeUintConstant(10); + for (uint32_t i = 1; i < 3; ++i) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(packed); + id_vector_temp.push_back( + SpirvShaderTranslator::UnclampedFloat32To7e3( + builder, source_color[0][i], ext_inst_glsl_std_450)); + id_vector_temp.push_back(builder.makeUintConstant(10 * i)); + id_vector_temp.push_back(width_rgb); + packed = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + // Saturate and convert the alpha. + id_vector_temp.clear(); + id_vector_temp.reserve(3); + id_vector_temp.push_back(source_color[0][3]); + id_vector_temp.push_back(builder.makeFloatConstant(0.0f)); + id_vector_temp.push_back(builder.makeFloatConstant(1.0f)); + spv::Id alpha_saturated = + builder.createBuiltinCall(type_float, ext_inst_glsl_std_450, + GLSLstd450NClamp, id_vector_temp); + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(packed); + id_vector_temp.push_back(builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp(spv::OpFMul, type_float, + alpha_saturated, + builder.makeFloatConstant(3.0f)), + builder.makeFloatConstant(0.5f)))); + id_vector_temp.push_back(builder.makeUintConstant(30)); + id_vector_temp.push_back(builder.makeUintConstant(2)); + packed = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + } + } break; + case xenos::ColorRenderTargetFormat::k_16_16: + case xenos::ColorRenderTargetFormat::k_16_16_16_16: + case xenos::ColorRenderTargetFormat::k_16_16_FLOAT: + case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT: { + // All 64bpp formats, and all 16 bits per component formats, are + // represented as integers in ownership transfer for safe handling of + // NaN encodings and -32768 / -32767. + // TODO(Triang3l): Handle the case when that's not true (no + // multisampled sampled images, no 16-bit UNORM, no cross-packing + // 32bpp aliasing on a portability subset device or a 64bpp format + // where that wouldn't help anyway). + if (dest_is_color && + (dest_color_format == xenos::ColorRenderTargetFormat::k_16_16 || + dest_color_format == + xenos::ColorRenderTargetFormat::k_16_16_FLOAT)) { + id_vector_temp.clear(); + id_vector_temp.reserve(2); + for (uint32_t i = 0; i < 2; ++i) { + id_vector_temp.push_back(source_color[0][i]); + } + builder.createStore(builder.createCompositeConstruct( + type_fragment_data, id_vector_temp), + output_fragment_data); + } else { + packed = source_color[0][0]; + if (mode.output != TransferOutput::kStencilBit) { + spv::Id component_offset_width = builder.makeUintConstant(16); + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(packed); + id_vector_temp.push_back(source_color[0][1]); + id_vector_temp.push_back(component_offset_width); + id_vector_temp.push_back(component_offset_width); + packed = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + } + } break; + // Float32 is transferred as uint32 to preserve NaN encodings. However, + // multisampled sampled image support is optional in Vulkan. + case xenos::ColorRenderTargetFormat::k_32_FLOAT: + case xenos::ColorRenderTargetFormat::k_32_32_FLOAT: { + packed = source_color[0][0]; + if (!source_color_is_uint) { + packed = builder.createUnaryOp(spv::OpBitcast, type_uint, packed); + } + } break; + } + } else if (source_depth_float[0] != spv::NoResult) { + if (mode.output == TransferOutput::kDepth && + dest_depth_format == source_depth_format) { + builder.createStore(source_depth_float[0], output_fragment_depth); + } else { + switch (source_depth_format) { + case xenos::DepthRenderTargetFormat::kD24S8: { + // Round to the nearest even integer. This seems to be the correct, + // adding +0.5 and rounding towards zero results in red instead of + // black in the 4D5307E6 clear shader. + id_vector_temp.clear(); + id_vector_temp.push_back(builder.createBinOp( + spv::OpFMul, type_float, source_depth_float[0], + builder.makeFloatConstant(float(0xFFFFFF)))); + packed = builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBuiltinCall(type_float, ext_inst_glsl_std_450, + GLSLstd450RoundEven, id_vector_temp)); + } break; + case xenos::DepthRenderTargetFormat::kD24FS8: { + packed = SpirvShaderTranslator::PreClampedDepthTo20e4( + builder, source_depth_float[0], true, ext_inst_glsl_std_450); + } break; + } + if (mode.output == TransferOutput::kDepth) { + packed_only_depth = true; + } else { + // Merge depth and stencil. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(source_stencil[0]); + id_vector_temp.push_back(packed); + id_vector_temp.push_back(builder.makeUintConstant(8)); + id_vector_temp.push_back(builder.makeUintConstant(24)); + packed = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + } + } + switch (mode.output) { + case TransferOutput::kColor: { + // Unless a special path was taken, unpack the raw 32bpp value into the + // 32bpp color output. + if (packed != spv::NoResult) { + switch (dest_color_format) { + case xenos::ColorRenderTargetFormat::k_8_8_8_8: + case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: { + spv::Id component_width = builder.makeUintConstant(8); + spv::Id unorm_scale = builder.makeFloatConstant(1.0f / 255.0f); + id_vector_temp.clear(); + id_vector_temp.reserve(4); + for (uint32_t i = 0; i < 4; ++i) { + id_vector_temp.push_back(builder.createBinOp( + spv::OpFMul, type_float, + builder.createUnaryOp( + spv::OpConvertUToF, type_float, + builder.createTriOp( + spv::OpBitFieldUExtract, type_uint, packed, + builder.makeUintConstant(8 * i), component_width)), + unorm_scale)); + } + builder.createStore(builder.createCompositeConstruct( + type_fragment_data, id_vector_temp), + output_fragment_data); + } break; + case xenos::ColorRenderTargetFormat::k_2_10_10_10: + case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: { + spv::Id width_rgb = builder.makeUintConstant(10); + spv::Id unorm_scale_rgb = + builder.makeFloatConstant(1.0f / 1023.0f); + spv::Id width_a = builder.makeUintConstant(2); + spv::Id unorm_scale_a = builder.makeFloatConstant(1.0f / 3.0f); + id_vector_temp.clear(); + id_vector_temp.reserve(4); + for (uint32_t i = 0; i < 4; ++i) { + id_vector_temp.push_back(builder.createBinOp( + spv::OpFMul, type_float, + builder.createUnaryOp( + spv::OpConvertUToF, type_float, + builder.createTriOp(spv::OpBitFieldUExtract, type_uint, + packed, + builder.makeUintConstant(10 * i), + i == 3 ? width_a : width_rgb)), + i == 3 ? unorm_scale_a : unorm_scale_rgb)); + } + builder.createStore(builder.createCompositeConstruct( + type_fragment_data, id_vector_temp), + output_fragment_data); + } break; + case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT: + case xenos::ColorRenderTargetFormat:: + k_2_10_10_10_FLOAT_AS_16_16_16_16: { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + // Color. + spv::Id width_rgb = builder.makeUintConstant(10); + for (uint32_t i = 0; i < 3; ++i) { + id_vector_temp.push_back(SpirvShaderTranslator::Float7e3To32( + builder, packed, 10 * i, false, ext_inst_glsl_std_450)); + } + // Alpha. + id_vector_temp.push_back(builder.createBinOp( + spv::OpFMul, type_float, + builder.createUnaryOp( + spv::OpConvertUToF, type_float, + builder.createTriOp(spv::OpBitFieldUExtract, type_uint, + packed, builder.makeUintConstant(30), + builder.makeUintConstant(2))), + builder.makeFloatConstant(1.0f / 3.0f))); + builder.createStore(builder.createCompositeConstruct( + type_fragment_data, id_vector_temp), + output_fragment_data); + } break; + case xenos::ColorRenderTargetFormat::k_16_16: + case xenos::ColorRenderTargetFormat::k_16_16_FLOAT: { + // All 16 bits per component formats are represented as integers + // in ownership transfer for safe handling of NaN encodings and + // -32768 / -32767. + // TODO(Triang3l): Handle the case when that's not true (no + // multisampled sampled images, no 16-bit UNORM, no cross-packing + // 32bpp aliasing on a portability subset device or a 64bpp format + // where that wouldn't help anyway). + spv::Id component_offset_width = builder.makeUintConstant(16); + id_vector_temp.clear(); + id_vector_temp.reserve(2); + for (uint32_t i = 0; i < 2; ++i) { + id_vector_temp.push_back(builder.createTriOp( + spv::OpBitFieldUExtract, type_uint, packed, + i ? component_offset_width : builder.makeUintConstant(0), + component_offset_width)); + } + builder.createStore(builder.createCompositeConstruct( + type_fragment_data, id_vector_temp), + output_fragment_data); + } break; + case xenos::ColorRenderTargetFormat::k_32_FLOAT: { + // Float32 is transferred as uint32 to preserve NaN encodings. + // However, multisampled sampled images are optional in Vulkan, + // and image views of different formats can't be created + // separately for sampled image and color attachment usages, so no + // multisampled integer sampled image support implies no + // multisampled integer framebuffer attachment support in Xenia. + spv::Id float32 = packed; + if (!dest_color_is_uint) { + float32 = + builder.createUnaryOp(spv::OpBitcast, type_float, float32); + } + builder.createStore(float32, output_fragment_data); + } break; + default: + // A 64bpp format (handled separately) or an invalid one. + assert_unhandled_case(dest_color_format); + } + } + } break; + case TransferOutput::kDepth: { + if (packed) { + spv::Id guest_depth24 = packed; + if (!packed_only_depth) { + // Extract the depth bits. + guest_depth24 = + builder.createBinOp(spv::OpShiftRightLogical, type_uint, + guest_depth24, builder.makeUintConstant(8)); + } + // Load the host float32 depth, check if, when converted to the guest + // format, it's the same as the guest source, thus up to date, and if + // it is, write host float32 depth, otherwise do the guest -> host + // conversion. + spv::Id host_depth32 = spv::NoResult; + if (host_depth_source_texture != spv::NoResult) { + // Convert position and sample index from within the destination + // tile to within the host depth source tile, like for the guest + // render target, but for 32bpp -> 32bpp only. + spv::Id host_depth_source_sample_id = dest_sample_id; + spv::Id host_depth_source_tile_pixel_x = dest_tile_pixel_x; + spv::Id host_depth_source_tile_pixel_y = dest_tile_pixel_y; + if (key.host_depth_source_msaa_samples != key.dest_msaa_samples) { + if (key.host_depth_source_msaa_samples >= + xenos::MsaaSamples::k4X) { + // 4x -> 1x/2x. + if (key.dest_msaa_samples == xenos::MsaaSamples::k2X) { + // 4x -> 2x. + // Horizontal pixels to samples. Vertical sample (1/0 in the + // first bit for native 2x or 0/1 in the second bit for 2x as + // 4x) to second sample bit. + if (msaa_2x_attachments_supported_) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(dest_tile_pixel_x); + id_vector_temp.push_back(builder.createBinOp( + spv::OpBitwiseXor, type_uint, dest_sample_id, + builder.makeUintConstant(1))); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(31)); + host_depth_source_sample_id = builder.createOp( + spv::OpBitFieldInsert, type_uint, id_vector_temp); + } else { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(dest_sample_id); + id_vector_temp.push_back(dest_tile_pixel_x); + id_vector_temp.push_back(builder.makeUintConstant(0)); + id_vector_temp.push_back(builder.makeUintConstant(1)); + host_depth_source_sample_id = builder.createOp( + spv::OpBitFieldInsert, type_uint, id_vector_temp); + } + host_depth_source_tile_pixel_x = builder.createBinOp( + spv::OpShiftRightLogical, type_uint, dest_tile_pixel_x, + builder.makeUintConstant(1)); + } else { + // 4x -> 1x. + // Pixels to samples. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(builder.createBinOp( + spv::OpBitwiseAnd, type_uint, dest_tile_pixel_x, + builder.makeUintConstant(1))); + id_vector_temp.push_back(dest_tile_pixel_y); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(1)); + host_depth_source_sample_id = builder.createOp( + spv::OpBitFieldInsert, type_uint, id_vector_temp); + host_depth_source_tile_pixel_x = builder.createBinOp( + spv::OpShiftRightLogical, type_uint, dest_tile_pixel_x, + builder.makeUintConstant(1)); + host_depth_source_tile_pixel_y = builder.createBinOp( + spv::OpShiftRightLogical, type_uint, dest_tile_pixel_y, + builder.makeUintConstant(1)); + } + } else { + // 1x/2x -> 1x/2x/4x (as long as they're different). + // Only the X part - Y is handled by common code. + if (key.dest_msaa_samples >= xenos::MsaaSamples::k4X) { + // Horizontal samples to pixels. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(dest_sample_id); + id_vector_temp.push_back(dest_tile_pixel_x); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(31)); + host_depth_source_tile_pixel_x = builder.createOp( + spv::OpBitFieldInsert, type_uint, id_vector_temp); + } + } + // Host depth source Y and sample index for 1x/2x AA sources. + if (key.host_depth_source_msaa_samples < + xenos::MsaaSamples::k4X) { + if (key.dest_msaa_samples >= xenos::MsaaSamples::k4X) { + // 1x/2x -> 4x. + if (key.host_depth_source_msaa_samples == + xenos::MsaaSamples::k2X) { + // 2x -> 4x. + // Vertical samples (second bit) of 4x destination to + // vertical sample (1, 0 for native 2x, or 0, 3 for 2x as + // 4x) of 2x source. + host_depth_source_sample_id = builder.createBinOp( + spv::OpShiftRightLogical, type_uint, dest_sample_id, + builder.makeUintConstant(1)); + if (msaa_2x_attachments_supported_) { + host_depth_source_sample_id = + builder.createBinOp(spv::OpBitwiseXor, type_uint, + host_depth_source_sample_id, + builder.makeUintConstant(1)); + } else { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(host_depth_source_sample_id); + id_vector_temp.push_back(host_depth_source_sample_id); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(1)); + host_depth_source_sample_id = builder.createOp( + spv::OpBitFieldInsert, type_uint, id_vector_temp); + } + } else { + // 1x -> 4x. + // Vertical samples (second bit) to Y pixels. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(builder.createBinOp( + spv::OpShiftRightLogical, type_uint, dest_sample_id, + builder.makeUintConstant(1))); + id_vector_temp.push_back(dest_tile_pixel_y); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(31)); + host_depth_source_tile_pixel_y = builder.createOp( + spv::OpBitFieldInsert, type_uint, id_vector_temp); + } + } else { + // 1x/2x -> different 1x/2x. + if (key.host_depth_source_msaa_samples == + xenos::MsaaSamples::k2X) { + // 2x -> 1x. + // Vertical pixels of 2x destination to vertical samples (1, + // 0 for native 2x, or 0, 3 for 2x as 4x) of 1x source. + host_depth_source_sample_id = builder.createBinOp( + spv::OpBitwiseAnd, type_uint, dest_tile_pixel_y, + builder.makeUintConstant(1)); + if (msaa_2x_attachments_supported_) { + host_depth_source_sample_id = + builder.createBinOp(spv::OpBitwiseXor, type_uint, + host_depth_source_sample_id, + builder.makeUintConstant(1)); + } else { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(host_depth_source_sample_id); + id_vector_temp.push_back(host_depth_source_sample_id); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(1)); + host_depth_source_sample_id = builder.createOp( + spv::OpBitFieldInsert, type_uint, id_vector_temp); + } + host_depth_source_tile_pixel_y = builder.createBinOp( + spv::OpShiftRightLogical, type_uint, dest_tile_pixel_y, + builder.makeUintConstant(1)); + } else { + // 1x -> 2x. + // Vertical samples (1/0 in the first bit for native 2x or + // 0/1 in the second bit for 2x as 4x) of 2x destination to + // vertical pixels of 1x source. + if (msaa_2x_attachments_supported_) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(builder.createBinOp( + spv::OpBitwiseXor, type_uint, dest_sample_id, + builder.makeUintConstant(1))); + id_vector_temp.push_back(dest_tile_pixel_y); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(31)); + host_depth_source_tile_pixel_y = builder.createOp( + spv::OpBitFieldInsert, type_uint, id_vector_temp); + } else { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(builder.createBinOp( + spv::OpShiftRightLogical, type_uint, dest_sample_id, + builder.makeUintConstant(1))); + id_vector_temp.push_back(dest_tile_pixel_y); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(31)); + host_depth_source_tile_pixel_y = builder.createOp( + spv::OpBitFieldInsert, type_uint, id_vector_temp); + } + } + } + } + } + assert_true(push_constants_member_host_depth_address != UINT32_MAX); + id_vector_temp.clear(); + id_vector_temp.push_back(builder.makeIntConstant( + int32_t(push_constants_member_host_depth_address))); + spv::Id host_depth_address_constant = builder.createLoad( + builder.createAccessChain(spv::StorageClassPushConstant, + push_constants, id_vector_temp), + spv::NoPrecision); + // Transform the destination tile index into the host depth source. + spv::Id host_depth_source_tile_index = builder.createUnaryOp( + spv::OpBitcast, type_uint, + builder.createBinOp( + spv::OpIAdd, type_int, + builder.createUnaryOp(spv::OpBitcast, type_int, + dest_tile_index), + builder.createTriOp( + spv::OpBitFieldSExtract, type_int, + builder.createUnaryOp(spv::OpBitcast, type_int, + host_depth_address_constant), + builder.makeUintConstant(xenos::kEdramPitchTilesBits * + 2), + builder.makeUintConstant(xenos::kEdramBaseTilesBits)))); + // Split the host depth source tile index into X and Y tile index + // within the source image. + spv::Id host_depth_source_pitch_tiles = builder.createTriOp( + spv::OpBitFieldUExtract, type_uint, host_depth_address_constant, + builder.makeUintConstant(xenos::kEdramPitchTilesBits), + builder.makeUintConstant(xenos::kEdramPitchTilesBits)); + spv::Id host_depth_source_tile_index_y = builder.createBinOp( + spv::OpUDiv, type_uint, host_depth_source_tile_index, + host_depth_source_pitch_tiles); + spv::Id host_depth_source_tile_index_x = builder.createBinOp( + spv::OpUMod, type_uint, host_depth_source_tile_index, + host_depth_source_pitch_tiles); + // Finally calculate the host depth source texture coordinates. + spv::Id host_depth_source_pixel_x_int = builder.createUnaryOp( + spv::OpBitcast, type_int, + builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createBinOp(spv::OpIMul, type_uint, + builder.makeUintConstant( + tile_width_samples_scaled >> + uint32_t(key.source_msaa_samples >= + xenos::MsaaSamples::k4X)), + host_depth_source_tile_index_x), + host_depth_source_tile_pixel_x)); + spv::Id host_depth_source_pixel_y_int = builder.createUnaryOp( + spv::OpBitcast, type_int, + builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createBinOp(spv::OpIMul, type_uint, + builder.makeUintConstant( + tile_height_samples_scaled >> + uint32_t(key.source_msaa_samples >= + xenos::MsaaSamples::k2X)), + host_depth_source_tile_index_y), + host_depth_source_tile_pixel_y)); + // Load the host depth source. + spv::Builder::TextureParameters + host_depth_source_texture_parameters = {}; + host_depth_source_texture_parameters.sampler = + builder.createLoad(host_depth_source_texture, spv::NoPrecision); + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(host_depth_source_pixel_x_int); + id_vector_temp.push_back(host_depth_source_pixel_y_int); + host_depth_source_texture_parameters.coords = + builder.createCompositeConstruct(type_int2, id_vector_temp); + if (key.host_depth_source_msaa_samples != xenos::MsaaSamples::k1X) { + host_depth_source_texture_parameters.sample = + builder.createUnaryOp(spv::OpBitcast, type_int, + host_depth_source_sample_id); + } else { + host_depth_source_texture_parameters.lod = + builder.makeIntConstant(0); + } + host_depth32 = builder.createCompositeExtract( + builder.createTextureCall(spv::NoPrecision, type_float4, false, + true, false, false, false, + host_depth_source_texture_parameters, + spv::ImageOperandsMaskNone), + type_float, 0); + } else if (host_depth_source_buffer != spv::NoResult) { + // Get the address in the EDRAM scratch buffer and load from there. + // The beginning of the buffer is (0, 0) of the destination. + // 40-sample columns are not swapped for addressing simplicity + // (because this is used for depth -> depth transfers, where + // swapping isn't needed). + // Convert samples to pixels. + assert_true(key.host_depth_source_msaa_samples == + xenos::MsaaSamples::k1X); + spv::Id dest_tile_sample_x = dest_tile_pixel_x; + spv::Id dest_tile_sample_y = dest_tile_pixel_y; + if (key.dest_msaa_samples >= xenos::MsaaSamples::k2X) { + if (key.dest_msaa_samples >= xenos::MsaaSamples::k4X) { + // Horizontal sample index in bit 0. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(dest_sample_id); + id_vector_temp.push_back(dest_tile_pixel_x); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(31)); + dest_tile_sample_x = builder.createOp( + spv::OpBitFieldInsert, type_uint, id_vector_temp); + } + // Vertical sample index as 1 or 0 in bit 0 for true 2x or as 0 + // or 1 in bit 1 for 4x or for 2x emulated as 4x. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(builder.createBinOp( + (key.dest_msaa_samples == xenos::MsaaSamples::k2X && + msaa_2x_attachments_supported_) + ? spv::OpBitwiseXor + : spv::OpShiftRightLogical, + type_uint, dest_sample_id, builder.makeUintConstant(1))); + id_vector_temp.push_back(dest_tile_pixel_y); + id_vector_temp.push_back(builder.makeUintConstant(1)); + id_vector_temp.push_back(builder.makeUintConstant(31)); + dest_tile_sample_y = builder.createOp(spv::OpBitFieldInsert, + type_uint, id_vector_temp); + } + // Combine the tile sample index and the tile index. + spv::Id host_depth_offset = builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createBinOp( + spv::OpIMul, type_uint, + builder.makeUintConstant(tile_width_samples_scaled * + tile_height_samples_scaled), + dest_tile_index), + builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createBinOp( + spv::OpIMul, type_uint, + builder.makeUintConstant(tile_width_samples_scaled), + dest_tile_sample_y), + dest_tile_sample_x)); + id_vector_temp.clear(); + id_vector_temp.reserve(2); + // The only SSBO structure member. + id_vector_temp.push_back(builder.makeIntConstant(0)); + id_vector_temp.push_back(builder.createUnaryOp( + spv::OpBitcast, type_int, host_depth_offset)); + // StorageBuffer since SPIR-V 1.3, but since SPIR-V 1.0 is + // generated, it's Uniform. + host_depth32 = builder.createUnaryOp( + spv::OpBitcast, type_float, + builder.createLoad( + builder.createAccessChain(spv::StorageClassUniform, + host_depth_source_buffer, + id_vector_temp), + spv::NoPrecision)); + } + spv::Block* depth24_to_depth32_header = builder.getBuildPoint(); + spv::Id depth24_to_depth32_convert_id = spv::NoResult; + spv::Block* depth24_to_depth32_merge = nullptr; + spv::Id host_depth24 = spv::NoResult; + if (host_depth32 != spv::NoResult) { + // Convert the host depth value to the guest format and check if it + // matches the value in the currently owning guest render target. + switch (dest_depth_format) { + case xenos::DepthRenderTargetFormat::kD24S8: { + // Round to the nearest even integer. This seems to be the + // correct, adding +0.5 and rounding towards zero results in red + // instead of black in the 4D5307E6 clear shader. + id_vector_temp.clear(); + id_vector_temp.push_back(builder.createBinOp( + spv::OpFMul, type_float, host_depth32, + builder.makeFloatConstant(float(0xFFFFFF)))); + host_depth24 = builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBuiltinCall(type_float, ext_inst_glsl_std_450, + GLSLstd450RoundEven, + id_vector_temp)); + } break; + case xenos::DepthRenderTargetFormat::kD24FS8: { + host_depth24 = SpirvShaderTranslator::PreClampedDepthTo20e4( + builder, host_depth32, true, ext_inst_glsl_std_450); + } break; + } + assert_true(host_depth24 != spv::NoResult); + // Update the header block pointer after the conversion (to avoid + // assuming that the conversion doesn't branch). + depth24_to_depth32_header = builder.getBuildPoint(); + spv::Id host_depth_outdated = builder.createBinOp( + spv::OpINotEqual, type_bool, guest_depth24, host_depth24); + spv::Block& depth24_to_depth32_convert_entry = + builder.makeNewBlock(); + { + spv::Block& depth24_to_depth32_merge_block = + builder.makeNewBlock(); + depth24_to_depth32_merge = &depth24_to_depth32_merge_block; + } + { + std::unique_ptr depth24_to_depth32_merge_op = + std::make_unique(spv::OpSelectionMerge); + depth24_to_depth32_merge_op->addIdOperand( + depth24_to_depth32_merge->getId()); + depth24_to_depth32_merge_op->addImmediateOperand( + spv::SelectionControlMaskNone); + builder.getBuildPoint()->addInstruction( + std::move(depth24_to_depth32_merge_op)); + } + builder.createConditionalBranch(host_depth_outdated, + &depth24_to_depth32_convert_entry, + depth24_to_depth32_merge); + builder.setBuildPoint(&depth24_to_depth32_convert_entry); + } + // Convert the guest 24-bit depth to float32 (in an open conditional + // if the host depth is also loaded). + spv::Id guest_depth32 = spv::NoResult; + switch (dest_depth_format) { + case xenos::DepthRenderTargetFormat::kD24S8: { + // Multiplying by 1.0 / 0xFFFFFF produces an incorrect result (for + // 0xC00000, for instance - which is 2_10_10_10 clear to 0001) - + // rescale from 0...0xFFFFFF to 0...0x1000000 doing what true + // float division followed by multiplication does (on x86-64 MSVC + // with default SSE rounding) - values starting from 0x800000 + // become bigger by 1; then accurately bias the result's exponent. + guest_depth32 = builder.createBinOp( + spv::OpFMul, type_float, + builder.createUnaryOp( + spv::OpConvertUToF, type_float, + builder.createBinOp( + spv::OpIAdd, type_uint, guest_depth24, + builder.createBinOp(spv::OpShiftRightLogical, + type_uint, guest_depth24, + builder.makeUintConstant(23)))), + builder.makeFloatConstant(1.0f / float(1 << 24))); + } break; + case xenos::DepthRenderTargetFormat::kD24FS8: { + guest_depth32 = SpirvShaderTranslator::Depth20e4To32( + builder, guest_depth24, 0, true, false, + ext_inst_glsl_std_450); + } break; + } + assert_true(guest_depth32 != spv::NoResult); + spv::Id fragment_depth32 = guest_depth32; + if (host_depth32 != spv::NoResult) { + assert_not_null(depth24_to_depth32_merge); + spv::Id depth24_to_depth32_result_block_id = + builder.getBuildPoint()->getId(); + builder.createBranch(depth24_to_depth32_merge); + builder.setBuildPoint(depth24_to_depth32_merge); + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(guest_depth32); + id_vector_temp.push_back(depth24_to_depth32_result_block_id); + id_vector_temp.push_back(host_depth32); + id_vector_temp.push_back(depth24_to_depth32_header->getId()); + fragment_depth32 = + builder.createOp(spv::OpPhi, type_float, id_vector_temp); + } + builder.createStore(fragment_depth32, output_fragment_depth); + } + } break; + case TransferOutput::kStencilBit: { + if (packed) { + // Kill the sample if the needed stencil bit is not set. + assert_true(push_constants_member_stencil_mask != UINT32_MAX); + id_vector_temp.clear(); + id_vector_temp.push_back(builder.makeIntConstant( + int32_t(push_constants_member_stencil_mask))); + spv::Id stencil_mask_constant = builder.createLoad( + builder.createAccessChain(spv::StorageClassPushConstant, + push_constants, id_vector_temp), + spv::NoPrecision); + spv::Id stencil_sample_passed = builder.createBinOp( + spv::OpINotEqual, type_bool, + builder.createBinOp(spv::OpBitwiseAnd, type_uint, packed, + stencil_mask_constant), + builder.makeUintConstant(0)); + spv::Block& stencil_bit_kill_block = builder.makeNewBlock(); + spv::Block& stencil_bit_merge_block = builder.makeNewBlock(); + { + std::unique_ptr stencil_bit_merge_op = + std::make_unique(spv::OpSelectionMerge); + stencil_bit_merge_op->addIdOperand(stencil_bit_merge_block.getId()); + stencil_bit_merge_op->addImmediateOperand( + spv::SelectionControlMaskNone); + builder.getBuildPoint()->addInstruction( + std::move(stencil_bit_merge_op)); + } + builder.createConditionalBranch(stencil_sample_passed, + &stencil_bit_merge_block, + &stencil_bit_kill_block); + builder.setBuildPoint(&stencil_bit_kill_block); + builder.createNoResultOp(spv::OpKill); + builder.setBuildPoint(&stencil_bit_merge_block); + } + } break; + } + } + + // End the main function and make it the entry point. + builder.leaveFunction(); + builder.addExecutionMode(main_function, spv::ExecutionModeOriginUpperLeft); + if (output_fragment_depth != spv::NoResult) { + builder.addExecutionMode(main_function, spv::ExecutionModeDepthReplacing); + } + if (output_fragment_stencil_ref != spv::NoResult) { + builder.addExecutionMode(main_function, + spv::ExecutionModeStencilRefReplacingEXT); + } + spv::Instruction* entry_point = + builder.addEntryPoint(spv::ExecutionModelFragment, main_function, "main"); + for (spv::Id interface_id : main_interface) { + entry_point->addIdOperand(interface_id); + } + + // Serialize the shader code. + std::vector shader_code; + builder.dump(shader_code); + + // Create the shader module, and store the handle even if creation fails not + // to try to create it again later. + VkShaderModule shader_module = ui::vulkan::util::CreateShaderModule( + provider, reinterpret_cast(shader_code.data()), + sizeof(uint32_t) * shader_code.size()); + if (shader_module == VK_NULL_HANDLE) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the render target ownership " + "transfer shader 0x{:08X}", + key.key); + } + transfer_shaders_.emplace(key, shader_module); + return shader_module; +} + +VkPipeline const* VulkanRenderTargetCache::GetTransferPipelines( + TransferPipelineKey key) { + auto pipeline_it = transfer_pipelines_.find(key); + if (pipeline_it != transfer_pipelines_.end()) { + return pipeline_it->second[0] != VK_NULL_HANDLE ? pipeline_it->second.data() + : nullptr; + } + + VkRenderPass render_pass = GetRenderPass(key.render_pass_key); + VkShaderModule fragment_shader_module = GetTransferShader(key.shader_key); + if (render_pass == VK_NULL_HANDLE || + fragment_shader_module == VK_NULL_HANDLE) { + transfer_pipelines_.emplace(key, std::array{}); + return nullptr; + } + + const TransferModeInfo& mode = kTransferModes[size_t(key.shader_key.mode)]; + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + const VkPhysicalDeviceFeatures& device_features = provider.device_features(); + + uint32_t dest_sample_count = uint32_t(1) + << uint32_t(key.shader_key.dest_msaa_samples); + bool dest_is_masked_sample = + dest_sample_count > 1 && !device_features.sampleRateShading; + + VkPipelineShaderStageCreateInfo shader_stages[2]; + shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shader_stages[0].pNext = nullptr; + shader_stages[0].flags = 0; + shader_stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; + shader_stages[0].module = transfer_passthrough_vertex_shader_; + shader_stages[0].pName = "main"; + shader_stages[0].pSpecializationInfo = nullptr; + shader_stages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shader_stages[1].pNext = nullptr; + shader_stages[1].flags = 0; + shader_stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; + shader_stages[1].module = fragment_shader_module; + shader_stages[1].pName = "main"; + shader_stages[1].pSpecializationInfo = nullptr; + VkSpecializationMapEntry sample_id_specialization_map_entry; + uint32_t sample_id_specialization_constant; + VkSpecializationInfo sample_id_specialization_info; + if (dest_is_masked_sample) { + sample_id_specialization_map_entry.constantID = 0; + sample_id_specialization_map_entry.offset = 0; + sample_id_specialization_map_entry.size = sizeof(uint32_t); + sample_id_specialization_constant = 0; + sample_id_specialization_info.mapEntryCount = 1; + sample_id_specialization_info.pMapEntries = + &sample_id_specialization_map_entry; + sample_id_specialization_info.dataSize = + sizeof(sample_id_specialization_constant); + sample_id_specialization_info.pData = &sample_id_specialization_constant; + shader_stages[1].pSpecializationInfo = &sample_id_specialization_info; + } + + VkVertexInputBindingDescription vertex_input_binding; + vertex_input_binding.binding = 0; + vertex_input_binding.stride = sizeof(float) * 2; + vertex_input_binding.inputRate = VK_VERTEX_INPUT_RATE_VERTEX; + VkVertexInputAttributeDescription vertex_input_attribute; + vertex_input_attribute.location = 0; + vertex_input_attribute.binding = 0; + vertex_input_attribute.format = VK_FORMAT_R32G32_SFLOAT; + vertex_input_attribute.offset = 0; + VkPipelineVertexInputStateCreateInfo vertex_input_state; + vertex_input_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + vertex_input_state.pNext = nullptr; + vertex_input_state.flags = 0; + vertex_input_state.vertexBindingDescriptionCount = 1; + vertex_input_state.pVertexBindingDescriptions = &vertex_input_binding; + vertex_input_state.vertexAttributeDescriptionCount = 1; + vertex_input_state.pVertexAttributeDescriptions = &vertex_input_attribute; + + VkPipelineInputAssemblyStateCreateInfo input_assembly_state; + input_assembly_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + input_assembly_state.pNext = nullptr; + input_assembly_state.flags = 0; + input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + input_assembly_state.primitiveRestartEnable = VK_FALSE; + + // Dynamic, to stay within maxViewportDimensions while preferring a + // power-of-two factor for converting from pixel coordinates to NDC for exact + // precision. + VkPipelineViewportStateCreateInfo viewport_state; + viewport_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + viewport_state.pNext = nullptr; + viewport_state.flags = 0; + viewport_state.viewportCount = 1; + viewport_state.pViewports = nullptr; + viewport_state.scissorCount = 1; + viewport_state.pScissors = nullptr; + + VkPipelineRasterizationStateCreateInfo rasterization_state = {}; + rasterization_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rasterization_state.polygonMode = VK_POLYGON_MODE_FILL; + rasterization_state.cullMode = VK_CULL_MODE_NONE; + rasterization_state.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; + rasterization_state.lineWidth = 1.0f; + + // For samples other than the first, will be changed for the pipelines for + // other samples. + VkSampleMask sample_mask = UINT32_MAX; + VkPipelineMultisampleStateCreateInfo multisample_state = {}; + multisample_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + multisample_state.rasterizationSamples = + (dest_sample_count == 2 && !msaa_2x_attachments_supported_) + ? VK_SAMPLE_COUNT_4_BIT + : VkSampleCountFlagBits(dest_sample_count); + if (dest_sample_count > 1) { + if (device_features.sampleRateShading) { + multisample_state.sampleShadingEnable = VK_TRUE; + multisample_state.minSampleShading = 1.0f; + if (dest_sample_count == 2 && !msaa_2x_attachments_supported_) { + // Emulating 2x MSAA as samples 0 and 3 of 4x MSAA when 2x is not + // supported. + sample_mask = 0b1001; + } + } else { + sample_mask = 0b1; + } + if (sample_mask != UINT32_MAX) { + multisample_state.pSampleMask = &sample_mask; + } + } + + // Whether the depth / stencil state is used depends on the presence of a + // depth attachment in the render pass - but not making assumptions about + // whether the render pass contains any specific attachments, so setting up + // valid depth / stencil state unconditionally. + VkPipelineDepthStencilStateCreateInfo depth_stencil_state = {}; + depth_stencil_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + if (mode.output == TransferOutput::kDepth) { + depth_stencil_state.depthTestEnable = VK_TRUE; + depth_stencil_state.depthWriteEnable = VK_TRUE; + depth_stencil_state.depthCompareOp = cvars::depth_transfer_not_equal_test + ? VK_COMPARE_OP_NOT_EQUAL + : VK_COMPARE_OP_ALWAYS; + } + if ((mode.output == TransferOutput::kDepth && + provider.device_extensions().ext_shader_stencil_export) || + mode.output == TransferOutput::kStencilBit) { + depth_stencil_state.stencilTestEnable = VK_TRUE; + depth_stencil_state.front.failOp = VK_STENCIL_OP_KEEP; + depth_stencil_state.front.passOp = VK_STENCIL_OP_REPLACE; + depth_stencil_state.front.depthFailOp = VK_STENCIL_OP_REPLACE; + // Using ALWAYS, not NOT_EQUAL, so depth writing is unaffected by stencil + // being different. + depth_stencil_state.front.compareOp = VK_COMPARE_OP_ALWAYS; + // Will be dynamic for stencil bit output. + depth_stencil_state.front.writeMask = UINT8_MAX; + depth_stencil_state.front.reference = UINT8_MAX; + depth_stencil_state.back = depth_stencil_state.front; + } + + // Whether the color blend state is used depends on the presence of color + // attachments in the render pass - but not making assumptions about whether + // the render pass contains any specific attachments, so setting up valid + // color blend state unconditionally. + VkPipelineColorBlendAttachmentState + color_blend_attachments[xenos::kMaxColorRenderTargets] = {}; + VkPipelineColorBlendStateCreateInfo color_blend_state = {}; + color_blend_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + color_blend_state.attachmentCount = + 32 - xe::lzcnt(key.render_pass_key.depth_and_color_used >> 1); + color_blend_state.pAttachments = color_blend_attachments; + if (mode.output == TransferOutput::kColor) { + if (device_features.independentBlend) { + // State the intention more explicitly. + color_blend_attachments[key.shader_key.dest_color_rt_index] + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + } else { + // The blend state for all attachments must be identical, but other render + // targets are not written to by the shader. + for (uint32_t i = 0; i < color_blend_state.attachmentCount; ++i) { + color_blend_attachments[i].colorWriteMask = + VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + } + } + } + + std::array dynamic_states; + VkPipelineDynamicStateCreateInfo dynamic_state; + dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + dynamic_state.pNext = nullptr; + dynamic_state.flags = 0; + dynamic_state.dynamicStateCount = 0; + dynamic_state.pDynamicStates = dynamic_states.data(); + dynamic_states[dynamic_state.dynamicStateCount++] = VK_DYNAMIC_STATE_VIEWPORT; + dynamic_states[dynamic_state.dynamicStateCount++] = VK_DYNAMIC_STATE_SCISSOR; + if (mode.output == TransferOutput::kStencilBit) { + dynamic_states[dynamic_state.dynamicStateCount++] = + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK; + } + + std::array pipelines{}; + VkGraphicsPipelineCreateInfo pipeline_create_info; + pipeline_create_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + pipeline_create_info.pNext = nullptr; + pipeline_create_info.flags = 0; + if (dest_is_masked_sample) { + pipeline_create_info.flags |= VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT; + } + pipeline_create_info.stageCount = uint32_t(xe::countof(shader_stages)); + pipeline_create_info.pStages = shader_stages; + pipeline_create_info.pVertexInputState = &vertex_input_state; + pipeline_create_info.pInputAssemblyState = &input_assembly_state; + pipeline_create_info.pTessellationState = nullptr; + pipeline_create_info.pViewportState = &viewport_state; + pipeline_create_info.pRasterizationState = &rasterization_state; + pipeline_create_info.pMultisampleState = &multisample_state; + pipeline_create_info.pDepthStencilState = &depth_stencil_state; + pipeline_create_info.pColorBlendState = &color_blend_state; + pipeline_create_info.pDynamicState = &dynamic_state; + pipeline_create_info.layout = + transfer_pipeline_layouts_[size_t(mode.pipeline_layout)]; + pipeline_create_info.renderPass = render_pass; + pipeline_create_info.subpass = 0; + pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE; + pipeline_create_info.basePipelineIndex = -1; + if (dfn.vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, + &pipeline_create_info, nullptr, + &pipelines[0]) != VK_SUCCESS) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the render target ownership " + "transfer pipeline for render pass 0x{:08X}, shader 0x{:08X}", + key.render_pass_key.key, key.shader_key.key); + transfer_pipelines_.emplace(key, std::array{}); + return nullptr; + } + if (dest_is_masked_sample) { + assert_true(multisample_state.pSampleMask == &sample_mask); + pipeline_create_info.flags = (pipeline_create_info.flags & + ~VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT) | + VK_PIPELINE_CREATE_DERIVATIVE_BIT; + pipeline_create_info.basePipelineHandle = pipelines[0]; + for (uint32_t i = 1; i < dest_sample_count; ++i) { + // Emulating 2x MSAA as samples 0 and 3 of 4x MSAA when 2x is not + // supported. + uint32_t host_sample_index = + (dest_sample_count == 2 && !msaa_2x_attachments_supported_ && i == 1) + ? 3 + : i; + sample_id_specialization_constant = host_sample_index; + sample_mask = uint32_t(1) << host_sample_index; + if (dfn.vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, + &pipeline_create_info, nullptr, + &pipelines[i]) != VK_SUCCESS) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the render target " + "ownership transfer pipeline for render pass 0x{:08X}, shader " + "0x{:08X}, sample {}", + key.render_pass_key.key, key.shader_key.key, i); + for (uint32_t j = 0; j < i; ++j) { + dfn.vkDestroyPipeline(device, pipelines[j], nullptr); + } + transfer_pipelines_.emplace(key, std::array{}); + return nullptr; + } + } + } + return transfer_pipelines_.emplace(key, pipelines).first->second.data(); +} + +void VulkanRenderTargetCache::PerformTransfersAndResolveClears( + uint32_t render_target_count, RenderTarget* const* render_targets, + const std::vector* render_target_transfers, + const uint64_t* render_target_resolve_clear_values, + const Transfer::Rectangle* resolve_clear_rectangle) { + assert_true(GetPath() == Path::kHostRenderTargets); + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const VkPhysicalDeviceLimits& device_limits = + provider.device_properties().limits; + const VkPhysicalDeviceFeatures& device_features = provider.device_features(); + bool shader_stencil_export = + provider.device_extensions().ext_shader_stencil_export; + uint64_t current_submission = command_processor_.GetCurrentSubmission(); + DeferredCommandBuffer& command_buffer = + command_processor_.deferred_command_buffer(); + + bool resolve_clear_needed = + render_target_resolve_clear_values && resolve_clear_rectangle; + VkClearRect resolve_clear_rect; + if (resolve_clear_needed) { + // Assuming the rectangle is already clamped by the setup function from the + // common render target cache. + resolve_clear_rect.rect.offset.x = + int32_t(resolve_clear_rectangle->x_pixels * resolution_scale_x_); + resolve_clear_rect.rect.offset.y = + int32_t(resolve_clear_rectangle->y_pixels * resolution_scale_y_); + resolve_clear_rect.rect.extent.width = + resolve_clear_rectangle->width_pixels * resolution_scale_x_; + resolve_clear_rect.rect.extent.height = + resolve_clear_rectangle->height_pixels * resolution_scale_y_; + resolve_clear_rect.baseArrayLayer = 0; + resolve_clear_rect.layerCount = 1; + } + + // Do host depth storing for the depth destination (assuming there can be only + // one depth destination) where depth destination == host depth source. + bool host_depth_store_set_up = false; + for (uint32_t i = 0; i < render_target_count; ++i) { + RenderTarget* dest_rt = render_targets[i]; + if (!dest_rt) { + continue; + } + auto& dest_vulkan_rt = *static_cast(dest_rt); + RenderTargetKey dest_rt_key = dest_vulkan_rt.key(); + if (!dest_rt_key.is_depth) { + continue; + } + const std::vector& depth_transfers = render_target_transfers[i]; + for (const Transfer& transfer : depth_transfers) { + if (transfer.host_depth_source != dest_rt) { + continue; + } + if (!host_depth_store_set_up) { + // Pipeline. + command_processor_.BindExternalComputePipeline( + host_depth_store_pipelines_[size_t(dest_rt_key.msaa_samples)]); + // Descriptor set bindings. + VkDescriptorSet host_depth_store_descriptor_sets[] = { + edram_storage_buffer_descriptor_set_, + dest_vulkan_rt.GetDescriptorSetTransferSource(), + }; + command_buffer.CmdVkBindDescriptorSets( + VK_PIPELINE_BIND_POINT_COMPUTE, host_depth_store_pipeline_layout_, + 0, uint32_t(xe::countof(host_depth_store_descriptor_sets)), + host_depth_store_descriptor_sets, 0, nullptr); + // Render target constant. + HostDepthStoreRenderTargetConstant + host_depth_store_render_target_constant = + GetHostDepthStoreRenderTargetConstant( + dest_rt_key.pitch_tiles_at_32bpp, + msaa_2x_attachments_supported_); + command_buffer.CmdVkPushConstants( + host_depth_store_pipeline_layout_, VK_SHADER_STAGE_COMPUTE_BIT, + uint32_t(offsetof(HostDepthStoreConstants, render_target)), + sizeof(host_depth_store_render_target_constant), + &host_depth_store_render_target_constant); + // Barriers - don't need to try to combine them with the rest of + // render target transfer barriers now - if this happens, after host + // depth storing, SHADER_READ -> DEPTH_STENCIL_ATTACHMENT_WRITE will be + // done anyway even in the best case, so it's not possible to have all + // the barriers in one place here. + UseEdramBuffer(EdramBufferUsage::kComputeWrite); + // Always transitioning both depth and stencil, not storing separate + // usage flags for depth and stencil. + command_processor_.PushImageMemoryBarrier( + dest_vulkan_rt.image(), + ui::vulkan::util::InitializeSubresourceRange( + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT), + dest_vulkan_rt.current_stage_mask(), + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + dest_vulkan_rt.current_access_mask(), VK_ACCESS_SHADER_READ_BIT, + dest_vulkan_rt.current_layout(), + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + dest_vulkan_rt.SetUsage(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + host_depth_store_set_up = true; + } + Transfer::Rectangle + transfer_rectangles[Transfer::kMaxRectanglesWithCutout]; + uint32_t transfer_rectangle_count = transfer.GetRectangles( + dest_rt_key.base_tiles, dest_rt_key.pitch_tiles_at_32bpp, + dest_rt_key.msaa_samples, false, transfer_rectangles, + resolve_clear_rectangle); + assert_not_zero(transfer_rectangle_count); + HostDepthStoreRectangleConstant host_depth_store_rectangle_constant; + for (uint32_t j = 0; j < transfer_rectangle_count; ++j) { + uint32_t group_count_x, group_count_y; + GetHostDepthStoreRectangleInfo( + transfer_rectangles[j], dest_rt_key.msaa_samples, + host_depth_store_rectangle_constant, group_count_x, group_count_y); + command_buffer.CmdVkPushConstants( + host_depth_store_pipeline_layout_, VK_SHADER_STAGE_COMPUTE_BIT, + uint32_t(offsetof(HostDepthStoreConstants, rectangle)), + sizeof(host_depth_store_rectangle_constant), + &host_depth_store_rectangle_constant); + command_processor_.SubmitBarriers(true); + command_buffer.CmdVkDispatch(group_count_x, group_count_y, 1); + MarkEdramBufferModified(); + } + } + break; + } + + constexpr VkPipelineStageFlags kSourceStageMask = + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + constexpr VkAccessFlags kSourceAccessMask = VK_ACCESS_SHADER_READ_BIT; + constexpr VkImageLayout kSourceLayout = + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + // Try to insert as many barriers as possible in one place, hoping that in the + // best case (no cross-copying between current render targets), barriers will + // need to be only inserted here, not between transfers. In case of + // cross-copying, if the destination use is going to happen before the source + // use, choose the destination state, otherwise the source state - to match + // the order in which transfers will actually happen (otherwise there will be + // just a useless switch back and forth). + for (uint32_t i = 0; i < render_target_count; ++i) { + RenderTarget* dest_rt = render_targets[i]; + if (!dest_rt) { + continue; + } + const std::vector& dest_transfers = render_target_transfers[i]; + if (!resolve_clear_needed && dest_transfers.empty()) { + continue; + } + // Transition the destination, only if not going to be used as a source + // earlier. + bool dest_used_previously_as_source = false; + for (uint32_t j = 0; j < i; ++j) { + for (const Transfer& previous_transfer : render_target_transfers[j]) { + if (previous_transfer.source == dest_rt || + previous_transfer.host_depth_source == dest_rt) { + dest_used_previously_as_source = true; + break; + } + } + } + if (!dest_used_previously_as_source) { + auto& dest_vulkan_rt = *static_cast(dest_rt); + VkPipelineStageFlags dest_dst_stage_mask; + VkAccessFlags dest_dst_access_mask; + VkImageLayout dest_new_layout; + dest_vulkan_rt.GetDrawUsage(&dest_dst_stage_mask, &dest_dst_access_mask, + &dest_new_layout); + command_processor_.PushImageMemoryBarrier( + dest_vulkan_rt.image(), + ui::vulkan::util::InitializeSubresourceRange( + dest_vulkan_rt.key().is_depth + ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) + : VK_IMAGE_ASPECT_COLOR_BIT), + dest_vulkan_rt.current_stage_mask(), dest_dst_stage_mask, + dest_vulkan_rt.current_access_mask(), dest_dst_access_mask, + dest_vulkan_rt.current_layout(), dest_new_layout); + dest_vulkan_rt.SetUsage(dest_dst_stage_mask, dest_dst_access_mask, + dest_new_layout); + } + // Transition the sources, only if not going to be used as destinations + // earlier. + for (const Transfer& transfer : dest_transfers) { + bool source_previously_used_as_dest = false; + bool host_depth_source_previously_used_as_dest = false; + for (uint32_t j = 0; j < i; ++j) { + if (render_target_transfers[j].empty()) { + continue; + } + const RenderTarget* previous_rt = render_targets[j]; + if (transfer.source == previous_rt) { + source_previously_used_as_dest = true; + } + if (transfer.host_depth_source == previous_rt) { + host_depth_source_previously_used_as_dest = true; + } + } + if (!source_previously_used_as_dest) { + auto& source_vulkan_rt = + *static_cast(transfer.source); + command_processor_.PushImageMemoryBarrier( + source_vulkan_rt.image(), + ui::vulkan::util::InitializeSubresourceRange( + source_vulkan_rt.key().is_depth + ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) + : VK_IMAGE_ASPECT_COLOR_BIT), + source_vulkan_rt.current_stage_mask(), kSourceStageMask, + source_vulkan_rt.current_access_mask(), kSourceAccessMask, + source_vulkan_rt.current_layout(), kSourceLayout); + source_vulkan_rt.SetUsage(kSourceStageMask, kSourceAccessMask, + kSourceLayout); + } + // transfer.host_depth_source == dest_rt means the EDRAM buffer will be + // used instead, no need to transition. + if (transfer.host_depth_source && transfer.host_depth_source != dest_rt && + !host_depth_source_previously_used_as_dest) { + auto& host_depth_source_vulkan_rt = + *static_cast(transfer.host_depth_source); + command_processor_.PushImageMemoryBarrier( + host_depth_source_vulkan_rt.image(), + ui::vulkan::util::InitializeSubresourceRange( + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT), + host_depth_source_vulkan_rt.current_stage_mask(), kSourceStageMask, + host_depth_source_vulkan_rt.current_access_mask(), + kSourceAccessMask, host_depth_source_vulkan_rt.current_layout(), + kSourceLayout); + host_depth_source_vulkan_rt.SetUsage(kSourceStageMask, + kSourceAccessMask, kSourceLayout); + } + } + } + if (host_depth_store_set_up) { + // Will be reading copied host depth from the EDRAM buffer. + UseEdramBuffer(EdramBufferUsage::kFragmentRead); + } + + // Perform the transfers and clears. + + TransferPipelineLayoutIndex last_transfer_pipeline_layout_index = + TransferPipelineLayoutIndex::kCount; + uint32_t transfer_descriptor_sets_bound = 0; + uint32_t transfer_push_constants_set = 0; + VkDescriptorSet last_descriptor_set_host_depth_stencil_textures = + VK_NULL_HANDLE; + VkDescriptorSet last_descriptor_set_depth_stencil_textures = VK_NULL_HANDLE; + VkDescriptorSet last_descriptor_set_color_texture = VK_NULL_HANDLE; + TransferAddressConstant last_host_depth_address_constant; + TransferAddressConstant last_address_constant; + + for (uint32_t i = 0; i < render_target_count; ++i) { + RenderTarget* dest_rt = render_targets[i]; + if (!dest_rt) { + continue; + } + + const std::vector& current_transfers = render_target_transfers[i]; + if (current_transfers.empty() && !resolve_clear_needed) { + continue; + } + + auto& dest_vulkan_rt = *static_cast(dest_rt); + RenderTargetKey dest_rt_key = dest_vulkan_rt.key(); + + // Late barriers in case there was cross-copying that prevented merging of + // barriers. + { + VkPipelineStageFlags dest_dst_stage_mask; + VkAccessFlags dest_dst_access_mask; + VkImageLayout dest_new_layout; + dest_vulkan_rt.GetDrawUsage(&dest_dst_stage_mask, &dest_dst_access_mask, + &dest_new_layout); + command_processor_.PushImageMemoryBarrier( + dest_vulkan_rt.image(), + ui::vulkan::util::InitializeSubresourceRange( + dest_rt_key.is_depth + ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) + : VK_IMAGE_ASPECT_COLOR_BIT), + dest_vulkan_rt.current_stage_mask(), dest_dst_stage_mask, + dest_vulkan_rt.current_access_mask(), dest_dst_access_mask, + dest_vulkan_rt.current_layout(), dest_new_layout); + dest_vulkan_rt.SetUsage(dest_dst_stage_mask, dest_dst_access_mask, + dest_new_layout); + } + + // Get the objects needed for transfers to the destination. + // TODO(Triang3l): Reuse the guest render pass for transfers where possible + // (if the Vulkan format used for drawing is also usable for transfers - for + // instance, R8G8B8A8_UNORM can be used for both, so the guest pass can be + // reused, but R16G16B16A16_SFLOAT render targets use R16G16B16A16_UINT for + // transfers, so the transfer pass has to be separate) to avoid stores and + // loads on tile-based devices to make this actually applicable. Also + // overall perform all non-cross-copying transfers for the current + // framebuffer configuration in a single pass, to load / store only once. + RenderPassKey transfer_render_pass_key; + transfer_render_pass_key.msaa_samples = dest_rt_key.msaa_samples; + if (dest_rt_key.is_depth) { + transfer_render_pass_key.depth_and_color_used = 0b1; + transfer_render_pass_key.depth_format = dest_rt_key.GetDepthFormat(); + } else { + transfer_render_pass_key.depth_and_color_used = 0b1 << 1; + transfer_render_pass_key.color_0_view_format = + dest_rt_key.GetColorFormat(); + transfer_render_pass_key.color_rts_use_transfer_formats = 1; + } + VkRenderPass transfer_render_pass = GetRenderPass(transfer_render_pass_key); + if (transfer_render_pass == VK_NULL_HANDLE) { + continue; + } + const RenderTarget* + transfer_framebuffer_render_targets[1 + xenos::kMaxColorRenderTargets] = + {}; + transfer_framebuffer_render_targets[dest_rt_key.is_depth ? 0 : 1] = dest_rt; + const Framebuffer* transfer_framebuffer = GetFramebuffer( + transfer_render_pass_key, dest_rt_key.pitch_tiles_at_32bpp, + transfer_framebuffer_render_targets); + if (!transfer_framebuffer) { + continue; + } + // Don't enter the render pass immediately - may still insert source + // barriers later. + + if (!current_transfers.empty()) { + uint32_t dest_pitch_tiles = dest_rt_key.GetPitchTiles(); + bool dest_is_64bpp = dest_rt_key.Is64bpp(); + + // Gather shader keys and sort to reduce pipeline state and binding + // switches. Also gather stencil rectangles to clear if needed. + bool need_stencil_bit_draws = + dest_rt_key.is_depth && !shader_stencil_export; + current_transfer_invocations_.clear(); + current_transfer_invocations_.reserve( + current_transfers.size() << uint32_t(need_stencil_bit_draws)); + uint32_t rt_sort_index = 0; + TransferShaderKey new_transfer_shader_key; + new_transfer_shader_key.dest_msaa_samples = dest_rt_key.msaa_samples; + new_transfer_shader_key.dest_resource_format = + dest_rt_key.resource_format; + uint32_t stencil_clear_rectangle_count = 0; + for (uint32_t j = 0; j <= uint32_t(need_stencil_bit_draws); ++j) { + // j == 0 - color or depth. + // j == 1 - stencil bits. + // Stencil bit writing always requires a different root signature, + // handle these separately. Stencil never has a host depth source. + // Clear previously set sort indices. + for (const Transfer& transfer : current_transfers) { + auto host_depth_source_vulkan_rt = + static_cast(transfer.host_depth_source); + if (host_depth_source_vulkan_rt) { + host_depth_source_vulkan_rt->SetTemporarySortIndex(UINT32_MAX); + } + assert_not_null(transfer.source); + auto& source_vulkan_rt = + *static_cast(transfer.source); + source_vulkan_rt.SetTemporarySortIndex(UINT32_MAX); + } + for (const Transfer& transfer : current_transfers) { + assert_not_null(transfer.source); + auto& source_vulkan_rt = + *static_cast(transfer.source); + VulkanRenderTarget* host_depth_source_vulkan_rt = + j ? nullptr + : static_cast(transfer.host_depth_source); + if (host_depth_source_vulkan_rt && + host_depth_source_vulkan_rt->temporary_sort_index() == + UINT32_MAX) { + host_depth_source_vulkan_rt->SetTemporarySortIndex(rt_sort_index++); + } + if (source_vulkan_rt.temporary_sort_index() == UINT32_MAX) { + source_vulkan_rt.SetTemporarySortIndex(rt_sort_index++); + } + RenderTargetKey source_rt_key = source_vulkan_rt.key(); + new_transfer_shader_key.source_msaa_samples = + source_rt_key.msaa_samples; + new_transfer_shader_key.source_resource_format = + source_rt_key.resource_format; + bool host_depth_source_is_copy = + host_depth_source_vulkan_rt == &dest_vulkan_rt; + // The host depth copy buffer has only raw samples. + new_transfer_shader_key.host_depth_source_msaa_samples = + (host_depth_source_vulkan_rt && !host_depth_source_is_copy) + ? host_depth_source_vulkan_rt->key().msaa_samples + : xenos::MsaaSamples::k1X; + if (j) { + new_transfer_shader_key.mode = + source_rt_key.is_depth ? TransferMode::kDepthToStencilBit + : TransferMode::kColorToStencilBit; + stencil_clear_rectangle_count += + transfer.GetRectangles(dest_rt_key.base_tiles, dest_pitch_tiles, + dest_rt_key.msaa_samples, dest_is_64bpp, + nullptr, resolve_clear_rectangle); + } else { + if (dest_rt_key.is_depth) { + if (host_depth_source_vulkan_rt) { + if (host_depth_source_is_copy) { + new_transfer_shader_key.mode = + source_rt_key.is_depth + ? TransferMode::kDepthAndHostDepthCopyToDepth + : TransferMode::kColorAndHostDepthCopyToDepth; + } else { + new_transfer_shader_key.mode = + source_rt_key.is_depth + ? TransferMode::kDepthAndHostDepthToDepth + : TransferMode::kColorAndHostDepthToDepth; + } + } else { + new_transfer_shader_key.mode = + source_rt_key.is_depth ? TransferMode::kDepthToDepth + : TransferMode::kColorToDepth; + } + } else { + new_transfer_shader_key.mode = source_rt_key.is_depth + ? TransferMode::kDepthToColor + : TransferMode::kColorToColor; + } + } + current_transfer_invocations_.emplace_back(transfer, + new_transfer_shader_key); + if (j) { + current_transfer_invocations_.back().transfer.host_depth_source = + nullptr; + } + } + } + std::sort(current_transfer_invocations_.begin(), + current_transfer_invocations_.end()); + + for (auto it = current_transfer_invocations_.cbegin(); + it != current_transfer_invocations_.cend(); ++it) { + assert_not_null(it->transfer.source); + auto& source_vulkan_rt = + *static_cast(it->transfer.source); + command_processor_.PushImageMemoryBarrier( + source_vulkan_rt.image(), + ui::vulkan::util::InitializeSubresourceRange( + source_vulkan_rt.key().is_depth + ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) + : VK_IMAGE_ASPECT_COLOR_BIT), + source_vulkan_rt.current_stage_mask(), kSourceStageMask, + source_vulkan_rt.current_access_mask(), kSourceAccessMask, + source_vulkan_rt.current_layout(), kSourceLayout); + source_vulkan_rt.SetUsage(kSourceStageMask, kSourceAccessMask, + kSourceLayout); + auto host_depth_source_vulkan_rt = + static_cast(it->transfer.host_depth_source); + if (host_depth_source_vulkan_rt) { + TransferShaderKey transfer_shader_key = it->shader_key; + if (transfer_shader_key.mode == + TransferMode::kDepthAndHostDepthCopyToDepth || + transfer_shader_key.mode == + TransferMode::kColorAndHostDepthCopyToDepth) { + // Reading copied host depth from the EDRAM buffer. + UseEdramBuffer(EdramBufferUsage::kFragmentRead); + } else { + // Reading host depth from the texture. + command_processor_.PushImageMemoryBarrier( + host_depth_source_vulkan_rt->image(), + ui::vulkan::util::InitializeSubresourceRange( + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT), + host_depth_source_vulkan_rt->current_stage_mask(), + kSourceStageMask, + host_depth_source_vulkan_rt->current_access_mask(), + kSourceAccessMask, + host_depth_source_vulkan_rt->current_layout(), kSourceLayout); + host_depth_source_vulkan_rt->SetUsage( + kSourceStageMask, kSourceAccessMask, kSourceLayout); + } + } + } + + // Perform the transfers for the render target. + + command_processor_.SubmitBarriersAndEnterRenderTargetCacheRenderPass( + transfer_render_pass, transfer_framebuffer); + + if (stencil_clear_rectangle_count) { + VkClearAttachment* stencil_clear_attachment; + VkClearRect* stencil_clear_rect_write_ptr; + command_buffer.CmdClearAttachmentsEmplace(1, stencil_clear_attachment, + stencil_clear_rectangle_count, + stencil_clear_rect_write_ptr); + stencil_clear_attachment->aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT; + stencil_clear_attachment->colorAttachment = 0; + stencil_clear_attachment->clearValue.depthStencil.depth = 0.0f; + stencil_clear_attachment->clearValue.depthStencil.stencil = 0; + for (const Transfer& transfer : current_transfers) { + Transfer::Rectangle transfer_stencil_clear_rectangles + [Transfer::kMaxRectanglesWithCutout]; + uint32_t transfer_stencil_clear_rectangle_count = + transfer.GetRectangles(dest_rt_key.base_tiles, dest_pitch_tiles, + dest_rt_key.msaa_samples, dest_is_64bpp, + transfer_stencil_clear_rectangles, + resolve_clear_rectangle); + for (uint32_t j = 0; j < transfer_stencil_clear_rectangle_count; + ++j) { + const Transfer::Rectangle& stencil_clear_rectangle = + transfer_stencil_clear_rectangles[j]; + stencil_clear_rect_write_ptr->rect.offset.x = + int32_t(stencil_clear_rectangle.x_pixels * resolution_scale_x_); + stencil_clear_rect_write_ptr->rect.offset.y = + int32_t(stencil_clear_rectangle.y_pixels * resolution_scale_y_); + stencil_clear_rect_write_ptr->rect.extent.width = + stencil_clear_rectangle.width_pixels * resolution_scale_x_; + stencil_clear_rect_write_ptr->rect.extent.height = + stencil_clear_rectangle.height_pixels * resolution_scale_y_; + stencil_clear_rect_write_ptr->baseArrayLayer = 0; + stencil_clear_rect_write_ptr->layerCount = 1; + ++stencil_clear_rect_write_ptr; + } + } + } + + // Prefer power of two viewports for exact division by simply biasing the + // exponent. + VkViewport transfer_viewport; + transfer_viewport.x = 0.0f; + transfer_viewport.y = 0.0f; + transfer_viewport.width = + float(std::min(xe::next_pow2(transfer_framebuffer->host_extent.width), + device_limits.maxViewportDimensions[0])); + transfer_viewport.height = float( + std::min(xe::next_pow2(transfer_framebuffer->host_extent.height), + device_limits.maxViewportDimensions[1])); + transfer_viewport.minDepth = 0.0f; + transfer_viewport.maxDepth = 1.0f; + command_processor_.SetViewport(transfer_viewport); + float pixels_to_ndc_x = 2.0f / transfer_viewport.width; + float pixels_to_ndc_y = 2.0f / transfer_viewport.height; + VkRect2D transfer_scissor; + transfer_scissor.offset.x = 0; + transfer_scissor.offset.y = 0; + transfer_scissor.extent = transfer_framebuffer->host_extent; + command_processor_.SetScissor(transfer_scissor); + + for (auto it = current_transfer_invocations_.cbegin(); + it != current_transfer_invocations_.cend(); ++it) { + const TransferInvocation& transfer_invocation_first = *it; + // Will be merging transfers from the same source into one mesh. + auto it_merged_first = it, it_merged_last = it; + uint32_t transfer_rectangle_count = + transfer_invocation_first.transfer.GetRectangles( + dest_rt_key.base_tiles, dest_pitch_tiles, + dest_rt_key.msaa_samples, dest_is_64bpp, nullptr, + resolve_clear_rectangle); + for (auto it_merge = std::next(it_merged_first); + it_merge != current_transfer_invocations_.cend(); ++it_merge) { + if (!transfer_invocation_first.CanBeMergedIntoOneDraw(*it_merge)) { + break; + } + transfer_rectangle_count += it_merge->transfer.GetRectangles( + dest_rt_key.base_tiles, dest_pitch_tiles, + dest_rt_key.msaa_samples, dest_is_64bpp, nullptr, + resolve_clear_rectangle); + it_merged_last = it_merge; + } + assert_not_zero(transfer_rectangle_count); + // Skip the merged transfers in the subsequent iterations. + it = it_merged_last; + + assert_not_null(it->transfer.source); + auto& source_vulkan_rt = + *static_cast(it->transfer.source); + auto host_depth_source_vulkan_rt = + static_cast(it->transfer.host_depth_source); + TransferShaderKey transfer_shader_key = it->shader_key; + const TransferModeInfo& transfer_mode_info = + kTransferModes[size_t(transfer_shader_key.mode)]; + TransferPipelineLayoutIndex transfer_pipeline_layout_index = + transfer_mode_info.pipeline_layout; + const TransferPipelineLayoutInfo& transfer_pipeline_layout_info = + kTransferPipelineLayoutInfos[size_t( + transfer_pipeline_layout_index)]; + uint32_t transfer_sample_pipeline_count = + device_features.sampleRateShading + ? 1 + : uint32_t(1) << uint32_t(dest_rt_key.msaa_samples); + bool transfer_is_stencil_bit = + (transfer_pipeline_layout_info.used_push_constant_dwords & + kTransferUsedPushConstantDwordStencilMaskBit) != 0; + + uint32_t transfer_vertex_count = 6 * transfer_rectangle_count; + VkBuffer transfer_vertex_buffer; + VkDeviceSize transfer_vertex_buffer_offset; + float* transfer_rectangle_write_ptr = + reinterpret_cast(transfer_vertex_buffer_pool_->Request( + current_submission, sizeof(float) * 2 * transfer_vertex_count, + sizeof(float), transfer_vertex_buffer, + transfer_vertex_buffer_offset)); + if (!transfer_rectangle_write_ptr) { + continue; + } + for (auto it_merged = it_merged_first; it_merged <= it_merged_last; + ++it_merged) { + Transfer::Rectangle transfer_invocation_rectangles + [Transfer::kMaxRectanglesWithCutout]; + uint32_t transfer_invocation_rectangle_count = + it_merged->transfer.GetRectangles( + dest_rt_key.base_tiles, dest_pitch_tiles, + dest_rt_key.msaa_samples, dest_is_64bpp, + transfer_invocation_rectangles, resolve_clear_rectangle); + assert_not_zero(transfer_invocation_rectangle_count); + for (uint32_t j = 0; j < transfer_invocation_rectangle_count; ++j) { + const Transfer::Rectangle& transfer_rectangle = + transfer_invocation_rectangles[j]; + float transfer_rectangle_x0 = + -1.0f + transfer_rectangle.x_pixels * pixels_to_ndc_x; + float transfer_rectangle_y0 = + -1.0f + transfer_rectangle.y_pixels * pixels_to_ndc_y; + float transfer_rectangle_x1 = + transfer_rectangle_x0 + + transfer_rectangle.width_pixels * pixels_to_ndc_x; + float transfer_rectangle_y1 = + transfer_rectangle_y0 + + transfer_rectangle.height_pixels * pixels_to_ndc_y; + // O-* + // |/ + // * + *(transfer_rectangle_write_ptr++) = transfer_rectangle_x0; + *(transfer_rectangle_write_ptr++) = transfer_rectangle_y0; + // *-* + // |/ + // O + *(transfer_rectangle_write_ptr++) = transfer_rectangle_x0; + *(transfer_rectangle_write_ptr++) = transfer_rectangle_y1; + // *-O + // |/ + // * + *(transfer_rectangle_write_ptr++) = transfer_rectangle_x1; + *(transfer_rectangle_write_ptr++) = transfer_rectangle_y0; + // O + // /| + // *-* + *(transfer_rectangle_write_ptr++) = transfer_rectangle_x1; + *(transfer_rectangle_write_ptr++) = transfer_rectangle_y0; + // * + // /| + // O-* + *(transfer_rectangle_write_ptr++) = transfer_rectangle_x0; + *(transfer_rectangle_write_ptr++) = transfer_rectangle_y1; + // * + // /| + // *-O + *(transfer_rectangle_write_ptr++) = transfer_rectangle_x1; + *(transfer_rectangle_write_ptr++) = transfer_rectangle_y1; + } + } + command_buffer.CmdVkBindVertexBuffers(0, 1, &transfer_vertex_buffer, + &transfer_vertex_buffer_offset); + + const VkPipeline* transfer_pipelines = GetTransferPipelines( + TransferPipelineKey(transfer_render_pass_key, transfer_shader_key)); + if (!transfer_pipelines) { + continue; + } + command_processor_.BindExternalGraphicsPipeline(transfer_pipelines[0]); + if (last_transfer_pipeline_layout_index != + transfer_pipeline_layout_index) { + last_transfer_pipeline_layout_index = transfer_pipeline_layout_index; + transfer_descriptor_sets_bound = 0; + transfer_push_constants_set = 0; + } + + // Invalidate outdated bindings. + if (transfer_pipeline_layout_info.used_descriptor_sets & + kTransferUsedDescriptorSetHostDepthStencilTexturesBit) { + assert_not_null(host_depth_source_vulkan_rt); + VkDescriptorSet descriptor_set_host_depth_stencil_textures = + host_depth_source_vulkan_rt->GetDescriptorSetTransferSource(); + if (last_descriptor_set_host_depth_stencil_textures != + descriptor_set_host_depth_stencil_textures) { + last_descriptor_set_host_depth_stencil_textures = + descriptor_set_host_depth_stencil_textures; + transfer_descriptor_sets_bound &= + ~kTransferUsedDescriptorSetHostDepthStencilTexturesBit; + } + } + if (transfer_pipeline_layout_info.used_descriptor_sets & + kTransferUsedDescriptorSetDepthStencilTexturesBit) { + VkDescriptorSet descriptor_set_depth_stencil_textures = + source_vulkan_rt.GetDescriptorSetTransferSource(); + if (last_descriptor_set_depth_stencil_textures != + descriptor_set_depth_stencil_textures) { + last_descriptor_set_depth_stencil_textures = + descriptor_set_depth_stencil_textures; + transfer_descriptor_sets_bound &= + ~kTransferUsedDescriptorSetDepthStencilTexturesBit; + } + } + if (transfer_pipeline_layout_info.used_descriptor_sets & + kTransferUsedDescriptorSetColorTextureBit) { + VkDescriptorSet descriptor_set_color_texture = + source_vulkan_rt.GetDescriptorSetTransferSource(); + if (last_descriptor_set_color_texture != + descriptor_set_color_texture) { + last_descriptor_set_color_texture = descriptor_set_color_texture; + transfer_descriptor_sets_bound &= + ~kTransferUsedDescriptorSetColorTextureBit; + } + } + if (transfer_pipeline_layout_info.used_push_constant_dwords & + kTransferUsedPushConstantDwordHostDepthAddressBit) { + assert_not_null(host_depth_source_vulkan_rt); + RenderTargetKey host_depth_source_rt_key = + host_depth_source_vulkan_rt->key(); + TransferAddressConstant host_depth_address_constant; + host_depth_address_constant.dest_pitch = dest_pitch_tiles; + host_depth_address_constant.source_pitch = + host_depth_source_rt_key.GetPitchTiles(); + host_depth_address_constant.source_to_dest = + int32_t(dest_rt_key.base_tiles) - + int32_t(host_depth_source_rt_key.base_tiles); + if (last_host_depth_address_constant != host_depth_address_constant) { + last_host_depth_address_constant = host_depth_address_constant; + transfer_push_constants_set &= + ~kTransferUsedPushConstantDwordHostDepthAddressBit; + } + } + if (transfer_pipeline_layout_info.used_push_constant_dwords & + kTransferUsedPushConstantDwordAddressBit) { + RenderTargetKey source_rt_key = source_vulkan_rt.key(); + TransferAddressConstant address_constant; + address_constant.dest_pitch = dest_pitch_tiles; + address_constant.source_pitch = source_rt_key.GetPitchTiles(); + address_constant.source_to_dest = int32_t(dest_rt_key.base_tiles) - + int32_t(source_rt_key.base_tiles); + if (last_address_constant != address_constant) { + last_address_constant = address_constant; + transfer_push_constants_set &= + ~kTransferUsedPushConstantDwordAddressBit; + } + } + + // Apply the new bindings. + // TODO(Triang3l): Merge binding updates into spans. + VkPipelineLayout transfer_pipeline_layout = + transfer_pipeline_layouts_[size_t(transfer_pipeline_layout_index)]; + uint32_t transfer_descriptor_sets_unbound = + transfer_pipeline_layout_info.used_descriptor_sets & + ~transfer_descriptor_sets_bound; + if (transfer_descriptor_sets_unbound & + kTransferUsedDescriptorSetHostDepthBufferBit) { + command_buffer.CmdVkBindDescriptorSets( + VK_PIPELINE_BIND_POINT_GRAPHICS, transfer_pipeline_layout, + xe::bit_count(transfer_pipeline_layout_info.used_descriptor_sets & + (kTransferUsedDescriptorSetHostDepthBufferBit - 1)), + 1, &edram_storage_buffer_descriptor_set_, 0, nullptr); + transfer_descriptor_sets_bound |= + kTransferUsedDescriptorSetHostDepthBufferBit; + } + if (transfer_descriptor_sets_unbound & + kTransferUsedDescriptorSetHostDepthStencilTexturesBit) { + command_buffer.CmdVkBindDescriptorSets( + VK_PIPELINE_BIND_POINT_GRAPHICS, transfer_pipeline_layout, + xe::bit_count( + transfer_pipeline_layout_info.used_descriptor_sets & + (kTransferUsedDescriptorSetHostDepthStencilTexturesBit - 1)), + 1, &last_descriptor_set_host_depth_stencil_textures, 0, nullptr); + transfer_descriptor_sets_bound |= + kTransferUsedDescriptorSetHostDepthStencilTexturesBit; + } + if (transfer_descriptor_sets_unbound & + kTransferUsedDescriptorSetDepthStencilTexturesBit) { + command_buffer.CmdVkBindDescriptorSets( + VK_PIPELINE_BIND_POINT_GRAPHICS, transfer_pipeline_layout, + xe::bit_count( + transfer_pipeline_layout_info.used_descriptor_sets & + (kTransferUsedDescriptorSetDepthStencilTexturesBit - 1)), + 1, &last_descriptor_set_depth_stencil_textures, 0, nullptr); + transfer_descriptor_sets_bound |= + kTransferUsedDescriptorSetDepthStencilTexturesBit; + } + if (transfer_descriptor_sets_unbound & + kTransferUsedDescriptorSetColorTextureBit) { + command_buffer.CmdVkBindDescriptorSets( + VK_PIPELINE_BIND_POINT_GRAPHICS, transfer_pipeline_layout, + xe::bit_count(transfer_pipeline_layout_info.used_descriptor_sets & + (kTransferUsedDescriptorSetColorTextureBit - 1)), + 1, &last_descriptor_set_color_texture, 0, nullptr); + transfer_descriptor_sets_bound |= + kTransferUsedDescriptorSetColorTextureBit; + } + uint32_t transfer_push_constants_unset = + transfer_pipeline_layout_info.used_push_constant_dwords & + ~transfer_push_constants_set; + if (transfer_push_constants_unset & + kTransferUsedPushConstantDwordHostDepthAddressBit) { + command_buffer.CmdVkPushConstants( + transfer_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, + sizeof(uint32_t) * + xe::bit_count( + transfer_pipeline_layout_info.used_push_constant_dwords & + (kTransferUsedPushConstantDwordHostDepthAddressBit - 1)), + sizeof(uint32_t), &last_host_depth_address_constant); + transfer_push_constants_set |= + kTransferUsedPushConstantDwordHostDepthAddressBit; + } + if (transfer_push_constants_unset & + kTransferUsedPushConstantDwordAddressBit) { + command_buffer.CmdVkPushConstants( + transfer_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, + sizeof(uint32_t) * + xe::bit_count( + transfer_pipeline_layout_info.used_push_constant_dwords & + (kTransferUsedPushConstantDwordAddressBit - 1)), + sizeof(uint32_t), &last_address_constant); + transfer_push_constants_set |= + kTransferUsedPushConstantDwordAddressBit; + } + + for (uint32_t j = 0; j < transfer_sample_pipeline_count; ++j) { + if (j) { + command_processor_.BindExternalGraphicsPipeline( + transfer_pipelines[j]); + } + for (uint32_t k = 0; k < uint32_t(transfer_is_stencil_bit ? 8 : 1); + ++k) { + if (transfer_is_stencil_bit) { + uint32_t transfer_stencil_bit = uint32_t(1) << k; + command_buffer.CmdVkPushConstants( + transfer_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, + sizeof(uint32_t) * + xe::bit_count( + transfer_pipeline_layout_info + .used_push_constant_dwords & + (kTransferUsedPushConstantDwordStencilMaskBit - 1)), + sizeof(uint32_t), &transfer_stencil_bit); + command_buffer.CmdVkSetStencilWriteMask( + VK_STENCIL_FACE_FRONT_AND_BACK, transfer_stencil_bit); + } + command_buffer.CmdVkDraw(transfer_vertex_count, 1, 0, 0); + } + } + } + } + + // Perform the clear. + if (resolve_clear_needed) { + command_processor_.SubmitBarriersAndEnterRenderTargetCacheRenderPass( + transfer_render_pass, transfer_framebuffer); + VkClearAttachment resolve_clear_attachment; + resolve_clear_attachment.colorAttachment = 0; + std::memset(&resolve_clear_attachment.clearValue, 0, + sizeof(resolve_clear_attachment.clearValue)); + uint64_t clear_value = render_target_resolve_clear_values[i]; + if (dest_rt_key.is_depth) { + resolve_clear_attachment.aspectMask = + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + uint32_t depth_guest_clear_value = + (uint32_t(clear_value) >> 8) & 0xFFFFFF; + switch (dest_rt_key.GetDepthFormat()) { + case xenos::DepthRenderTargetFormat::kD24S8: + resolve_clear_attachment.clearValue.depthStencil.depth = + xenos::UNorm24To32(depth_guest_clear_value); + break; + case xenos::DepthRenderTargetFormat::kD24FS8: + // Taking [0, 2) -> [0, 1) remapping into account. + resolve_clear_attachment.clearValue.depthStencil.depth = + xenos::Float20e4To32(depth_guest_clear_value) * 0.5f; + break; + } + resolve_clear_attachment.clearValue.depthStencil.stencil = + uint32_t(clear_value) & 0xFF; + } else { + resolve_clear_attachment.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + switch (dest_rt_key.GetColorFormat()) { + case xenos::ColorRenderTargetFormat::k_8_8_8_8: + case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: { + for (uint32_t j = 0; j < 4; ++j) { + resolve_clear_attachment.clearValue.color.float32[j] = + ((clear_value >> (j * 8)) & 0xFF) * (1.0f / 0xFF); + } + } break; + case xenos::ColorRenderTargetFormat::k_2_10_10_10: + case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: { + for (uint32_t j = 0; j < 3; ++j) { + resolve_clear_attachment.clearValue.color.float32[j] = + ((clear_value >> (j * 10)) & 0x3FF) * (1.0f / 0x3FF); + } + resolve_clear_attachment.clearValue.color.float32[3] = + ((clear_value >> 30) & 0x3) * (1.0f / 0x3); + } break; + case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT: + case xenos::ColorRenderTargetFormat:: + k_2_10_10_10_FLOAT_AS_16_16_16_16: { + for (uint32_t j = 0; j < 3; ++j) { + resolve_clear_attachment.clearValue.color.float32[j] = + xenos::Float7e3To32((clear_value >> (j * 10)) & 0x3FF); + } + resolve_clear_attachment.clearValue.color.float32[3] = + ((clear_value >> 30) & 0x3) * (1.0f / 0x3); + } break; + case xenos::ColorRenderTargetFormat::k_16_16: + case xenos::ColorRenderTargetFormat::k_16_16_FLOAT: { + // Using uint for transfers and clears of both. Disregarding the + // current -32...32 vs. -1...1 settings for consistency with color + // clear via depth aliasing. + // TODO(Triang3l): Handle cases of unsupported multisampled 16_UINT + // and completely unsupported 16_UNORM. + for (uint32_t j = 0; j < 2; ++j) { + resolve_clear_attachment.clearValue.color.uint32[j] = + uint32_t(clear_value >> (j * 16)) & 0xFFFF; + } + } break; + case xenos::ColorRenderTargetFormat::k_16_16_16_16: + case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT: { + // Using uint for transfers and clears of both. Disregarding the + // current -32...32 vs. -1...1 settings for consistency with color + // clear via depth aliasing. + // TODO(Triang3l): Handle cases of unsupported multisampled 16_UINT + // and completely unsupported 16_UNORM. + for (uint32_t j = 0; j < 4; ++j) { + resolve_clear_attachment.clearValue.color.uint32[j] = + uint32_t(clear_value >> (j * 16)) & 0xFFFF; + } + } break; + case xenos::ColorRenderTargetFormat::k_32_FLOAT: { + // Using uint for proper denormal and NaN handling. + resolve_clear_attachment.clearValue.color.uint32[0] = + uint32_t(clear_value); + } break; + case xenos::ColorRenderTargetFormat::k_32_32_FLOAT: { + // Using uint for proper denormal and NaN handling. + resolve_clear_attachment.clearValue.color.uint32[0] = + uint32_t(clear_value); + resolve_clear_attachment.clearValue.color.uint32[1] = + uint32_t(clear_value >> 32); + } break; + } + } + command_buffer.CmdVkClearAttachments(1, &resolve_clear_attachment, 1, + &resolve_clear_rect); + } + } +} + } // namespace vulkan } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.h b/src/xenia/gpu/vulkan/vulkan_render_target_cache.h index 97bb690af..c98da4974 100644 --- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.h @@ -10,13 +10,20 @@ #ifndef XENIA_GPU_VULKAN_VULKAN_RENDER_TARGET_CACHE_H_ #define XENIA_GPU_VULKAN_VULKAN_RENDER_TARGET_CACHE_H_ +#include #include #include +#include +#include #include #include "xenia/base/hash.h" +#include "xenia/base/xxhash.h" #include "xenia/gpu/render_target_cache.h" +#include "xenia/gpu/xenos.h" +#include "xenia/ui/vulkan/single_layout_descriptor_set_pool.h" #include "xenia/ui/vulkan/vulkan_provider.h" +#include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h" namespace xe { namespace gpu { @@ -28,8 +35,12 @@ class VulkanRenderTargetCache final : public RenderTargetCache { public: union RenderPassKey { struct { - // If emulating 2x as 4x, set this to 4x for 2x not to create unnecessary - // render pass objects. + // If emulating 2x as 4x, this is still 2x for simplicity of using this + // field to make guest-related decisions. Render pass objects are not very + // expensive, and their dependencies can't be shared between 2x-as-4x and + // true 4x MSAA passes (framebuffers because render target cache render + // targets are different for 2x and 4x guest MSAA, pipelines because the + // sample mask will have 2 samples excluded for 2x-as-4x). xenos::MsaaSamples msaa_samples : xenos::kMsaaSamplesBits; // 2 // << 0 is depth, << 1...4 is color. uint32_t depth_and_color_used : 1 + xenos::kMaxColorRenderTargets; // 7 @@ -46,7 +57,8 @@ class VulkanRenderTargetCache final : public RenderTargetCache { xenos::ColorRenderTargetFormat color_2_view_format : xenos::kColorRenderTargetFormatBits; // 20 xenos::ColorRenderTargetFormat color_3_view_format - : xenos::kColorRenderTargetFormatBits; // 24 + : xenos::kColorRenderTargetFormatBits; // 24 + uint32_t color_rts_use_transfer_formats : 1; // 25 }; uint32_t key = 0; struct Hasher { @@ -60,6 +72,9 @@ class VulkanRenderTargetCache final : public RenderTargetCache { bool operator!=(const RenderPassKey& other_key) const { return !(*this == other_key); } + bool operator<(const RenderPassKey& other_key) const { + return key < other_key.key; + } }; static_assert_size(RenderPassKey, sizeof(uint32_t)); @@ -78,12 +93,14 @@ class VulkanRenderTargetCache final : public RenderTargetCache { void Shutdown(bool from_destructor = false); void ClearCache() override; - // TOOD(Triang3l): Fragment shader interlock. + void CompletedSubmissionUpdated(); + void EndSubmission(); + + // TODO(Triang3l): Fragment shader interlock. Path GetPath() const override { return Path::kHostRenderTargets; } - // TODO(Triang3l): Resolution scaling. - uint32_t GetResolutionScaleX() const override { return 1; } - uint32_t GetResolutionScaleY() const override { return 1; } + uint32_t GetResolutionScaleX() const override { return resolution_scale_x_; } + uint32_t GetResolutionScaleY() const override { return resolution_scale_y_; } bool Update(bool is_rasterization_done, uint32_t shader_writes_color_targets) override; @@ -98,6 +115,17 @@ class VulkanRenderTargetCache final : public RenderTargetCache { return last_update_framebuffer_; } + bool msaa_2x_attachments_supported() const { + return msaa_2x_attachments_supported_; + } + bool msaa_2x_no_attachments_supported() const { + return msaa_2x_no_attachments_supported_; + } + bool IsMsaa2xSupported(bool subpass_has_attachments) const { + return subpass_has_attachments ? msaa_2x_attachments_supported_ + : msaa_2x_no_attachments_supported_; + } + // Returns the render pass object, or VK_NULL_HANDLE if failed to create. // A render pass managed by the render target cache may be ended and resumed // at any time (to allow for things like copying and texture loading). @@ -110,6 +138,99 @@ class VulkanRenderTargetCache final : public RenderTargetCache { bool* is_integer_out = nullptr) const; protected: + uint32_t GetMaxRenderTargetWidth() const override; + uint32_t GetMaxRenderTargetHeight() const override; + + RenderTarget* CreateRenderTarget(RenderTargetKey key) override; + + // TODO(Triang3l): Check actual unorm24 support. + bool IsHostDepthEncodingDifferent( + xenos::DepthRenderTargetFormat format) const override { + return true; + } + + private: + enum class EdramBufferUsage { + // There's no need for combined fragment and compute usages. + // With host render targets, the usual usage sequence is as follows: + // - Optionally compute writes - host depth copy storing for EDRAM range + // ownership transfers. + // - Optionally fragment reads - host depth copy storing for EDRAM range + // ownership transfers. + // - Compute writes - copying from host render targets during resolving. + // - Compute reads - writing to the shared memory during resolving. + // With the render backend implementation based on fragment shader + // interlocks, it's: + // - Fragment reads and writes - depth / stencil and color operations. + // - Compute reads - writing to the shared memory during resolving. + // So, fragment reads and compute reads normally don't follow each other, + // and there's no need to amortize the cost of a read > read barrier in an + // exceptional situation by using a wider barrier in the normal scenario. + + // Host depth copy storing. + kFragmentRead, + // Fragment shader interlock depth / stencil and color operations. + kFragmentReadWrite, + // Resolve - copying to the shared memory. + kComputeRead, + // Resolve - copying from host render targets. + kComputeWrite, + // Trace recording. + kTransferRead, + // Trace playback. + kTransferWrite, + }; + enum class EdramBufferModificationStatus { + // The values are ordered by how strong the barrier conditions are. + // No uncommitted shader writes. + kUnmodified, + // Need to commit before the next fragment shader interlock usage with + // overlap. + kViaFragmentShaderInterlock, + // Need to commit before any next fragment shader interlock usage. + kViaUnordered, + }; + static void GetEdramBufferUsageMasks(EdramBufferUsage usage, + VkPipelineStageFlags& stage_mask_out, + VkAccessFlags& access_mask_out); + void UseEdramBuffer(EdramBufferUsage new_usage); + void MarkEdramBufferModified( + EdramBufferModificationStatus modification_status = + EdramBufferModificationStatus::kViaUnordered); + void CommitEdramBufferShaderWrites( + EdramBufferModificationStatus commit_status = + EdramBufferModificationStatus::kViaFragmentShaderInterlock); + + VulkanCommandProcessor& command_processor_; + + uint32_t resolution_scale_x_ = 1; + uint32_t resolution_scale_y_ = 1; + + // Accessible in fragment and compute shaders. + VkDescriptorSetLayout descriptor_set_layout_storage_buffer_ = VK_NULL_HANDLE; + VkDescriptorSetLayout descriptor_set_layout_sampled_image_ = VK_NULL_HANDLE; + VkDescriptorSetLayout descriptor_set_layout_sampled_image_x2_ = + VK_NULL_HANDLE; + + std::unique_ptr + descriptor_set_pool_sampled_image_; + std::unique_ptr + descriptor_set_pool_sampled_image_x2_; + + VkDeviceMemory edram_buffer_memory_ = VK_NULL_HANDLE; + VkBuffer edram_buffer_ = VK_NULL_HANDLE; + EdramBufferUsage edram_buffer_usage_; + EdramBufferModificationStatus edram_buffer_modification_status_ = + EdramBufferModificationStatus::kUnmodified; + VkDescriptorPool edram_storage_buffer_descriptor_pool_ = VK_NULL_HANDLE; + VkDescriptorSet edram_storage_buffer_descriptor_set_; + + // RenderPassKey::key -> VkRenderPass. + // VK_NULL_HANDLE if failed to create. + std::unordered_map render_passes_; + + // For host render targets. + // Can only be destroyed when framebuffers referencing it are destroyed! class VulkanRenderTarget final : public RenderTarget { public: @@ -131,27 +252,45 @@ class VulkanRenderTargetCache final : public RenderTargetCache { // Takes ownership of the Vulkan objects passed to the constructor. VulkanRenderTarget(RenderTargetKey key, - const ui::vulkan::VulkanProvider& provider, + VulkanRenderTargetCache& render_target_cache, VkImage image, VkDeviceMemory memory, VkImageView view_depth_color, VkImageView view_depth_stencil, VkImageView view_stencil, VkImageView view_srgb, - VkImageView view_color_transfer_separate) + VkImageView view_color_transfer_separate, + size_t descriptor_set_index_transfer_source) : RenderTarget(key), - provider_(provider), + render_target_cache_(render_target_cache), image_(image), memory_(memory), view_depth_color_(view_depth_color), view_depth_stencil_(view_depth_stencil), view_stencil_(view_stencil), view_srgb_(view_srgb), - view_color_transfer_separate_(view_color_transfer_separate) {} + view_color_transfer_separate_(view_color_transfer_separate), + descriptor_set_index_transfer_source_( + descriptor_set_index_transfer_source) {} ~VulkanRenderTarget(); VkImage image() const { return image_; } VkImageView view_depth_color() const { return view_depth_color_; } VkImageView view_depth_stencil() const { return view_depth_stencil_; } + VkImageView view_color_transfer_separate() const { + return view_color_transfer_separate_; + } + VkImageView view_color_transfer() const { + return view_color_transfer_separate_ != VK_NULL_HANDLE + ? view_color_transfer_separate_ + : view_depth_color_; + } + VkDescriptorSet GetDescriptorSetTransferSource() const { + ui::vulkan::SingleLayoutDescriptorSetPool& descriptor_set_pool = + key().is_depth + ? *render_target_cache_.descriptor_set_pool_sampled_image_x2_ + : *render_target_cache_.descriptor_set_pool_sampled_image_; + return descriptor_set_pool.Get(descriptor_set_index_transfer_source_); + } static void GetDrawUsage(bool is_depth, VkPipelineStageFlags* stage_mask_out, @@ -185,8 +324,13 @@ class VulkanRenderTargetCache final : public RenderTargetCache { current_layout_ = layout; } + uint32_t temporary_sort_index() const { return temporary_sort_index_; } + void SetTemporarySortIndex(uint32_t index) { + temporary_sort_index_ = index; + } + private: - const ui::vulkan::VulkanProvider& provider_; + VulkanRenderTargetCache& render_target_cache_; VkImage image_; VkDeviceMemory memory_; @@ -200,30 +344,17 @@ class VulkanRenderTargetCache final : public RenderTargetCache { VkImageView view_srgb_; VkImageView view_color_transfer_separate_; + // 2 sampled images for depth / stencil, 1 sampled image for color. + size_t descriptor_set_index_transfer_source_; + VkPipelineStageFlags current_stage_mask_ = 0; VkAccessFlags current_access_mask_ = 0; VkImageLayout current_layout_ = VK_IMAGE_LAYOUT_UNDEFINED; + + // Temporary storage for indices in operations like transfers and dumps. + uint32_t temporary_sort_index_ = 0; }; - uint32_t GetMaxRenderTargetWidth() const override; - uint32_t GetMaxRenderTargetHeight() const override; - - RenderTarget* CreateRenderTarget(RenderTargetKey key) override; - - // TODO(Triang3l): Check actual unorm24 support. - bool IsHostDepthEncodingDifferent( - xenos::DepthRenderTargetFormat format) const override { - return true; - } - - private: - VulkanCommandProcessor& command_processor_; - - // RenderPassKey::key -> VkRenderPass. - std::unordered_map render_passes_; - - // For host render targets. - struct FramebufferKey { RenderPassKey render_pass_key; @@ -254,13 +385,276 @@ class VulkanRenderTargetCache final : public RenderTargetCache { void Reset() { std::memset(this, 0, sizeof(*this)); } }; + enum TransferUsedDescriptorSet : uint32_t { + // Ordered from the least to the most frequently changed. + kTransferUsedDescriptorSetHostDepthBuffer, + kTransferUsedDescriptorSetHostDepthStencilTextures, + kTransferUsedDescriptorSetDepthStencilTextures, + // Mutually exclusive with kTransferUsedDescriptorSetDepthStencilTextures. + kTransferUsedDescriptorSetColorTexture, + + kTransferUsedDescriptorSetCount, + + kTransferUsedDescriptorSetHostDepthBufferBit = + uint32_t(1) << kTransferUsedDescriptorSetHostDepthBuffer, + kTransferUsedDescriptorSetHostDepthStencilTexturesBit = + uint32_t(1) << kTransferUsedDescriptorSetHostDepthStencilTextures, + kTransferUsedDescriptorSetDepthStencilTexturesBit = + uint32_t(1) << kTransferUsedDescriptorSetDepthStencilTextures, + kTransferUsedDescriptorSetColorTextureBit = + uint32_t(1) << kTransferUsedDescriptorSetColorTexture, + }; + + // 32-bit push constants (for simplicity of size calculation and to avoid + // std140 packing issues). + enum TransferUsedPushConstantDword : uint32_t { + kTransferUsedPushConstantDwordHostDepthAddress, + kTransferUsedPushConstantDwordAddress, + // Changed 8 times per transfer. + kTransferUsedPushConstantDwordStencilMask, + + kTransferUsedPushConstantDwordCount, + + kTransferUsedPushConstantDwordHostDepthAddressBit = + uint32_t(1) << kTransferUsedPushConstantDwordHostDepthAddress, + kTransferUsedPushConstantDwordAddressBit = + uint32_t(1) << kTransferUsedPushConstantDwordAddress, + kTransferUsedPushConstantDwordStencilMaskBit = + uint32_t(1) << kTransferUsedPushConstantDwordStencilMask, + }; + + enum class TransferPipelineLayoutIndex { + kColor, + kDepth, + kColorToStencilBit, + kDepthToStencilBit, + kColorAndHostDepthTexture, + kColorAndHostDepthBuffer, + kDepthAndHostDepthTexture, + kDepthAndHostDepthBuffer, + + kCount, + }; + + struct TransferPipelineLayoutInfo { + uint32_t used_descriptor_sets; + uint32_t used_push_constant_dwords; + }; + + static const TransferPipelineLayoutInfo + kTransferPipelineLayoutInfos[size_t(TransferPipelineLayoutIndex::kCount)]; + + enum class TransferMode : uint32_t { + kColorToDepth, + kColorToColor, + + kDepthToDepth, + kDepthToColor, + + kColorToStencilBit, + kDepthToStencilBit, + + // Two-source modes, using the host depth if it, when converted to the guest + // format, matches what's in the owner source (not modified, keep host + // precision), or the guest data otherwise (significantly modified, possibly + // cleared). Stencil for FragStencilRef is always taken from the guest + // source. + + kColorAndHostDepthToDepth, + // When using different source and destination depth formats. + kDepthAndHostDepthToDepth, + + // If host depth is fetched, but it's the same image as the destination, + // it's copied to the EDRAM buffer (but since it's just a scratch buffer, + // with tiles laid out linearly with the same pitch as in the original + // render target; also no swapping of 40-sample columns as opposed to the + // host render target - this is done only for the color source) and fetched + // from there instead of the host depth texture. + kColorAndHostDepthCopyToDepth, + kDepthAndHostDepthCopyToDepth, + + kCount, + }; + + enum class TransferOutput { + kColor, + kDepth, + kStencilBit, + }; + + struct TransferModeInfo { + TransferOutput output; + TransferPipelineLayoutIndex pipeline_layout; + }; + + static const TransferModeInfo kTransferModes[size_t(TransferMode::kCount)]; + + union TransferShaderKey { + uint32_t key; + struct { + xenos::MsaaSamples dest_msaa_samples : xenos::kMsaaSamplesBits; + uint32_t dest_color_rt_index : xenos::kColorRenderTargetIndexBits; + uint32_t dest_resource_format : xenos::kRenderTargetFormatBits; + xenos::MsaaSamples source_msaa_samples : xenos::kMsaaSamplesBits; + // Always 1x when the host depth is a copy from a buffer rather than an + // image, not to create the same pipeline for different MSAA sample counts + // as it doesn't matter in this case. + xenos::MsaaSamples host_depth_source_msaa_samples + : xenos::kMsaaSamplesBits; + uint32_t source_resource_format : xenos::kRenderTargetFormatBits; + + // Last bits because this affects the pipeline layout - after sorting, + // only change it as fewer times as possible. Depth buffers have an + // additional stencil texture. + static_assert(size_t(TransferMode::kCount) <= (size_t(1) << 4)); + TransferMode mode : 4; + }; + + TransferShaderKey() : key(0) { static_assert_size(*this, sizeof(key)); } + + struct Hasher { + size_t operator()(const TransferShaderKey& key) const { + return std::hash{}(key.key); + } + }; + bool operator==(const TransferShaderKey& other_key) const { + return key == other_key.key; + } + bool operator!=(const TransferShaderKey& other_key) const { + return !(*this == other_key); + } + bool operator<(const TransferShaderKey& other_key) const { + return key < other_key.key; + } + }; + + struct TransferPipelineKey { + RenderPassKey render_pass_key; + TransferShaderKey shader_key; + + TransferPipelineKey(RenderPassKey render_pass_key, + TransferShaderKey shader_key) + : render_pass_key(render_pass_key), shader_key(shader_key) {} + + struct Hasher { + size_t operator()(const TransferPipelineKey& key) const { + XXH3_state_t hash_state; + XXH3_64bits_reset(&hash_state); + XXH3_64bits_update(&hash_state, &key.render_pass_key, + sizeof(key.render_pass_key)); + XXH3_64bits_update(&hash_state, &key.shader_key, + sizeof(key.shader_key)); + return static_cast(XXH3_64bits_digest(&hash_state)); + } + }; + bool operator==(const TransferPipelineKey& other_key) const { + return render_pass_key == other_key.render_pass_key && + shader_key == other_key.shader_key; + } + bool operator!=(const TransferPipelineKey& other_key) const { + return !(*this == other_key); + } + bool operator<(const TransferPipelineKey& other_key) const { + if (render_pass_key != other_key.render_pass_key) { + return render_pass_key < other_key.render_pass_key; + } + return shader_key < other_key.shader_key; + } + }; + + union TransferAddressConstant { + uint32_t constant; + struct { + // All in tiles. + uint32_t dest_pitch : xenos::kEdramPitchTilesBits; + uint32_t source_pitch : xenos::kEdramPitchTilesBits; + // Safe to use 12 bits for signed difference - no ownership transfer can + // ever occur between render targets with EDRAM base >= 2048 as this would + // result in 0-length spans. 10 + 10 + 12 is exactly 32, any more bits, + // and more root 32-bit constants will be used. + // Destination base in tiles minus source base in tiles (not vice versa + // because this is a transform of the coordinate system, not addresses + // themselves). + // 0 for host_depth_source_is_copy (ignored in this case anyway as + // destination == source anyway). + int32_t source_to_dest : xenos::kEdramBaseTilesBits; + }; + TransferAddressConstant() : constant(0) { + static_assert_size(*this, sizeof(constant)); + } + bool operator==(const TransferAddressConstant& other_constant) const { + return constant == other_constant.constant; + } + bool operator!=(const TransferAddressConstant& other_constant) const { + return !(*this == other_constant); + } + }; + + struct TransferInvocation { + Transfer transfer; + TransferShaderKey shader_key; + TransferInvocation(const Transfer& transfer, + const TransferShaderKey& shader_key) + : transfer(transfer), shader_key(shader_key) {} + bool operator<(const TransferInvocation& other_invocation) { + // TODO(Triang3l): See if it may be better to sort by the source in the + // first place, especially when reading the same data multiple times (like + // to write the stencil bits after depth) for better read locality. + // Sort by the shader key primarily to reduce pipeline state (context) + // switches. + if (shader_key != other_invocation.shader_key) { + return shader_key < other_invocation.shader_key; + } + // Host depth render targets are changed rarely if they exist, won't save + // many binding changes, ignore them for simplicity (their existence is + // caught by the shader key change). + assert_not_null(transfer.source); + assert_not_null(other_invocation.transfer.source); + uint32_t source_index = + static_cast(transfer.source) + ->temporary_sort_index(); + uint32_t other_source_index = static_cast( + other_invocation.transfer.source) + ->temporary_sort_index(); + if (source_index != other_source_index) { + return source_index < other_source_index; + } + return transfer.start_tiles < other_invocation.transfer.start_tiles; + } + bool CanBeMergedIntoOneDraw( + const TransferInvocation& other_invocation) const { + return shader_key == other_invocation.shader_key && + transfer.AreSourcesSame(other_invocation.transfer); + } + }; + // Returns the framebuffer object, or VK_NULL_HANDLE if failed to create. const Framebuffer* GetFramebuffer( RenderPassKey render_pass_key, uint32_t pitch_tiles_at_32bpp, const RenderTarget* const* depth_and_color_render_targets); + VkShaderModule GetTransferShader(TransferShaderKey key); + // With sample-rate shading, returns a pointer to one pipeline. Without + // sample-rate shading, returns a pointer to as many pipelines as there are + // samples. If there was a failure to create a pipeline, returns nullptr. + VkPipeline const* GetTransferPipelines(TransferPipelineKey key); + + // Do ownership transfers for render targets - each render target / vector may + // be null / empty in case there's nothing to do for them. + // resolve_clear_rectangle is expected to be provided by + // PrepareHostRenderTargetsResolveClear which should do all the needed size + // bound checks. + void PerformTransfersAndResolveClears( + uint32_t render_target_count, RenderTarget* const* render_targets, + const std::vector* render_target_transfers, + const uint64_t* render_target_resolve_clear_values = nullptr, + const Transfer::Rectangle* resolve_clear_rectangle = nullptr); + bool gamma_render_target_as_srgb_ = false; + bool msaa_2x_attachments_supported_ = false; + bool msaa_2x_no_attachments_supported_ = false; + std::unordered_map framebuffers_; @@ -271,6 +665,32 @@ class VulkanRenderTargetCache final : public RenderTargetCache { last_update_framebuffer_attachments_[1 + xenos::kMaxColorRenderTargets] = {}; const Framebuffer* last_update_framebuffer_ = VK_NULL_HANDLE; + + // Set 0 - EDRAM storage buffer, set 1 - source depth sampled image (and + // unused stencil from the transfer descriptor set), HostDepthStoreConstants + // passed via push constants. + VkPipelineLayout host_depth_store_pipeline_layout_ = VK_NULL_HANDLE; + VkPipeline host_depth_store_pipelines_[size_t(xenos::MsaaSamples::k4X) + 1] = + {}; + + std::unique_ptr + transfer_vertex_buffer_pool_; + VkShaderModule transfer_passthrough_vertex_shader_ = VK_NULL_HANDLE; + VkPipelineLayout transfer_pipeline_layouts_[size_t( + TransferPipelineLayoutIndex::kCount)] = {}; + // VK_NULL_HANDLE if failed to create. + std::unordered_map + transfer_shaders_; + // With sample-rate shading, one pipeline per entry. Without sample-rate + // shading, one pipeline per sample per entry. VK_NULL_HANDLE if failed to + // create. + std::unordered_map, + TransferPipelineKey::Hasher> + transfer_pipelines_; + + // Temporary storage for PerformTransfersAndResolveClears. + std::vector current_transfer_invocations_; }; } // namespace vulkan diff --git a/src/xenia/gpu/vulkan/vulkan_shared_memory.cc b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc index 0d95189da..788b8166a 100644 --- a/src/xenia/gpu/vulkan/vulkan_shared_memory.cc +++ b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc @@ -177,6 +177,10 @@ bool VulkanSharedMemory::Initialize() { } } + // The first usage will likely be uploading. + last_usage_ = Usage::kTransferDestination; + last_written_range_ = std::make_pair(0, 0); + upload_buffer_pool_ = std::make_unique( provider, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, xe::align(ui::vulkan::VulkanUploadBufferPool::kDefaultPageSize, @@ -190,9 +194,6 @@ void VulkanSharedMemory::Shutdown(bool from_destructor) { upload_buffer_pool_.reset(); - last_written_range_ = std::make_pair(0, 0); - last_usage_ = Usage::kTransferDestination; - const ui::vulkan::VulkanProvider& provider = command_processor_.GetVulkanProvider(); const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); @@ -226,8 +227,8 @@ void VulkanSharedMemory::Use(Usage usage, if (last_usage_ != usage || last_written_range_.second) { VkPipelineStageFlags src_stage_mask, dst_stage_mask; VkAccessFlags src_access_mask, dst_access_mask; - GetBarrier(last_usage_, src_stage_mask, src_access_mask); - GetBarrier(usage, dst_stage_mask, dst_access_mask); + GetUsageMasks(last_usage_, src_stage_mask, src_access_mask); + GetUsageMasks(usage, dst_stage_mask, dst_access_mask); VkDeviceSize offset, size; if (last_usage_ == usage) { // Committing the previous write, while not changing the access mask @@ -447,9 +448,9 @@ bool VulkanSharedMemory::UploadRanges( return successful; } -void VulkanSharedMemory::GetBarrier(Usage usage, - VkPipelineStageFlags& stage_mask, - VkAccessFlags& access_mask) const { +void VulkanSharedMemory::GetUsageMasks(Usage usage, + VkPipelineStageFlags& stage_mask, + VkAccessFlags& access_mask) const { switch (usage) { case Usage::kComputeWrite: stage_mask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; diff --git a/src/xenia/gpu/vulkan/vulkan_shared_memory.h b/src/xenia/gpu/vulkan/vulkan_shared_memory.h index 0d8e90813..b37949ec8 100644 --- a/src/xenia/gpu/vulkan/vulkan_shared_memory.h +++ b/src/xenia/gpu/vulkan/vulkan_shared_memory.h @@ -47,8 +47,8 @@ class VulkanSharedMemory : public SharedMemory { kComputeWrite, kTransferDestination, }; - // Places pipeline barrier for the target usage, also ensuring writes of - // adjacent are ordered with writes of each other and reads. + // Inserts a pipeline barrier for the target usage, also ensuring consecutive + // read-write accesses are ordered with each other. void Use(Usage usage, std::pair written_range = {}); VkBuffer buffer() const { return buffer_; } @@ -65,8 +65,8 @@ class VulkanSharedMemory : public SharedMemory { upload_page_ranges) override; private: - void GetBarrier(Usage usage, VkPipelineStageFlags& stage_mask, - VkAccessFlags& access_mask) const; + void GetUsageMasks(Usage usage, VkPipelineStageFlags& stage_mask, + VkAccessFlags& access_mask) const; VulkanCommandProcessor& command_processor_; TraceWriter& trace_writer_; @@ -76,9 +76,8 @@ class VulkanSharedMemory : public SharedMemory { // Single for non-sparse, every allocation so far for sparse. std::vector buffer_memory_; - // First usage will likely be uploading. - Usage last_usage_ = Usage::kTransferDestination; - std::pair last_written_range_ = {}; + Usage last_usage_; + std::pair last_written_range_; std::unique_ptr upload_buffer_pool_; std::vector upload_regions_; diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h index d2279a7b8..2f88bc74c 100644 --- a/src/xenia/gpu/xenos.h +++ b/src/xenia/gpu/xenos.h @@ -248,6 +248,7 @@ enum class MsaaSamples : uint32_t { constexpr uint32_t kMsaaSamplesBits = 2; +constexpr uint32_t kColorRenderTargetIndexBits = 2; constexpr uint32_t kMaxColorRenderTargets = 4; enum class ColorRenderTargetFormat : uint32_t { diff --git a/src/xenia/ui/vulkan/functions/device_1_0.inc b/src/xenia/ui/vulkan/functions/device_1_0.inc index 2a979f55f..148d6dd52 100644 --- a/src/xenia/ui/vulkan/functions/device_1_0.inc +++ b/src/xenia/ui/vulkan/functions/device_1_0.inc @@ -15,6 +15,7 @@ XE_UI_VULKAN_FUNCTION(vkCmdClearColorImage) XE_UI_VULKAN_FUNCTION(vkCmdCopyBuffer) XE_UI_VULKAN_FUNCTION(vkCmdCopyBufferToImage) XE_UI_VULKAN_FUNCTION(vkCmdCopyImageToBuffer) +XE_UI_VULKAN_FUNCTION(vkCmdDispatch) XE_UI_VULKAN_FUNCTION(vkCmdDraw) XE_UI_VULKAN_FUNCTION(vkCmdDrawIndexed) XE_UI_VULKAN_FUNCTION(vkCmdEndRenderPass) @@ -29,6 +30,7 @@ XE_UI_VULKAN_FUNCTION(vkCmdSetStencilWriteMask) XE_UI_VULKAN_FUNCTION(vkCmdSetViewport) XE_UI_VULKAN_FUNCTION(vkCreateBuffer) XE_UI_VULKAN_FUNCTION(vkCreateCommandPool) +XE_UI_VULKAN_FUNCTION(vkCreateComputePipelines) XE_UI_VULKAN_FUNCTION(vkCreateDescriptorPool) XE_UI_VULKAN_FUNCTION(vkCreateDescriptorSetLayout) XE_UI_VULKAN_FUNCTION(vkCreateFence) diff --git a/src/xenia/ui/vulkan/single_layout_descriptor_set_pool.cc b/src/xenia/ui/vulkan/single_layout_descriptor_set_pool.cc new file mode 100644 index 000000000..8dfff2a3f --- /dev/null +++ b/src/xenia/ui/vulkan/single_layout_descriptor_set_pool.cc @@ -0,0 +1,120 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/ui/vulkan/single_layout_descriptor_set_pool.h" + +#include "xenia/base/assert.h" +#include "xenia/base/logging.h" + +namespace xe { +namespace ui { +namespace vulkan { + +SingleLayoutDescriptorSetPool::SingleLayoutDescriptorSetPool( + const VulkanProvider& provider, uint32_t pool_set_count, + uint32_t set_layout_descriptor_counts_count, + const VkDescriptorPoolSize* set_layout_descriptor_counts, + VkDescriptorSetLayout set_layout) + : provider_(provider), + pool_set_count_(pool_set_count), + set_layout_(set_layout) { + assert_not_zero(pool_set_count); + pool_descriptor_counts_.resize(set_layout_descriptor_counts_count); + for (uint32_t i = 0; i < set_layout_descriptor_counts_count; ++i) { + VkDescriptorPoolSize& pool_descriptor_type_count = + pool_descriptor_counts_[i]; + const VkDescriptorPoolSize& set_layout_descriptor_type_count = + set_layout_descriptor_counts[i]; + pool_descriptor_type_count.type = set_layout_descriptor_type_count.type; + pool_descriptor_type_count.descriptorCount = + set_layout_descriptor_type_count.descriptorCount * pool_set_count; + } +} + +SingleLayoutDescriptorSetPool::~SingleLayoutDescriptorSetPool() { + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); + VkDevice device = provider_.device(); + if (current_pool_ != VK_NULL_HANDLE) { + dfn.vkDestroyDescriptorPool(device, current_pool_, nullptr); + } + for (VkDescriptorPool pool : full_pools_) { + dfn.vkDestroyDescriptorPool(device, pool, nullptr); + } +} + +size_t SingleLayoutDescriptorSetPool::Allocate() { + if (!descriptor_sets_free_.empty()) { + size_t free_index = descriptor_sets_free_.back(); + descriptor_sets_free_.pop_back(); + return free_index; + } + + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); + VkDevice device = provider_.device(); + + // Two iterations so if vkAllocateDescriptorSets fails even with a non-zero + // current_pool_sets_remaining_, another attempt will be made in a new pool. + for (uint32_t i = 0; i < 2; ++i) { + if (current_pool_ != VK_NULL_HANDLE && !current_pool_sets_remaining_) { + full_pools_.push_back(current_pool_); + current_pool_ = VK_NULL_HANDLE; + } + if (current_pool_ == VK_NULL_HANDLE) { + VkDescriptorPoolCreateInfo pool_create_info; + pool_create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + pool_create_info.pNext = nullptr; + pool_create_info.flags = 0; + pool_create_info.maxSets = pool_set_count_; + pool_create_info.poolSizeCount = uint32_t(pool_descriptor_counts_.size()); + pool_create_info.pPoolSizes = pool_descriptor_counts_.data(); + if (dfn.vkCreateDescriptorPool(device, &pool_create_info, nullptr, + ¤t_pool_) != VK_SUCCESS) { + XELOGE( + "SingleLayoutDescriptorSetPool: Failed to create a descriptor " + "pool"); + return SIZE_MAX; + } + current_pool_sets_remaining_ = pool_set_count_; + } + + VkDescriptorSetAllocateInfo descriptor_set_allocate_info; + descriptor_set_allocate_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + descriptor_set_allocate_info.pNext = nullptr; + descriptor_set_allocate_info.descriptorPool = current_pool_; + descriptor_set_allocate_info.descriptorSetCount = 1; + descriptor_set_allocate_info.pSetLayouts = &set_layout_; + VkDescriptorSet descriptor_set; + if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info, + &descriptor_set) != VK_SUCCESS) { + XELOGE( + "SingleLayoutDescriptorSetPool: Failed to allocate a descriptor " + "layout"); + if (current_pool_sets_remaining_ >= pool_set_count_) { + // Failed to allocate in a new pool - something completely wrong, don't + // store empty pools as full. + dfn.vkDestroyDescriptorPool(device, current_pool_, nullptr); + current_pool_ = VK_NULL_HANDLE; + return SIZE_MAX; + } + full_pools_.push_back(current_pool_); + current_pool_ = VK_NULL_HANDLE; + } + --current_pool_sets_remaining_; + descriptor_sets_.push_back(descriptor_set); + return descriptor_sets_.size() - 1; + } + + // Both attempts have failed. + return SIZE_MAX; +} + +} // namespace vulkan +} // namespace ui +} // namespace xe diff --git a/src/xenia/ui/vulkan/single_layout_descriptor_set_pool.h b/src/xenia/ui/vulkan/single_layout_descriptor_set_pool.h new file mode 100644 index 000000000..c3f3eb080 --- /dev/null +++ b/src/xenia/ui/vulkan/single_layout_descriptor_set_pool.h @@ -0,0 +1,63 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_UI_VULKAN_SINGLE_DESCRIPTOR_SET_POOL_H_ +#define XENIA_UI_VULKAN_SINGLE_DESCRIPTOR_SET_POOL_H_ + +#include +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/ui/vulkan/vulkan_provider.h" + +namespace xe { +namespace ui { +namespace vulkan { + +class SingleLayoutDescriptorSetPool { + public: + // set_layout_descriptor_counts must contain the numbers of descriptors of + // each type in a single set with the layout (the multiplication by the pool + // set count will be done internally). The descriptor set layout must not be + // destroyed until this object is also destroyed. + SingleLayoutDescriptorSetPool( + const VulkanProvider& provider, uint32_t pool_set_count, + uint32_t set_layout_descriptor_counts_count, + const VkDescriptorPoolSize* set_layout_descriptor_counts, + VkDescriptorSetLayout set_layout); + ~SingleLayoutDescriptorSetPool(); + + // Returns SIZE_MAX in case of a failure. + size_t Allocate(); + void Free(size_t index) { + assert_true(index < descriptor_sets_.size()); + descriptor_sets_free_.push_back(index); + } + VkDescriptorSet Get(size_t index) const { return descriptor_sets_[index]; } + + private: + const VulkanProvider& provider_; + uint32_t pool_set_count_; + std::vector pool_descriptor_counts_; + VkDescriptorSetLayout set_layout_; + + std::vector full_pools_; + VkDescriptorPool current_pool_ = VK_NULL_HANDLE; + uint32_t current_pool_sets_remaining_ = 0; + + std::vector descriptor_sets_; + std::vector descriptor_sets_free_; +}; + +} // namespace vulkan +} // namespace ui +} // namespace xe + +#endif // XENIA_UI_VULKAN_SINGLE_DESCRIPTOR_SET_POOL_H_ diff --git a/src/xenia/ui/vulkan/vulkan_provider.cc b/src/xenia/ui/vulkan/vulkan_provider.cc index 2d93485ff..eb48cfa23 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.cc +++ b/src/xenia/ui/vulkan/vulkan_provider.cc @@ -715,6 +715,8 @@ bool VulkanProvider::Initialize() { static const std::pair kUsedDeviceExtensions[] = { {"VK_EXT_fragment_shader_interlock", offsetof(DeviceExtensions, ext_fragment_shader_interlock)}, + {"VK_EXT_shader_stencil_export", + offsetof(DeviceExtensions, ext_shader_stencil_export)}, {"VK_KHR_dedicated_allocation", offsetof(DeviceExtensions, khr_dedicated_allocation)}, {"VK_KHR_image_format_list", @@ -946,6 +948,8 @@ bool VulkanProvider::Initialize() { XELOGVK("Vulkan device extensions:"); XELOGVK("* VK_EXT_fragment_shader_interlock: {}", device_extensions_.ext_fragment_shader_interlock ? "yes" : "no"); + XELOGVK("* VK_EXT_shader_stencil_export: {}", + device_extensions_.ext_shader_stencil_export ? "yes" : "no"); XELOGVK("* VK_KHR_dedicated_allocation: {}", device_extensions_.khr_dedicated_allocation ? "yes" : "no"); XELOGVK("* VK_KHR_image_format_list: {}", diff --git a/src/xenia/ui/vulkan/vulkan_provider.h b/src/xenia/ui/vulkan/vulkan_provider.h index 0887b88ac..83f4d587f 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.h +++ b/src/xenia/ui/vulkan/vulkan_provider.h @@ -132,6 +132,7 @@ class VulkanProvider : public GraphicsProvider { } struct DeviceExtensions { bool ext_fragment_shader_interlock; + bool ext_shader_stencil_export; // Core since 1.1.0. bool khr_dedicated_allocation; // Core since 1.2.0. diff --git a/src/xenia/ui/vulkan/vulkan_util.cc b/src/xenia/ui/vulkan/vulkan_util.cc index f8dd5846e..b4eb02c3f 100644 --- a/src/xenia/ui/vulkan/vulkan_util.cc +++ b/src/xenia/ui/vulkan/vulkan_util.cc @@ -189,6 +189,53 @@ bool CreateDedicatedAllocationImage(const VulkanProvider& provider, return true; } +VkPipeline CreateComputePipeline( + const VulkanProvider& provider, VkPipelineLayout layout, + VkShaderModule shader, const VkSpecializationInfo* specialization_info, + const char* entry_point) { + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + VkComputePipelineCreateInfo pipeline_create_info; + pipeline_create_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + pipeline_create_info.pNext = nullptr; + pipeline_create_info.flags = 0; + pipeline_create_info.stage.sType = + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + pipeline_create_info.stage.pNext = nullptr; + pipeline_create_info.stage.flags = 0; + pipeline_create_info.stage.stage = VK_SHADER_STAGE_COMPUTE_BIT; + pipeline_create_info.stage.module = shader; + pipeline_create_info.stage.pName = entry_point; + pipeline_create_info.stage.pSpecializationInfo = specialization_info; + pipeline_create_info.layout = layout; + pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE; + pipeline_create_info.basePipelineIndex = -1; + VkPipeline pipeline; + if (dfn.vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, + &pipeline_create_info, nullptr, + &pipeline) != VK_SUCCESS) { + return VK_NULL_HANDLE; + } + return pipeline; +} + +VkPipeline CreateComputePipeline( + const VulkanProvider& provider, VkPipelineLayout layout, + const uint32_t* shader_code, size_t shader_code_size_bytes, + const VkSpecializationInfo* specialization_info, const char* entry_point) { + VkShaderModule shader = + CreateShaderModule(provider, shader_code, shader_code_size_bytes); + if (shader == VK_NULL_HANDLE) { + return VK_NULL_HANDLE; + } + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + VkPipeline pipeline = CreateComputePipeline(provider, layout, shader, + specialization_info, entry_point); + dfn.vkDestroyShaderModule(device, shader, nullptr); + return pipeline; +} + } // namespace util } // namespace vulkan } // namespace ui diff --git a/src/xenia/ui/vulkan/vulkan_util.h b/src/xenia/ui/vulkan/vulkan_util.h index fda575305..7af10f65f 100644 --- a/src/xenia/ui/vulkan/vulkan_util.h +++ b/src/xenia/ui/vulkan/vulkan_util.h @@ -164,6 +164,17 @@ inline VkShaderModule CreateShaderModule(const VulkanProvider& provider, : VK_NULL_HANDLE; } +VkPipeline CreateComputePipeline( + const VulkanProvider& provider, VkPipelineLayout layout, + VkShaderModule shader, + const VkSpecializationInfo* specialization_info = nullptr, + const char* entry_point = "main"); +VkPipeline CreateComputePipeline( + const VulkanProvider& provider, VkPipelineLayout layout, + const uint32_t* shader_code, size_t shader_code_size_bytes, + const VkSpecializationInfo* specialization_info = nullptr, + const char* entry_point = "main"); + } // namespace util } // namespace vulkan } // namespace ui From 73d574a046d9ae10c4a68eff3c1d4729a1cf17ae Mon Sep 17 00:00:00 2001 From: Triang3l Date: Tue, 10 May 2022 21:48:18 +0300 Subject: [PATCH 078/123] [Vulkan] Rectangle and quad list geometry shader generation --- .../primitive_rectangle_list_gs.h | 391 --------- .../shaders/primitive_rectangle_list.gs.glsl | 103 --- src/xenia/gpu/shaders/xenos_gs.glsli | 25 - src/xenia/gpu/spirv_shader_translator.cc | 56 +- src/xenia/gpu/spirv_shader_translator.h | 3 - src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc | 830 +++++++++++++++++- src/xenia/gpu/vulkan/vulkan_pipeline_cache.h | 41 +- .../gpu/vulkan/vulkan_primitive_processor.cc | 5 +- 8 files changed, 846 insertions(+), 608 deletions(-) delete mode 100644 src/xenia/gpu/shaders/bytecode/vulkan_spirv/primitive_rectangle_list_gs.h delete mode 100644 src/xenia/gpu/shaders/primitive_rectangle_list.gs.glsl delete mode 100644 src/xenia/gpu/shaders/xenos_gs.glsli diff --git a/src/xenia/gpu/shaders/bytecode/vulkan_spirv/primitive_rectangle_list_gs.h b/src/xenia/gpu/shaders/bytecode/vulkan_spirv/primitive_rectangle_list_gs.h deleted file mode 100644 index 86590f170..000000000 --- a/src/xenia/gpu/shaders/bytecode/vulkan_spirv/primitive_rectangle_list_gs.h +++ /dev/null @@ -1,391 +0,0 @@ -// Generated with `xb buildshaders`. -#if 0 -; SPIR-V -; Version: 1.0 -; Generator: Khronos Glslang Reference Front End; 10 -; Bound: 24886 -; Schema: 0 - OpCapability Geometry - %1 = OpExtInstImport "GLSL.std.450" - OpMemoryModel Logical GLSL450 - OpEntryPoint Geometry %5663 "main" %5305 %3631 %3144 %4930 - OpExecutionMode %5663 Triangles - OpExecutionMode %5663 Invocations 1 - OpExecutionMode %5663 OutputTriangleStrip - OpExecutionMode %5663 OutputVertices 6 - OpMemberDecorate %_struct_1017 0 BuiltIn Position - OpDecorate %_struct_1017 Block - OpDecorate %3631 Location 0 - OpDecorate %3144 Location 0 - OpMemberDecorate %_struct_1018 0 BuiltIn Position - OpDecorate %_struct_1018 Block - OpDecorate %7509 NoContraction - OpDecorate %15269 NoContraction - OpDecorate %24885 NoContraction - OpDecorate %14166 NoContraction - OpDecorate %7062 NoContraction - %void = OpTypeVoid - %1282 = OpTypeFunction %void - %bool = OpTypeBool - %float = OpTypeFloat 32 - %v4float = OpTypeVector %float 4 -%_struct_1017 = OpTypeStruct %v4float - %uint = OpTypeInt 32 0 - %uint_3 = OpConstant %uint 3 -%_arr__struct_1017_uint_3 = OpTypeArray %_struct_1017 %uint_3 -%_ptr_Input__arr__struct_1017_uint_3 = OpTypePointer Input %_arr__struct_1017_uint_3 - %5305 = OpVariable %_ptr_Input__arr__struct_1017_uint_3 Input - %int = OpTypeInt 32 1 - %int_0 = OpConstant %int 0 -%_ptr_Input_v4float = OpTypePointer Input %v4float - %v4bool = OpTypeVector %bool 4 - %int_1 = OpConstant %int 1 - %int_2 = OpConstant %int 2 - %uint_0 = OpConstant %uint 0 - %uint_16 = OpConstant %uint 16 -%_arr_v4float_uint_16 = OpTypeArray %v4float %uint_16 -%_ptr_Output__arr_v4float_uint_16 = OpTypePointer Output %_arr_v4float_uint_16 - %3631 = OpVariable %_ptr_Output__arr_v4float_uint_16 Output -%_arr__arr_v4float_uint_16_uint_3 = OpTypeArray %_arr_v4float_uint_16 %uint_3 -%_ptr_Input__arr__arr_v4float_uint_16_uint_3 = OpTypePointer Input %_arr__arr_v4float_uint_16_uint_3 - %3144 = OpVariable %_ptr_Input__arr__arr_v4float_uint_16_uint_3 Input -%_ptr_Input__arr_v4float_uint_16 = OpTypePointer Input %_arr_v4float_uint_16 -%_struct_1018 = OpTypeStruct %v4float -%_ptr_Output__struct_1018 = OpTypePointer Output %_struct_1018 - %4930 = OpVariable %_ptr_Output__struct_1018 Output -%_ptr_Output_v4float = OpTypePointer Output %v4float - %v3float = OpTypeVector %float 3 - %float_n1 = OpConstant %float -1 - %float_1 = OpConstant %float 1 - %266 = OpConstantComposite %v3float %float_n1 %float_1 %float_1 - %2582 = OpConstantComposite %v3float %float_1 %float_n1 %float_1 - %267 = OpConstantComposite %v3float %float_1 %float_1 %float_n1 - %v3bool = OpTypeVector %bool 3 - %5663 = OpFunction %void None %1282 - %15110 = OpLabel - OpSelectionMerge %23648 None - OpSwitch %uint_0 %11880 - %11880 = OpLabel - %23974 = OpAccessChain %_ptr_Input_v4float %5305 %int_0 %int_0 - %20722 = OpLoad %v4float %23974 - %16842 = OpIsNan %v4bool %20722 - %9783 = OpAny %bool %16842 - %11671 = OpLogicalNot %bool %9783 - OpSelectionMerge %7750 None - OpBranchConditional %11671 %12129 %7750 - %12129 = OpLabel - %19939 = OpAccessChain %_ptr_Input_v4float %5305 %int_1 %int_0 - %20723 = OpLoad %v4float %19939 - %18381 = OpIsNan %v4bool %20723 - %14860 = OpAny %bool %18381 - OpBranch %7750 - %7750 = OpLabel - %24534 = OpPhi %bool %9783 %11880 %14860 %12129 - %22068 = OpLogicalNot %bool %24534 - OpSelectionMerge %9251 None - OpBranchConditional %22068 %12130 %9251 - %12130 = OpLabel - %19940 = OpAccessChain %_ptr_Input_v4float %5305 %int_2 %int_0 - %20724 = OpLoad %v4float %19940 - %18382 = OpIsNan %v4bool %20724 - %14861 = OpAny %bool %18382 - OpBranch %9251 - %9251 = OpLabel - %10924 = OpPhi %bool %24534 %7750 %14861 %12130 - OpSelectionMerge %7205 None - OpBranchConditional %10924 %21992 %7205 - %21992 = OpLabel - OpBranch %23648 - %7205 = OpLabel - OpBranch %6529 - %6529 = OpLabel - %23131 = OpPhi %uint %uint_0 %7205 %11651 %14551 - %13910 = OpULessThan %bool %23131 %uint_3 - OpLoopMerge %8693 %14551 None - OpBranchConditional %13910 %14551 %8693 - %14551 = OpLabel - %18153 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3144 %23131 - %16222 = OpLoad %_arr_v4float_uint_16 %18153 - OpStore %3631 %16222 - %16679 = OpAccessChain %_ptr_Input_v4float %5305 %23131 %int_0 - %7391 = OpLoad %v4float %16679 - %22888 = OpAccessChain %_ptr_Output_v4float %4930 %int_0 - OpStore %22888 %7391 - OpEmitVertex - %11651 = OpIAdd %uint %23131 %int_1 - OpBranch %6529 - %8693 = OpLabel - OpEndPrimitive - %12070 = OpAccessChain %_ptr_Input_v4float %5305 %int_1 %int_0 - %6301 = OpLoad %v4float %12070 - %18018 = OpVectorShuffle %v3float %6301 %6301 0 1 2 - %12374 = OpVectorShuffle %v3float %20722 %20722 0 1 2 - %18845 = OpFSub %v3float %18018 %12374 - %18938 = OpAccessChain %_ptr_Input_v4float %5305 %int_2 %int_0 - %13501 = OpLoad %v4float %18938 - %9022 = OpVectorShuffle %v3float %13501 %13501 0 1 2 - %7477 = OpFSub %v3float %9022 %12374 - %11062 = OpFSub %v3float %9022 %18018 - %14931 = OpDot %float %18845 %18845 - %23734 = OpDot %float %7477 %7477 - %22344 = OpDot %float %11062 %11062 - %24721 = OpFOrdGreaterThan %bool %22344 %14931 - OpSelectionMerge %15688 None - OpBranchConditional %24721 %13839 %15688 - %13839 = OpLabel - %21187 = OpFOrdGreaterThan %bool %22344 %23734 - OpBranch %15688 - %15688 = OpLabel - %10925 = OpPhi %bool %24721 %8693 %21187 %13839 - OpSelectionMerge %11701 None - OpBranchConditional %10925 %12131 %13261 - %12131 = OpLabel - %18154 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3144 %int_2 - %16223 = OpLoad %_arr_v4float_uint_16 %18154 - OpStore %3631 %16223 - %19413 = OpAccessChain %_ptr_Output_v4float %4930 %int_0 - OpStore %19413 %13501 - OpEmitVertex - %22812 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3144 %int_1 - %11341 = OpLoad %_arr_v4float_uint_16 %22812 - OpStore %3631 %11341 - OpStore %19413 %6301 - OpEmitVertex - OpBranch %11701 - %13261 = OpLabel - %23993 = OpFOrdGreaterThan %bool %23734 %14931 - OpSelectionMerge %15689 None - OpBranchConditional %23993 %13840 %15689 - %13840 = OpLabel - %21188 = OpFOrdGreaterThan %bool %23734 %22344 - OpBranch %15689 - %15689 = OpLabel - %10926 = OpPhi %bool %23993 %13261 %21188 %13840 - OpSelectionMerge %11046 None - OpBranchConditional %10926 %12132 %11589 - %12132 = OpLabel - %18155 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3144 %int_0 - %16224 = OpLoad %_arr_v4float_uint_16 %18155 - OpStore %3631 %16224 - %19414 = OpAccessChain %_ptr_Output_v4float %4930 %int_0 - OpStore %19414 %20722 - OpEmitVertex - %22813 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3144 %int_2 - %11342 = OpLoad %_arr_v4float_uint_16 %22813 - OpStore %3631 %11342 - OpStore %19414 %13501 - OpEmitVertex - OpBranch %11046 - %11589 = OpLabel - %20575 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3144 %int_1 - %16225 = OpLoad %_arr_v4float_uint_16 %20575 - OpStore %3631 %16225 - %19415 = OpAccessChain %_ptr_Output_v4float %4930 %int_0 - OpStore %19415 %6301 - OpEmitVertex - %22814 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3144 %int_0 - %11343 = OpLoad %_arr_v4float_uint_16 %22814 - OpStore %3631 %11343 - OpStore %19415 %20722 - OpEmitVertex - OpBranch %11046 - %11046 = OpLabel - %16046 = OpCompositeConstruct %v3bool %10926 %10926 %10926 - %20034 = OpSelect %v3float %16046 %2582 %267 - OpBranch %11701 - %11701 = OpLabel - %10540 = OpPhi %v3float %266 %12131 %20034 %11046 - OpBranch %19952 - %19952 = OpLabel - %23132 = OpPhi %uint %uint_0 %11701 %21301 %11859 - %13911 = OpULessThan %bool %23132 %uint_16 - OpLoopMerge %14959 %11859 None - OpBranchConditional %13911 %11859 %14959 - %11859 = OpLabel - %19851 = OpCompositeExtract %float %10540 0 - %12487 = OpAccessChain %_ptr_Input_v4float %3144 %int_0 %23132 - %12683 = OpLoad %v4float %12487 - %8719 = OpVectorTimesScalar %v4float %12683 %19851 - %15671 = OpCompositeExtract %float %10540 1 - %17096 = OpAccessChain %_ptr_Input_v4float %3144 %int_1 %23132 - %13595 = OpLoad %v4float %17096 - %19790 = OpVectorTimesScalar %v4float %13595 %15671 - %20206 = OpFAdd %v4float %8719 %19790 - %10579 = OpCompositeExtract %float %10540 2 - %16297 = OpAccessChain %_ptr_Input_v4float %3144 %int_2 %23132 - %13596 = OpLoad %v4float %16297 - %19486 = OpVectorTimesScalar %v4float %13596 %10579 - %22917 = OpFAdd %v4float %20206 %19486 - %16419 = OpAccessChain %_ptr_Output_v4float %3631 %23132 - OpStore %16419 %22917 - %21301 = OpIAdd %uint %23132 %int_1 - OpBranch %19952 - %14959 = OpLabel - %9332 = OpCompositeExtract %float %10540 0 - %7509 = OpVectorTimesScalar %v4float %20722 %9332 - %6858 = OpCompositeExtract %float %10540 1 - %15269 = OpVectorTimesScalar %v4float %6301 %6858 - %24885 = OpFAdd %v4float %7509 %15269 - %17621 = OpCompositeExtract %float %10540 2 - %14166 = OpVectorTimesScalar %v4float %13501 %17621 - %7062 = OpFAdd %v4float %24885 %14166 - %18129 = OpAccessChain %_ptr_Output_v4float %4930 %int_0 - OpStore %18129 %7062 - OpEmitVertex - OpEndPrimitive - OpBranch %23648 - %23648 = OpLabel - OpReturn - OpFunctionEnd -#endif - -const uint32_t primitive_rectangle_list_gs[] = { - 0x07230203, 0x00010000, 0x0008000A, 0x00006136, 0x00000000, 0x00020011, - 0x00000002, 0x0006000B, 0x00000001, 0x4C534C47, 0x6474732E, 0x3035342E, - 0x00000000, 0x0003000E, 0x00000000, 0x00000001, 0x0009000F, 0x00000003, - 0x0000161F, 0x6E69616D, 0x00000000, 0x000014B9, 0x00000E2F, 0x00000C48, - 0x00001342, 0x00030010, 0x0000161F, 0x00000016, 0x00040010, 0x0000161F, - 0x00000000, 0x00000001, 0x00030010, 0x0000161F, 0x0000001D, 0x00040010, - 0x0000161F, 0x0000001A, 0x00000006, 0x00050048, 0x000003F9, 0x00000000, - 0x0000000B, 0x00000000, 0x00030047, 0x000003F9, 0x00000002, 0x00040047, - 0x00000E2F, 0x0000001E, 0x00000000, 0x00040047, 0x00000C48, 0x0000001E, - 0x00000000, 0x00050048, 0x000003FA, 0x00000000, 0x0000000B, 0x00000000, - 0x00030047, 0x000003FA, 0x00000002, 0x00030047, 0x00001D55, 0x0000002A, - 0x00030047, 0x00003BA5, 0x0000002A, 0x00030047, 0x00006135, 0x0000002A, - 0x00030047, 0x00003756, 0x0000002A, 0x00030047, 0x00001B96, 0x0000002A, - 0x00020013, 0x00000008, 0x00030021, 0x00000502, 0x00000008, 0x00020014, - 0x00000009, 0x00030016, 0x0000000D, 0x00000020, 0x00040017, 0x0000001D, - 0x0000000D, 0x00000004, 0x0003001E, 0x000003F9, 0x0000001D, 0x00040015, - 0x0000000B, 0x00000020, 0x00000000, 0x0004002B, 0x0000000B, 0x00000A13, - 0x00000003, 0x0004001C, 0x00000A0F, 0x000003F9, 0x00000A13, 0x00040020, - 0x000000C9, 0x00000001, 0x00000A0F, 0x0004003B, 0x000000C9, 0x000014B9, - 0x00000001, 0x00040015, 0x0000000C, 0x00000020, 0x00000001, 0x0004002B, - 0x0000000C, 0x00000A0B, 0x00000000, 0x00040020, 0x0000029A, 0x00000001, - 0x0000001D, 0x00040017, 0x00000011, 0x00000009, 0x00000004, 0x0004002B, - 0x0000000C, 0x00000A0E, 0x00000001, 0x0004002B, 0x0000000C, 0x00000A11, - 0x00000002, 0x0004002B, 0x0000000B, 0x00000A0A, 0x00000000, 0x0004002B, - 0x0000000B, 0x00000A3A, 0x00000010, 0x0004001C, 0x0000066B, 0x0000001D, - 0x00000A3A, 0x00040020, 0x000008E8, 0x00000003, 0x0000066B, 0x0004003B, - 0x000008E8, 0x00000E2F, 0x00000003, 0x0004001C, 0x000001AC, 0x0000066B, - 0x00000A13, 0x00040020, 0x00000429, 0x00000001, 0x000001AC, 0x0004003B, - 0x00000429, 0x00000C48, 0x00000001, 0x00040020, 0x000008E9, 0x00000001, - 0x0000066B, 0x0003001E, 0x000003FA, 0x0000001D, 0x00040020, 0x00000676, - 0x00000003, 0x000003FA, 0x0004003B, 0x00000676, 0x00001342, 0x00000003, - 0x00040020, 0x0000029B, 0x00000003, 0x0000001D, 0x00040017, 0x00000018, - 0x0000000D, 0x00000003, 0x0004002B, 0x0000000D, 0x00000341, 0xBF800000, - 0x0004002B, 0x0000000D, 0x0000008A, 0x3F800000, 0x0006002C, 0x00000018, - 0x0000010A, 0x00000341, 0x0000008A, 0x0000008A, 0x0006002C, 0x00000018, - 0x00000A16, 0x0000008A, 0x00000341, 0x0000008A, 0x0006002C, 0x00000018, - 0x0000010B, 0x0000008A, 0x0000008A, 0x00000341, 0x00040017, 0x00000010, - 0x00000009, 0x00000003, 0x00050036, 0x00000008, 0x0000161F, 0x00000000, - 0x00000502, 0x000200F8, 0x00003B06, 0x000300F7, 0x00005C60, 0x00000000, - 0x000300FB, 0x00000A0A, 0x00002E68, 0x000200F8, 0x00002E68, 0x00060041, - 0x0000029A, 0x00005DA6, 0x000014B9, 0x00000A0B, 0x00000A0B, 0x0004003D, - 0x0000001D, 0x000050F2, 0x00005DA6, 0x0004009C, 0x00000011, 0x000041CA, - 0x000050F2, 0x0004009A, 0x00000009, 0x00002637, 0x000041CA, 0x000400A8, - 0x00000009, 0x00002D97, 0x00002637, 0x000300F7, 0x00001E46, 0x00000000, - 0x000400FA, 0x00002D97, 0x00002F61, 0x00001E46, 0x000200F8, 0x00002F61, - 0x00060041, 0x0000029A, 0x00004DE3, 0x000014B9, 0x00000A0E, 0x00000A0B, - 0x0004003D, 0x0000001D, 0x000050F3, 0x00004DE3, 0x0004009C, 0x00000011, - 0x000047CD, 0x000050F3, 0x0004009A, 0x00000009, 0x00003A0C, 0x000047CD, - 0x000200F9, 0x00001E46, 0x000200F8, 0x00001E46, 0x000700F5, 0x00000009, - 0x00005FD6, 0x00002637, 0x00002E68, 0x00003A0C, 0x00002F61, 0x000400A8, - 0x00000009, 0x00005634, 0x00005FD6, 0x000300F7, 0x00002423, 0x00000000, - 0x000400FA, 0x00005634, 0x00002F62, 0x00002423, 0x000200F8, 0x00002F62, - 0x00060041, 0x0000029A, 0x00004DE4, 0x000014B9, 0x00000A11, 0x00000A0B, - 0x0004003D, 0x0000001D, 0x000050F4, 0x00004DE4, 0x0004009C, 0x00000011, - 0x000047CE, 0x000050F4, 0x0004009A, 0x00000009, 0x00003A0D, 0x000047CE, - 0x000200F9, 0x00002423, 0x000200F8, 0x00002423, 0x000700F5, 0x00000009, - 0x00002AAC, 0x00005FD6, 0x00001E46, 0x00003A0D, 0x00002F62, 0x000300F7, - 0x00001C25, 0x00000000, 0x000400FA, 0x00002AAC, 0x000055E8, 0x00001C25, - 0x000200F8, 0x000055E8, 0x000200F9, 0x00005C60, 0x000200F8, 0x00001C25, - 0x000200F9, 0x00001981, 0x000200F8, 0x00001981, 0x000700F5, 0x0000000B, - 0x00005A5B, 0x00000A0A, 0x00001C25, 0x00002D83, 0x000038D7, 0x000500B0, - 0x00000009, 0x00003656, 0x00005A5B, 0x00000A13, 0x000400F6, 0x000021F5, - 0x000038D7, 0x00000000, 0x000400FA, 0x00003656, 0x000038D7, 0x000021F5, - 0x000200F8, 0x000038D7, 0x00050041, 0x000008E9, 0x000046E9, 0x00000C48, - 0x00005A5B, 0x0004003D, 0x0000066B, 0x00003F5E, 0x000046E9, 0x0003003E, - 0x00000E2F, 0x00003F5E, 0x00060041, 0x0000029A, 0x00004127, 0x000014B9, - 0x00005A5B, 0x00000A0B, 0x0004003D, 0x0000001D, 0x00001CDF, 0x00004127, - 0x00050041, 0x0000029B, 0x00005968, 0x00001342, 0x00000A0B, 0x0003003E, - 0x00005968, 0x00001CDF, 0x000100DA, 0x00050080, 0x0000000B, 0x00002D83, - 0x00005A5B, 0x00000A0E, 0x000200F9, 0x00001981, 0x000200F8, 0x000021F5, - 0x000100DB, 0x00060041, 0x0000029A, 0x00002F26, 0x000014B9, 0x00000A0E, - 0x00000A0B, 0x0004003D, 0x0000001D, 0x0000189D, 0x00002F26, 0x0008004F, - 0x00000018, 0x00004662, 0x0000189D, 0x0000189D, 0x00000000, 0x00000001, - 0x00000002, 0x0008004F, 0x00000018, 0x00003056, 0x000050F2, 0x000050F2, - 0x00000000, 0x00000001, 0x00000002, 0x00050083, 0x00000018, 0x0000499D, - 0x00004662, 0x00003056, 0x00060041, 0x0000029A, 0x000049FA, 0x000014B9, - 0x00000A11, 0x00000A0B, 0x0004003D, 0x0000001D, 0x000034BD, 0x000049FA, - 0x0008004F, 0x00000018, 0x0000233E, 0x000034BD, 0x000034BD, 0x00000000, - 0x00000001, 0x00000002, 0x00050083, 0x00000018, 0x00001D35, 0x0000233E, - 0x00003056, 0x00050083, 0x00000018, 0x00002B36, 0x0000233E, 0x00004662, - 0x00050094, 0x0000000D, 0x00003A53, 0x0000499D, 0x0000499D, 0x00050094, - 0x0000000D, 0x00005CB6, 0x00001D35, 0x00001D35, 0x00050094, 0x0000000D, - 0x00005748, 0x00002B36, 0x00002B36, 0x000500BA, 0x00000009, 0x00006091, - 0x00005748, 0x00003A53, 0x000300F7, 0x00003D48, 0x00000000, 0x000400FA, - 0x00006091, 0x0000360F, 0x00003D48, 0x000200F8, 0x0000360F, 0x000500BA, - 0x00000009, 0x000052C3, 0x00005748, 0x00005CB6, 0x000200F9, 0x00003D48, - 0x000200F8, 0x00003D48, 0x000700F5, 0x00000009, 0x00002AAD, 0x00006091, - 0x000021F5, 0x000052C3, 0x0000360F, 0x000300F7, 0x00002DB5, 0x00000000, - 0x000400FA, 0x00002AAD, 0x00002F63, 0x000033CD, 0x000200F8, 0x00002F63, - 0x00050041, 0x000008E9, 0x000046EA, 0x00000C48, 0x00000A11, 0x0004003D, - 0x0000066B, 0x00003F5F, 0x000046EA, 0x0003003E, 0x00000E2F, 0x00003F5F, - 0x00050041, 0x0000029B, 0x00004BD5, 0x00001342, 0x00000A0B, 0x0003003E, - 0x00004BD5, 0x000034BD, 0x000100DA, 0x00050041, 0x000008E9, 0x0000591C, - 0x00000C48, 0x00000A0E, 0x0004003D, 0x0000066B, 0x00002C4D, 0x0000591C, - 0x0003003E, 0x00000E2F, 0x00002C4D, 0x0003003E, 0x00004BD5, 0x0000189D, - 0x000100DA, 0x000200F9, 0x00002DB5, 0x000200F8, 0x000033CD, 0x000500BA, - 0x00000009, 0x00005DB9, 0x00005CB6, 0x00003A53, 0x000300F7, 0x00003D49, - 0x00000000, 0x000400FA, 0x00005DB9, 0x00003610, 0x00003D49, 0x000200F8, - 0x00003610, 0x000500BA, 0x00000009, 0x000052C4, 0x00005CB6, 0x00005748, - 0x000200F9, 0x00003D49, 0x000200F8, 0x00003D49, 0x000700F5, 0x00000009, - 0x00002AAE, 0x00005DB9, 0x000033CD, 0x000052C4, 0x00003610, 0x000300F7, - 0x00002B26, 0x00000000, 0x000400FA, 0x00002AAE, 0x00002F64, 0x00002D45, - 0x000200F8, 0x00002F64, 0x00050041, 0x000008E9, 0x000046EB, 0x00000C48, - 0x00000A0B, 0x0004003D, 0x0000066B, 0x00003F60, 0x000046EB, 0x0003003E, - 0x00000E2F, 0x00003F60, 0x00050041, 0x0000029B, 0x00004BD6, 0x00001342, - 0x00000A0B, 0x0003003E, 0x00004BD6, 0x000050F2, 0x000100DA, 0x00050041, - 0x000008E9, 0x0000591D, 0x00000C48, 0x00000A11, 0x0004003D, 0x0000066B, - 0x00002C4E, 0x0000591D, 0x0003003E, 0x00000E2F, 0x00002C4E, 0x0003003E, - 0x00004BD6, 0x000034BD, 0x000100DA, 0x000200F9, 0x00002B26, 0x000200F8, - 0x00002D45, 0x00050041, 0x000008E9, 0x0000505F, 0x00000C48, 0x00000A0E, - 0x0004003D, 0x0000066B, 0x00003F61, 0x0000505F, 0x0003003E, 0x00000E2F, - 0x00003F61, 0x00050041, 0x0000029B, 0x00004BD7, 0x00001342, 0x00000A0B, - 0x0003003E, 0x00004BD7, 0x0000189D, 0x000100DA, 0x00050041, 0x000008E9, - 0x0000591E, 0x00000C48, 0x00000A0B, 0x0004003D, 0x0000066B, 0x00002C4F, - 0x0000591E, 0x0003003E, 0x00000E2F, 0x00002C4F, 0x0003003E, 0x00004BD7, - 0x000050F2, 0x000100DA, 0x000200F9, 0x00002B26, 0x000200F8, 0x00002B26, - 0x00060050, 0x00000010, 0x00003EAE, 0x00002AAE, 0x00002AAE, 0x00002AAE, - 0x000600A9, 0x00000018, 0x00004E42, 0x00003EAE, 0x00000A16, 0x0000010B, - 0x000200F9, 0x00002DB5, 0x000200F8, 0x00002DB5, 0x000700F5, 0x00000018, - 0x0000292C, 0x0000010A, 0x00002F63, 0x00004E42, 0x00002B26, 0x000200F9, - 0x00004DF0, 0x000200F8, 0x00004DF0, 0x000700F5, 0x0000000B, 0x00005A5C, - 0x00000A0A, 0x00002DB5, 0x00005335, 0x00002E53, 0x000500B0, 0x00000009, - 0x00003657, 0x00005A5C, 0x00000A3A, 0x000400F6, 0x00003A6F, 0x00002E53, - 0x00000000, 0x000400FA, 0x00003657, 0x00002E53, 0x00003A6F, 0x000200F8, - 0x00002E53, 0x00050051, 0x0000000D, 0x00004D8B, 0x0000292C, 0x00000000, - 0x00060041, 0x0000029A, 0x000030C7, 0x00000C48, 0x00000A0B, 0x00005A5C, - 0x0004003D, 0x0000001D, 0x0000318B, 0x000030C7, 0x0005008E, 0x0000001D, - 0x0000220F, 0x0000318B, 0x00004D8B, 0x00050051, 0x0000000D, 0x00003D37, - 0x0000292C, 0x00000001, 0x00060041, 0x0000029A, 0x000042C8, 0x00000C48, - 0x00000A0E, 0x00005A5C, 0x0004003D, 0x0000001D, 0x0000351B, 0x000042C8, - 0x0005008E, 0x0000001D, 0x00004D4E, 0x0000351B, 0x00003D37, 0x00050081, - 0x0000001D, 0x00004EEE, 0x0000220F, 0x00004D4E, 0x00050051, 0x0000000D, - 0x00002953, 0x0000292C, 0x00000002, 0x00060041, 0x0000029A, 0x00003FA9, - 0x00000C48, 0x00000A11, 0x00005A5C, 0x0004003D, 0x0000001D, 0x0000351C, - 0x00003FA9, 0x0005008E, 0x0000001D, 0x00004C1E, 0x0000351C, 0x00002953, - 0x00050081, 0x0000001D, 0x00005985, 0x00004EEE, 0x00004C1E, 0x00050041, - 0x0000029B, 0x00004023, 0x00000E2F, 0x00005A5C, 0x0003003E, 0x00004023, - 0x00005985, 0x00050080, 0x0000000B, 0x00005335, 0x00005A5C, 0x00000A0E, - 0x000200F9, 0x00004DF0, 0x000200F8, 0x00003A6F, 0x00050051, 0x0000000D, - 0x00002474, 0x0000292C, 0x00000000, 0x0005008E, 0x0000001D, 0x00001D55, - 0x000050F2, 0x00002474, 0x00050051, 0x0000000D, 0x00001ACA, 0x0000292C, - 0x00000001, 0x0005008E, 0x0000001D, 0x00003BA5, 0x0000189D, 0x00001ACA, - 0x00050081, 0x0000001D, 0x00006135, 0x00001D55, 0x00003BA5, 0x00050051, - 0x0000000D, 0x000044D5, 0x0000292C, 0x00000002, 0x0005008E, 0x0000001D, - 0x00003756, 0x000034BD, 0x000044D5, 0x00050081, 0x0000001D, 0x00001B96, - 0x00006135, 0x00003756, 0x00050041, 0x0000029B, 0x000046D1, 0x00001342, - 0x00000A0B, 0x0003003E, 0x000046D1, 0x00001B96, 0x000100DA, 0x000100DB, - 0x000200F9, 0x00005C60, 0x000200F8, 0x00005C60, 0x000100FD, 0x00010038, -}; diff --git a/src/xenia/gpu/shaders/primitive_rectangle_list.gs.glsl b/src/xenia/gpu/shaders/primitive_rectangle_list.gs.glsl deleted file mode 100644 index 1dd7f7edb..000000000 --- a/src/xenia/gpu/shaders/primitive_rectangle_list.gs.glsl +++ /dev/null @@ -1,103 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2022 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#version 460 -#extension GL_GOOGLE_include_directive : require -#include "xenos_gs.glsli" - -layout(triangles) in; -layout(triangle_strip, max_vertices=6) out; - -void main() { - if (any(isnan(gl_in[0].gl_Position)) || any(isnan(gl_in[1].gl_Position)) || - any(isnan(gl_in[2].gl_Position))) { - return; - } - - uint i; - - for (i = 0; i < 3u; ++i) { - xe_out_interpolators = xe_in_interpolators[i]; - gl_Position = gl_in[i].gl_Position; - EmitVertex(); - } - EndPrimitive(); - - // Find the diagonal (the edge that is longer than both the other two) and - // mirror the other vertex across it. - vec3 edge_01 = gl_in[1].gl_Position.xyz - gl_in[0].gl_Position.xyz; - vec3 edge_02 = gl_in[2].gl_Position.xyz - gl_in[0].gl_Position.xyz; - vec3 edge_12 = gl_in[2].gl_Position.xyz - gl_in[1].gl_Position.xyz; - vec3 edge_squares = vec3( - dot(edge_01, edge_01), dot(edge_02, edge_02), dot(edge_12, edge_12)); - vec3 v3_signs; - if (edge_squares.z > edge_squares.x && edge_squares.z > edge_squares.y) { - // 12 is the diagonal. Most games use this form. - // - // 0 ------ 1 0: -1,-1 - // | - | 1: 1,-1 - // | // | 2: -1, 1 - // | - | 3: [ 1, 1 ] - // 2 ----- [3] - // - // 0 ------ 2 0: -1,-1 - // | - | 1: -1, 1 - // | // | 2: 1,-1 - // | - | 3: [ 1, 1 ] - // 1 ------[3] - xe_out_interpolators = xe_in_interpolators[2]; - gl_Position = gl_in[2].gl_Position; - EmitVertex(); - xe_out_interpolators = xe_in_interpolators[1]; - gl_Position = gl_in[1].gl_Position; - EmitVertex(); - v3_signs = vec3(-1.0, 1.0, 1.0); - } else if (edge_squares.y > edge_squares.x && - edge_squares.y > edge_squares.z) { - // 02 is the diagonal. - // - // 0 ------ 1 0: -1,-1 - // | - | 1: 1,-1 - // | \\ | 2: 1, 1 - // | - | 3: [-1, 1 ] - // [3] ----- 2 - xe_out_interpolators = xe_in_interpolators[0]; - gl_Position = gl_in[0].gl_Position; - EmitVertex(); - xe_out_interpolators = xe_in_interpolators[2]; - gl_Position = gl_in[2].gl_Position; - EmitVertex(); - v3_signs = vec3(1.0, -1.0, 1.0); - } else { - // 01 is the diagonal. Not seen in any game so far. - // - // 0 ------ 2 0: -1,-1 - // | - | 1: 1, 1 - // | \\ | 2: 1,-1 - // | - | 3: [-1, 1 ] - // [3] ----- 1 - xe_out_interpolators = xe_in_interpolators[1]; - gl_Position = gl_in[1].gl_Position; - EmitVertex(); - xe_out_interpolators = xe_in_interpolators[0]; - gl_Position = gl_in[0].gl_Position; - EmitVertex(); - v3_signs = vec3(1.0, 1.0, -1.0); - } - for (i = 0; i < 16u; ++i) { - xe_out_interpolators[i] = v3_signs.x * xe_in_interpolators[0][i] + - v3_signs.y * xe_in_interpolators[1][i] + - v3_signs.z * xe_in_interpolators[2][i]; - } - gl_Position = v3_signs.x * gl_in[0].gl_Position + - v3_signs.y * gl_in[1].gl_Position + - v3_signs.z * gl_in[2].gl_Position; - EmitVertex(); - EndPrimitive(); -} diff --git a/src/xenia/gpu/shaders/xenos_gs.glsli b/src/xenia/gpu/shaders/xenos_gs.glsli deleted file mode 100644 index f27b5cae3..000000000 --- a/src/xenia/gpu/shaders/xenos_gs.glsli +++ /dev/null @@ -1,25 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2022 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_D3D12_SHADERS_XENOS_GS_GLSLI_ -#define XENIA_GPU_D3D12_SHADERS_XENOS_GS_GLSLI_ - -layout(location=0) in vec4 xe_in_interpolators[][16]; - -in gl_PerVertex { - vec4 gl_Position; -} gl_in[]; - -layout(location=0) out vec4 xe_out_interpolators[16]; - -out gl_PerVertex { - precise vec4 gl_Position; -}; - -#endif // XENIA_GPU_D3D12_SHADERS_XENOS_GS_GLSLI_ diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 99d92b0c3..9cc6fec72 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -932,64 +932,34 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() { } // Create the Xenia-specific outputs. + // TODO(Triang3l): Change to an interpolator array. for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) { spv::Id interpolator = builder_->createVariable( spv::NoPrecision, spv::StorageClassOutput, type_float4_, (kInterpolatorNamePrefix + std::to_string(i)).c_str()); input_output_interpolators_[i] = interpolator; builder_->addDecoration(interpolator, spv::DecorationLocation, int(i)); + builder_->addDecoration(interpolator, spv::DecorationInvariant); main_interface_.push_back(interpolator); } - // Create the entire GLSL 4.50 gl_PerVertex output similar to what glslang - // does. Members (like gl_PointSize) don't need to be used, and also - // ClipDistance and CullDistance may exist even if the device doesn't support - // them, as long as the capabilities aren't enabled, and nothing is stored to - // them. - if (features_.clip_distance) { - builder_->addCapability(spv::CapabilityClipDistance); - } - if (features_.cull_distance) { - builder_->addCapability(spv::CapabilityCullDistance); - } + // Create the gl_PerVertex output for used system outputs. std::vector struct_per_vertex_members; struct_per_vertex_members.reserve(kOutputPerVertexMemberCount); struct_per_vertex_members.push_back(type_float4_); - struct_per_vertex_members.push_back(type_float_); - // TODO(Triang3l): Specialization constant for ucp_cull_only_ena, for 6 + 1 - // or 1 + 7 array sizes. - struct_per_vertex_members.push_back(builder_->makeArrayType( - type_float_, builder_->makeUintConstant(features_.clip_distance ? 6 : 1), - 0)); - struct_per_vertex_members.push_back( - builder_->makeArrayType(type_float_, builder_->makeUintConstant(1), 0)); spv::Id type_struct_per_vertex = builder_->makeStructType(struct_per_vertex_members, "gl_PerVertex"); + builder_->addMemberName(type_struct_per_vertex, + kOutputPerVertexMemberPosition, "gl_Position"); builder_->addMemberDecoration(type_struct_per_vertex, kOutputPerVertexMemberPosition, spv::DecorationInvariant); builder_->addMemberDecoration(type_struct_per_vertex, kOutputPerVertexMemberPosition, spv::DecorationBuiltIn, spv::BuiltInPosition); - builder_->addMemberDecoration(type_struct_per_vertex, - kOutputPerVertexMemberPointSize, - spv::DecorationBuiltIn, spv::BuiltInPointSize); - builder_->addMemberDecoration(type_struct_per_vertex, - kOutputPerVertexMemberClipDistance, - spv::DecorationInvariant); - builder_->addMemberDecoration( - type_struct_per_vertex, kOutputPerVertexMemberClipDistance, - spv::DecorationBuiltIn, spv::BuiltInClipDistance); - builder_->addMemberDecoration(type_struct_per_vertex, - kOutputPerVertexMemberCullDistance, - spv::DecorationInvariant); - builder_->addMemberDecoration( - type_struct_per_vertex, kOutputPerVertexMemberCullDistance, - spv::DecorationBuiltIn, spv::BuiltInCullDistance); builder_->addDecoration(type_struct_per_vertex, spv::DecorationBlock); - output_per_vertex_ = - builder_->createVariable(spv::NoPrecision, spv::StorageClassOutput, - type_struct_per_vertex, "xe_out_gl_PerVertex"); + output_per_vertex_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassOutput, type_struct_per_vertex, ""); main_interface_.push_back(output_per_vertex_); } @@ -1178,18 +1148,6 @@ void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() { std::move(composite_construct_op)); } builder_->createStore(position, position_ptr); - - // Write 1 to point size (using a geometry shader or another kind of fallback - // to expand point sprites - point size support is not guaranteed, and the - // size would also be limited, and can't be controlled independently along two - // axes). - id_vector_temp_.clear(); - id_vector_temp_.push_back( - builder_->makeIntConstant(kOutputPerVertexMemberPointSize)); - builder_->createStore( - const_float_1_, - builder_->createAccessChain(spv::StorageClassOutput, output_per_vertex_, - id_vector_temp_)); } void SpirvShaderTranslator::StartFragmentShaderBeforeMain() { diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 69ae784f4..259b703c1 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -459,9 +459,6 @@ class SpirvShaderTranslator : public ShaderTranslator { enum OutputPerVertexMember : unsigned int { kOutputPerVertexMemberPosition, - kOutputPerVertexMemberPointSize, - kOutputPerVertexMemberClipDistance, - kOutputPerVertexMemberCullDistance, kOutputPerVertexMemberCount, }; spv::Id output_per_vertex_; diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index a096e18e7..db2d36d69 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -11,9 +11,12 @@ #include #include +#include #include #include +#include +#include "third_party/glslang/SPIRV/SpvBuilder.h" #include "xenia/base/assert.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" @@ -33,11 +36,6 @@ namespace xe { namespace gpu { namespace vulkan { -// Generated with `xb buildshaders`. -namespace shaders { -#include "xenia/gpu/shaders/bytecode/vulkan_spirv/primitive_rectangle_list_gs.h" -} // namespace shaders - VulkanPipelineCache::VulkanPipelineCache( VulkanCommandProcessor& command_processor, const RegisterFile& register_file, @@ -51,20 +49,6 @@ VulkanPipelineCache::~VulkanPipelineCache() { Shutdown(); } bool VulkanPipelineCache::Initialize() { const ui::vulkan::VulkanProvider& provider = command_processor_.GetVulkanProvider(); - const VkPhysicalDeviceFeatures& device_features = provider.device_features(); - - if (device_features.geometryShader) { - gs_rectangle_list_ = ui::vulkan::util::CreateShaderModule( - provider, shaders::primitive_rectangle_list_gs, - sizeof(shaders::primitive_rectangle_list_gs)); - if (gs_rectangle_list_ == VK_NULL_HANDLE) { - XELOGE( - "VulkanPipelineCache: Failed to create the rectangle list geometry " - "shader"); - Shutdown(); - return false; - } - } shader_translator_ = std::make_unique( SpirvShaderTranslator::Features(provider)); @@ -80,10 +64,14 @@ void VulkanPipelineCache::Shutdown() { ClearCache(); - shader_translator_.reset(); + for (const auto& geometry_shader_pair : geometry_shaders_) { + if (geometry_shader_pair.second != VK_NULL_HANDLE) { + dfn.vkDestroyShaderModule(device, geometry_shader_pair.second, nullptr); + } + } + geometry_shaders_.clear(); - ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyShaderModule, device, - gs_rectangle_list_); + shader_translator_.reset(); } void VulkanPipelineCache::ClearCache() { @@ -255,6 +243,14 @@ bool VulkanPipelineCache::ConfigurePipeline( if (!pipeline_layout) { return false; } + VkShaderModule geometry_shader = VK_NULL_HANDLE; + GeometryShaderKey geometry_shader_key; + if (GetGeometryShaderKey(description.geometry_shader, geometry_shader_key)) { + geometry_shader = GetGeometryShader(geometry_shader_key); + if (geometry_shader == VK_NULL_HANDLE) { + return false; + } + } VkRenderPass render_pass = render_target_cache_.GetRenderPass(render_pass_key); if (render_pass == VK_NULL_HANDLE) { @@ -266,6 +262,7 @@ bool VulkanPipelineCache::ConfigurePipeline( creation_arguments.pipeline = &pipeline; creation_arguments.vertex_shader = vertex_shader; creation_arguments.pixel_shader = pixel_shader; + creation_arguments.geometry_shader = geometry_shader; creation_arguments.render_pass = render_pass; if (!EnsurePipelineCreated(creation_arguments)) { return false; @@ -419,6 +416,7 @@ bool VulkanPipelineCache::GetCurrentStateDescription( primitive_topology = PipelinePrimitiveTopology::kTriangleList; break; case xenos::PrimitiveType::kQuadList: + geometry_shader = PipelineGeometryShader::kQuadList; primitive_topology = PipelinePrimitiveTopology::kLineListWithAdjacency; break; default: @@ -686,6 +684,782 @@ bool VulkanPipelineCache::ArePipelineRequirementsMet( return true; } +bool VulkanPipelineCache::GetGeometryShaderKey( + PipelineGeometryShader geometry_shader_type, GeometryShaderKey& key_out) { + if (geometry_shader_type == PipelineGeometryShader::kNone) { + return false; + } + GeometryShaderKey key; + key.type = geometry_shader_type; + // TODO(Triang3l): Make the linkage parameters depend on the real needs of the + // vertex and the pixel shader. + key.interpolator_count = xenos::kMaxInterpolators; + key.user_clip_plane_count = /* 6 */ 0; + key.user_clip_plane_cull = 0; + key.has_vertex_kill_and = /* 1 */ 0; + key.has_point_size = /* 1 */ 0; + key.has_point_coordinates = /* 1 */ 0; + key_out = key; + return true; +} + +VkShaderModule VulkanPipelineCache::GetGeometryShader(GeometryShaderKey key) { + auto it = geometry_shaders_.find(key); + if (it != geometry_shaders_.end()) { + return it->second; + } + + std::vector id_vector_temp; + std::vector uint_vector_temp; + + spv::ExecutionMode input_primitive_execution_mode = spv::ExecutionMode(0); + uint32_t input_primitive_vertex_count = 0; + spv::ExecutionMode output_primitive_execution_mode = spv::ExecutionMode(0); + uint32_t output_max_vertices = 0; + switch (key.type) { + case PipelineGeometryShader::kRectangleList: + // Triangle to a strip of 2 triangles. + input_primitive_execution_mode = spv::ExecutionModeTriangles; + input_primitive_vertex_count = 3; + output_primitive_execution_mode = spv::ExecutionModeOutputTriangleStrip; + output_max_vertices = 4; + break; + case PipelineGeometryShader::kQuadList: + // 4 vertices passed via a line list with adjacency to a strip of 2 + // triangles. + input_primitive_execution_mode = spv::ExecutionModeInputLinesAdjacency; + input_primitive_vertex_count = 4; + output_primitive_execution_mode = spv::ExecutionModeOutputTriangleStrip; + output_max_vertices = 4; + break; + default: + assert_unhandled_case(key.type); + } + + uint32_t clip_distance_count = + key.user_clip_plane_cull ? 0 : key.user_clip_plane_count; + uint32_t cull_distance_count = + (key.user_clip_plane_cull ? key.user_clip_plane_count : 0) + + key.has_vertex_kill_and; + + spv::Builder builder(spv::Spv_1_0, + (SpirvShaderTranslator::kSpirvMagicToolId << 16) | 1, + nullptr); + spv::Id ext_inst_glsl_std_450 = builder.import("GLSL.std.450"); + builder.addCapability(spv::CapabilityGeometry); + if (clip_distance_count) { + builder.addCapability(spv::CapabilityClipDistance); + } + if (cull_distance_count) { + builder.addCapability(spv::CapabilityCullDistance); + } + builder.setMemoryModel(spv::AddressingModelLogical, spv::MemoryModelGLSL450); + builder.setSource(spv::SourceLanguageUnknown, 0); + + // TODO(Triang3l): Shader float controls (NaN preservation most importantly). + + std::vector main_interface; + + spv::Id type_void = builder.makeVoidType(); + spv::Id type_bool = builder.makeBoolType(); + spv::Id type_bool4 = builder.makeVectorType(type_bool, 4); + spv::Id type_int = builder.makeIntType(32); + spv::Id type_float = builder.makeFloatType(32); + spv::Id type_float4 = builder.makeVectorType(type_float, 4); + spv::Id type_clip_distances = + clip_distance_count + ? builder.makeArrayType( + type_float, builder.makeUintConstant(clip_distance_count), 0) + : spv::NoType; + spv::Id type_cull_distances = + cull_distance_count + ? builder.makeArrayType( + type_float, builder.makeUintConstant(cull_distance_count), 0) + : spv::NoType; + spv::Id type_interpolators = + key.interpolator_count + ? builder.makeArrayType( + type_float4, builder.makeUintConstant(key.interpolator_count), + 0) + : spv::NoType; + spv::Id type_point_coordinates = key.has_point_coordinates + ? builder.makeVectorType(type_float, 2) + : spv::NoType; + + // Inputs and outputs - matching glslang order, in gl_PerVertex gl_in[], + // user-defined outputs, user-defined inputs, out gl_PerVertex. + // TODO(Triang3l): Point parameters from the system uniform buffer. + + spv::Id const_input_primitive_vertex_count = + builder.makeUintConstant(input_primitive_vertex_count); + + // in gl_PerVertex gl_in[]. + // gl_Position. + id_vector_temp.clear(); + uint32_t member_in_gl_per_vertex_position = uint32_t(id_vector_temp.size()); + id_vector_temp.push_back(type_float4); + spv::Id const_member_in_gl_per_vertex_position = + builder.makeIntConstant(int32_t(member_in_gl_per_vertex_position)); + // gl_ClipDistance. + uint32_t member_in_gl_per_vertex_clip_distance = UINT32_MAX; + spv::Id const_member_in_gl_per_vertex_clip_distance = spv::NoResult; + if (clip_distance_count) { + member_in_gl_per_vertex_clip_distance = uint32_t(id_vector_temp.size()); + id_vector_temp.push_back(type_clip_distances); + const_member_in_gl_per_vertex_clip_distance = + builder.makeIntConstant(int32_t(member_in_gl_per_vertex_clip_distance)); + } + // gl_CullDistance. + uint32_t member_in_gl_per_vertex_cull_distance = UINT32_MAX; + if (cull_distance_count) { + member_in_gl_per_vertex_cull_distance = uint32_t(id_vector_temp.size()); + id_vector_temp.push_back(type_cull_distances); + } + // Structure and array. + spv::Id type_struct_in_gl_per_vertex = + builder.makeStructType(id_vector_temp, "gl_PerVertex"); + builder.addMemberName(type_struct_in_gl_per_vertex, + member_in_gl_per_vertex_position, "gl_Position"); + builder.addMemberDecoration(type_struct_in_gl_per_vertex, + member_in_gl_per_vertex_position, + spv::DecorationBuiltIn, spv::BuiltInPosition); + if (clip_distance_count) { + builder.addMemberName(type_struct_in_gl_per_vertex, + member_in_gl_per_vertex_clip_distance, + "gl_ClipDistance"); + builder.addMemberDecoration( + type_struct_in_gl_per_vertex, member_in_gl_per_vertex_clip_distance, + spv::DecorationBuiltIn, spv::BuiltInClipDistance); + } + if (cull_distance_count) { + builder.addMemberName(type_struct_in_gl_per_vertex, + member_in_gl_per_vertex_cull_distance, + "gl_CullDistance"); + builder.addMemberDecoration( + type_struct_in_gl_per_vertex, member_in_gl_per_vertex_cull_distance, + spv::DecorationBuiltIn, spv::BuiltInCullDistance); + } + builder.addDecoration(type_struct_in_gl_per_vertex, spv::DecorationBlock); + spv::Id type_array_in_gl_per_vertex = builder.makeArrayType( + type_struct_in_gl_per_vertex, const_input_primitive_vertex_count, 0); + spv::Id in_gl_per_vertex = + builder.createVariable(spv::NoPrecision, spv::StorageClassInput, + type_array_in_gl_per_vertex, "gl_in"); + main_interface.push_back(in_gl_per_vertex); + + // Interpolators output. + spv::Id out_interpolators = spv::NoResult; + if (key.interpolator_count) { + out_interpolators = + builder.createVariable(spv::NoPrecision, spv::StorageClassOutput, + type_interpolators, "xe_out_interpolators"); + builder.addDecoration(out_interpolators, spv::DecorationLocation, 0); + builder.addDecoration(out_interpolators, spv::DecorationInvariant); + main_interface.push_back(out_interpolators); + } + + // Point coordinate output. + spv::Id out_point_coordinates = spv::NoResult; + if (key.has_point_coordinates) { + out_point_coordinates = builder.createVariable( + spv::NoPrecision, spv::StorageClassOutput, type_point_coordinates, + "xe_out_point_coordinates"); + builder.addDecoration(out_point_coordinates, spv::DecorationLocation, + key.interpolator_count); + builder.addDecoration(out_point_coordinates, spv::DecorationInvariant); + main_interface.push_back(out_point_coordinates); + } + + // Interpolator input. + spv::Id in_interpolators = spv::NoResult; + if (key.interpolator_count) { + in_interpolators = builder.createVariable( + spv::NoPrecision, spv::StorageClassInput, + builder.makeArrayType(type_interpolators, + const_input_primitive_vertex_count, 0), + "xe_in_interpolators"); + builder.addDecoration(in_interpolators, spv::DecorationLocation, 0); + main_interface.push_back(in_interpolators); + } + + // Point size input. + spv::Id in_point_size = spv::NoResult; + if (key.has_point_size) { + in_point_size = builder.createVariable( + spv::NoPrecision, spv::StorageClassInput, + builder.makeArrayType(type_float, const_input_primitive_vertex_count, + 0), + "xe_in_point_size"); + builder.addDecoration(in_point_size, spv::DecorationLocation, + key.interpolator_count); + main_interface.push_back(in_point_size); + } + + // out gl_PerVertex. + // gl_Position. + id_vector_temp.clear(); + uint32_t member_out_gl_per_vertex_position = uint32_t(id_vector_temp.size()); + id_vector_temp.push_back(type_float4); + spv::Id const_member_out_gl_per_vertex_position = + builder.makeIntConstant(int32_t(member_out_gl_per_vertex_position)); + // gl_ClipDistance. + uint32_t member_out_gl_per_vertex_clip_distance = UINT32_MAX; + spv::Id const_member_out_gl_per_vertex_clip_distance = spv::NoResult; + if (clip_distance_count) { + member_out_gl_per_vertex_clip_distance = uint32_t(id_vector_temp.size()); + id_vector_temp.push_back(type_clip_distances); + const_member_out_gl_per_vertex_clip_distance = builder.makeIntConstant( + int32_t(member_out_gl_per_vertex_clip_distance)); + } + // Structure. + spv::Id type_struct_out_gl_per_vertex = + builder.makeStructType(id_vector_temp, "gl_PerVertex"); + builder.addMemberName(type_struct_out_gl_per_vertex, + member_out_gl_per_vertex_position, "gl_Position"); + builder.addMemberDecoration(type_struct_out_gl_per_vertex, + member_out_gl_per_vertex_position, + spv::DecorationInvariant); + builder.addMemberDecoration(type_struct_out_gl_per_vertex, + member_out_gl_per_vertex_position, + spv::DecorationBuiltIn, spv::BuiltInPosition); + if (clip_distance_count) { + builder.addMemberName(type_struct_out_gl_per_vertex, + member_out_gl_per_vertex_clip_distance, + "gl_ClipDistance"); + builder.addMemberDecoration(type_struct_out_gl_per_vertex, + member_out_gl_per_vertex_clip_distance, + spv::DecorationInvariant); + builder.addMemberDecoration( + type_struct_out_gl_per_vertex, member_out_gl_per_vertex_clip_distance, + spv::DecorationBuiltIn, spv::BuiltInClipDistance); + } + builder.addDecoration(type_struct_out_gl_per_vertex, spv::DecorationBlock); + spv::Id out_gl_per_vertex = + builder.createVariable(spv::NoPrecision, spv::StorageClassOutput, + type_struct_out_gl_per_vertex, ""); + main_interface.push_back(out_gl_per_vertex); + + // Begin the main function. + std::vector main_param_types; + std::vector> main_precisions; + spv::Block* main_entry; + spv::Function* main_function = + builder.makeFunctionEntry(spv::NoPrecision, type_void, "main", + main_param_types, main_precisions, &main_entry); + spv::Instruction* entry_point = + builder.addEntryPoint(spv::ExecutionModelGeometry, main_function, "main"); + for (spv::Id interface_id : main_interface) { + entry_point->addIdOperand(interface_id); + } + builder.addExecutionMode(main_function, input_primitive_execution_mode); + builder.addExecutionMode(main_function, spv::ExecutionModeInvocations, 1); + builder.addExecutionMode(main_function, output_primitive_execution_mode); + builder.addExecutionMode(main_function, spv::ExecutionModeOutputVertices, + int(output_max_vertices)); + + // Note that after every OpEmitVertex, all output variables are undefined. + + // Discard the whole primitive if any vertex has a NaN position (may also be + // set to NaN for emulation of vertex killing with the OR operator). + for (uint32_t i = 0; i < input_primitive_vertex_count; ++i) { + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(builder.makeIntConstant(int32_t(i))); + id_vector_temp.push_back(const_member_in_gl_per_vertex_position); + spv::Id position_is_nan = builder.createUnaryOp( + spv::OpAny, type_bool, + builder.createUnaryOp( + spv::OpIsNan, type_bool4, + builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, + in_gl_per_vertex, id_vector_temp), + spv::NoPrecision))); + spv::Block& discard_predecessor = *builder.getBuildPoint(); + spv::Block& discard_then_block = builder.makeNewBlock(); + spv::Block& discard_merge_block = builder.makeNewBlock(); + { + std::unique_ptr selection_merge_op( + std::make_unique(spv::OpSelectionMerge)); + selection_merge_op->addIdOperand(discard_merge_block.getId()); + selection_merge_op->addImmediateOperand( + spv::SelectionControlDontFlattenMask); + discard_predecessor.addInstruction(std::move(selection_merge_op)); + } + { + std::unique_ptr branch_conditional_op( + std::make_unique(spv::OpBranchConditional)); + branch_conditional_op->addIdOperand(position_is_nan); + branch_conditional_op->addIdOperand(discard_then_block.getId()); + branch_conditional_op->addIdOperand(discard_merge_block.getId()); + branch_conditional_op->addImmediateOperand(1); + branch_conditional_op->addImmediateOperand(2); + discard_predecessor.addInstruction(std::move(branch_conditional_op)); + } + discard_then_block.addPredecessor(&discard_predecessor); + discard_merge_block.addPredecessor(&discard_predecessor); + builder.setBuildPoint(&discard_then_block); + builder.createNoResultOp(spv::OpReturn); + builder.setBuildPoint(&discard_merge_block); + } + + // Cull the whole primitive if any cull distance for all vertices in the + // primitive is < 0. + // TODO(Triang3l): For points, handle ps_ucp_mode (transform the host clip + // space to the guest one, calculate the distances to the user clip planes, + // cull using the distance from the center for modes 0, 1 and 2, cull and clip + // per-vertex for modes 2 and 3) - except for the vertex kill flag. + if (cull_distance_count) { + spv::Id const_member_in_gl_per_vertex_cull_distance = + builder.makeIntConstant(int32_t(member_in_gl_per_vertex_cull_distance)); + spv::Id const_float_0 = builder.makeFloatConstant(0.0f); + spv::Id cull_condition = spv::NoResult; + for (uint32_t i = 0; i < cull_distance_count; ++i) { + for (uint32_t j = 0; j < input_primitive_vertex_count; ++j) { + id_vector_temp.clear(); + id_vector_temp.reserve(3); + id_vector_temp.push_back(builder.makeIntConstant(int32_t(j))); + id_vector_temp.push_back(const_member_in_gl_per_vertex_cull_distance); + id_vector_temp.push_back(builder.makeIntConstant(int32_t(i))); + spv::Id cull_distance_is_negative = builder.createBinOp( + spv::OpFOrdLessThan, type_bool, + builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, + in_gl_per_vertex, id_vector_temp), + spv::NoPrecision), + const_float_0); + if (cull_condition != spv::NoResult) { + cull_condition = + builder.createBinOp(spv::OpLogicalAnd, type_bool, cull_condition, + cull_distance_is_negative); + } else { + cull_condition = cull_distance_is_negative; + } + } + } + assert_true(cull_condition != spv::NoResult); + spv::Block& discard_predecessor = *builder.getBuildPoint(); + spv::Block& discard_then_block = builder.makeNewBlock(); + spv::Block& discard_merge_block = builder.makeNewBlock(); + { + std::unique_ptr selection_merge_op( + std::make_unique(spv::OpSelectionMerge)); + selection_merge_op->addIdOperand(discard_merge_block.getId()); + selection_merge_op->addImmediateOperand( + spv::SelectionControlDontFlattenMask); + discard_predecessor.addInstruction(std::move(selection_merge_op)); + } + { + std::unique_ptr branch_conditional_op( + std::make_unique(spv::OpBranchConditional)); + branch_conditional_op->addIdOperand(cull_condition); + branch_conditional_op->addIdOperand(discard_then_block.getId()); + branch_conditional_op->addIdOperand(discard_merge_block.getId()); + branch_conditional_op->addImmediateOperand(1); + branch_conditional_op->addImmediateOperand(2); + discard_predecessor.addInstruction(std::move(branch_conditional_op)); + } + discard_then_block.addPredecessor(&discard_predecessor); + discard_merge_block.addPredecessor(&discard_predecessor); + builder.setBuildPoint(&discard_then_block); + builder.createNoResultOp(spv::OpReturn); + builder.setBuildPoint(&discard_merge_block); + } + + switch (key.type) { + case PipelineGeometryShader::kRectangleList: { + // Construct a strip with the fourth vertex generated by mirroring a + // vertex across the longest edge (the diagonal). + // + // Possible options: + // + // 0---1 + // | /| + // | / | - 12 is the longest edge, strip 0123 (most commonly used) + // |/ | v3 = v0 + (v1 - v0) + (v2 - v0), or v3 = -v0 + v1 + v2 + // 2--[3] + // + // 1---2 + // | /| + // | / | - 20 is the longest edge, strip 1203 + // |/ | + // 0--[3] + // + // 2---0 + // | /| + // | / | - 01 is the longest edge, strip 2013 + // |/ | + // 1--[3] + + spv::Id const_int_0 = builder.makeIntConstant(0); + spv::Id const_int_1 = builder.makeIntConstant(1); + spv::Id const_int_2 = builder.makeIntConstant(2); + spv::Id const_int_3 = builder.makeIntConstant(3); + + // Get squares of edge lengths to choose the longest edge. + // [0] - 12, [1] - 20, [2] - 01. + spv::Id edge_lengths[3]; + id_vector_temp.resize(3); + id_vector_temp[1] = const_member_in_gl_per_vertex_position; + for (uint32_t i = 0; i < 3; ++i) { + id_vector_temp[0] = builder.makeIntConstant(int32_t((1 + i) % 3)); + id_vector_temp[2] = const_int_0; + spv::Id edge_0_x = builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, in_gl_per_vertex, + id_vector_temp), + spv::NoPrecision); + id_vector_temp[2] = const_int_1; + spv::Id edge_0_y = builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, in_gl_per_vertex, + id_vector_temp), + spv::NoPrecision); + id_vector_temp[0] = builder.makeIntConstant(int32_t((2 + i) % 3)); + id_vector_temp[2] = const_int_0; + spv::Id edge_1_x = builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, in_gl_per_vertex, + id_vector_temp), + spv::NoPrecision); + id_vector_temp[2] = const_int_1; + spv::Id edge_1_y = builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, in_gl_per_vertex, + id_vector_temp), + spv::NoPrecision); + spv::Id edge_x = + builder.createBinOp(spv::OpFSub, type_float, edge_1_x, edge_0_x); + spv::Id edge_y = + builder.createBinOp(spv::OpFSub, type_float, edge_1_y, edge_0_y); + edge_lengths[i] = builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp(spv::OpFMul, type_float, edge_x, edge_x), + builder.createBinOp(spv::OpFMul, type_float, edge_y, edge_y)); + } + + // Choose the index of the first vertex in the strip based on which edge + // is the longest, and calculate the indices of the other vertices. + spv::Id vertex_indices[3]; + // If 12 > 20 && 12 > 01, then 12 is the longest edge, and the strip is + // 0123. Otherwise, if 20 > 01, then 20 is the longest, and the strip is + // 1203, but if not, 01 is the longest, and the strip is 2013. + vertex_indices[0] = builder.createTriOp( + spv::OpSelect, type_int, + builder.createBinOp( + spv::OpLogicalAnd, type_bool, + builder.createBinOp(spv::OpFOrdGreaterThan, type_bool, + edge_lengths[0], edge_lengths[1]), + builder.createBinOp(spv::OpFOrdGreaterThan, type_bool, + edge_lengths[0], edge_lengths[2])), + const_int_0, + builder.createTriOp( + spv::OpSelect, type_int, + builder.createBinOp(spv::OpFOrdGreaterThan, type_bool, + edge_lengths[1], edge_lengths[2]), + const_int_1, const_int_2)); + for (uint32_t i = 1; i < 3; ++i) { + // vertex_indices[i] = (vertex_indices[0] + i) % 3 + spv::Id vertex_index_without_wrapping = + builder.createBinOp(spv::OpIAdd, type_int, vertex_indices[0], + builder.makeIntConstant(int32_t(i))); + vertex_indices[i] = builder.createTriOp( + spv::OpSelect, type_int, + builder.createBinOp(spv::OpSLessThan, type_bool, + vertex_index_without_wrapping, const_int_3), + vertex_index_without_wrapping, + builder.createBinOp(spv::OpISub, type_int, + vertex_index_without_wrapping, const_int_3)); + } + + // Initialize the point coordinates output for safety if this shader type + // is used with has_point_coordinates for some reason. + spv::Id const_point_coordinates_zero = spv::NoResult; + if (key.has_point_coordinates) { + spv::Id const_float_0 = builder.makeFloatConstant(0.0f); + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(const_float_0); + id_vector_temp.push_back(const_float_0); + const_point_coordinates_zero = builder.makeCompositeConstant( + type_point_coordinates, id_vector_temp); + } + + // Emit the triangle in the strip that consists of the original vertices. + for (uint32_t i = 0; i < 3; ++i) { + spv::Id vertex_index = vertex_indices[i]; + // Interpolators. + if (key.interpolator_count) { + id_vector_temp.clear(); + id_vector_temp.push_back(vertex_index); + builder.createStore( + builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, + in_interpolators, id_vector_temp), + spv::NoPrecision), + out_interpolators); + } + // Point coordinates. + if (key.has_point_coordinates) { + builder.createStore(const_point_coordinates_zero, + out_point_coordinates); + } + // Position. + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(vertex_index); + id_vector_temp.push_back(const_member_in_gl_per_vertex_position); + spv::Id vertex_position = builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, in_gl_per_vertex, + id_vector_temp), + spv::NoPrecision); + id_vector_temp.clear(); + id_vector_temp.push_back(const_member_out_gl_per_vertex_position); + builder.createStore( + vertex_position, + builder.createAccessChain(spv::StorageClassOutput, + out_gl_per_vertex, id_vector_temp)); + // Clip distances. + if (clip_distance_count) { + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(vertex_index); + id_vector_temp.push_back(const_member_in_gl_per_vertex_clip_distance); + spv::Id vertex_clip_distances = builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, + in_gl_per_vertex, id_vector_temp), + spv::NoPrecision); + id_vector_temp.clear(); + id_vector_temp.push_back( + const_member_out_gl_per_vertex_clip_distance); + builder.createStore( + vertex_clip_distances, + builder.createAccessChain(spv::StorageClassOutput, + out_gl_per_vertex, id_vector_temp)); + } + // Emit the vertex. + builder.createNoResultOp(spv::OpEmitVertex); + } + + // Construct the fourth vertex. + // Interpolators. + for (uint32_t i = 0; i < key.interpolator_count; ++i) { + spv::Id const_int_i = builder.makeIntConstant(int32_t(i)); + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(vertex_indices[0]); + id_vector_temp.push_back(const_int_i); + spv::Id vertex_interpolator_v0 = builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, in_interpolators, + id_vector_temp), + spv::NoPrecision); + id_vector_temp[0] = vertex_indices[1]; + spv::Id vertex_interpolator_v01 = builder.createBinOp( + spv::OpFSub, type_float4, + builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, + in_interpolators, id_vector_temp), + spv::NoPrecision), + vertex_interpolator_v0); + builder.addDecoration(vertex_interpolator_v01, + spv::DecorationNoContraction); + id_vector_temp[0] = vertex_indices[2]; + spv::Id vertex_interpolator_v3 = builder.createBinOp( + spv::OpFAdd, type_float4, vertex_interpolator_v01, + builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, + in_interpolators, id_vector_temp), + spv::NoPrecision)); + builder.addDecoration(vertex_interpolator_v3, + spv::DecorationNoContraction); + id_vector_temp.clear(); + id_vector_temp.push_back(const_int_i); + builder.createStore( + vertex_interpolator_v3, + builder.createAccessChain(spv::StorageClassOutput, + out_interpolators, id_vector_temp)); + } + // Point coordinates. + if (key.has_point_coordinates) { + builder.createStore(const_point_coordinates_zero, + out_point_coordinates); + } + // Position. + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(vertex_indices[0]); + id_vector_temp.push_back(const_member_in_gl_per_vertex_position); + spv::Id vertex_position_v0 = builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, in_gl_per_vertex, + id_vector_temp), + spv::NoPrecision); + id_vector_temp[0] = vertex_indices[1]; + spv::Id vertex_position_v01 = builder.createBinOp( + spv::OpFSub, type_float4, + builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, + in_gl_per_vertex, id_vector_temp), + spv::NoPrecision), + vertex_position_v0); + builder.addDecoration(vertex_position_v01, spv::DecorationNoContraction); + id_vector_temp[0] = vertex_indices[2]; + spv::Id vertex_position_v3 = builder.createBinOp( + spv::OpFAdd, type_float4, vertex_position_v01, + builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, + in_gl_per_vertex, id_vector_temp), + spv::NoPrecision)); + builder.addDecoration(vertex_position_v3, spv::DecorationNoContraction); + id_vector_temp.clear(); + id_vector_temp.push_back(const_member_out_gl_per_vertex_position); + builder.createStore( + vertex_position_v3, + builder.createAccessChain(spv::StorageClassOutput, out_gl_per_vertex, + id_vector_temp)); + // Clip distances. + for (uint32_t i = 0; i < clip_distance_count; ++i) { + spv::Id const_int_i = builder.makeIntConstant(int32_t(i)); + id_vector_temp.clear(); + id_vector_temp.reserve(3); + id_vector_temp.push_back(vertex_indices[0]); + id_vector_temp.push_back(const_member_in_gl_per_vertex_clip_distance); + id_vector_temp.push_back(const_int_i); + spv::Id vertex_clip_distance_v0 = builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, in_gl_per_vertex, + id_vector_temp), + spv::NoPrecision); + id_vector_temp[0] = vertex_indices[1]; + spv::Id vertex_clip_distance_v01 = builder.createBinOp( + spv::OpFSub, type_float, + builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, + in_gl_per_vertex, id_vector_temp), + spv::NoPrecision), + vertex_clip_distance_v0); + builder.addDecoration(vertex_clip_distance_v01, + spv::DecorationNoContraction); + id_vector_temp[0] = vertex_indices[2]; + spv::Id vertex_clip_distance_v3 = builder.createBinOp( + spv::OpFAdd, type_float, vertex_clip_distance_v01, + builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, + in_gl_per_vertex, id_vector_temp), + spv::NoPrecision)); + builder.addDecoration(vertex_clip_distance_v3, + spv::DecorationNoContraction); + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(const_member_in_gl_per_vertex_clip_distance); + id_vector_temp.push_back(const_int_i); + builder.createStore( + vertex_clip_distance_v3, + builder.createAccessChain(spv::StorageClassOutput, + out_gl_per_vertex, id_vector_temp)); + } + // Emit the vertex. + builder.createNoResultOp(spv::OpEmitVertex); + builder.createNoResultOp(spv::OpEndPrimitive); + } break; + + case PipelineGeometryShader::kQuadList: { + // Initialize the point coordinates output for safety if this shader type + // is used with has_point_coordinates for some reason. + spv::Id const_point_coordinates_zero = spv::NoResult; + if (key.has_point_coordinates) { + spv::Id const_float_0 = builder.makeFloatConstant(0.0f); + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(const_float_0); + id_vector_temp.push_back(const_float_0); + const_point_coordinates_zero = builder.makeCompositeConstant( + type_point_coordinates, id_vector_temp); + } + + // Build the triangle strip from the original quad vertices in the + // 0, 1, 3, 2 order (like specified for GL_QUAD_STRIP). + // TODO(Triang3l): Find the correct decomposition of quads into triangles + // on the real hardware. + for (uint32_t i = 0; i < 4; ++i) { + spv::Id const_vertex_index = + builder.makeIntConstant(int32_t(i ^ (i >> 1))); + // Interpolators. + if (key.interpolator_count) { + id_vector_temp.clear(); + id_vector_temp.push_back(const_vertex_index); + builder.createStore( + builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, + in_interpolators, id_vector_temp), + spv::NoPrecision), + out_interpolators); + } + // Point coordinates. + if (key.has_point_coordinates) { + builder.createStore(const_point_coordinates_zero, + out_point_coordinates); + } + // Position. + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(const_vertex_index); + id_vector_temp.push_back(const_member_in_gl_per_vertex_position); + spv::Id vertex_position = builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, in_gl_per_vertex, + id_vector_temp), + spv::NoPrecision); + id_vector_temp.clear(); + id_vector_temp.push_back(const_member_out_gl_per_vertex_position); + builder.createStore( + vertex_position, + builder.createAccessChain(spv::StorageClassOutput, + out_gl_per_vertex, id_vector_temp)); + // Clip distances. + if (clip_distance_count) { + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(const_vertex_index); + id_vector_temp.push_back(const_member_in_gl_per_vertex_clip_distance); + spv::Id vertex_clip_distances = builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, + in_gl_per_vertex, id_vector_temp), + spv::NoPrecision); + id_vector_temp.clear(); + id_vector_temp.push_back( + const_member_out_gl_per_vertex_clip_distance); + builder.createStore( + vertex_clip_distances, + builder.createAccessChain(spv::StorageClassOutput, + out_gl_per_vertex, id_vector_temp)); + } + // Emit the vertex. + builder.createNoResultOp(spv::OpEmitVertex); + } + builder.createNoResultOp(spv::OpEndPrimitive); + } break; + + default: + assert_unhandled_case(key.type); + } + + // End the main function. + builder.leaveFunction(); + + // Serialize the shader code. + std::vector shader_code; + builder.dump(shader_code); + + // Create the shader module, and store the handle even if creation fails not + // to try to create it again later. + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + VkShaderModule shader_module = ui::vulkan::util::CreateShaderModule( + provider, reinterpret_cast(shader_code.data()), + sizeof(uint32_t) * shader_code.size()); + if (shader_module == VK_NULL_HANDLE) { + XELOGE( + "VulkanPipelineCache: Failed to create the primitive type geometry " + "shader 0x{:08X}", + key.key); + } + geometry_shaders_.emplace(key, shader_module); + return shader_module; +} + bool VulkanPipelineCache::EnsurePipelineCreated( const PipelineCreationArguments& creation_arguments) { if (creation_arguments.pipeline->second.pipeline != VK_NULL_HANDLE) { @@ -739,15 +1513,7 @@ bool VulkanPipelineCache::EnsurePipelineCreated( shader_stage_vertex.pName = "main"; shader_stage_vertex.pSpecializationInfo = nullptr; // Geometry shader. - VkShaderModule geometry_shader = VK_NULL_HANDLE; - switch (description.geometry_shader) { - case PipelineGeometryShader::kRectangleList: - geometry_shader = gs_rectangle_list_; - break; - default: - break; - } - if (geometry_shader != VK_NULL_HANDLE) { + if (creation_arguments.geometry_shader != VK_NULL_HANDLE) { VkPipelineShaderStageCreateInfo& shader_stage_geometry = shader_stages[shader_stage_count++]; shader_stage_geometry.sType = @@ -755,7 +1521,7 @@ bool VulkanPipelineCache::EnsurePipelineCreated( shader_stage_geometry.pNext = nullptr; shader_stage_geometry.flags = 0; shader_stage_geometry.stage = VK_SHADER_STAGE_GEOMETRY_BIT; - shader_stage_geometry.module = geometry_shader; + shader_stage_geometry.module = creation_arguments.geometry_shader; shader_stage_geometry.pName = "main"; shader_stage_geometry.pSpecializationInfo = nullptr; } diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h index f979b0b03..ceb04efcc 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h @@ -86,6 +86,7 @@ class VulkanPipelineCache { enum class PipelineGeometryShader : uint32_t { kNone, kRectangleList, + kQuadList, }; enum class PipelinePrimitiveTopology : uint32_t { @@ -205,9 +206,37 @@ class VulkanPipelineCache { std::pair* pipeline; const VulkanShader::VulkanTranslation* vertex_shader; const VulkanShader::VulkanTranslation* pixel_shader; + VkShaderModule geometry_shader; VkRenderPass render_pass; }; + union GeometryShaderKey { + uint32_t key; + struct { + PipelineGeometryShader type : 2; + uint32_t interpolator_count : 5; + uint32_t user_clip_plane_count : 3; + uint32_t user_clip_plane_cull : 1; + uint32_t has_vertex_kill_and : 1; + uint32_t has_point_size : 1; + uint32_t has_point_coordinates : 1; + }; + + GeometryShaderKey() : key(0) { static_assert_size(*this, sizeof(key)); } + + struct Hasher { + size_t operator()(const GeometryShaderKey& key) const { + return std::hash{}(key.key); + } + }; + bool operator==(const GeometryShaderKey& other_key) const { + return key == other_key.key; + } + bool operator!=(const GeometryShaderKey& other_key) const { + return !(*this == other_key); + } + }; + // Can be called from multiple threads. bool TranslateAnalyzedShader(SpirvShaderTranslator& translator, VulkanShader::VulkanTranslation& translation); @@ -227,6 +256,10 @@ class VulkanPipelineCache { // Whether the pipeline for the given description is supported by the device. bool ArePipelineRequirementsMet(const PipelineDescription& description) const; + static bool GetGeometryShaderKey(PipelineGeometryShader geometry_shader_type, + GeometryShaderKey& key_out); + VkShaderModule GetGeometryShader(GeometryShaderKey key); + // Can be called from creation threads - all needed data must be fully set up // at the point of the call: shaders must be translated, pipeline layout and // render pass objects must be available. @@ -237,8 +270,6 @@ class VulkanPipelineCache { const RegisterFile& register_file_; VulkanRenderTargetCache& render_target_cache_; - VkShaderModule gs_rectangle_list_ = VK_NULL_HANDLE; - // Temporary storage for AnalyzeUcode calls on the processor thread. StringBuffer ucode_disasm_buffer_; // Reusable shader translator on the command processor thread. @@ -249,6 +280,12 @@ class VulkanPipelineCache { xe::hash::IdentityHasher> shaders_; + // Geometry shaders for Xenos primitive types not supported by Vulkan. + // Stores VK_NULL_HANDLE if failed to create. + std::unordered_map + geometry_shaders_; + std::unordered_map pipelines_; diff --git a/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc b/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc index 0d33aaf64..058b6a5d1 100644 --- a/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc @@ -28,17 +28,16 @@ VulkanPrimitiveProcessor::~VulkanPrimitiveProcessor() { Shutdown(true); } bool VulkanPrimitiveProcessor::Initialize() { // TODO(Triang3l): fullDrawIndexUint32 feature check and indirect index fetch. - // TODO(Triang3l): geometryShader check for quads when geometry shaders are - // added. const ui::vulkan::VulkanProvider& provider = command_processor_.GetVulkanProvider(); + const VkPhysicalDeviceFeatures& device_features = provider.device_features(); const VkPhysicalDevicePortabilitySubsetFeaturesKHR* device_portability_subset_features = provider.device_portability_subset_features(); if (!InitializeCommon(true, !device_portability_subset_features || device_portability_subset_features->triangleFans, - false, false)) { + false, device_features.geometryShader)) { Shutdown(); return false; } From 862c457761ce84f445df08bf4ff4b2f1311d0019 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 15 May 2022 16:19:36 +0300 Subject: [PATCH 079/123] [Vulkan] Use Shader::IsHostVertexShaderTypeDomain --- src/xenia/gpu/spirv_shader_translator.h | 8 ++++---- src/xenia/gpu/vulkan/vulkan_command_processor.cc | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 259b703c1..429ab5fe0 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -249,13 +249,13 @@ class SpirvShaderTranslator : public ShaderTranslator { bool IsSpirvVertexShader() const { return is_vertex_shader() && - GetSpirvShaderModification().vertex.host_vertex_shader_type == - Shader::HostVertexShaderType::kVertex; + !Shader::IsHostVertexShaderTypeDomain( + GetSpirvShaderModification().vertex.host_vertex_shader_type); } bool IsSpirvTessEvalShader() const { return is_vertex_shader() && - GetSpirvShaderModification().vertex.host_vertex_shader_type != - Shader::HostVertexShaderType::kVertex; + Shader::IsHostVertexShaderTypeDomain( + GetSpirvShaderModification().vertex.host_vertex_shader_type); } // Must be called before emitting any SPIR-V operations that must be in a diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 74c7f3fe6..5e947b212 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -1379,7 +1379,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, // Nothing to draw. return true; } - // TODO(Triang3l): Tessellation. + // TODO(Triang3l): Tessellation, geometry-type-specific vertex shader, vertex + // shader as compute. if (primitive_processing_result.host_vertex_shader_type != Shader::HostVertexShaderType::kVertex) { return false; From 7d19a8c0e8517a81362ba4698a379dd66333f61e Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 15 May 2022 16:20:12 +0300 Subject: [PATCH 080/123] [Vulkan] Add missing include for std::hash --- src/xenia/gpu/vulkan/vulkan_pipeline_cache.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h index ceb04efcc..dd82bed31 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h @@ -12,6 +12,7 @@ #include #include +#include #include #include #include From 185c23dd509faf13c5275bbe992bb4df1709971d Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 15 May 2022 16:31:24 +0300 Subject: [PATCH 081/123] [Vulkan] Gather shader stages that VS can be translated into --- .../gpu/vulkan/vulkan_command_processor.cc | 53 ++++++++++--------- .../gpu/vulkan/vulkan_command_processor.h | 8 ++- src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc | 17 +++++- src/xenia/gpu/vulkan/vulkan_pipeline_cache.h | 4 +- src/xenia/gpu/vulkan/vulkan_shared_memory.cc | 16 +++--- src/xenia/gpu/vulkan/vulkan_shared_memory.h | 4 +- 6 files changed, 60 insertions(+), 42 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 5e947b212..9556f9efe 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -68,6 +68,21 @@ bool VulkanCommandProcessor::SetupContext() { const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); + const VkPhysicalDeviceFeatures& device_features = provider.device_features(); + + guest_shader_pipeline_stages_ = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + guest_shader_vertex_stages_ = VK_SHADER_STAGE_VERTEX_BIT; + if (device_features.tessellationShader) { + guest_shader_pipeline_stages_ |= + VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT; + guest_shader_vertex_stages_ |= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; + } + if (!device_features.vertexPipelineStoresAndAtomics) { + // For memory export from vertex shaders converted to compute shaders. + guest_shader_pipeline_stages_ |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + guest_shader_vertex_stages_ |= VK_SHADER_STAGE_COMPUTE_BIT; + } // No specific reason for 32768, just the "too much" amount from Direct3D 12 // PIX warnings. @@ -98,15 +113,14 @@ bool VulkanCommandProcessor::SetupContext() { XELOGE("Failed to create an empty Vulkan descriptor set layout"); return false; } - VkShaderStageFlags shader_stages_guest_vertex = - GetGuestVertexShaderStageFlags(); + VkShaderStageFlags guest_shader_stages = + guest_shader_vertex_stages_ | VK_SHADER_STAGE_FRAGMENT_BIT; VkDescriptorSetLayoutBinding descriptor_set_layout_binding_uniform_buffer; descriptor_set_layout_binding_uniform_buffer.binding = 0; descriptor_set_layout_binding_uniform_buffer.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; descriptor_set_layout_binding_uniform_buffer.descriptorCount = 1; - descriptor_set_layout_binding_uniform_buffer.stageFlags = - shader_stages_guest_vertex | VK_SHADER_STAGE_FRAGMENT_BIT; + descriptor_set_layout_binding_uniform_buffer.stageFlags = guest_shader_stages; descriptor_set_layout_binding_uniform_buffer.pImmutableSamplers = nullptr; descriptor_set_layout_create_info.bindingCount = 1; descriptor_set_layout_create_info.pBindings = @@ -120,7 +134,7 @@ bool VulkanCommandProcessor::SetupContext() { return false; } descriptor_set_layout_binding_uniform_buffer.stageFlags = - shader_stages_guest_vertex; + guest_shader_vertex_stages_; if (dfn.vkCreateDescriptorSetLayout( device, &descriptor_set_layout_create_info, nullptr, &descriptor_set_layout_float_constants_vertex_) != VK_SUCCESS) { @@ -139,9 +153,8 @@ bool VulkanCommandProcessor::SetupContext() { "float constants uniform buffer"); return false; } - descriptor_set_layout_binding_uniform_buffer.stageFlags = - shader_stages_guest_vertex | VK_SHADER_STAGE_FRAGMENT_BIT; - if (provider.device_features().tessellationShader) { + descriptor_set_layout_binding_uniform_buffer.stageFlags = guest_shader_stages; + if (device_features.tessellationShader) { descriptor_set_layout_binding_uniform_buffer.stageFlags |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; } @@ -169,7 +182,7 @@ bool VulkanCommandProcessor::SetupContext() { // vertex shader access to the shared memory for the tessellation vertex // shader (to retrieve tessellation factors). descriptor_set_layout_bindings_shared_memory_and_edram[0].stageFlags = - shader_stages_guest_vertex | VK_SHADER_STAGE_FRAGMENT_BIT; + guest_shader_stages; descriptor_set_layout_bindings_shared_memory_and_edram[0].pImmutableSamplers = nullptr; // TODO(Triang3l): EDRAM storage image binding for the fragment shader @@ -185,8 +198,8 @@ bool VulkanCommandProcessor::SetupContext() { return false; } - shared_memory_ = - std::make_unique(*this, *memory_, trace_writer_); + shared_memory_ = std::make_unique( + *this, *memory_, trace_writer_, guest_shader_pipeline_stages_); if (!shared_memory_->Initialize()) { XELOGE("Failed to initialize shared memory"); return false; @@ -209,7 +222,8 @@ bool VulkanCommandProcessor::SetupContext() { } pipeline_cache_ = std::make_unique( - *this, *register_file_, *render_target_cache_); + *this, *register_file_, *render_target_cache_, + guest_shader_vertex_stages_); if (!pipeline_cache_->Initialize()) { XELOGE("Failed to initialize the graphics pipeline cache"); return false; @@ -1151,8 +1165,7 @@ VulkanCommandProcessor::GetPipelineLayout(uint32_t texture_count_pixel, descriptor_set_layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; descriptor_set_layout_binding.descriptorCount = texture_count_vertex; - descriptor_set_layout_binding.stageFlags = - GetGuestVertexShaderStageFlags(); + descriptor_set_layout_binding.stageFlags = guest_shader_vertex_stages_; descriptor_set_layout_binding.pImmutableSamplers = nullptr; VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info; descriptor_set_layout_create_info.sType = @@ -2130,18 +2143,6 @@ void VulkanCommandProcessor::SplitPendingBarrier() { pending_image_memory_barrier_count; } -VkShaderStageFlags VulkanCommandProcessor::GetGuestVertexShaderStageFlags() - const { - VkShaderStageFlags stages = VK_SHADER_STAGE_VERTEX_BIT; - const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); - if (provider.device_features().tessellationShader) { - stages |= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; - } - // TODO(Triang3l): Vertex to compute translation for rectangle and possibly - // point emulation. - return stages; -} - void VulkanCommandProcessor::UpdateDynamicState( const draw_util::ViewportInfo& viewport_info, bool primitive_polygonal, reg::RB_DEPTHCONTROL normalized_depth_control) { diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 64d2f95b5..d81cde4a1 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -240,8 +240,6 @@ class VulkanCommandProcessor : public CommandProcessor { void SplitPendingBarrier(); - VkShaderStageFlags GetGuestVertexShaderStageFlags() const; - void UpdateDynamicState(const draw_util::ViewportInfo& viewport_info, bool primitive_polygonal, reg::RB_DEPTHCONTROL normalized_depth_control); @@ -261,6 +259,12 @@ class VulkanCommandProcessor : public CommandProcessor { bool cache_clear_requested_ = false; + // Host shader types that guest shaders can be translated into - they can + // access the shared memory (via vertex fetch, memory export, or manual index + // buffer reading) and textures. + VkPipelineStageFlags guest_shader_pipeline_stages_ = 0; + VkShaderStageFlags guest_shader_vertex_stages_ = 0; + std::vector fences_free_; std::vector semaphores_free_; diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index db2d36d69..b6e74a648 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -39,10 +39,12 @@ namespace vulkan { VulkanPipelineCache::VulkanPipelineCache( VulkanCommandProcessor& command_processor, const RegisterFile& register_file, - VulkanRenderTargetCache& render_target_cache) + VulkanRenderTargetCache& render_target_cache, + VkShaderStageFlags guest_shader_vertex_stages) : command_processor_(command_processor), register_file_(register_file), - render_target_cache_(render_target_cache) {} + render_target_cache_(render_target_cache), + guest_shader_vertex_stages_(guest_shader_vertex_stages) {} VulkanPipelineCache::~VulkanPipelineCache() { Shutdown(); } @@ -607,6 +609,17 @@ bool VulkanPipelineCache::GetCurrentStateDescription( bool VulkanPipelineCache::ArePipelineRequirementsMet( const PipelineDescription& description) const { + VkShaderStageFlags vertex_shader_stage = + Shader::IsHostVertexShaderTypeDomain( + SpirvShaderTranslator::Modification( + description.vertex_shader_modification) + .vertex.host_vertex_shader_type) + ? VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT + : VK_SHADER_STAGE_VERTEX_BIT; + if (!(guest_shader_vertex_stages_ & vertex_shader_stage)) { + return false; + } + const ui::vulkan::VulkanProvider& provider = command_processor_.GetVulkanProvider(); const VkPhysicalDeviceFeatures& device_features = provider.device_features(); diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h index dd82bed31..6be73d43c 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h @@ -50,7 +50,8 @@ class VulkanPipelineCache { VulkanPipelineCache(VulkanCommandProcessor& command_processor, const RegisterFile& register_file, - VulkanRenderTargetCache& render_target_cache); + VulkanRenderTargetCache& render_target_cache, + VkShaderStageFlags guest_shader_vertex_stages); ~VulkanPipelineCache(); bool Initialize(); @@ -270,6 +271,7 @@ class VulkanPipelineCache { VulkanCommandProcessor& command_processor_; const RegisterFile& register_file_; VulkanRenderTargetCache& render_target_cache_; + VkShaderStageFlags guest_shader_vertex_stages_; // Temporary storage for AnalyzeUcode calls on the processor thread. StringBuffer ucode_disasm_buffer_; diff --git a/src/xenia/gpu/vulkan/vulkan_shared_memory.cc b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc index 788b8166a..c321b9840 100644 --- a/src/xenia/gpu/vulkan/vulkan_shared_memory.cc +++ b/src/xenia/gpu/vulkan/vulkan_shared_memory.cc @@ -35,10 +35,12 @@ namespace vulkan { VulkanSharedMemory::VulkanSharedMemory( VulkanCommandProcessor& command_processor, Memory& memory, - TraceWriter& trace_writer) + TraceWriter& trace_writer, + VkPipelineStageFlags guest_shader_pipeline_stages) : SharedMemory(memory), command_processor_(command_processor), - trace_writer_(trace_writer) {} + trace_writer_(trace_writer), + guest_shader_pipeline_stages_(guest_shader_pipeline_stages) {} VulkanSharedMemory::~VulkanSharedMemory() { Shutdown(true); } @@ -463,14 +465,8 @@ void VulkanSharedMemory::GetUsageMasks(Usage usage, default: break; } - stage_mask = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - const ui::vulkan::VulkanProvider& provider = - command_processor_.GetVulkanProvider(); - if (provider.device_features().tessellationShader) { - stage_mask |= VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT; - } + stage_mask = + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | guest_shader_pipeline_stages_; access_mask = VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_SHADER_READ_BIT; switch (usage) { case Usage::kRead: diff --git a/src/xenia/gpu/vulkan/vulkan_shared_memory.h b/src/xenia/gpu/vulkan/vulkan_shared_memory.h index b37949ec8..14214a5d0 100644 --- a/src/xenia/gpu/vulkan/vulkan_shared_memory.h +++ b/src/xenia/gpu/vulkan/vulkan_shared_memory.h @@ -30,7 +30,8 @@ class VulkanCommandProcessor; class VulkanSharedMemory : public SharedMemory { public: VulkanSharedMemory(VulkanCommandProcessor& command_processor, Memory& memory, - TraceWriter& trace_writer); + TraceWriter& trace_writer, + VkPipelineStageFlags guest_shader_pipeline_stages); ~VulkanSharedMemory() override; bool Initialize(); @@ -70,6 +71,7 @@ class VulkanSharedMemory : public SharedMemory { VulkanCommandProcessor& command_processor_; TraceWriter& trace_writer_; + VkPipelineStageFlags guest_shader_pipeline_stages_; VkBuffer buffer_ = VK_NULL_HANDLE; uint32_t buffer_memory_type_; From b80361ee3c01b4e742fc8fc0fc65c1b570b78ced Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 15 May 2022 16:59:27 +0300 Subject: [PATCH 082/123] [Vulkan] Texture cache: Maximum dimensions, null images --- .../gpu/vulkan/deferred_command_buffer.cc | 11 + .../gpu/vulkan/deferred_command_buffer.h | 34 ++ .../gpu/vulkan/vulkan_command_processor.cc | 19 + .../gpu/vulkan/vulkan_command_processor.h | 11 +- src/xenia/gpu/vulkan/vulkan_texture_cache.cc | 452 ++++++++++++++++++ src/xenia/gpu/vulkan/vulkan_texture_cache.h | 94 ++++ 6 files changed, 618 insertions(+), 3 deletions(-) create mode 100644 src/xenia/gpu/vulkan/vulkan_texture_cache.cc create mode 100644 src/xenia/gpu/vulkan/vulkan_texture_cache.h diff --git a/src/xenia/gpu/vulkan/deferred_command_buffer.cc b/src/xenia/gpu/vulkan/deferred_command_buffer.cc index 98d42865d..fef52b9f4 100644 --- a/src/xenia/gpu/vulkan/deferred_command_buffer.cc +++ b/src/xenia/gpu/vulkan/deferred_command_buffer.cc @@ -134,6 +134,17 @@ void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) { attachments, args.rect_count, rects); } break; + case Command::kVkClearColorImage: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdClearColorImage( + command_buffer, args.image, args.image_layout, &args.color, + args.range_count, + reinterpret_cast( + reinterpret_cast(stream) + + xe::align(sizeof(ArgsVkClearColorImage), + alignof(VkImageSubresourceRange)))); + } break; + case Command::kVkCopyBuffer: { auto& args = *reinterpret_cast(stream); dfn.vkCmdCopyBuffer( diff --git a/src/xenia/gpu/vulkan/deferred_command_buffer.h b/src/xenia/gpu/vulkan/deferred_command_buffer.h index e3605f1e6..60b216354 100644 --- a/src/xenia/gpu/vulkan/deferred_command_buffer.h +++ b/src/xenia/gpu/vulkan/deferred_command_buffer.h @@ -163,6 +163,30 @@ class DeferredCommandBuffer { std::memcpy(rects_arg, rects, sizeof(VkClearRect) * rect_count); } + VkImageSubresourceRange* CmdClearColorImageEmplace( + VkImage image, VkImageLayout image_layout, const VkClearColorValue* color, + uint32_t range_count) { + const size_t header_size = xe::align(sizeof(ArgsVkClearColorImage), + alignof(VkImageSubresourceRange)); + uint8_t* args_ptr = reinterpret_cast(WriteCommand( + Command::kVkClearColorImage, + header_size + sizeof(VkImageSubresourceRange) * range_count)); + auto& args = *reinterpret_cast(args_ptr); + args.image = image; + args.image_layout = image_layout; + args.color = *color; + args.range_count = range_count; + return reinterpret_cast(args_ptr + header_size); + } + void CmdVkClearColorImage(VkImage image, VkImageLayout image_layout, + const VkClearColorValue* color, + uint32_t range_count, + const VkImageSubresourceRange* ranges) { + std::memcpy( + CmdClearColorImageEmplace(image, image_layout, color, range_count), + ranges, sizeof(VkImageSubresourceRange) * range_count); + } + VkBufferCopy* CmdCopyBufferEmplace(VkBuffer src_buffer, VkBuffer dst_buffer, uint32_t region_count) { const size_t header_size = @@ -316,6 +340,7 @@ class DeferredCommandBuffer { kVkBindPipeline, kVkBindVertexBuffers, kVkClearAttachments, + kVkClearColorImage, kVkCopyBuffer, kVkDispatch, kVkDraw, @@ -386,6 +411,15 @@ class DeferredCommandBuffer { static_assert(alignof(VkClearRect) <= alignof(uintmax_t)); }; + struct ArgsVkClearColorImage { + VkImage image; + VkImageLayout image_layout; + VkClearColorValue color; + uint32_t range_count; + // Followed by aligned VkImageSubresourceRange[]. + static_assert(alignof(VkImageSubresourceRange) <= alignof(uintmax_t)); + }; + struct ArgsVkCopyBuffer { VkBuffer src_buffer; VkBuffer dst_buffer; diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 9556f9efe..c67a59367 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -229,6 +229,15 @@ bool VulkanCommandProcessor::SetupContext() { return false; } + // TODO(Triang3l): Actual draw resolution scale. + texture_cache_ = + VulkanTextureCache::Create(*register_file_, *shared_memory_, 1, 1, *this, + guest_shader_pipeline_stages_); + if (!texture_cache_) { + XELOGE("Failed to initialize the texture cache"); + return false; + } + // Shared memory and EDRAM common bindings. VkDescriptorPoolSize descriptor_pool_sizes[1]; descriptor_pool_sizes[0].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; @@ -537,6 +546,8 @@ void VulkanCommandProcessor::ShutdownContext() { dfn.vkDestroyDescriptorPool, device, shared_memory_and_edram_descriptor_pool_); + texture_cache_.reset(); + pipeline_cache_.reset(); render_target_cache_.reset(); @@ -1747,6 +1758,8 @@ void VulkanCommandProcessor::CheckSubmissionFenceAndDeviceLoss( render_target_cache_->CompletedSubmissionUpdated(); + texture_cache_->CompletedSubmissionUpdated(submission_completed_); + // Destroy outdated swap objects. while (!swap_framebuffers_outdated_.empty()) { const auto& framebuffer_pair = swap_framebuffers_outdated_.front(); @@ -1829,6 +1842,8 @@ bool VulkanCommandProcessor::BeginSubmission(bool is_guest_command) { current_graphics_descriptor_sets_bound_up_to_date_ = 0; primitive_processor_->BeginSubmission(); + + texture_cache_->BeginSubmission(GetCurrentSubmission()); } if (is_opening_frame) { @@ -1854,6 +1869,8 @@ bool VulkanCommandProcessor::BeginSubmission(bool is_guest_command) { uniform_buffer_pool_->Reclaim(frame_completed_); primitive_processor_->BeginFrame(); + + texture_cache_->BeginFrame(); } return true; @@ -2093,6 +2110,8 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { uniform_buffer_pool_->ClearCache(); transient_descriptor_pool_uniform_buffers_->ClearCache(); + texture_cache_->ClearCache(); + pipeline_cache_->ClearCache(); render_target_cache_->ClearCache(); diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index d81cde4a1..c23279bcf 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -30,6 +30,7 @@ #include "xenia/gpu/vulkan/vulkan_render_target_cache.h" #include "xenia/gpu/vulkan/vulkan_shader.h" #include "xenia/gpu/vulkan/vulkan_shared_memory.h" +#include "xenia/gpu/vulkan/vulkan_texture_cache.h" #include "xenia/gpu/xenos.h" #include "xenia/kernel/kernel_state.h" #include "xenia/ui/vulkan/transient_descriptor_pool.h" @@ -82,8 +83,10 @@ class VulkanCommandProcessor : public CommandProcessor { uint64_t GetCurrentFrame() const { return frame_current_; } uint64_t GetCompletedFrame() const { return frame_completed_; } - // Submission must be open to insert barriers. Returning true if the barrier - // has actually been inserted and not dropped. + // Submission must be open to insert barriers. If no pipeline stages access + // the resource in a synchronization scope, the stage masks should be 0 (top / + // bottom of pipe should be specified only if explicitly needed). Returning + // true if the barrier has actually been inserted and not dropped. bool PushBufferMemoryBarrier( VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, @@ -328,9 +331,11 @@ class VulkanCommandProcessor : public CommandProcessor { std::unique_ptr primitive_processor_; + std::unique_ptr render_target_cache_; + std::unique_ptr pipeline_cache_; - std::unique_ptr render_target_cache_; + std::unique_ptr texture_cache_; VkDescriptorPool shared_memory_and_edram_descriptor_pool_ = VK_NULL_HANDLE; VkDescriptorSet shared_memory_and_edram_descriptor_set_; diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc new file mode 100644 index 000000000..c9b8f8e58 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc @@ -0,0 +1,452 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/vulkan/vulkan_texture_cache.h" + +#include + +#include "xenia/base/assert.h" +#include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/gpu/vulkan/vulkan_command_processor.h" +#include "xenia/ui/vulkan/vulkan_util.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +// Generated with `xb buildshaders`. +namespace shaders { +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_128bpb_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_128bpb_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_16bpb_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_16bpb_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_32bpb_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_32bpb_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_64bpb_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_64bpb_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_8bpb_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_8bpb_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_ctx1_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_depth_float_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_depth_float_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_depth_unorm_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_depth_unorm_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_dxn_rg8_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_dxt1_rgba8_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_dxt3_rgba8_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_dxt3a_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_dxt3aas1111_argb4_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_dxt5_rgba8_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_dxt5a_r8_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r10g11b11_rgba16_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r10g11b11_rgba16_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r10g11b11_rgba16_snorm_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r10g11b11_rgba16_snorm_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r11g11b10_rgba16_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r11g11b10_rgba16_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r11g11b10_rgba16_snorm_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r11g11b10_rgba16_snorm_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r16_snorm_float_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r16_snorm_float_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r16_unorm_float_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r16_unorm_float_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r4g4b4a4_a4r4g4b4_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r4g4b4a4_a4r4g4b4_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r5g5b5a1_b5g5r5a1_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r5g5b5a1_b5g5r5a1_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r5g5b6_b5g6r5_swizzle_rbga_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r5g6b5_b5g6r5_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r5g6b5_b5g6r5_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_rg16_snorm_float_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_rg16_snorm_float_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_rg16_unorm_float_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_rg16_unorm_float_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_rgba16_snorm_float_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_rgba16_snorm_float_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_rgba16_unorm_float_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_rgba16_unorm_float_scaled_cs.h" +} // namespace shaders + +VulkanTextureCache::~VulkanTextureCache() { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + if (null_image_view_3d_ != VK_NULL_HANDLE) { + dfn.vkDestroyImageView(device, null_image_view_3d_, nullptr); + } + if (null_image_view_cube_ != VK_NULL_HANDLE) { + dfn.vkDestroyImageView(device, null_image_view_cube_, nullptr); + } + if (null_image_view_2d_array_ != VK_NULL_HANDLE) { + dfn.vkDestroyImageView(device, null_image_view_2d_array_, nullptr); + } + if (null_image_3d_ != VK_NULL_HANDLE) { + dfn.vkDestroyImage(device, null_image_3d_, nullptr); + } + if (null_image_2d_array_cube_ != VK_NULL_HANDLE) { + dfn.vkDestroyImage(device, null_image_2d_array_cube_, nullptr); + } + for (VkDeviceMemory null_images_memory : null_images_memory_) { + if (null_images_memory != VK_NULL_HANDLE) { + dfn.vkFreeMemory(device, null_images_memory, nullptr); + } + } +} + +void VulkanTextureCache::BeginSubmission(uint64_t new_submission_index) { + TextureCache::BeginSubmission(new_submission_index); + + if (!null_images_cleared_) { + VkImage null_images[] = {null_image_2d_array_cube_, null_image_3d_}; + VkImageSubresourceRange null_image_subresource_range( + ui::vulkan::util::InitializeSubresourceRange()); + for (size_t i = 0; i < xe::countof(null_images); ++i) { + command_processor_.PushImageMemoryBarrier( + null_images[i], null_image_subresource_range, 0, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, VK_ACCESS_TRANSFER_WRITE_BIT, + VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + } + command_processor_.SubmitBarriers(true); + DeferredCommandBuffer& command_buffer = + command_processor_.deferred_command_buffer(); + // TODO(Triang3l): Find the return value for invalid texture fetch constants + // on the real hardware. + VkClearColorValue null_image_clear_color; + null_image_clear_color.float32[0] = 0.0f; + null_image_clear_color.float32[1] = 0.0f; + null_image_clear_color.float32[2] = 0.0f; + null_image_clear_color.float32[3] = 0.0f; + for (size_t i = 0; i < xe::countof(null_images); ++i) { + command_buffer.CmdVkClearColorImage( + null_images[i], VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + &null_image_clear_color, 1, &null_image_subresource_range); + } + for (size_t i = 0; i < xe::countof(null_images); ++i) { + command_processor_.PushImageMemoryBarrier( + null_images[i], null_image_subresource_range, + VK_PIPELINE_STAGE_TRANSFER_BIT, guest_shader_pipeline_stages_, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + } + null_images_cleared_ = true; + } +} + +uint32_t VulkanTextureCache::GetHostFormatSwizzle(TextureKey key) const { + // TODO(Triang3l): Implement GetHostFormatSwizzle. + return xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA; +} + +uint32_t VulkanTextureCache::GetMaxHostTextureWidthHeight( + xenos::DataDimension dimension) const { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const VkPhysicalDeviceLimits& device_limits = + provider.device_properties().limits; + switch (dimension) { + case xenos::DataDimension::k1D: + case xenos::DataDimension::k2DOrStacked: + // 1D and 2D are emulated as 2D arrays. + return device_limits.maxImageDimension2D; + case xenos::DataDimension::k3D: + return device_limits.maxImageDimension3D; + case xenos::DataDimension::kCube: + return device_limits.maxImageDimensionCube; + default: + assert_unhandled_case(dimension); + return 0; + } +} + +uint32_t VulkanTextureCache::GetMaxHostTextureDepthOrArraySize( + xenos::DataDimension dimension) const { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const VkPhysicalDeviceLimits& device_limits = + provider.device_properties().limits; + switch (dimension) { + case xenos::DataDimension::k1D: + case xenos::DataDimension::k2DOrStacked: + // 1D and 2D are emulated as 2D arrays. + return device_limits.maxImageArrayLayers; + case xenos::DataDimension::k3D: + return device_limits.maxImageDimension3D; + case xenos::DataDimension::kCube: + // Not requesting the imageCubeArray feature, and the Xenos doesn't + // support cube map arrays. + return 6; + default: + assert_unhandled_case(dimension); + return 0; + } +} + +std::unique_ptr VulkanTextureCache::CreateTexture( + TextureKey key) { + // TODO(Triang3l): Implement CreateTexture. + return std::unique_ptr(new VulkanTexture(*this, key)); +} + +bool VulkanTextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, + bool load_base, + bool load_mips) { + // TODO(Triang3l): Implement LoadTextureDataFromResidentMemoryImpl. + return true; +} + +VulkanTextureCache::VulkanTexture::VulkanTexture( + VulkanTextureCache& texture_cache, const TextureKey& key) + : Texture(texture_cache, key) {} + +VulkanTextureCache::VulkanTextureCache( + const RegisterFile& register_file, VulkanSharedMemory& shared_memory, + uint32_t draw_resolution_scale_x, uint32_t draw_resolution_scale_y, + VulkanCommandProcessor& command_processor, + VkPipelineStageFlags guest_shader_pipeline_stages) + : TextureCache(register_file, shared_memory, draw_resolution_scale_x, + draw_resolution_scale_y), + command_processor_(command_processor), + guest_shader_pipeline_stages_(guest_shader_pipeline_stages) { + // TODO(Triang3l): Support draw resolution scaling. + assert_true(draw_resolution_scale_x == 1 && draw_resolution_scale_y == 1); +} + +bool VulkanTextureCache::Initialize() { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + const VkPhysicalDevicePortabilitySubsetFeaturesKHR* + device_portability_subset_features = + provider.device_portability_subset_features(); + + // Null images as a replacement for unneeded bindings and for bindings for + // which the real image hasn't been created. + + VkImageCreateInfo null_image_create_info; + null_image_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + null_image_create_info.pNext = nullptr; + null_image_create_info.flags = VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; + null_image_create_info.imageType = VK_IMAGE_TYPE_2D; + // Four components to return (0, 0, 0, 0). + // TODO(Triang3l): Find the return value for invalid texture fetch constants + // on the real hardware. + null_image_create_info.format = VK_FORMAT_R8G8B8A8_UNORM; + null_image_create_info.extent.width = 1; + null_image_create_info.extent.height = 1; + null_image_create_info.extent.depth = 1; + null_image_create_info.mipLevels = 1; + null_image_create_info.arrayLayers = 6; + null_image_create_info.samples = VK_SAMPLE_COUNT_1_BIT; + null_image_create_info.tiling = VK_IMAGE_TILING_OPTIMAL; + null_image_create_info.usage = + VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; + null_image_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + null_image_create_info.queueFamilyIndexCount = 0; + null_image_create_info.pQueueFamilyIndices = nullptr; + null_image_create_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + if (dfn.vkCreateImage(device, &null_image_create_info, nullptr, + &null_image_2d_array_cube_) != VK_SUCCESS) { + XELOGE( + "VulkanTextureCache: Failed to create the null 2D array and cube " + "image"); + return false; + } + + null_image_create_info.flags &= ~VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; + null_image_create_info.imageType = VK_IMAGE_TYPE_3D; + null_image_create_info.arrayLayers = 1; + if (dfn.vkCreateImage(device, &null_image_create_info, nullptr, + &null_image_3d_) != VK_SUCCESS) { + XELOGE("VulkanTextureCache: Failed to create the null 3D image"); + return false; + } + + VkMemoryRequirements null_image_memory_requirements_2d_array_cube_; + dfn.vkGetImageMemoryRequirements( + device, null_image_2d_array_cube_, + &null_image_memory_requirements_2d_array_cube_); + VkMemoryRequirements null_image_memory_requirements_3d_; + dfn.vkGetImageMemoryRequirements(device, null_image_3d_, + &null_image_memory_requirements_3d_); + uint32_t null_image_memory_type_common = ui::vulkan::util::ChooseMemoryType( + provider, + null_image_memory_requirements_2d_array_cube_.memoryTypeBits & + null_image_memory_requirements_3d_.memoryTypeBits, + ui::vulkan::util::MemoryPurpose::kDeviceLocal); + if (null_image_memory_type_common != UINT32_MAX) { + // Place both null images in one memory allocation because maximum total + // memory allocation count is limited. + VkDeviceSize null_image_memory_offset_3d_ = + xe::align(null_image_memory_requirements_2d_array_cube_.size, + std::max(null_image_memory_requirements_3d_.alignment, + VkDeviceSize(1))); + VkMemoryAllocateInfo null_image_memory_allocate_info; + null_image_memory_allocate_info.sType = + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + null_image_memory_allocate_info.pNext = nullptr; + null_image_memory_allocate_info.allocationSize = + null_image_memory_offset_3d_ + null_image_memory_requirements_3d_.size; + null_image_memory_allocate_info.memoryTypeIndex = + null_image_memory_type_common; + if (dfn.vkAllocateMemory(device, &null_image_memory_allocate_info, nullptr, + &null_images_memory_[0]) != VK_SUCCESS) { + XELOGE( + "VulkanTextureCache: Failed to allocate the memory for the null " + "images"); + return false; + } + if (dfn.vkBindImageMemory(device, null_image_2d_array_cube_, + null_images_memory_[0], 0) != VK_SUCCESS) { + XELOGE( + "VulkanTextureCache: Failed to bind the memory to the null 2D array " + "and cube image"); + return false; + } + if (dfn.vkBindImageMemory(device, null_image_3d_, null_images_memory_[0], + null_image_memory_offset_3d_) != VK_SUCCESS) { + XELOGE( + "VulkanTextureCache: Failed to bind the memory to the null 3D image"); + return false; + } + } else { + // Place each null image in separate allocations. + uint32_t null_image_memory_type_2d_array_cube_ = + ui::vulkan::util::ChooseMemoryType( + provider, + null_image_memory_requirements_2d_array_cube_.memoryTypeBits, + ui::vulkan::util::MemoryPurpose::kDeviceLocal); + uint32_t null_image_memory_type_3d_ = ui::vulkan::util::ChooseMemoryType( + provider, null_image_memory_requirements_3d_.memoryTypeBits, + ui::vulkan::util::MemoryPurpose::kDeviceLocal); + if (null_image_memory_type_2d_array_cube_ == UINT32_MAX || + null_image_memory_type_3d_ == UINT32_MAX) { + XELOGE( + "VulkanTextureCache: Failed to get the memory types for the null " + "images"); + return false; + } + + VkMemoryAllocateInfo null_image_memory_allocate_info; + VkMemoryAllocateInfo* null_image_memory_allocate_info_last = + &null_image_memory_allocate_info; + null_image_memory_allocate_info.sType = + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + null_image_memory_allocate_info.pNext = nullptr; + null_image_memory_allocate_info.allocationSize = + null_image_memory_requirements_2d_array_cube_.size; + null_image_memory_allocate_info.memoryTypeIndex = + null_image_memory_type_2d_array_cube_; + VkMemoryDedicatedAllocateInfoKHR null_image_memory_dedicated_allocate_info; + if (provider.device_extensions().khr_dedicated_allocation) { + null_image_memory_allocate_info_last->pNext = + &null_image_memory_dedicated_allocate_info; + null_image_memory_allocate_info_last = + reinterpret_cast( + &null_image_memory_dedicated_allocate_info); + null_image_memory_dedicated_allocate_info.sType = + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR; + null_image_memory_dedicated_allocate_info.pNext = nullptr; + null_image_memory_dedicated_allocate_info.image = + null_image_2d_array_cube_; + null_image_memory_dedicated_allocate_info.buffer = VK_NULL_HANDLE; + } + if (dfn.vkAllocateMemory(device, &null_image_memory_allocate_info, nullptr, + &null_images_memory_[0]) != VK_SUCCESS) { + XELOGE( + "VulkanTextureCache: Failed to allocate the memory for the null 2D " + "array and cube image"); + return false; + } + if (dfn.vkBindImageMemory(device, null_image_2d_array_cube_, + null_images_memory_[0], 0) != VK_SUCCESS) { + XELOGE( + "VulkanTextureCache: Failed to bind the memory to the null 2D array " + "and cube image"); + return false; + } + + null_image_memory_allocate_info.allocationSize = + null_image_memory_requirements_3d_.size; + null_image_memory_allocate_info.memoryTypeIndex = + null_image_memory_type_3d_; + null_image_memory_dedicated_allocate_info.image = null_image_3d_; + if (dfn.vkAllocateMemory(device, &null_image_memory_allocate_info, nullptr, + &null_images_memory_[1]) != VK_SUCCESS) { + XELOGE( + "VulkanTextureCache: Failed to allocate the memory for the null 3D " + "image"); + return false; + } + if (dfn.vkBindImageMemory(device, null_image_3d_, null_images_memory_[1], + 0) != VK_SUCCESS) { + XELOGE( + "VulkanTextureCache: Failed to bind the memory to the null 3D image"); + return false; + } + } + + VkImageViewCreateInfo null_image_view_create_info; + null_image_view_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + null_image_view_create_info.pNext = nullptr; + null_image_view_create_info.flags = 0; + null_image_view_create_info.image = null_image_2d_array_cube_; + null_image_view_create_info.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY; + null_image_view_create_info.format = null_image_create_info.format; + // TODO(Triang3l): Find the return value for invalid texture fetch constants + // on the real hardware. + // Micro-optimization if this has any effect on the host GPU at all, use only + // constant components instead of the real texels. The image will be cleared + // to (0, 0, 0, 0) anyway. + VkComponentSwizzle null_image_view_swizzle = + (!device_portability_subset_features || + device_portability_subset_features->imageViewFormatSwizzle) + ? VK_COMPONENT_SWIZZLE_ZERO + : VK_COMPONENT_SWIZZLE_IDENTITY; + null_image_view_create_info.components.r = null_image_view_swizzle; + null_image_view_create_info.components.g = null_image_view_swizzle; + null_image_view_create_info.components.b = null_image_view_swizzle; + null_image_view_create_info.components.a = null_image_view_swizzle; + null_image_view_create_info.subresourceRange = + ui::vulkan::util::InitializeSubresourceRange( + VK_IMAGE_ASPECT_COLOR_BIT, 0, VK_REMAINING_MIP_LEVELS, 0, 1); + if (dfn.vkCreateImageView(device, &null_image_view_create_info, nullptr, + &null_image_view_2d_array_) != VK_SUCCESS) { + XELOGE("VulkanTextureCache: Failed to create the null 2D array image view"); + return false; + } + null_image_view_create_info.viewType = VK_IMAGE_VIEW_TYPE_CUBE; + null_image_view_create_info.subresourceRange.layerCount = 6; + if (dfn.vkCreateImageView(device, &null_image_view_create_info, nullptr, + &null_image_view_cube_) != VK_SUCCESS) { + XELOGE("VulkanTextureCache: Failed to create the null cube image view"); + return false; + } + null_image_view_create_info.image = null_image_3d_; + null_image_view_create_info.viewType = VK_IMAGE_VIEW_TYPE_3D; + null_image_view_create_info.subresourceRange.layerCount = 1; + if (dfn.vkCreateImageView(device, &null_image_view_create_info, nullptr, + &null_image_view_3d_) != VK_SUCCESS) { + XELOGE("VulkanTextureCache: Failed to create the null 3D image view"); + return false; + } + + null_images_cleared_ = false; + + return true; +} + +} // namespace vulkan +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.h b/src/xenia/gpu/vulkan/vulkan_texture_cache.h new file mode 100644 index 000000000..69f3965d5 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.h @@ -0,0 +1,94 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_VULKAN_VULKAN_TEXTURE_CACHE_H_ +#define XENIA_GPU_VULKAN_VULKAN_TEXTURE_CACHE_H_ + +#include +#include +#include + +#include "xenia/gpu/texture_cache.h" +#include "xenia/gpu/vulkan/vulkan_shared_memory.h" +#include "xenia/ui/vulkan/vulkan_provider.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +class VulkanCommandProcessor; + +class VulkanTextureCache final : public TextureCache { + public: + static std::unique_ptr Create( + const RegisterFile& register_file, VulkanSharedMemory& shared_memory, + uint32_t draw_resolution_scale_x, uint32_t draw_resolution_scale_y, + VulkanCommandProcessor& command_processor, + VkPipelineStageFlags guest_shader_pipeline_stages) { + std::unique_ptr texture_cache(new VulkanTextureCache( + register_file, shared_memory, draw_resolution_scale_x, + draw_resolution_scale_y, command_processor, + guest_shader_pipeline_stages)); + if (!texture_cache->Initialize()) { + return nullptr; + } + return std::move(texture_cache); + } + + ~VulkanTextureCache(); + + void BeginSubmission(uint64_t new_submission_index) override; + + protected: + uint32_t GetHostFormatSwizzle(TextureKey key) const override; + + uint32_t GetMaxHostTextureWidthHeight( + xenos::DataDimension dimension) const override; + uint32_t GetMaxHostTextureDepthOrArraySize( + xenos::DataDimension dimension) const override; + + std::unique_ptr CreateTexture(TextureKey key) override; + + bool LoadTextureDataFromResidentMemoryImpl(Texture& texture, bool load_base, + bool load_mips) override; + + private: + class VulkanTexture final : public Texture { + public: + explicit VulkanTexture(VulkanTextureCache& texture_cache, + const TextureKey& key); + }; + + explicit VulkanTextureCache( + const RegisterFile& register_file, VulkanSharedMemory& shared_memory, + uint32_t draw_resolution_scale_x, uint32_t draw_resolution_scale_y, + VulkanCommandProcessor& command_processor, + VkPipelineStageFlags guest_shader_pipeline_stages); + + bool Initialize(); + + VulkanCommandProcessor& command_processor_; + VkPipelineStageFlags guest_shader_pipeline_stages_; + + // If both images can be placed in the same allocation, it's one allocation, + // otherwise it's two separate. + std::array null_images_memory_{}; + VkImage null_image_2d_array_cube_ = VK_NULL_HANDLE; + VkImage null_image_3d_ = VK_NULL_HANDLE; + VkImageView null_image_view_2d_array_ = VK_NULL_HANDLE; + VkImageView null_image_view_cube_ = VK_NULL_HANDLE; + VkImageView null_image_view_3d_ = VK_NULL_HANDLE; + bool null_images_cleared_ = false; +}; + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_VULKAN_VULKAN_TEXTURE_CACHE_H_ From 0db94a700fe5ac9d17e934cdfa218af23ccb2867 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 15 May 2022 17:42:27 +0300 Subject: [PATCH 083/123] [Vulkan] Use pipeline layout key structures directly --- .../gpu/vulkan/vulkan_command_processor.cc | 12 ++--- .../gpu/vulkan/vulkan_command_processor.h | 52 +++++++++++++++---- 2 files changed, 48 insertions(+), 16 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index c67a59367..d10d40bcb 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -1109,7 +1109,7 @@ VulkanCommandProcessor::GetPipelineLayout(uint32_t texture_count_pixel, pipeline_layout_key.texture_count_pixel = texture_count_pixel; pipeline_layout_key.texture_count_vertex = texture_count_vertex; { - auto it = pipeline_layouts_.find(pipeline_layout_key.key); + auto it = pipeline_layouts_.find(pipeline_layout_key); if (it != pipeline_layouts_.end()) { return &it->second; } @@ -1125,7 +1125,7 @@ VulkanCommandProcessor::GetPipelineLayout(uint32_t texture_count_pixel, texture_descriptor_set_layout_key.is_vertex = 0; texture_descriptor_set_layout_key.texture_count = texture_count_pixel; auto it = descriptor_set_layouts_textures_.find( - texture_descriptor_set_layout_key.key); + texture_descriptor_set_layout_key); if (it != descriptor_set_layouts_textures_.end()) { descriptor_set_layout_textures_pixel = it->second; } else { @@ -1154,7 +1154,7 @@ VulkanCommandProcessor::GetPipelineLayout(uint32_t texture_count_pixel, return nullptr; } descriptor_set_layouts_textures_.emplace( - texture_descriptor_set_layout_key.key, + texture_descriptor_set_layout_key, descriptor_set_layout_textures_pixel); } } else { @@ -1167,7 +1167,7 @@ VulkanCommandProcessor::GetPipelineLayout(uint32_t texture_count_pixel, texture_descriptor_set_layout_key.is_vertex = 0; texture_descriptor_set_layout_key.texture_count = texture_count_vertex; auto it = descriptor_set_layouts_textures_.find( - texture_descriptor_set_layout_key.key); + texture_descriptor_set_layout_key); if (it != descriptor_set_layouts_textures_.end()) { descriptor_set_layout_textures_vertex = it->second; } else { @@ -1196,7 +1196,7 @@ VulkanCommandProcessor::GetPipelineLayout(uint32_t texture_count_pixel, return nullptr; } descriptor_set_layouts_textures_.emplace( - texture_descriptor_set_layout_key.key, + texture_descriptor_set_layout_key, descriptor_set_layout_textures_vertex); } } else { @@ -1248,7 +1248,7 @@ VulkanCommandProcessor::GetPipelineLayout(uint32_t texture_count_pixel, return nullptr; } auto emplaced_pair = pipeline_layouts_.emplace( - std::piecewise_construct, std::forward_as_tuple(pipeline_layout_key.key), + std::piecewise_construct, std::forward_as_tuple(pipeline_layout_key), std::forward_as_tuple(pipeline_layout, descriptor_set_layout_textures_vertex, descriptor_set_layout_textures_pixel)); diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index c23279bcf..2f90ca614 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -14,11 +14,14 @@ #include #include #include +#include #include #include #include #include +#include "xenia/base/assert.h" +#include "xenia/base/hash.h" #include "xenia/gpu/command_processor.h" #include "xenia/gpu/draw_util.h" #include "xenia/gpu/registers.h" @@ -167,26 +170,54 @@ class VulkanCommandProcessor : public CommandProcessor { }; union TextureDescriptorSetLayoutKey { + uint32_t key; struct { uint32_t is_vertex : 1; // For 0, use descriptor_set_layout_empty_ instead as these are owning // references. uint32_t texture_count : 31; }; - uint32_t key = 0; + + TextureDescriptorSetLayoutKey() : key(0) { + static_assert_size(*this, sizeof(key)); + } + + struct Hasher { + size_t operator()(const TextureDescriptorSetLayoutKey& key) const { + return std::hash{}(key.key); + } + }; + bool operator==(const TextureDescriptorSetLayoutKey& other_key) const { + return key == other_key.key; + } + bool operator!=(const TextureDescriptorSetLayoutKey& other_key) const { + return !(*this == other_key); + } }; - static_assert(sizeof(TextureDescriptorSetLayoutKey) == sizeof(uint32_t)); union PipelineLayoutKey { + uint32_t key; struct { // Pixel textures in the low bits since those are varied much more // commonly. uint32_t texture_count_pixel : 16; uint32_t texture_count_vertex : 16; }; - uint32_t key = 0; + + PipelineLayoutKey() : key(0) { static_assert_size(*this, sizeof(key)); } + + struct Hasher { + size_t operator()(const PipelineLayoutKey& key) const { + return std::hash{}(key.key); + } + }; + bool operator==(const PipelineLayoutKey& other_key) const { + return key == other_key.key; + } + bool operator!=(const PipelineLayoutKey& other_key) const { + return !(*this == other_key); + } }; - static_assert(sizeof(PipelineLayoutKey) == sizeof(uint32_t)); class PipelineLayout : public VulkanPipelineCache::PipelineLayoutProvider { public: @@ -319,13 +350,14 @@ class VulkanCommandProcessor : public CommandProcessor { VkDescriptorSetLayout descriptor_set_layout_shared_memory_and_edram_ = VK_NULL_HANDLE; - // TextureDescriptorSetLayoutKey::key -> VkDescriptorSetLayout. - // Layouts are referenced by pipeline_layouts_. - std::unordered_map + // Descriptor set layouts are referenced by pipeline_layouts_. + std::unordered_map descriptor_set_layouts_textures_; - // PipelineLayoutKey::key -> PipelineLayout. - // Layouts are referenced by VulkanPipelineCache. - std::unordered_map pipeline_layouts_; + // Pipeline layouts are referenced by VulkanPipelineCache. + std::unordered_map + pipeline_layouts_; std::unique_ptr shared_memory_; From 5f2b0a899a4e2464481668adc68263161c5e81f9 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 15 May 2022 22:20:24 +0300 Subject: [PATCH 084/123] [Vulkan] Fix TransientDescriptorPool ignoring the descriptor type --- src/xenia/ui/vulkan/transient_descriptor_pool.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xenia/ui/vulkan/transient_descriptor_pool.cc b/src/xenia/ui/vulkan/transient_descriptor_pool.cc index e471a6c6e..963738d80 100644 --- a/src/xenia/ui/vulkan/transient_descriptor_pool.cc +++ b/src/xenia/ui/vulkan/transient_descriptor_pool.cc @@ -118,7 +118,7 @@ VkDescriptorSet TransientDescriptorPool::Request( if (pages_writable_.empty()) { VkDescriptorPoolSize descriptor_pool_size; - descriptor_pool_size.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + descriptor_pool_size.type = descriptor_type_; descriptor_pool_size.descriptorCount = page_descriptor_count_; VkDescriptorPoolCreateInfo descriptor_pool_create_info; descriptor_pool_create_info.sType = From 46202dd27a49591cf6b2f2fed3b7412fcd29f741 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Tue, 17 May 2022 22:42:28 +0300 Subject: [PATCH 085/123] [Vulkan] Basic texture descriptor set allocation/binding --- src/xenia/gpu/spirv_shader.cc | 30 + src/xenia/gpu/spirv_shader.h | 81 ++ src/xenia/gpu/spirv_shader_translator.cc | 39 + src/xenia/gpu/spirv_shader_translator.h | 48 +- .../gpu/spirv_shader_translator_fetch.cc | 142 +++ .../gpu/vulkan/vulkan_command_processor.cc | 881 +++++++++++++----- .../gpu/vulkan/vulkan_command_processor.h | 128 ++- src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc | 92 +- src/xenia/gpu/vulkan/vulkan_pipeline_cache.h | 17 + src/xenia/gpu/vulkan/vulkan_shader.cc | 11 +- src/xenia/gpu/vulkan/vulkan_shader.h | 47 +- src/xenia/gpu/vulkan/vulkan_texture_cache.h | 11 + .../single_layout_descriptor_set_pool.cc | 3 +- .../single_type_descriptor_set_allocator.cc | 216 +++++ .../single_type_descriptor_set_allocator.h | 84 ++ .../ui/vulkan/transient_descriptor_pool.cc | 162 ---- .../ui/vulkan/transient_descriptor_pool.h | 61 -- 17 files changed, 1561 insertions(+), 492 deletions(-) create mode 100644 src/xenia/gpu/spirv_shader.cc create mode 100644 src/xenia/gpu/spirv_shader.h create mode 100644 src/xenia/ui/vulkan/single_type_descriptor_set_allocator.cc create mode 100644 src/xenia/ui/vulkan/single_type_descriptor_set_allocator.h delete mode 100644 src/xenia/ui/vulkan/transient_descriptor_pool.cc delete mode 100644 src/xenia/ui/vulkan/transient_descriptor_pool.h diff --git a/src/xenia/gpu/spirv_shader.cc b/src/xenia/gpu/spirv_shader.cc new file mode 100644 index 000000000..db3ebd0da --- /dev/null +++ b/src/xenia/gpu/spirv_shader.cc @@ -0,0 +1,30 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/spirv_shader.h" + +#include + +namespace xe { +namespace gpu { + +SpirvShader::SpirvShader(xenos::ShaderType shader_type, + uint64_t ucode_data_hash, const uint32_t* ucode_dwords, + size_t ucode_dword_count, + std::endian ucode_source_endian) + : Shader(shader_type, ucode_data_hash, ucode_dwords, ucode_dword_count, + ucode_source_endian) {} + +Shader::Translation* SpirvShader::CreateTranslationInstance( + uint64_t modification) { + return new SpirvTranslation(*this, modification); +} + +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/spirv_shader.h b/src/xenia/gpu/spirv_shader.h new file mode 100644 index 000000000..7eba372fa --- /dev/null +++ b/src/xenia/gpu/spirv_shader.h @@ -0,0 +1,81 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_SPIRV_SHADER_H_ +#define XENIA_GPU_SPIRV_SHADER_H_ + +#include +#include + +#include "xenia/gpu/shader.h" +#include "xenia/gpu/spirv_shader_translator.h" +#include "xenia/gpu/xenos.h" + +namespace xe { +namespace gpu { + +class SpirvShader : public Shader { + public: + class SpirvTranslation : public Translation { + public: + explicit SpirvTranslation(SpirvShader& shader, uint64_t modification) + : Translation(shader, modification) {} + }; + + explicit SpirvShader(xenos::ShaderType shader_type, uint64_t ucode_data_hash, + const uint32_t* ucode_dwords, size_t ucode_dword_count, + std::endian ucode_source_endian = std::endian::big); + + // Resource bindings are gathered after the successful translation of any + // modification for simplicity of translation (and they don't depend on + // modification bits). + + struct TextureBinding { + uint32_t fetch_constant : 5; + // Stacked and 3D are separate TextureBindings. + xenos::FetchOpDimension dimension : 2; + uint32_t is_signed : 1; + }; + // Safe to hash and compare with memcmp for layout hashing. + const std::vector& GetTextureBindingsAfterTranslation() + const { + return texture_bindings_; + } + const uint32_t GetUsedTextureMaskAfterTranslation() const { + return used_texture_mask_; + } + + struct SamplerBinding { + uint32_t fetch_constant : 5; + xenos::TextureFilter mag_filter : 2; + xenos::TextureFilter min_filter : 2; + xenos::TextureFilter mip_filter : 2; + xenos::AnisoFilter aniso_filter : 3; + }; + const std::vector& GetSamplerBindingsAfterTranslation() + const { + return sampler_bindings_; + } + + protected: + Translation* CreateTranslationInstance(uint64_t modification) override; + + private: + friend class SpirvShaderTranslator; + + std::atomic_flag bindings_setup_entered_ = ATOMIC_FLAG_INIT; + std::vector texture_bindings_; + std::vector sampler_bindings_; + uint32_t used_texture_mask_ = 0; +}; + +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_SPIRV_SHADER_H_ diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 9cc6fec72..c05e4043c 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -20,6 +20,7 @@ #include "third_party/glslang/SPIRV/GLSL.std.450.h" #include "xenia/base/assert.h" #include "xenia/base/math.h" +#include "xenia/gpu/spirv_shader.h" namespace xe { namespace gpu { @@ -95,6 +96,9 @@ void SpirvShaderTranslator::Reset() { uniform_float_constants_ = spv::NoResult; + sampler_bindings_.clear(); + texture_bindings_.clear(); + main_interface_.clear(); var_main_registers_ = spv::NoResult; @@ -595,6 +599,41 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { return module_bytes; } +void SpirvShaderTranslator::PostTranslation() { + Shader::Translation& translation = current_translation(); + if (!translation.is_valid()) { + return; + } + SpirvShader* spirv_shader = dynamic_cast(&translation.shader()); + if (spirv_shader && !spirv_shader->bindings_setup_entered_.test_and_set( + std::memory_order_relaxed)) { + spirv_shader->texture_bindings_.clear(); + spirv_shader->texture_bindings_.reserve(texture_bindings_.size()); + for (const TextureBinding& translator_binding : texture_bindings_) { + SpirvShader::TextureBinding& shader_binding = + spirv_shader->texture_bindings_.emplace_back(); + // For a stable hash. + std::memset(&shader_binding, 0, sizeof(shader_binding)); + shader_binding.fetch_constant = translator_binding.fetch_constant; + shader_binding.dimension = translator_binding.dimension; + shader_binding.is_signed = translator_binding.is_signed; + spirv_shader->used_texture_mask_ |= UINT32_C(1) + << translator_binding.fetch_constant; + } + spirv_shader->sampler_bindings_.clear(); + spirv_shader->sampler_bindings_.reserve(sampler_bindings_.size()); + for (const SamplerBinding& translator_binding : sampler_bindings_) { + SpirvShader::SamplerBinding& shader_binding = + spirv_shader->sampler_bindings_.emplace_back(); + shader_binding.fetch_constant = translator_binding.fetch_constant; + shader_binding.mag_filter = translator_binding.mag_filter; + shader_binding.min_filter = translator_binding.min_filter; + shader_binding.mip_filter = translator_binding.mip_filter; + shader_binding.aniso_filter = translator_binding.aniso_filter; + } + } +} + void SpirvShaderTranslator::ProcessLabel(uint32_t cf_index) { if (cf_index == 0) { // 0 already added in the beginning. diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 429ab5fe0..abc3225a5 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -131,9 +131,11 @@ class SpirvShaderTranslator : public ShaderTranslator { kDescriptorSetMutableLayoutsStart, // Rarely used at all, but may be changed at an unpredictable rate when - // vertex textures are used, combined images and samplers. - kDescriptorSetTexturesVertex = kDescriptorSetMutableLayoutsStart, - // Per-material, combined images and samplers. + // vertex textures are used. + kDescriptorSetSamplersVertex = kDescriptorSetMutableLayoutsStart, + kDescriptorSetTexturesVertex, + // Per-material textures. + kDescriptorSetSamplersPixel, kDescriptorSetTexturesPixel, kDescriptorSetCount, }; @@ -217,6 +219,8 @@ class SpirvShaderTranslator : public ShaderTranslator { std::vector CompleteTranslation() override; + void PostTranslation() override; + void ProcessLabel(uint32_t cf_index) override; void ProcessExecInstructionBegin(const ParsedExecInstruction& instr) override; @@ -229,9 +233,31 @@ class SpirvShaderTranslator : public ShaderTranslator { void ProcessVertexFetchInstruction( const ParsedVertexFetchInstruction& instr) override; + void ProcessTextureFetchInstruction( + const ParsedTextureFetchInstruction& instr) override; void ProcessAluInstruction(const ParsedAluInstruction& instr) override; private: + struct TextureBinding { + uint32_t fetch_constant; + // Stacked and 3D are separate TextureBindings. + xenos::FetchOpDimension dimension; + bool is_signed; + + spv::Id type; + spv::Id variable; + }; + + struct SamplerBinding { + uint32_t fetch_constant; + xenos::TextureFilter mag_filter; + xenos::TextureFilter min_filter; + xenos::TextureFilter mip_filter; + xenos::AnisoFilter aniso_filter; + + spv::Id variable; + }; + // Builder helpers. void SpirvCreateSelectionMerge( spv::Id merge_block_id, spv::SelectionControlMask selection_control_mask = @@ -353,6 +379,15 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id LoadUint32FromSharedMemory(spv::Id address_dwords_int); + size_t FindOrAddTextureBinding(uint32_t fetch_constant, + xenos::FetchOpDimension dimension, + bool is_signed); + size_t FindOrAddSamplerBinding(uint32_t fetch_constant, + xenos::TextureFilter mag_filter, + xenos::TextureFilter min_filter, + xenos::TextureFilter mip_filter, + xenos::AnisoFilter aniso_filter); + Features features_; std::unique_ptr builder_; @@ -446,6 +481,13 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id buffers_shared_memory_; + // Not using combined images and samplers because + // maxPerStageDescriptorSamplers is often lower than + // maxPerStageDescriptorSampledImages, and for every fetch constant, there + // are, for regular fetches, two bindings (unsigned and signed). + std::vector texture_bindings_; + std::vector sampler_bindings_; + // VS as VS only - int. spv::Id input_vertex_index_; // VS as TES only - int. diff --git a/src/xenia/gpu/spirv_shader_translator_fetch.cc b/src/xenia/gpu/spirv_shader_translator_fetch.cc index 23dc33765..5ec982618 100644 --- a/src/xenia/gpu/spirv_shader_translator_fetch.cc +++ b/src/xenia/gpu/spirv_shader_translator_fetch.cc @@ -12,8 +12,10 @@ #include #include #include +#include #include +#include "third_party/fmt/include/fmt/format.h" #include "third_party/glslang/SPIRV/GLSL.std.450.h" #include "xenia/base/math.h" @@ -533,5 +535,145 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction( StoreResult(instr.result, result); } +void SpirvShaderTranslator::ProcessTextureFetchInstruction( + const ParsedTextureFetchInstruction& instr) { + UpdateInstructionPredication(instr.is_predicated, instr.predicate_condition); + + EnsureBuildPointAvailable(); + + // TODO(Triang3l): Fetch the texture. + if (instr.opcode == ucode::FetchOpcode::kTextureFetch) { + uint32_t fetch_constant_index = instr.operands[1].storage_index; + bool use_computed_lod = + instr.attributes.use_computed_lod && + (is_pixel_shader() || instr.attributes.use_register_gradients); + FindOrAddTextureBinding(fetch_constant_index, instr.dimension, false); + FindOrAddTextureBinding(fetch_constant_index, instr.dimension, true); + FindOrAddSamplerBinding(fetch_constant_index, instr.attributes.mag_filter, + instr.attributes.min_filter, + instr.attributes.mip_filter, + use_computed_lod ? instr.attributes.aniso_filter + : xenos::AnisoFilter::kDisabled); + } +} + +size_t SpirvShaderTranslator::FindOrAddTextureBinding( + uint32_t fetch_constant, xenos::FetchOpDimension dimension, + bool is_signed) { + // 1D and 2D textures (including stacked ones) are treated as 2D arrays for + // binding and coordinate simplicity. + if (dimension == xenos::FetchOpDimension::k1D) { + dimension = xenos::FetchOpDimension::k2D; + } + for (size_t i = 0; i < texture_bindings_.size(); ++i) { + const TextureBinding& texture_binding = texture_bindings_[i]; + if (texture_binding.fetch_constant == fetch_constant && + texture_binding.dimension == dimension && + texture_binding.is_signed == is_signed) { + return i; + } + } + // TODO(Triang3l): Limit the total count to that actually supported by the + // implementation. + size_t new_texture_binding_index = texture_bindings_.size(); + TextureBinding& new_texture_binding = texture_bindings_.emplace_back(); + new_texture_binding.fetch_constant = fetch_constant; + new_texture_binding.dimension = dimension; + new_texture_binding.is_signed = is_signed; + spv::Dim type_dimension; + bool is_array; + const char* dimension_name; + switch (dimension) { + case xenos::FetchOpDimension::k3DOrStacked: + type_dimension = spv::Dim3D; + is_array = false; + dimension_name = "3d"; + break; + case xenos::FetchOpDimension::kCube: + type_dimension = spv::DimCube; + is_array = false; + dimension_name = "cube"; + break; + default: + type_dimension = spv::Dim2D; + is_array = true; + dimension_name = "2d"; + } + new_texture_binding.type = + builder_->makeImageType(type_float_, type_dimension, false, is_array, + false, 1, spv::ImageFormatUnknown); + new_texture_binding.variable = builder_->createVariable( + spv::NoPrecision, spv::StorageClassUniformConstant, + new_texture_binding.type, + fmt::format("xe_texture{}_{}_{}", fetch_constant, dimension_name, + is_signed ? 's' : 'u') + .c_str()); + builder_->addDecoration( + new_texture_binding.variable, spv::DecorationDescriptorSet, + int(is_vertex_shader() ? kDescriptorSetTexturesVertex + : kDescriptorSetTexturesPixel)); + builder_->addDecoration(new_texture_binding.variable, spv::DecorationBinding, + int(new_texture_binding_index)); + if (features_.spirv_version >= spv::Spv_1_4) { + main_interface_.push_back(new_texture_binding.variable); + } + return new_texture_binding_index; +} + +size_t SpirvShaderTranslator::FindOrAddSamplerBinding( + uint32_t fetch_constant, xenos::TextureFilter mag_filter, + xenos::TextureFilter min_filter, xenos::TextureFilter mip_filter, + xenos::AnisoFilter aniso_filter) { + if (aniso_filter != xenos::AnisoFilter::kUseFetchConst) { + // TODO(Triang3l): Limit to what's actually supported by the implementation. + aniso_filter = std::min(aniso_filter, xenos::AnisoFilter::kMax_16_1); + } + for (size_t i = 0; i < sampler_bindings_.size(); ++i) { + const SamplerBinding& sampler_binding = sampler_bindings_[i]; + if (sampler_binding.fetch_constant == fetch_constant && + sampler_binding.mag_filter == mag_filter && + sampler_binding.min_filter == min_filter && + sampler_binding.mip_filter == mip_filter && + sampler_binding.aniso_filter == aniso_filter) { + return i; + } + } + // TODO(Triang3l): Limit the total count to that actually supported by the + // implementation. + size_t new_sampler_binding_index = sampler_bindings_.size(); + SamplerBinding& new_sampler_binding = sampler_bindings_.emplace_back(); + new_sampler_binding.fetch_constant = fetch_constant; + new_sampler_binding.mag_filter = mag_filter; + new_sampler_binding.min_filter = min_filter; + new_sampler_binding.mip_filter = mip_filter; + new_sampler_binding.aniso_filter = aniso_filter; + std::ostringstream name; + static const char kFilterSuffixes[] = {'p', 'l', 'b', 'f'}; + name << "xe_sampler" << fetch_constant << '_' + << kFilterSuffixes[uint32_t(mag_filter)] + << kFilterSuffixes[uint32_t(min_filter)] + << kFilterSuffixes[uint32_t(mip_filter)]; + if (aniso_filter != xenos::AnisoFilter::kUseFetchConst) { + if (aniso_filter == xenos::AnisoFilter::kDisabled) { + name << "_a0"; + } else { + name << "_a" << (UINT32_C(1) << (uint32_t(aniso_filter) - 1)); + } + } + new_sampler_binding.variable = builder_->createVariable( + spv::NoPrecision, spv::StorageClassUniformConstant, + builder_->makeSamplerType(), name.str().c_str()); + builder_->addDecoration( + new_sampler_binding.variable, spv::DecorationDescriptorSet, + int(is_vertex_shader() ? kDescriptorSetSamplersVertex + : kDescriptorSetSamplersPixel)); + builder_->addDecoration(new_sampler_binding.variable, spv::DecorationBinding, + int(new_sampler_binding_index)); + if (features_.spirv_version >= spv::Spv_1_4) { + main_interface_.push_back(new_sampler_binding.variable); + } + return new_sampler_binding_index; +} + } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index d10d40bcb..d9b81f9a8 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -44,10 +44,29 @@ namespace shaders { #include "xenia/gpu/shaders/bytecode/vulkan_spirv/uv_ps.h" } // namespace shaders +// No specific reason for 32768 descriptors, just the "too much" amount from +// Direct3D 12 PIX warnings. 2x descriptors for textures because of unsigned and +// signed bindings. VulkanCommandProcessor::VulkanCommandProcessor( VulkanGraphicsSystem* graphics_system, kernel::KernelState* kernel_state) : CommandProcessor(graphics_system, kernel_state), - deferred_command_buffer_(*this) {} + deferred_command_buffer_(*this), + transient_descriptor_allocator_uniform_buffer_( + *static_cast( + graphics_system->provider()), + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 32768, 32768), + transient_descriptor_allocator_storage_buffer_( + *static_cast( + graphics_system->provider()), + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 32768, 32768), + transient_descriptor_allocator_sampled_image_( + *static_cast( + graphics_system->provider()), + VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 2 * 32768, 32768), + transient_descriptor_allocator_sampler_( + *static_cast( + graphics_system->provider()), + VK_DESCRIPTOR_TYPE_SAMPLER, 32768, 32768) {} VulkanCommandProcessor::~VulkanCommandProcessor() = default; @@ -70,6 +89,10 @@ bool VulkanCommandProcessor::SetupContext() { VkDevice device = provider.device(); const VkPhysicalDeviceFeatures& device_features = provider.device_features(); + // The unconditional inclusion of the vertex shader stage also covers the case + // of manual index / factor buffer fetch (the system constants and the shared + // memory are needed for that) in the tessellation vertex shader when + // fullDrawIndexUint32 is not supported. guest_shader_pipeline_stages_ = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; guest_shader_vertex_stages_ = VK_SHADER_STAGE_VERTEX_BIT; @@ -84,11 +107,6 @@ bool VulkanCommandProcessor::SetupContext() { guest_shader_vertex_stages_ |= VK_SHADER_STAGE_COMPUTE_BIT; } - // No specific reason for 32768, just the "too much" amount from Direct3D 12 - // PIX warnings. - transient_descriptor_pool_uniform_buffers_ = - std::make_unique( - provider, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 32768, 32768); // 16384 is bigger than any single uniform buffer that Xenia needs, but is the // minimum maxUniformBufferRange, thus the safe minimum amount. VkDeviceSize uniform_buffer_alignment = std::max( @@ -100,6 +118,10 @@ bool VulkanCommandProcessor::SetupContext() { size_t(16384)), size_t(uniform_buffer_alignment))); + // Descriptor set layouts. + VkShaderStageFlags guest_shader_stages = + guest_shader_vertex_stages_ | VK_SHADER_STAGE_FRAGMENT_BIT; + // Empty. VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info; descriptor_set_layout_create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; @@ -113,63 +135,11 @@ bool VulkanCommandProcessor::SetupContext() { XELOGE("Failed to create an empty Vulkan descriptor set layout"); return false; } - VkShaderStageFlags guest_shader_stages = - guest_shader_vertex_stages_ | VK_SHADER_STAGE_FRAGMENT_BIT; - VkDescriptorSetLayoutBinding descriptor_set_layout_binding_uniform_buffer; - descriptor_set_layout_binding_uniform_buffer.binding = 0; - descriptor_set_layout_binding_uniform_buffer.descriptorType = - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - descriptor_set_layout_binding_uniform_buffer.descriptorCount = 1; - descriptor_set_layout_binding_uniform_buffer.stageFlags = guest_shader_stages; - descriptor_set_layout_binding_uniform_buffer.pImmutableSamplers = nullptr; - descriptor_set_layout_create_info.bindingCount = 1; - descriptor_set_layout_create_info.pBindings = - &descriptor_set_layout_binding_uniform_buffer; - if (dfn.vkCreateDescriptorSetLayout( - device, &descriptor_set_layout_create_info, nullptr, - &descriptor_set_layout_fetch_bool_loop_constants_) != VK_SUCCESS) { - XELOGE( - "Failed to create a Vulkan descriptor set layout for the fetch, bool " - "and loop constants uniform buffer"); - return false; - } - descriptor_set_layout_binding_uniform_buffer.stageFlags = - guest_shader_vertex_stages_; - if (dfn.vkCreateDescriptorSetLayout( - device, &descriptor_set_layout_create_info, nullptr, - &descriptor_set_layout_float_constants_vertex_) != VK_SUCCESS) { - XELOGE( - "Failed to create a Vulkan descriptor set layout for the vertex shader " - "float constants uniform buffer"); - return false; - } - descriptor_set_layout_binding_uniform_buffer.stageFlags = - VK_SHADER_STAGE_FRAGMENT_BIT; - if (dfn.vkCreateDescriptorSetLayout( - device, &descriptor_set_layout_create_info, nullptr, - &descriptor_set_layout_float_constants_pixel_) != VK_SUCCESS) { - XELOGE( - "Failed to create a Vulkan descriptor set layout for the pixel shader " - "float constants uniform buffer"); - return false; - } - descriptor_set_layout_binding_uniform_buffer.stageFlags = guest_shader_stages; - if (device_features.tessellationShader) { - descriptor_set_layout_binding_uniform_buffer.stageFlags |= - VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; - } - if (dfn.vkCreateDescriptorSetLayout( - device, &descriptor_set_layout_create_info, nullptr, - &descriptor_set_layout_system_constants_) != VK_SUCCESS) { - XELOGE( - "Failed to create a Vulkan descriptor set layout for the system " - "constants uniform buffer"); - return false; - } + // Shared memory and EDRAM. uint32_t shared_memory_binding_count_log2 = SpirvShaderTranslator::GetSharedMemoryStorageBufferCountLog2( provider.device_properties().limits.maxStorageBufferRange); - uint32_t shared_memory_binding_count = uint32_t(1) + uint32_t shared_memory_binding_count = UINT32_C(1) << shared_memory_binding_count_log2; VkDescriptorSetLayoutBinding descriptor_set_layout_bindings_shared_memory_and_edram[1]; @@ -178,15 +148,14 @@ bool VulkanCommandProcessor::SetupContext() { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; descriptor_set_layout_bindings_shared_memory_and_edram[0].descriptorCount = shared_memory_binding_count; - // TODO(Triang3l): When fullDrawIndexUint32 fallback is added, force host - // vertex shader access to the shared memory for the tessellation vertex - // shader (to retrieve tessellation factors). descriptor_set_layout_bindings_shared_memory_and_edram[0].stageFlags = guest_shader_stages; descriptor_set_layout_bindings_shared_memory_and_edram[0].pImmutableSamplers = nullptr; // TODO(Triang3l): EDRAM storage image binding for the fragment shader // interlocks case. + descriptor_set_layout_create_info.bindingCount = uint32_t( + xe::countof(descriptor_set_layout_bindings_shared_memory_and_edram)); descriptor_set_layout_create_info.pBindings = descriptor_set_layout_bindings_shared_memory_and_edram; if (dfn.vkCreateDescriptorSetLayout( @@ -197,6 +166,109 @@ bool VulkanCommandProcessor::SetupContext() { "and the EDRAM"); return false; } + // Transient: uniform buffer for the guest vertex shader stages. + VkDescriptorSetLayoutBinding descriptor_set_layout_binding_transient; + descriptor_set_layout_binding_transient.binding = 0; + descriptor_set_layout_binding_transient.descriptorType = + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + descriptor_set_layout_binding_transient.descriptorCount = 1; + descriptor_set_layout_binding_transient.stageFlags = + guest_shader_vertex_stages_; + descriptor_set_layout_binding_transient.pImmutableSamplers = nullptr; + descriptor_set_layout_create_info.bindingCount = 1; + descriptor_set_layout_create_info.pBindings = + &descriptor_set_layout_binding_transient; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layouts_single_transient_[size_t( + SingleTransientDescriptorLayout::kUniformBufferGuestVertex)]) != + VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for a uniform buffer " + "bound to the guest vertex shader stages"); + return false; + } + // Transient: uniform buffer for fragment shaders. + descriptor_set_layout_binding_transient.descriptorType = + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + descriptor_set_layout_binding_transient.stageFlags = + VK_SHADER_STAGE_FRAGMENT_BIT; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layouts_single_transient_[size_t( + SingleTransientDescriptorLayout::kUniformBufferFragment)]) != + VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for a uniform buffer " + "bound to the fragment shader"); + return false; + } + // Transient: uniform buffer for the guest shader stages. + descriptor_set_layout_binding_transient.descriptorType = + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + descriptor_set_layout_binding_transient.stageFlags = guest_shader_stages; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layouts_single_transient_[size_t( + SingleTransientDescriptorLayout::kUniformBufferGuestShader)]) != + VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for a uniform buffer " + "bound to the guest shader stages"); + return false; + } + // Transient: system constants. + descriptor_set_layout_binding_transient.descriptorType = + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + descriptor_set_layout_binding_transient.stageFlags = guest_shader_stages; + if (device_features.tessellationShader) { + descriptor_set_layout_binding_transient.stageFlags |= + VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; + } + if (device_features.geometryShader) { + descriptor_set_layout_binding_transient.stageFlags |= + VK_SHADER_STAGE_GEOMETRY_BIT; + } + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layouts_single_transient_[size_t( + SingleTransientDescriptorLayout :: + kUniformBufferSystemConstants)]) != VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for the system " + "constants uniform buffer"); + return false; + } + // Transient: uniform buffer for compute shaders. + descriptor_set_layout_binding_transient.descriptorType = + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + descriptor_set_layout_binding_transient.stageFlags = + VK_SHADER_STAGE_COMPUTE_BIT; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layouts_single_transient_[size_t( + SingleTransientDescriptorLayout::kUniformBufferCompute)]) != + VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for a uniform buffer " + "bound to the compute shader"); + return false; + } + // Transient: storage buffer for compute shaders. + descriptor_set_layout_binding_transient.descriptorType = + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + descriptor_set_layout_binding_transient.stageFlags = + VK_SHADER_STAGE_COMPUTE_BIT; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layouts_single_transient_[size_t( + SingleTransientDescriptorLayout::kStorageBufferCompute)]) != + VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for a storage buffer " + "bound to the compute shader"); + return false; + } shared_memory_ = std::make_unique( *this, *memory_, trace_writer_, guest_shader_pipeline_stages_); @@ -556,6 +628,8 @@ void VulkanCommandProcessor::ShutdownContext() { shared_memory_.reset(); + ClearTransientDescriptorPools(); + for (const auto& pipeline_layout_pair : pipeline_layouts_) { dfn.vkDestroyPipelineLayout( device, pipeline_layout_pair.second.GetPipelineLayout(), nullptr); @@ -568,26 +642,19 @@ void VulkanCommandProcessor::ShutdownContext() { } descriptor_set_layouts_textures_.clear(); + for (VkDescriptorSetLayout& descriptor_set_layout_single_transient : + descriptor_set_layouts_single_transient_) { + ui::vulkan::util::DestroyAndNullHandle( + dfn.vkDestroyDescriptorSetLayout, device, + descriptor_set_layout_single_transient); + } ui::vulkan::util::DestroyAndNullHandle( dfn.vkDestroyDescriptorSetLayout, device, descriptor_set_layout_shared_memory_and_edram_); - ui::vulkan::util::DestroyAndNullHandle( - dfn.vkDestroyDescriptorSetLayout, device, - descriptor_set_layout_system_constants_); - ui::vulkan::util::DestroyAndNullHandle( - dfn.vkDestroyDescriptorSetLayout, device, - descriptor_set_layout_float_constants_pixel_); - ui::vulkan::util::DestroyAndNullHandle( - dfn.vkDestroyDescriptorSetLayout, device, - descriptor_set_layout_float_constants_vertex_); - ui::vulkan::util::DestroyAndNullHandle( - dfn.vkDestroyDescriptorSetLayout, device, - descriptor_set_layout_fetch_bool_loop_constants_); ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout, device, descriptor_set_layout_empty_); uniform_buffer_pool_.reset(); - transient_descriptor_pool_uniform_buffers_.reset(); sparse_bind_wait_stage_mask_ = 0; sparse_buffer_binds_.clear(); @@ -651,14 +718,14 @@ void VulkanCommandProcessor::WriteRegister(uint32_t index, uint32_t value) { if (current_float_constant_map_pixel_[float_constant_index >> 6] & (1ull << (float_constant_index & 63))) { current_graphics_descriptor_set_values_up_to_date_ &= - ~(uint32_t(1) + ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel); } } else { if (current_float_constant_map_vertex_[float_constant_index >> 6] & (1ull << (float_constant_index & 63))) { current_graphics_descriptor_set_values_up_to_date_ &= - ~(uint32_t(1) + ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex); } } @@ -666,11 +733,11 @@ void VulkanCommandProcessor::WriteRegister(uint32_t index, uint32_t value) { } else if (index >= XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 && index <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31) { current_graphics_descriptor_set_values_up_to_date_ &= ~( - uint32_t(1) << SpirvShaderTranslator::kDescriptorSetBoolLoopConstants); + UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetBoolLoopConstants); } else if (index >= XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 && index <= XE_GPU_REG_SHADER_CONSTANT_FETCH_31_5) { current_graphics_descriptor_set_values_up_to_date_ &= - ~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants); + ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants); } } @@ -1102,12 +1169,103 @@ void VulkanCommandProcessor::EndRenderPass() { current_framebuffer_ = nullptr; } +VkDescriptorSet VulkanCommandProcessor::AllocateSingleTransientDescriptor( + SingleTransientDescriptorLayout transient_descriptor_layout) { + assert_true(frame_open_); + VkDescriptorSet descriptor_set; + std::vector& transient_descriptors_free = + single_transient_descriptors_free_[size_t(transient_descriptor_layout)]; + if (!transient_descriptors_free.empty()) { + descriptor_set = transient_descriptors_free.back(); + transient_descriptors_free.pop_back(); + } else { + const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + ui::vulkan::SingleTypeDescriptorSetAllocator& + transfer_descriptor_allocator = + transient_descriptor_layout == + SingleTransientDescriptorLayout::kStorageBufferCompute + ? transient_descriptor_allocator_storage_buffer_ + : transient_descriptor_allocator_uniform_buffer_; + descriptor_set = transfer_descriptor_allocator.Allocate( + GetSingleTransientDescriptorLayout(transient_descriptor_layout), 1); + if (descriptor_set == VK_NULL_HANDLE) { + return VK_NULL_HANDLE; + } + } + UsedSingleTransientDescriptor used_descriptor; + used_descriptor.frame = frame_current_; + used_descriptor.layout = transient_descriptor_layout; + used_descriptor.set = descriptor_set; + single_transient_descriptors_used_.emplace_back(used_descriptor); + return descriptor_set; +} + +VkDescriptorSetLayout VulkanCommandProcessor::GetTextureDescriptorSetLayout( + bool is_samplers, bool is_vertex, size_t binding_count) { + if (!binding_count) { + return descriptor_set_layout_empty_; + } + + TextureDescriptorSetLayoutKey texture_descriptor_set_layout_key; + texture_descriptor_set_layout_key.is_samplers = uint32_t(is_samplers); + texture_descriptor_set_layout_key.is_vertex = uint32_t(is_vertex); + texture_descriptor_set_layout_key.binding_count = uint32_t(binding_count); + auto it_existing = + descriptor_set_layouts_textures_.find(texture_descriptor_set_layout_key); + if (it_existing != descriptor_set_layouts_textures_.end()) { + return it_existing->second; + } + + const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + descriptor_set_layout_bindings_.clear(); + descriptor_set_layout_bindings_.reserve(binding_count); + VkDescriptorType descriptor_type = is_samplers + ? VK_DESCRIPTOR_TYPE_SAMPLER + : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + VkShaderStageFlags stage_flags = + is_vertex ? guest_shader_vertex_stages_ : VK_SHADER_STAGE_FRAGMENT_BIT; + for (size_t i = 0; i < binding_count; ++i) { + VkDescriptorSetLayoutBinding& descriptor_set_layout_binding = + descriptor_set_layout_bindings_.emplace_back(); + descriptor_set_layout_binding.binding = uint32_t(i); + descriptor_set_layout_binding.descriptorType = descriptor_type; + descriptor_set_layout_binding.descriptorCount = 1; + descriptor_set_layout_binding.stageFlags = stage_flags; + } + VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info; + descriptor_set_layout_create_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + descriptor_set_layout_create_info.pNext = nullptr; + descriptor_set_layout_create_info.flags = 0; + descriptor_set_layout_create_info.bindingCount = uint32_t(binding_count); + descriptor_set_layout_create_info.pBindings = + descriptor_set_layout_bindings_.data(); + VkDescriptorSetLayout texture_descriptor_set_layout; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &texture_descriptor_set_layout) != VK_SUCCESS) { + return VK_NULL_HANDLE; + } + descriptor_set_layouts_textures_.emplace(texture_descriptor_set_layout_key, + texture_descriptor_set_layout); + return texture_descriptor_set_layout; +} + const VulkanPipelineCache::PipelineLayoutProvider* -VulkanCommandProcessor::GetPipelineLayout(uint32_t texture_count_pixel, - uint32_t texture_count_vertex) { +VulkanCommandProcessor::GetPipelineLayout(size_t texture_count_pixel, + size_t sampler_count_pixel, + size_t texture_count_vertex, + size_t sampler_count_vertex) { PipelineLayoutKey pipeline_layout_key; - pipeline_layout_key.texture_count_pixel = texture_count_pixel; - pipeline_layout_key.texture_count_vertex = texture_count_vertex; + pipeline_layout_key.texture_count_pixel = uint16_t(texture_count_pixel); + pipeline_layout_key.sampler_count_pixel = uint16_t(sampler_count_pixel); + pipeline_layout_key.texture_count_vertex = uint16_t(texture_count_vertex); + pipeline_layout_key.sampler_count_vertex = uint16_t(sampler_count_vertex); { auto it = pipeline_layouts_.find(pipeline_layout_key); if (it != pipeline_layouts_.end()) { @@ -1115,92 +1273,41 @@ VulkanCommandProcessor::GetPipelineLayout(uint32_t texture_count_pixel, } } - const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); - VkDevice device = provider.device(); - - VkDescriptorSetLayout descriptor_set_layout_textures_pixel; - if (texture_count_pixel) { - TextureDescriptorSetLayoutKey texture_descriptor_set_layout_key; - texture_descriptor_set_layout_key.is_vertex = 0; - texture_descriptor_set_layout_key.texture_count = texture_count_pixel; - auto it = descriptor_set_layouts_textures_.find( - texture_descriptor_set_layout_key); - if (it != descriptor_set_layouts_textures_.end()) { - descriptor_set_layout_textures_pixel = it->second; - } else { - VkDescriptorSetLayoutBinding descriptor_set_layout_binding; - descriptor_set_layout_binding.binding = 0; - descriptor_set_layout_binding.descriptorType = - VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - descriptor_set_layout_binding.descriptorCount = texture_count_pixel; - descriptor_set_layout_binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - descriptor_set_layout_binding.pImmutableSamplers = nullptr; - VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info; - descriptor_set_layout_create_info.sType = - VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - descriptor_set_layout_create_info.pNext = nullptr; - descriptor_set_layout_create_info.flags = 0; - descriptor_set_layout_create_info.bindingCount = 1; - descriptor_set_layout_create_info.pBindings = - &descriptor_set_layout_binding; - if (dfn.vkCreateDescriptorSetLayout( - device, &descriptor_set_layout_create_info, nullptr, - &descriptor_set_layout_textures_pixel) != VK_SUCCESS) { - XELOGE( - "Failed to create a Vulkan descriptor set layout for {} combined " - "images and samplers for guest pixel shaders", - texture_count_pixel); - return nullptr; - } - descriptor_set_layouts_textures_.emplace( - texture_descriptor_set_layout_key, - descriptor_set_layout_textures_pixel); - } - } else { - descriptor_set_layout_textures_pixel = descriptor_set_layout_empty_; + VkDescriptorSetLayout descriptor_set_layout_textures_pixel = + GetTextureDescriptorSetLayout(false, false, texture_count_pixel); + if (descriptor_set_layout_textures_pixel == VK_NULL_HANDLE) { + XELOGE( + "Failed to obtain a Vulkan descriptor set layout for {} sampled images " + "for guest pixel shaders", + texture_count_pixel); + return nullptr; } - - VkDescriptorSetLayout descriptor_set_layout_textures_vertex; - if (texture_count_vertex) { - TextureDescriptorSetLayoutKey texture_descriptor_set_layout_key; - texture_descriptor_set_layout_key.is_vertex = 0; - texture_descriptor_set_layout_key.texture_count = texture_count_vertex; - auto it = descriptor_set_layouts_textures_.find( - texture_descriptor_set_layout_key); - if (it != descriptor_set_layouts_textures_.end()) { - descriptor_set_layout_textures_vertex = it->second; - } else { - VkDescriptorSetLayoutBinding descriptor_set_layout_binding; - descriptor_set_layout_binding.binding = 0; - descriptor_set_layout_binding.descriptorType = - VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - descriptor_set_layout_binding.descriptorCount = texture_count_vertex; - descriptor_set_layout_binding.stageFlags = guest_shader_vertex_stages_; - descriptor_set_layout_binding.pImmutableSamplers = nullptr; - VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info; - descriptor_set_layout_create_info.sType = - VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - descriptor_set_layout_create_info.pNext = nullptr; - descriptor_set_layout_create_info.flags = 0; - descriptor_set_layout_create_info.bindingCount = 1; - descriptor_set_layout_create_info.pBindings = - &descriptor_set_layout_binding; - if (dfn.vkCreateDescriptorSetLayout( - device, &descriptor_set_layout_create_info, nullptr, - &descriptor_set_layout_textures_vertex) != VK_SUCCESS) { - XELOGE( - "Failed to create a Vulkan descriptor set layout for {} combined " - "images and samplers for guest vertex shaders", - texture_count_vertex); - return nullptr; - } - descriptor_set_layouts_textures_.emplace( - texture_descriptor_set_layout_key, - descriptor_set_layout_textures_vertex); - } - } else { - descriptor_set_layout_textures_vertex = descriptor_set_layout_empty_; + VkDescriptorSetLayout descriptor_set_layout_samplers_pixel = + GetTextureDescriptorSetLayout(true, false, sampler_count_pixel); + if (descriptor_set_layout_samplers_pixel == VK_NULL_HANDLE) { + XELOGE( + "Failed to obtain a Vulkan descriptor set layout for {} samplers for " + "guest pixel shaders", + sampler_count_pixel); + return nullptr; + } + VkDescriptorSetLayout descriptor_set_layout_textures_vertex = + GetTextureDescriptorSetLayout(false, true, texture_count_vertex); + if (descriptor_set_layout_textures_vertex == VK_NULL_HANDLE) { + XELOGE( + "Failed to obtain a Vulkan descriptor set layout for {} sampled images " + "for guest vertex shaders", + texture_count_vertex); + return nullptr; + } + VkDescriptorSetLayout descriptor_set_layout_samplers_vertex = + GetTextureDescriptorSetLayout(true, true, sampler_count_vertex); + if (descriptor_set_layout_samplers_vertex == VK_NULL_HANDLE) { + XELOGE( + "Failed to obtain a Vulkan descriptor set layout for {} samplers for " + "guest vertex shaders", + sampler_count_vertex); + return nullptr; } VkDescriptorSetLayout @@ -1211,23 +1318,36 @@ VulkanCommandProcessor::GetPipelineLayout(uint32_t texture_count_pixel, descriptor_set_layout_shared_memory_and_edram_; descriptor_set_layouts [SpirvShaderTranslator::kDescriptorSetBoolLoopConstants] = - descriptor_set_layout_fetch_bool_loop_constants_; + GetSingleTransientDescriptorLayout( + SingleTransientDescriptorLayout::kUniformBufferGuestShader); descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetSystemConstants] = - descriptor_set_layout_system_constants_; + GetSingleTransientDescriptorLayout( + SingleTransientDescriptorLayout::kUniformBufferSystemConstants); descriptor_set_layouts [SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel] = - descriptor_set_layout_float_constants_pixel_; + GetSingleTransientDescriptorLayout( + SingleTransientDescriptorLayout::kUniformBufferFragment); descriptor_set_layouts [SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex] = - descriptor_set_layout_float_constants_vertex_; + GetSingleTransientDescriptorLayout( + SingleTransientDescriptorLayout::kUniformBufferGuestVertex); descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetFetchConstants] = - descriptor_set_layout_fetch_bool_loop_constants_; + GetSingleTransientDescriptorLayout( + SingleTransientDescriptorLayout::kUniformBufferGuestShader); // Mutable layouts. + descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetSamplersVertex] = + descriptor_set_layout_samplers_vertex; descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesVertex] = descriptor_set_layout_textures_vertex; + descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetSamplersPixel] = + descriptor_set_layout_samplers_pixel; descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesPixel] = descriptor_set_layout_textures_pixel; + const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + VkPipelineLayoutCreateInfo pipeline_layout_create_info; pipeline_layout_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; @@ -1251,7 +1371,9 @@ VulkanCommandProcessor::GetPipelineLayout(uint32_t texture_count_pixel, std::piecewise_construct, std::forward_as_tuple(pipeline_layout_key), std::forward_as_tuple(pipeline_layout, descriptor_set_layout_textures_vertex, - descriptor_set_layout_textures_pixel)); + descriptor_set_layout_samplers_vertex, + descriptor_set_layout_textures_pixel, + descriptor_set_layout_samplers_pixel)); // unordered_map insertion doesn't invalidate element references. return &emplaced_pair.first->second; } @@ -1472,6 +1594,13 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, // set N if set layouts 0 through N are compatible). uint32_t descriptor_sets_kept = uint32_t(SpirvShaderTranslator::kDescriptorSetCount); + if (current_guest_graphics_pipeline_layout_ + ->descriptor_set_layout_samplers_vertex_ref() != + pipeline_layout->descriptor_set_layout_samplers_vertex_ref()) { + descriptor_sets_kept = std::min( + descriptor_sets_kept, + uint32_t(SpirvShaderTranslator::kDescriptorSetSamplersVertex)); + } if (current_guest_graphics_pipeline_layout_ ->descriptor_set_layout_textures_vertex_ref() != pipeline_layout->descriptor_set_layout_textures_vertex_ref()) { @@ -1479,6 +1608,13 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, descriptor_sets_kept, uint32_t(SpirvShaderTranslator::kDescriptorSetTexturesVertex)); } + if (current_guest_graphics_pipeline_layout_ + ->descriptor_set_layout_samplers_pixel_ref() != + pipeline_layout->descriptor_set_layout_samplers_pixel_ref()) { + descriptor_sets_kept = std::min( + descriptor_sets_kept, + uint32_t(SpirvShaderTranslator::kDescriptorSetSamplersPixel)); + } if (current_guest_graphics_pipeline_layout_ ->descriptor_set_layout_textures_pixel_ref() != pipeline_layout->descriptor_set_layout_textures_pixel_ref()) { @@ -1860,13 +1996,45 @@ bool VulkanCommandProcessor::BeginSubmission(bool is_guest_command) { [SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram] = shared_memory_and_edram_descriptor_set_; current_graphics_descriptor_set_values_up_to_date_ = - uint32_t(1) + UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram; // Reclaim pool pages - no need to do this every small submission since some // may be reused. - transient_descriptor_pool_uniform_buffers_->Reclaim(frame_completed_); + // FIXME(Triang3l): This will result in a memory leak if the guest is not + // presenting. uniform_buffer_pool_->Reclaim(frame_completed_); + while (!single_transient_descriptors_used_.empty()) { + const UsedSingleTransientDescriptor& used_transient_descriptor = + single_transient_descriptors_used_.front(); + if (used_transient_descriptor.frame > frame_completed_) { + break; + } + single_transient_descriptors_free_[size_t( + used_transient_descriptor.layout)] + .push_back(used_transient_descriptor.set); + single_transient_descriptors_used_.pop_front(); + } + while (!texture_transient_descriptor_sets_used_.empty()) { + const UsedTextureTransientDescriptorSet& used_transient_descriptor_set = + texture_transient_descriptor_sets_used_.front(); + if (used_transient_descriptor_set.frame > frame_completed_) { + break; + } + auto it = texture_transient_descriptor_sets_free_.find( + used_transient_descriptor_set.layout); + if (it == texture_transient_descriptor_sets_free_.end()) { + it = + texture_transient_descriptor_sets_free_ + .emplace( + std::piecewise_construct, + std::forward_as_tuple(used_transient_descriptor_set.layout), + std::forward_as_tuple()) + .first; + } + it->second.push_back(used_transient_descriptor_set.set); + texture_transient_descriptor_sets_used_.pop_front(); + } primitive_processor_->BeginFrame(); @@ -2107,8 +2275,9 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { } command_buffers_writable_.clear(); + ClearTransientDescriptorPools(); + uniform_buffer_pool_->ClearCache(); - transient_descriptor_pool_uniform_buffers_->ClearCache(); texture_cache_->ClearCache(); @@ -2140,6 +2309,21 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { return true; } +void VulkanCommandProcessor::ClearTransientDescriptorPools() { + texture_transient_descriptor_sets_free_.clear(); + texture_transient_descriptor_sets_used_.clear(); + transient_descriptor_allocator_sampler_.Reset(); + transient_descriptor_allocator_sampled_image_.Reset(); + + for (std::vector& transient_descriptors_free : + single_transient_descriptors_free_) { + transient_descriptors_free.clear(); + } + single_transient_descriptors_used_.clear(); + transient_descriptor_allocator_storage_buffer_.Reset(); + transient_descriptor_allocator_uniform_buffer_.Reset(); +} + void VulkanCommandProcessor::SplitPendingBarrier() { size_t pending_buffer_memory_barrier_count = pending_barriers_buffer_memory_barriers_.size(); @@ -2431,7 +2615,7 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( if (dirty) { current_graphics_descriptor_set_values_up_to_date_ &= - ~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetSystemConstants); + ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSystemConstants); } } @@ -2443,7 +2627,13 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, const RegisterFile& regs = *register_file_; + const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + // Invalidate descriptors for changed data. + + // Float constants. // These are the constant base addresses/ranges for shaders. // We have these hardcoded right now cause nothing seems to differ on the Xbox // 360 (however, OpenGL ES on Adreno 200 on Android has different ranges). @@ -2465,7 +2655,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, if (float_constant_count_vertex) { current_graphics_descriptor_set_values_up_to_date_ &= ~( - uint32_t(1) + UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex); } } @@ -2482,7 +2672,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, float_constant_map_pixel.float_bitmap[i]; if (float_constant_count_pixel) { current_graphics_descriptor_set_values_up_to_date_ &= - ~(uint32_t(1) + ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel); } } @@ -2492,29 +2682,133 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, sizeof(current_float_constant_map_pixel_)); } + // Textures and samplers. + const std::vector& samplers_vertex = + vertex_shader->GetSamplerBindingsAfterTranslation(); + const std::vector& textures_vertex = + vertex_shader->GetTextureBindingsAfterTranslation(); + uint32_t sampler_count_vertex = uint32_t(samplers_vertex.size()); + uint32_t texture_count_vertex = uint32_t(textures_vertex.size()); + const std::vector* samplers_pixel; + const std::vector* textures_pixel; + uint32_t sampler_count_pixel, texture_count_pixel; + if (pixel_shader) { + samplers_pixel = &pixel_shader->GetSamplerBindingsAfterTranslation(); + textures_pixel = &pixel_shader->GetTextureBindingsAfterTranslation(); + sampler_count_pixel = uint32_t(samplers_pixel->size()); + texture_count_pixel = uint32_t(textures_pixel->size()); + } else { + samplers_pixel = nullptr; + textures_pixel = nullptr; + sampler_count_pixel = 0; + texture_count_pixel = 0; + } + // TODO(Triang3l): Reuse texture and sampler bindings if not changed. + current_graphics_descriptor_set_values_up_to_date_ &= + ~((UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSamplersVertex) | + (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetTexturesVertex) | + (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSamplersPixel) | + (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetTexturesPixel)); + // Make sure new descriptor sets are bound to the command buffer. + current_graphics_descriptor_sets_bound_up_to_date_ &= current_graphics_descriptor_set_values_up_to_date_; + // Fill the texture and sampler write image infos. + + bool write_vertex_samplers = + sampler_count_vertex && + !(current_graphics_descriptor_set_values_up_to_date_ & + (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSamplersVertex)); + bool write_vertex_textures = + texture_count_vertex && + !(current_graphics_descriptor_set_values_up_to_date_ & + (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetTexturesVertex)); + bool write_pixel_samplers = + sampler_count_pixel && + !(current_graphics_descriptor_set_values_up_to_date_ & + (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSamplersPixel)); + bool write_pixel_textures = + texture_count_pixel && + !(current_graphics_descriptor_set_values_up_to_date_ & + (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetTexturesPixel)); + descriptor_write_image_info_.clear(); + descriptor_write_image_info_.reserve( + (write_vertex_samplers ? sampler_count_vertex : 0) + + (write_vertex_textures ? texture_count_vertex : 0) + + (write_pixel_samplers ? sampler_count_pixel : 0) + + (write_pixel_textures ? texture_count_pixel : 0)); + size_t vertex_sampler_image_info_offset = descriptor_write_image_info_.size(); + if (write_vertex_samplers) { + // TODO(Triang3l): Real samplers. + for (const VulkanShader::SamplerBinding& sampler_binding : + samplers_vertex) { + VkDescriptorImageInfo& descriptor_image_info = + descriptor_write_image_info_.emplace_back(); + descriptor_image_info.sampler = provider.GetHostSampler( + ui::vulkan::VulkanProvider::HostSampler::kNearestClamp); + } + } + size_t vertex_texture_image_info_offset = descriptor_write_image_info_.size(); + if (write_vertex_textures) { + // TODO(Triang3l): Real textures. + for (const VulkanShader::TextureBinding& texture_binding : + textures_vertex) { + VkDescriptorImageInfo& descriptor_image_info = + descriptor_write_image_info_.emplace_back(); + descriptor_image_info.imageView = + texture_cache_->GetNullImageView(texture_binding.dimension); + descriptor_image_info.imageLayout = + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + } + } + size_t pixel_sampler_image_info_offset = descriptor_write_image_info_.size(); + if (write_pixel_samplers) { + // TODO(Triang3l): Real samplers. + for (const VulkanShader::SamplerBinding& sampler_binding : + *samplers_pixel) { + VkDescriptorImageInfo& descriptor_image_info = + descriptor_write_image_info_.emplace_back(); + descriptor_image_info.sampler = provider.GetHostSampler( + ui::vulkan::VulkanProvider::HostSampler::kNearestClamp); + } + } + size_t pixel_texture_image_info_offset = descriptor_write_image_info_.size(); + if (write_pixel_textures) { + // TODO(Triang3l): Real textures. + for (const VulkanShader::TextureBinding& texture_binding : + *textures_pixel) { + VkDescriptorImageInfo& descriptor_image_info = + descriptor_write_image_info_.emplace_back(); + descriptor_image_info.imageView = + texture_cache_->GetNullImageView(texture_binding.dimension); + descriptor_image_info.imageLayout = + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + } + } + // Write the new descriptor sets. + VkWriteDescriptorSet write_descriptor_sets[SpirvShaderTranslator::kDescriptorSetCount]; uint32_t write_descriptor_set_count = 0; uint32_t write_descriptor_set_bits = 0; assert_not_zero( current_graphics_descriptor_set_values_up_to_date_ & - (uint32_t(1) + (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram)); + // Bool and loop constants. VkDescriptorBufferInfo buffer_info_bool_loop_constants; if (!(current_graphics_descriptor_set_values_up_to_date_ & - (uint32_t(1) + (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetBoolLoopConstants))) { VkWriteDescriptorSet& write_bool_loop_constants = write_descriptor_sets[write_descriptor_set_count++]; constexpr size_t kBoolLoopConstantsSize = sizeof(uint32_t) * (8 + 32); - uint8_t* mapping_bool_loop_constants = WriteUniformBufferBinding( + uint8_t* mapping_bool_loop_constants = WriteTransientUniformBufferBinding( kBoolLoopConstantsSize, - descriptor_set_layout_fetch_bool_loop_constants_, + SingleTransientDescriptorLayout::kUniformBufferGuestShader, buffer_info_bool_loop_constants, write_bool_loop_constants); if (!mapping_bool_loop_constants) { return false; @@ -2523,35 +2817,37 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, ®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32, kBoolLoopConstantsSize); write_descriptor_set_bits |= - uint32_t(1) << SpirvShaderTranslator::kDescriptorSetBoolLoopConstants; + UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetBoolLoopConstants; current_graphics_descriptor_sets_ [SpirvShaderTranslator::kDescriptorSetBoolLoopConstants] = write_bool_loop_constants.dstSet; } + // System constants. VkDescriptorBufferInfo buffer_info_system_constants; if (!(current_graphics_descriptor_set_values_up_to_date_ & - (uint32_t(1) + (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSystemConstants))) { VkWriteDescriptorSet& write_system_constants = write_descriptor_sets[write_descriptor_set_count++]; - uint8_t* mapping_system_constants = WriteUniformBufferBinding( + uint8_t* mapping_system_constants = WriteTransientUniformBufferBinding( sizeof(SpirvShaderTranslator::SystemConstants), - descriptor_set_layout_system_constants_, buffer_info_system_constants, - write_system_constants); + SingleTransientDescriptorLayout::kUniformBufferSystemConstants, + buffer_info_system_constants, write_system_constants); if (!mapping_system_constants) { return false; } std::memcpy(mapping_system_constants, &system_constants_, sizeof(SpirvShaderTranslator::SystemConstants)); write_descriptor_set_bits |= - uint32_t(1) << SpirvShaderTranslator::kDescriptorSetSystemConstants; + UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSystemConstants; current_graphics_descriptor_sets_ [SpirvShaderTranslator::kDescriptorSetSystemConstants] = write_system_constants.dstSet; } + // Pixel shader float constants. VkDescriptorBufferInfo buffer_info_float_constant_pixel; if (!(current_graphics_descriptor_set_values_up_to_date_ & - (uint32_t(1) + (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel))) { // Even if the shader doesn't need any float constants, a valid binding must // still be provided (the pipeline layout always has float constants, for @@ -2560,9 +2856,9 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, // buffer. VkWriteDescriptorSet& write_float_constants_pixel = write_descriptor_sets[write_descriptor_set_count++]; - uint8_t* mapping_float_constants_pixel = WriteUniformBufferBinding( - sizeof(float) * 4 * std::max(float_constant_count_pixel, uint32_t(1)), - descriptor_set_layout_float_constants_pixel_, + uint8_t* mapping_float_constants_pixel = WriteTransientUniformBufferBinding( + sizeof(float) * 4 * std::max(float_constant_count_pixel, UINT32_C(1)), + SingleTransientDescriptorLayout::kUniformBufferFragment, buffer_info_float_constant_pixel, write_float_constants_pixel); if (!mapping_float_constants_pixel) { return false; @@ -2582,21 +2878,24 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, } } write_descriptor_set_bits |= - uint32_t(1) << SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel; + UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel; current_graphics_descriptor_sets_ [SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel] = write_float_constants_pixel.dstSet; } + // Vertex shader float constants. VkDescriptorBufferInfo buffer_info_float_constant_vertex; if (!(current_graphics_descriptor_set_values_up_to_date_ & - (uint32_t(1) + (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex))) { VkWriteDescriptorSet& write_float_constants_vertex = write_descriptor_sets[write_descriptor_set_count++]; - uint8_t* mapping_float_constants_vertex = WriteUniformBufferBinding( - sizeof(float) * 4 * std::max(float_constant_count_vertex, uint32_t(1)), - descriptor_set_layout_float_constants_vertex_, - buffer_info_float_constant_vertex, write_float_constants_vertex); + uint8_t* mapping_float_constants_vertex = + WriteTransientUniformBufferBinding( + sizeof(float) * 4 * + std::max(float_constant_count_vertex, UINT32_C(1)), + SingleTransientDescriptorLayout::kUniformBufferGuestVertex, + buffer_info_float_constant_vertex, write_float_constants_vertex); if (!mapping_float_constants_vertex) { return false; } @@ -2615,20 +2914,22 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, } } write_descriptor_set_bits |= - uint32_t(1) + UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex; current_graphics_descriptor_sets_ [SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex] = write_float_constants_vertex.dstSet; } + // Fetch constants. VkDescriptorBufferInfo buffer_info_fetch_constants; if (!(current_graphics_descriptor_set_values_up_to_date_ & - (uint32_t(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants))) { + (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants))) { VkWriteDescriptorSet& write_fetch_constants = write_descriptor_sets[write_descriptor_set_count++]; constexpr size_t kFetchConstantsSize = sizeof(uint32_t) * 6 * 32; - uint8_t* mapping_fetch_constants = WriteUniformBufferBinding( - kFetchConstantsSize, descriptor_set_layout_fetch_bool_loop_constants_, + uint8_t* mapping_fetch_constants = WriteTransientUniformBufferBinding( + kFetchConstantsSize, + SingleTransientDescriptorLayout::kUniformBufferGuestShader, buffer_info_fetch_constants, write_fetch_constants); if (!mapping_fetch_constants) { return false; @@ -2637,36 +2938,115 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, ®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32, kFetchConstantsSize); write_descriptor_set_bits |= - uint32_t(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants; + UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants; current_graphics_descriptor_sets_ [SpirvShaderTranslator::kDescriptorSetFetchConstants] = write_fetch_constants.dstSet; } + // Vertex shader samplers. + if (write_vertex_samplers) { + VkWriteDescriptorSet& write_samplers = + write_descriptor_sets[write_descriptor_set_count++]; + if (!WriteTransientTextureBindings( + true, true, sampler_count_vertex, + current_guest_graphics_pipeline_layout_ + ->descriptor_set_layout_samplers_vertex_ref(), + descriptor_write_image_info_.data() + + vertex_sampler_image_info_offset, + write_samplers)) { + return false; + } + write_descriptor_set_bits |= + UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSamplersVertex; + current_graphics_descriptor_sets_ + [SpirvShaderTranslator::kDescriptorSetSamplersVertex] = + write_samplers.dstSet; + } + // Vertex shader textures. + if (write_vertex_textures) { + VkWriteDescriptorSet& write_textures = + write_descriptor_sets[write_descriptor_set_count++]; + if (!WriteTransientTextureBindings( + false, true, texture_count_vertex, + current_guest_graphics_pipeline_layout_ + ->descriptor_set_layout_textures_vertex_ref(), + descriptor_write_image_info_.data() + + vertex_texture_image_info_offset, + write_textures)) { + return false; + } + write_descriptor_set_bits |= + UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetTexturesVertex; + current_graphics_descriptor_sets_ + [SpirvShaderTranslator::kDescriptorSetTexturesVertex] = + write_textures.dstSet; + } + // Pixel shader samplers. + if (write_pixel_samplers) { + VkWriteDescriptorSet& write_samplers = + write_descriptor_sets[write_descriptor_set_count++]; + if (!WriteTransientTextureBindings( + true, false, sampler_count_pixel, + current_guest_graphics_pipeline_layout_ + ->descriptor_set_layout_samplers_pixel_ref(), + descriptor_write_image_info_.data() + + pixel_sampler_image_info_offset, + write_samplers)) { + return false; + } + write_descriptor_set_bits |= + UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSamplersPixel; + current_graphics_descriptor_sets_ + [SpirvShaderTranslator::kDescriptorSetSamplersPixel] = + write_samplers.dstSet; + } + // Pixel shader textures. + if (write_pixel_textures) { + VkWriteDescriptorSet& write_textures = + write_descriptor_sets[write_descriptor_set_count++]; + if (!WriteTransientTextureBindings( + false, false, texture_count_pixel, + current_guest_graphics_pipeline_layout_ + ->descriptor_set_layout_textures_pixel_ref(), + descriptor_write_image_info_.data() + + pixel_texture_image_info_offset, + write_textures)) { + return false; + } + write_descriptor_set_bits |= + UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetTexturesPixel; + current_graphics_descriptor_sets_ + [SpirvShaderTranslator::kDescriptorSetTexturesPixel] = + write_textures.dstSet; + } + // Write. if (write_descriptor_set_count) { - const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); - VkDevice device = provider.device(); dfn.vkUpdateDescriptorSets(device, write_descriptor_set_count, write_descriptor_sets, 0, nullptr); } - // Only make valid if written successfully. + // Only make valid if all descriptor sets have been allocated and written + // successfully. current_graphics_descriptor_set_values_up_to_date_ |= write_descriptor_set_bits; // Bind the new descriptor sets. uint32_t descriptor_sets_needed = - (uint32_t(1) << SpirvShaderTranslator::kDescriptorSetCount) - 1; - if (current_guest_graphics_pipeline_layout_ - ->descriptor_set_layout_textures_vertex_ref() == - descriptor_set_layout_empty_) { + (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetCount) - 1; + if (!sampler_count_vertex) { descriptor_sets_needed &= - ~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetTexturesVertex); + ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSamplersVertex); } - if (current_guest_graphics_pipeline_layout_ - ->descriptor_set_layout_textures_pixel_ref() == - descriptor_set_layout_empty_) { + if (!texture_count_vertex) { descriptor_sets_needed &= - ~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetTexturesPixel); + ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetTexturesVertex); + } + if (!sampler_count_pixel) { + descriptor_sets_needed &= + ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSamplersPixel); + } + if (!texture_count_pixel) { + descriptor_sets_needed &= + ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetTexturesPixel); } uint32_t descriptor_sets_remaining = descriptor_sets_needed & @@ -2676,9 +3056,9 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, xe::bit_scan_forward(descriptor_sets_remaining, &descriptor_set_index)) { uint32_t descriptor_set_mask_tzcnt = xe::tzcnt(~(descriptor_sets_remaining | - ((uint32_t(1) << descriptor_set_index) - 1))); - // TODO(Triang3l): Bind to compute for rectangle list emulation without - // geometry shaders. + ((UINT32_C(1) << descriptor_set_index) - 1))); + // TODO(Triang3l): Bind to compute for memexport emulation without vertex + // shader memory stores. deferred_command_buffer_.CmdVkBindDescriptorSets( VK_PIPELINE_BIND_POINT_GRAPHICS, current_guest_graphics_pipeline_layout_->GetPipelineLayout(), @@ -2688,20 +3068,20 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, break; } descriptor_sets_remaining &= - ~((uint32_t(1) << descriptor_set_mask_tzcnt) - 1); + ~((UINT32_C(1) << descriptor_set_mask_tzcnt) - 1); } current_graphics_descriptor_sets_bound_up_to_date_ |= descriptor_sets_needed; return true; } -uint8_t* VulkanCommandProcessor::WriteUniformBufferBinding( - size_t size, VkDescriptorSetLayout descriptor_set_layout, +uint8_t* VulkanCommandProcessor::WriteTransientUniformBufferBinding( + size_t size, SingleTransientDescriptorLayout transient_descriptor_layout, VkDescriptorBufferInfo& descriptor_buffer_info_out, VkWriteDescriptorSet& write_descriptor_set_out) { + assert_true(frame_open_); VkDescriptorSet descriptor_set = - transient_descriptor_pool_uniform_buffers_->Request( - frame_current_, descriptor_set_layout, 1); + AllocateSingleTransientDescriptor(transient_descriptor_layout); if (descriptor_set == VK_NULL_HANDLE) { return nullptr; } @@ -2728,6 +3108,53 @@ uint8_t* VulkanCommandProcessor::WriteUniformBufferBinding( return mapping; } +bool VulkanCommandProcessor::WriteTransientTextureBindings( + bool is_samplers, bool is_vertex, uint32_t binding_count, + VkDescriptorSetLayout descriptor_set_layout, + const VkDescriptorImageInfo* image_info, + VkWriteDescriptorSet& write_descriptor_set_out) { + assert_not_zero(binding_count); + assert_true(frame_open_); + TextureDescriptorSetLayoutKey texture_descriptor_set_layout_key; + texture_descriptor_set_layout_key.is_samplers = uint32_t(is_samplers); + texture_descriptor_set_layout_key.is_vertex = uint32_t(is_vertex); + texture_descriptor_set_layout_key.binding_count = binding_count; + VkDescriptorSet texture_descriptor_set; + auto textures_free_it = texture_transient_descriptor_sets_free_.find( + texture_descriptor_set_layout_key); + if (textures_free_it != texture_transient_descriptor_sets_free_.end() && + !textures_free_it->second.empty()) { + texture_descriptor_set = textures_free_it->second.back(); + textures_free_it->second.pop_back(); + } else { + texture_descriptor_set = + (is_samplers ? transient_descriptor_allocator_sampler_ + : transient_descriptor_allocator_sampled_image_) + .Allocate(descriptor_set_layout, binding_count); + if (texture_descriptor_set == VK_NULL_HANDLE) { + return false; + } + } + UsedTextureTransientDescriptorSet& used_texture_descriptor_set = + texture_transient_descriptor_sets_used_.emplace_back(); + used_texture_descriptor_set.frame = frame_current_; + used_texture_descriptor_set.layout = texture_descriptor_set_layout_key; + used_texture_descriptor_set.set = texture_descriptor_set; + write_descriptor_set_out.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write_descriptor_set_out.pNext = nullptr; + write_descriptor_set_out.dstSet = texture_descriptor_set; + write_descriptor_set_out.dstBinding = 0; + write_descriptor_set_out.dstArrayElement = 0; + write_descriptor_set_out.descriptorCount = binding_count; + write_descriptor_set_out.descriptorType = + is_samplers ? VK_DESCRIPTOR_TYPE_SAMPLER + : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + write_descriptor_set_out.pImageInfo = image_info; + write_descriptor_set_out.pBufferInfo = nullptr; + write_descriptor_set_out.pTexelBufferView = nullptr; + return true; +} + } // namespace vulkan } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 2f90ca614..3158db0b6 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -36,7 +36,7 @@ #include "xenia/gpu/vulkan/vulkan_texture_cache.h" #include "xenia/gpu/xenos.h" #include "xenia/kernel/kernel_state.h" -#include "xenia/ui/vulkan/transient_descriptor_pool.h" +#include "xenia/ui/vulkan/single_type_descriptor_set_allocator.h" #include "xenia/ui/vulkan/vulkan_presenter.h" #include "xenia/ui/vulkan/vulkan_provider.h" #include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h" @@ -47,6 +47,17 @@ namespace vulkan { class VulkanCommandProcessor : public CommandProcessor { public: + // Single-descriptor layouts for use within a single frame. + enum class SingleTransientDescriptorLayout { + kUniformBufferGuestVertex, + kUniformBufferFragment, + kUniformBufferGuestShader, + kUniformBufferSystemConstants, + kUniformBufferCompute, + kStorageBufferCompute, + kCount, + }; + VulkanCommandProcessor(VulkanGraphicsSystem* graphics_system, kernel::KernelState* kernel_state); ~VulkanCommandProcessor(); @@ -119,9 +130,23 @@ class VulkanCommandProcessor : public CommandProcessor { // scope. Submission must be open. void EndRenderPass(); + VkDescriptorSetLayout GetSingleTransientDescriptorLayout( + SingleTransientDescriptorLayout transient_descriptor_layout) const { + return descriptor_set_layouts_single_transient_[size_t( + transient_descriptor_layout)]; + } + // A frame must be open. + VkDescriptorSet AllocateSingleTransientDescriptor( + SingleTransientDescriptorLayout transient_descriptor_layout); + + // The returned reference is valid until a cache clear. + VkDescriptorSetLayout GetTextureDescriptorSetLayout(bool is_samplers, + bool is_vertex, + size_t binding_count); // The returned reference is valid until a cache clear. const VulkanPipelineCache::PipelineLayoutProvider* GetPipelineLayout( - uint32_t texture_count_pixel, uint32_t texture_count_vertex); + size_t texture_count_pixel, size_t sampler_count_pixel, + size_t texture_count_vertex, size_t sampler_count_vertex); // Binds a graphics pipeline for host-specific purposes, invalidating the // affected state. keep_dynamic_* must be false (to invalidate the dynamic @@ -172,10 +197,12 @@ class VulkanCommandProcessor : public CommandProcessor { union TextureDescriptorSetLayoutKey { uint32_t key; struct { + // 0 - sampled image descriptors, 1 - sampler descriptors. + uint32_t is_samplers : 1; uint32_t is_vertex : 1; // For 0, use descriptor_set_layout_empty_ instead as these are owning // references. - uint32_t texture_count : 31; + uint32_t binding_count : 30; }; TextureDescriptorSetLayoutKey() : key(0) { @@ -196,12 +223,14 @@ class VulkanCommandProcessor : public CommandProcessor { }; union PipelineLayoutKey { - uint32_t key; + uint64_t key; struct { // Pixel textures in the low bits since those are varied much more // commonly. - uint32_t texture_count_pixel : 16; - uint32_t texture_count_vertex : 16; + uint16_t texture_count_pixel; + uint16_t sampler_count_pixel; + uint16_t texture_count_vertex; + uint16_t sampler_count_vertex; }; PipelineLayoutKey() : key(0) { static_assert_size(*this, sizeof(key)); } @@ -221,29 +250,55 @@ class VulkanCommandProcessor : public CommandProcessor { class PipelineLayout : public VulkanPipelineCache::PipelineLayoutProvider { public: - PipelineLayout( + explicit PipelineLayout( VkPipelineLayout pipeline_layout, VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref, - VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref) + VkDescriptorSetLayout descriptor_set_layout_samplers_vertex_ref, + VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref, + VkDescriptorSetLayout descriptor_set_layout_samplers_pixel_ref) : pipeline_layout_(pipeline_layout), descriptor_set_layout_textures_vertex_ref_( descriptor_set_layout_textures_vertex_ref), + descriptor_set_layout_samplers_vertex_ref_( + descriptor_set_layout_samplers_vertex_ref), descriptor_set_layout_textures_pixel_ref_( - descriptor_set_layout_textures_pixel_ref) {} + descriptor_set_layout_textures_pixel_ref), + descriptor_set_layout_samplers_pixel_ref_( + descriptor_set_layout_samplers_pixel_ref) {} VkPipelineLayout GetPipelineLayout() const override { return pipeline_layout_; } VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref() const { return descriptor_set_layout_textures_vertex_ref_; } + VkDescriptorSetLayout descriptor_set_layout_samplers_vertex_ref() const { + return descriptor_set_layout_samplers_vertex_ref_; + } VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref() const { return descriptor_set_layout_textures_pixel_ref_; } + VkDescriptorSetLayout descriptor_set_layout_samplers_pixel_ref() const { + return descriptor_set_layout_samplers_pixel_ref_; + } private: VkPipelineLayout pipeline_layout_; VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref_; + VkDescriptorSetLayout descriptor_set_layout_samplers_vertex_ref_; VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref_; + VkDescriptorSetLayout descriptor_set_layout_samplers_pixel_ref_; + }; + + struct UsedSingleTransientDescriptor { + uint64_t frame; + SingleTransientDescriptorLayout layout; + VkDescriptorSet set; + }; + + struct UsedTextureTransientDescriptorSet { + uint64_t frame; + TextureDescriptorSetLayoutKey layout; + VkDescriptorSet set; }; // BeginSubmission and EndSubmission may be called at any time. If there's an @@ -272,6 +327,8 @@ class VulkanCommandProcessor : public CommandProcessor { return !submission_open_ && submissions_in_flight_fences_.empty(); } + void ClearTransientDescriptorPools(); + void SplitPendingBarrier(); void UpdateDynamicState(const draw_util::ViewportInfo& viewport_info, @@ -284,10 +341,20 @@ class VulkanCommandProcessor : public CommandProcessor { // Allocates a descriptor, space in the uniform buffer pool, and fills the // VkWriteDescriptorSet structure and VkDescriptorBufferInfo referenced by it. // Returns null in case of failure. - uint8_t* WriteUniformBufferBinding( - size_t size, VkDescriptorSetLayout descriptor_set_layout, + uint8_t* WriteTransientUniformBufferBinding( + size_t size, SingleTransientDescriptorLayout transient_descriptor_layout, VkDescriptorBufferInfo& descriptor_buffer_info_out, VkWriteDescriptorSet& write_descriptor_set_out); + // Allocates a descriptor set and fills the VkWriteDescriptorSet structure. + // The descriptor set layout must be the one for the given is_samplers, + // is_vertex, binding_count (from GetTextureDescriptorSetLayout - may be + // already available at the moment of the call, no need to locate it again). + // Returns whether the allocation was successful. + bool WriteTransientTextureBindings( + bool is_samplers, bool is_vertex, uint32_t binding_count, + VkDescriptorSetLayout descriptor_set_layout, + const VkDescriptorImageInfo* image_info, + VkWriteDescriptorSet& write_descriptor_set_out); bool device_lost_ = false; @@ -333,22 +400,21 @@ class VulkanCommandProcessor : public CommandProcessor { std::vector sparse_buffer_bind_infos_temp_; VkPipelineStageFlags sparse_bind_wait_stage_mask_ = 0; - std::unique_ptr - transient_descriptor_pool_uniform_buffers_; + // Temporary storage with reusable memory for creating descriptor set layouts. + std::vector descriptor_set_layout_bindings_; + // Temporary storage with reusable memory for writing image and sampler + // descriptors. + std::vector descriptor_write_image_info_; + std::unique_ptr uniform_buffer_pool_; // Descriptor set layouts used by different shaders. VkDescriptorSetLayout descriptor_set_layout_empty_ = VK_NULL_HANDLE; - VkDescriptorSetLayout descriptor_set_layout_fetch_bool_loop_constants_ = - VK_NULL_HANDLE; - VkDescriptorSetLayout descriptor_set_layout_float_constants_vertex_ = - VK_NULL_HANDLE; - VkDescriptorSetLayout descriptor_set_layout_float_constants_pixel_ = - VK_NULL_HANDLE; - VkDescriptorSetLayout descriptor_set_layout_system_constants_ = - VK_NULL_HANDLE; VkDescriptorSetLayout descriptor_set_layout_shared_memory_and_edram_ = VK_NULL_HANDLE; + std::array + descriptor_set_layouts_single_transient_{}; // Descriptor set layouts are referenced by pipeline_layouts_. std::unordered_map pipeline_layouts_; + ui::vulkan::SingleTypeDescriptorSetAllocator + transient_descriptor_allocator_uniform_buffer_; + ui::vulkan::SingleTypeDescriptorSetAllocator + transient_descriptor_allocator_storage_buffer_; + std::deque single_transient_descriptors_used_; + std::array, + size_t(SingleTransientDescriptorLayout::kCount)> + single_transient_descriptors_free_; + + ui::vulkan::SingleTypeDescriptorSetAllocator + transient_descriptor_allocator_sampled_image_; + ui::vulkan::SingleTypeDescriptorSetAllocator + transient_descriptor_allocator_sampler_; + std::deque + texture_transient_descriptor_sets_used_; + std::unordered_map, + TextureDescriptorSetLayoutKey::Hasher> + texture_transient_descriptor_sets_free_; + std::unique_ptr shared_memory_; std::unique_ptr primitive_processor_; diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index b6e74a648..433c42aeb 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -82,6 +82,7 @@ void VulkanPipelineCache::ClearCache() { const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); + // Destroy all pipelines. last_pipeline_ = nullptr; for (const auto& pipeline_pair : pipelines_) { if (pipeline_pair.second.pipeline != VK_NULL_HANDLE) { @@ -90,10 +91,13 @@ void VulkanPipelineCache::ClearCache() { } pipelines_.clear(); + // Destroy all shaders. for (auto it : shaders_) { delete it.second; } shaders_.clear(); + texture_binding_layout_map_.clear(); + texture_binding_layouts_.clear(); } VulkanShader* VulkanPipelineCache::LoadShader(xenos::ShaderType shader_type, @@ -241,7 +245,23 @@ bool VulkanPipelineCache::ConfigurePipeline( // Create the pipeline if not the latest and not already existing. const PipelineLayoutProvider* pipeline_layout = - command_processor_.GetPipelineLayout(0, 0); + command_processor_.GetPipelineLayout( + pixel_shader + ? static_cast(pixel_shader->shader()) + .GetTextureBindingsAfterTranslation() + .size() + : 0, + pixel_shader + ? static_cast(pixel_shader->shader()) + .GetSamplerBindingsAfterTranslation() + .size() + : 0, + static_cast(vertex_shader->shader()) + .GetTextureBindingsAfterTranslation() + .size(), + static_cast(vertex_shader->shader()) + .GetSamplerBindingsAfterTranslation() + .size()); if (!pipeline_layout) { return false; } @@ -277,14 +297,80 @@ bool VulkanPipelineCache::ConfigurePipeline( bool VulkanPipelineCache::TranslateAnalyzedShader( SpirvShaderTranslator& translator, VulkanShader::VulkanTranslation& translation) { + VulkanShader& shader = static_cast(translation.shader()); + // Perform translation. // If this fails the shader will be marked as invalid and ignored later. if (!translator.TranslateAnalyzedShader(translation)) { XELOGE("Shader {:016X} translation failed; marking as ignored", - translation.shader().ucode_data_hash()); + shader.ucode_data_hash()); return false; } - return translation.GetOrCreateShaderModule() != VK_NULL_HANDLE; + if (translation.GetOrCreateShaderModule() == VK_NULL_HANDLE) { + return false; + } + + // TODO(Triang3l): Log that the shader has been successfully translated in + // common code. + + // Set up the texture binding layout. + if (shader.EnterBindingLayoutUserUIDSetup()) { + // Obtain the unique IDs of the binding layout if there are any texture + // bindings, for invalidation in the command processor. + size_t texture_binding_layout_uid = kLayoutUIDEmpty; + const std::vector& texture_bindings = + shader.GetTextureBindingsAfterTranslation(); + size_t texture_binding_count = texture_bindings.size(); + if (texture_binding_count) { + size_t texture_binding_layout_bytes = + texture_binding_count * sizeof(*texture_bindings.data()); + uint64_t texture_binding_layout_hash = + XXH3_64bits(texture_bindings.data(), texture_binding_layout_bytes); + auto found_range = + texture_binding_layout_map_.equal_range(texture_binding_layout_hash); + for (auto it = found_range.first; it != found_range.second; ++it) { + if (it->second.vector_span_length == texture_binding_count && + !std::memcmp( + texture_binding_layouts_.data() + it->second.vector_span_offset, + texture_bindings.data(), texture_binding_layout_bytes)) { + texture_binding_layout_uid = it->second.uid; + break; + } + } + if (texture_binding_layout_uid == kLayoutUIDEmpty) { + static_assert( + kLayoutUIDEmpty == 0, + "Layout UID is size + 1 because it's assumed that 0 is the UID for " + "an empty layout"); + texture_binding_layout_uid = texture_binding_layout_map_.size() + 1; + LayoutUID new_uid; + new_uid.uid = texture_binding_layout_uid; + new_uid.vector_span_offset = texture_binding_layouts_.size(); + new_uid.vector_span_length = texture_binding_count; + texture_binding_layouts_.resize(new_uid.vector_span_offset + + texture_binding_count); + std::memcpy( + texture_binding_layouts_.data() + new_uid.vector_span_offset, + texture_bindings.data(), texture_binding_layout_bytes); + texture_binding_layout_map_.emplace(texture_binding_layout_hash, + new_uid); + } + } + shader.SetTextureBindingLayoutUserUID(texture_binding_layout_uid); + + // Use the sampler count for samplers because it's the only thing that must + // be the same for layouts to be compatible in this case + // (instruction-specified parameters are used as overrides for creating + // actual samplers). + static_assert( + kLayoutUIDEmpty == 0, + "Empty layout UID is assumed to be 0 because for bindful samplers, the " + "UID is their count"); + shader.SetSamplerBindingLayoutUserUID( + shader.GetSamplerBindingsAfterTranslation().size()); + } + + return true; } void VulkanPipelineCache::WritePipelineRenderTargetDescription( diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h index 6be73d43c..58f53cff4 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h @@ -39,6 +39,8 @@ class VulkanCommandProcessor; // implementations. class VulkanPipelineCache { public: + static constexpr size_t kLayoutUIDEmpty = 0; + class PipelineLayoutProvider { public: virtual ~PipelineLayoutProvider() {} @@ -278,6 +280,21 @@ class VulkanPipelineCache { // Reusable shader translator on the command processor thread. std::unique_ptr shader_translator_; + struct LayoutUID { + size_t uid; + size_t vector_span_offset; + size_t vector_span_length; + }; + std::mutex layouts_mutex_; + // Texture binding layouts of different shaders, for obtaining layout UIDs. + std::vector texture_binding_layouts_; + // Map of texture binding layouts used by shaders, for obtaining UIDs. Keys + // are XXH3 hashes of layouts, values need manual collision resolution using + // layout_vector_offset:layout_length of texture_binding_layouts_. + std::unordered_multimap> + texture_binding_layout_map_; + // Ucode hash -> shader. std::unordered_map> diff --git a/src/xenia/gpu/vulkan/vulkan_shader.cc b/src/xenia/gpu/vulkan/vulkan_shader.cc index 9ebfc41a8..1ff7734ff 100644 --- a/src/xenia/gpu/vulkan/vulkan_shader.cc +++ b/src/xenia/gpu/vulkan/vulkan_shader.cc @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * + * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -11,6 +11,7 @@ #include +#include "xenia/base/logging.h" #include "xenia/ui/vulkan/vulkan_provider.h" namespace xe { @@ -45,6 +46,10 @@ VkShaderModule VulkanShader::VulkanTranslation::GetOrCreateShaderModule() { if (provider.dfn().vkCreateShaderModule(provider.device(), &shader_module_create_info, nullptr, &shader_module_) != VK_SUCCESS) { + XELOGE( + "VulkanShader::VulkanTranslation: Failed to create a Vulkan shader " + "module for shader {:016X} modification {:016X}", + shader().ucode_data_hash(), modification()); MakeInvalid(); return VK_NULL_HANDLE; } @@ -57,8 +62,8 @@ VulkanShader::VulkanShader(const ui::vulkan::VulkanProvider& provider, const uint32_t* ucode_dwords, size_t ucode_dword_count, std::endian ucode_source_endian) - : Shader(shader_type, ucode_data_hash, ucode_dwords, ucode_dword_count, - ucode_source_endian), + : SpirvShader(shader_type, ucode_data_hash, ucode_dwords, ucode_dword_count, + ucode_source_endian), provider_(provider) {} Shader::Translation* VulkanShader::CreateTranslationInstance( diff --git a/src/xenia/gpu/vulkan/vulkan_shader.h b/src/xenia/gpu/vulkan/vulkan_shader.h index b8e2a55f2..7e78ac3b6 100644 --- a/src/xenia/gpu/vulkan/vulkan_shader.h +++ b/src/xenia/gpu/vulkan/vulkan_shader.h @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * + * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -12,7 +12,7 @@ #include -#include "xenia/gpu/shader.h" +#include "xenia/gpu/spirv_shader.h" #include "xenia/gpu/xenos.h" #include "xenia/ui/vulkan/vulkan_provider.h" @@ -20,12 +20,12 @@ namespace xe { namespace gpu { namespace vulkan { -class VulkanShader : public Shader { +class VulkanShader : public SpirvShader { public: - class VulkanTranslation : public Translation { + class VulkanTranslation : public SpirvTranslation { public: - VulkanTranslation(VulkanShader& shader, uint64_t modification) - : Translation(shader, modification) {} + explicit VulkanTranslation(VulkanShader& shader, uint64_t modification) + : SpirvTranslation(shader, modification) {} ~VulkanTranslation() override; VkShaderModule GetOrCreateShaderModule(); @@ -35,16 +35,43 @@ class VulkanShader : public Shader { VkShaderModule shader_module_ = VK_NULL_HANDLE; }; - VulkanShader(const ui::vulkan::VulkanProvider& provider, - xenos::ShaderType shader_type, uint64_t ucode_data_hash, - const uint32_t* ucode_dwords, size_t ucode_dword_count, - std::endian ucode_source_endian = std::endian::big); + explicit VulkanShader(const ui::vulkan::VulkanProvider& provider, + xenos::ShaderType shader_type, uint64_t ucode_data_hash, + const uint32_t* ucode_dwords, size_t ucode_dword_count, + std::endian ucode_source_endian = std::endian::big); + + // For owning subsystem like the pipeline cache, accessors for unique + // identifiers (used instead of hashes to make sure collisions can't happen) + // of binding layouts used by the shader, for invalidation if a shader with an + // incompatible layout has been bound. + size_t GetTextureBindingLayoutUserUID() const { + return texture_binding_layout_user_uid_; + } + size_t GetSamplerBindingLayoutUserUID() const { + return sampler_binding_layout_user_uid_; + } + // Modifications of the same shader can be translated on different threads. + // The "set" function must only be called if "enter" returned true - these are + // set up only once. + bool EnterBindingLayoutUserUIDSetup() { + return !binding_layout_user_uids_set_up_.test_and_set(); + } + void SetTextureBindingLayoutUserUID(size_t uid) { + texture_binding_layout_user_uid_ = uid; + } + void SetSamplerBindingLayoutUserUID(size_t uid) { + sampler_binding_layout_user_uid_ = uid; + } protected: Translation* CreateTranslationInstance(uint64_t modification) override; private: const ui::vulkan::VulkanProvider& provider_; + + std::atomic_flag binding_layout_user_uids_set_up_ = ATOMIC_FLAG_INIT; + size_t texture_binding_layout_user_uid_ = 0; + size_t sampler_binding_layout_user_uid_ = 0; }; } // namespace vulkan diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.h b/src/xenia/gpu/vulkan/vulkan_texture_cache.h index 69f3965d5..ee4f8b9ee 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.h @@ -45,6 +45,17 @@ class VulkanTextureCache final : public TextureCache { void BeginSubmission(uint64_t new_submission_index) override; + VkImageView GetNullImageView(xenos::FetchOpDimension dimension) const { + switch (dimension) { + case xenos::FetchOpDimension::k3DOrStacked: + return null_image_view_3d_; + case xenos::FetchOpDimension::kCube: + return null_image_view_cube_; + default: + return null_image_view_2d_array_; + } + } + protected: uint32_t GetHostFormatSwizzle(TextureKey key) const override; diff --git a/src/xenia/ui/vulkan/single_layout_descriptor_set_pool.cc b/src/xenia/ui/vulkan/single_layout_descriptor_set_pool.cc index 8dfff2a3f..5b07c0673 100644 --- a/src/xenia/ui/vulkan/single_layout_descriptor_set_pool.cc +++ b/src/xenia/ui/vulkan/single_layout_descriptor_set_pool.cc @@ -94,8 +94,7 @@ size_t SingleLayoutDescriptorSetPool::Allocate() { if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info, &descriptor_set) != VK_SUCCESS) { XELOGE( - "SingleLayoutDescriptorSetPool: Failed to allocate a descriptor " - "layout"); + "SingleLayoutDescriptorSetPool: Failed to allocate a descriptor set"); if (current_pool_sets_remaining_ >= pool_set_count_) { // Failed to allocate in a new pool - something completely wrong, don't // store empty pools as full. diff --git a/src/xenia/ui/vulkan/single_type_descriptor_set_allocator.cc b/src/xenia/ui/vulkan/single_type_descriptor_set_allocator.cc new file mode 100644 index 000000000..62621bb49 --- /dev/null +++ b/src/xenia/ui/vulkan/single_type_descriptor_set_allocator.cc @@ -0,0 +1,216 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/ui/vulkan/single_type_descriptor_set_allocator.h" + +#include "xenia/base/logging.h" +#include "xenia/ui/vulkan/vulkan_util.h" + +namespace xe { +namespace ui { +namespace vulkan { + +void SingleTypeDescriptorSetAllocator::Reset() { + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); + VkDevice device = provider_.device(); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorPool, device, + page_usable_latest_.pool); + for (const std::pair& page_pair : pages_usable_) { + dfn.vkDestroyDescriptorPool(device, page_pair.second.pool, nullptr); + } + pages_usable_.clear(); + for (VkDescriptorPool pool : pages_full_) { + dfn.vkDestroyDescriptorPool(device, pool, nullptr); + } + pages_full_.clear(); +} + +VkDescriptorSet SingleTypeDescriptorSetAllocator::Allocate( + VkDescriptorSetLayout descriptor_set_layout, uint32_t descriptor_count) { + assert_not_zero(descriptor_count); + if (descriptor_count == 0) { + return VK_NULL_HANDLE; + } + + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); + VkDevice device = provider_.device(); + + VkDescriptorSetAllocateInfo descriptor_set_allocate_info; + descriptor_set_allocate_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + descriptor_set_allocate_info.pNext = nullptr; + descriptor_set_allocate_info.descriptorSetCount = 1; + descriptor_set_allocate_info.pSetLayouts = &descriptor_set_layout; + VkDescriptorSet descriptor_set; + + if (descriptor_count > descriptor_pool_size_.descriptorCount) { + // Can't allocate in the pool, need a dedicated allocation. + VkDescriptorPoolSize dedicated_descriptor_pool_size; + dedicated_descriptor_pool_size.type = descriptor_pool_size_.type; + dedicated_descriptor_pool_size.descriptorCount = descriptor_count; + VkDescriptorPoolCreateInfo dedicated_descriptor_pool_create_info; + dedicated_descriptor_pool_create_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + dedicated_descriptor_pool_create_info.pNext = nullptr; + dedicated_descriptor_pool_create_info.flags = 0; + dedicated_descriptor_pool_create_info.maxSets = 1; + dedicated_descriptor_pool_create_info.poolSizeCount = 1; + dedicated_descriptor_pool_create_info.pPoolSizes = + &dedicated_descriptor_pool_size; + VkDescriptorPool dedicated_descriptor_pool; + if (dfn.vkCreateDescriptorPool( + device, &dedicated_descriptor_pool_create_info, nullptr, + &dedicated_descriptor_pool) != VK_SUCCESS) { + XELOGE( + "SingleTypeDescriptorSetAllocator: Failed to create a dedicated pool " + "for {} descriptors", + dedicated_descriptor_pool_size.descriptorCount); + return VK_NULL_HANDLE; + } + descriptor_set_allocate_info.descriptorPool = dedicated_descriptor_pool; + if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info, + &descriptor_set) != VK_SUCCESS) { + XELOGE( + "SingleTypeDescriptorSetAllocator: Failed to allocate {} descriptors " + "in a dedicated pool", + descriptor_count); + dfn.vkDestroyDescriptorPool(device, dedicated_descriptor_pool, nullptr); + return VK_NULL_HANDLE; + } + pages_full_.push_back(dedicated_descriptor_pool); + return descriptor_set; + } + + // Try allocating from the latest page an allocation has happened from, to + // avoid detaching from the map and re-attaching for every allocation. + if (page_usable_latest_.pool != VK_NULL_HANDLE) { + assert_not_zero(page_usable_latest_.descriptors_remaining); + assert_not_zero(page_usable_latest_.descriptor_sets_remaining); + if (page_usable_latest_.descriptors_remaining >= descriptor_count) { + descriptor_set_allocate_info.descriptorPool = page_usable_latest_.pool; + if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info, + &descriptor_set) == VK_SUCCESS) { + page_usable_latest_.descriptors_remaining -= descriptor_count; + --page_usable_latest_.descriptor_sets_remaining; + if (!page_usable_latest_.descriptors_remaining || + !page_usable_latest_.descriptor_sets_remaining) { + pages_full_.push_back(page_usable_latest_.pool); + page_usable_latest_.pool = VK_NULL_HANDLE; + } + return descriptor_set; + } + // Failed to allocate internally even though there should be enough space, + // don't try to allocate from this pool again at all. + pages_full_.push_back(page_usable_latest_.pool); + page_usable_latest_.pool = VK_NULL_HANDLE; + } + } + + // If allocating from the latest pool wasn't possible, pick any that has free + // space. Prefer filling pages that have the most free space as they can more + // likely be used for more allocations later. + while (!pages_usable_.empty()) { + auto page_usable_last_it = std::prev(pages_usable_.cend()); + if (page_usable_last_it->second.descriptors_remaining < descriptor_count) { + // All other pages_usable_ entries have fewer free descriptors too (the + // remaining count is the map key). + break; + } + // Remove the page from the map unconditionally - in case of a successful + // allocation, it will have a different number of free descriptors, thus a + // new map key (but it will also become page_usable_latest_ instead even), + // or will become full, and in case of a failure to allocate internally even + // though there still should be enough space, it should never be allocated + // from again. + Page map_page = pages_usable_.crend()->second; + pages_usable_.erase(page_usable_last_it); + descriptor_set_allocate_info.descriptorPool = map_page.pool; + if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info, + &descriptor_set) != VK_SUCCESS) { + pages_full_.push_back(map_page.pool); + continue; + } + map_page.descriptors_remaining -= descriptor_count; + --map_page.descriptor_sets_remaining; + if (!map_page.descriptors_remaining || + !map_page.descriptor_sets_remaining) { + pages_full_.push_back(map_page.pool); + } else { + if (page_usable_latest_.pool != VK_NULL_HANDLE) { + // Make the page with more free descriptors the next to allocate from. + if (map_page.descriptors_remaining > + page_usable_latest_.descriptors_remaining) { + pages_usable_.emplace(page_usable_latest_.descriptors_remaining, + page_usable_latest_); + page_usable_latest_ = map_page; + } else { + pages_usable_.emplace(map_page.descriptors_remaining, map_page); + } + } else { + page_usable_latest_ = map_page; + } + } + return descriptor_set; + } + + // Try allocating from a new page. + VkDescriptorPoolCreateInfo new_descriptor_pool_create_info; + new_descriptor_pool_create_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + new_descriptor_pool_create_info.pNext = nullptr; + new_descriptor_pool_create_info.flags = 0; + new_descriptor_pool_create_info.maxSets = descriptor_sets_per_page_; + new_descriptor_pool_create_info.poolSizeCount = 1; + new_descriptor_pool_create_info.pPoolSizes = &descriptor_pool_size_; + VkDescriptorPool new_descriptor_pool; + if (dfn.vkCreateDescriptorPool(device, &new_descriptor_pool_create_info, + nullptr, &new_descriptor_pool) != VK_SUCCESS) { + XELOGE( + "SingleTypeDescriptorSetAllocator: Failed to create a pool for {} sets " + "with {} descriptors", + descriptor_sets_per_page_, descriptor_pool_size_.descriptorCount); + return VK_NULL_HANDLE; + } + descriptor_set_allocate_info.descriptorPool = new_descriptor_pool; + if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info, + &descriptor_set) != VK_SUCCESS) { + XELOGE( + "SingleTypeDescriptorSetAllocator: Failed to allocate {} descriptors", + descriptor_count); + dfn.vkDestroyDescriptorPool(device, new_descriptor_pool, nullptr); + return VK_NULL_HANDLE; + } + Page new_page; + new_page.pool = new_descriptor_pool; + new_page.descriptors_remaining = + descriptor_pool_size_.descriptorCount - descriptor_count; + new_page.descriptor_sets_remaining = descriptor_sets_per_page_ - 1; + if (!new_page.descriptors_remaining || !new_page.descriptor_sets_remaining) { + pages_full_.push_back(new_page.pool); + } else { + if (page_usable_latest_.pool != VK_NULL_HANDLE) { + // Make the page with more free descriptors the next to allocate from. + if (new_page.descriptors_remaining > + page_usable_latest_.descriptors_remaining) { + pages_usable_.emplace(page_usable_latest_.descriptors_remaining, + page_usable_latest_); + page_usable_latest_ = new_page; + } else { + pages_usable_.emplace(new_page.descriptors_remaining, new_page); + } + } else { + page_usable_latest_ = new_page; + } + } + return descriptor_set; +} + +} // namespace vulkan +} // namespace ui +} // namespace xe diff --git a/src/xenia/ui/vulkan/single_type_descriptor_set_allocator.h b/src/xenia/ui/vulkan/single_type_descriptor_set_allocator.h new file mode 100644 index 000000000..7a21f6f35 --- /dev/null +++ b/src/xenia/ui/vulkan/single_type_descriptor_set_allocator.h @@ -0,0 +1,84 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_UI_VULKAN_SINGLE_TYPE_DESCRIPTOR_SET_ALLOCATOR_H_ +#define XENIA_UI_VULKAN_SINGLE_TYPE_DESCRIPTOR_SET_ALLOCATOR_H_ + +#include +#include +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/ui/vulkan/vulkan_provider.h" + +namespace xe { +namespace ui { +namespace vulkan { + +// Allocates multiple descriptors of a single type in descriptor set layouts +// consisting of descriptors of only that type. There's no way to free these +// descriptors within the SingleTypeDescriptorSetAllocator, per-layout free +// lists should be used externally. +class SingleTypeDescriptorSetAllocator { + public: + explicit SingleTypeDescriptorSetAllocator( + const ui::vulkan::VulkanProvider& provider, + VkDescriptorType descriptor_type, uint32_t descriptors_per_page, + uint32_t descriptor_sets_per_page) + : provider_(provider), + descriptor_sets_per_page_(descriptor_sets_per_page) { + assert_not_zero(descriptor_sets_per_page_); + descriptor_pool_size_.type = descriptor_type; + // Not allocating sets with 0 descriptors using the allocator - pointless to + // have the descriptor count below the set count. + descriptor_pool_size_.descriptorCount = + std::max(descriptors_per_page, descriptor_sets_per_page); + } + SingleTypeDescriptorSetAllocator( + const SingleTypeDescriptorSetAllocator& allocator) = delete; + SingleTypeDescriptorSetAllocator& operator=( + const SingleTypeDescriptorSetAllocator& allocator) = delete; + ~SingleTypeDescriptorSetAllocator() { Reset(); } + + void Reset(); + + VkDescriptorSet Allocate(VkDescriptorSetLayout descriptor_set_layout, + uint32_t descriptor_count); + + private: + struct Page { + VkDescriptorPool pool; + uint32_t descriptors_remaining; + uint32_t descriptor_sets_remaining; + }; + + const ui::vulkan::VulkanProvider& provider_; + + VkDescriptorPoolSize descriptor_pool_size_; + uint32_t descriptor_sets_per_page_; + + std::vector pages_full_; + // Because allocations must be contiguous, overflow may happen even if a page + // still has free descriptors, so multiple pages may have free space. + // To avoid removing and re-adding the page to the map that keeps them sorted + // (the key is the number of free descriptors remaining, and it changes at + // every allocation from a page), instead of always looking for a free space + // in the map, maintaining one page outside the map, and allocation attempts + // will be made from that page first. + std::multimap pages_usable_; + // Doesn't exist if page_usable_latest_.pool == VK_NULL_HANDLE. + Page page_usable_latest_ = {}; +}; + +} // namespace vulkan +} // namespace ui +} // namespace xe + +#endif // XENIA_UI_VULKAN_SINGLE_TYPE_DESCRIPTOR_SET_ALLOCATOR_H_ diff --git a/src/xenia/ui/vulkan/transient_descriptor_pool.cc b/src/xenia/ui/vulkan/transient_descriptor_pool.cc deleted file mode 100644 index 963738d80..000000000 --- a/src/xenia/ui/vulkan/transient_descriptor_pool.cc +++ /dev/null @@ -1,162 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/ui/vulkan/transient_descriptor_pool.h" - -#include - -#include "xenia/base/assert.h" -#include "xenia/base/logging.h" - -namespace xe { -namespace ui { -namespace vulkan { - -TransientDescriptorPool::TransientDescriptorPool( - const VulkanProvider& provider, VkDescriptorType descriptor_type, - uint32_t page_descriptor_set_count, uint32_t page_descriptor_count) - : provider_(provider), - descriptor_type_(descriptor_type), - page_descriptor_set_count_(page_descriptor_set_count), - page_descriptor_count_(page_descriptor_count) { - assert_not_zero(page_descriptor_set_count); - assert_true(page_descriptor_set_count <= page_descriptor_count); -} - -TransientDescriptorPool::~TransientDescriptorPool() { ClearCache(); } - -void TransientDescriptorPool::Reclaim(uint64_t completed_submission_index) { - const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - while (!pages_submitted_.empty()) { - const auto& descriptor_pool_pair = pages_submitted_.front(); - if (descriptor_pool_pair.second > completed_submission_index) { - break; - } - dfn.vkResetDescriptorPool(device, descriptor_pool_pair.first, 0); - pages_writable_.push_back(descriptor_pool_pair.first); - pages_submitted_.pop_front(); - } -} - -void TransientDescriptorPool::ClearCache() { - const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - for (const auto& descriptor_pool_pair : pages_submitted_) { - dfn.vkDestroyDescriptorPool(device, descriptor_pool_pair.first, nullptr); - } - pages_submitted_.clear(); - page_current_descriptors_used_ = 0; - page_current_descriptor_sets_used_ = 0; - page_current_last_submission_ = 0; - for (VkDescriptorPool descriptor_pool : pages_writable_) { - dfn.vkDestroyDescriptorPool(device, descriptor_pool, nullptr); - } - pages_writable_.clear(); -} - -VkDescriptorSet TransientDescriptorPool::Request( - uint64_t submission_index, VkDescriptorSetLayout layout, - uint32_t layout_descriptor_count) { - assert_true(submission_index >= page_current_last_submission_); - assert_not_zero(layout_descriptor_count); - assert_true(layout_descriptor_count <= page_descriptor_count_); - - const VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - - VkDescriptorSetAllocateInfo descriptor_set_allocate_info; - descriptor_set_allocate_info.sType = - VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; - descriptor_set_allocate_info.pNext = nullptr; - descriptor_set_allocate_info.descriptorSetCount = 1; - descriptor_set_allocate_info.pSetLayouts = &layout; - VkDescriptorSet descriptor_set; - - // Try to allocate as normal. - // TODO(Triang3l): Investigate the possibility of reuse of descriptor sets, as - // vkAllocateDescriptorSets may be implemented suboptimally. - if (!pages_writable_.empty()) { - if (page_current_descriptor_sets_used_ < page_descriptor_set_count_ && - page_current_descriptors_used_ + layout_descriptor_count <= - page_descriptor_count_) { - descriptor_set_allocate_info.descriptorPool = pages_writable_.front(); - switch (dfn.vkAllocateDescriptorSets( - device, &descriptor_set_allocate_info, &descriptor_set)) { - case VK_SUCCESS: - page_current_last_submission_ = submission_index; - ++page_current_descriptor_sets_used_; - page_current_descriptors_used_ += layout_descriptor_count; - return descriptor_set; - case VK_ERROR_FRAGMENTED_POOL: - case VK_ERROR_OUT_OF_POOL_MEMORY: - // Need to create a new pool. - break; - default: - XELOGE( - "Failed to allocate a transient Vulkan descriptor set with {} " - "descriptors of type {}", - layout_descriptor_count, uint32_t(descriptor_type_)); - return VK_NULL_HANDLE; - } - } - - // Overflow - go to the next pool. - pages_submitted_.emplace_back(pages_writable_.front(), - page_current_last_submission_); - pages_writable_.front() = pages_writable_.back(); - pages_writable_.pop_back(); - page_current_descriptor_sets_used_ = 0; - page_current_descriptors_used_ = 0; - } - - if (pages_writable_.empty()) { - VkDescriptorPoolSize descriptor_pool_size; - descriptor_pool_size.type = descriptor_type_; - descriptor_pool_size.descriptorCount = page_descriptor_count_; - VkDescriptorPoolCreateInfo descriptor_pool_create_info; - descriptor_pool_create_info.sType = - VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; - descriptor_pool_create_info.pNext = nullptr; - descriptor_pool_create_info.flags = 0; - descriptor_pool_create_info.maxSets = page_descriptor_set_count_; - descriptor_pool_create_info.poolSizeCount = 1; - descriptor_pool_create_info.pPoolSizes = &descriptor_pool_size; - VkDescriptorPool descriptor_pool; - if (dfn.vkCreateDescriptorPool(device, &descriptor_pool_create_info, - nullptr, &descriptor_pool) != VK_SUCCESS) { - XELOGE( - "Failed to create a transient Vulkan descriptor pool for {} sets of " - "up to {} descriptors of type {}", - page_descriptor_set_count_, page_descriptor_count_, - uint32_t(descriptor_type_)); - return VK_NULL_HANDLE; - } - pages_writable_.push_back(descriptor_pool); - } - - // Try to allocate after handling overflow. - descriptor_set_allocate_info.descriptorPool = pages_writable_.front(); - if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info, - &descriptor_set) != VK_SUCCESS) { - XELOGE( - "Failed to allocate a transient Vulkan descriptor set with {} " - "descriptors of type {}", - layout_descriptor_count, uint32_t(descriptor_type_)); - return VK_NULL_HANDLE; - } - page_current_last_submission_ = submission_index; - ++page_current_descriptor_sets_used_; - page_current_descriptors_used_ += layout_descriptor_count; - return descriptor_set; -} - -} // namespace vulkan -} // namespace ui -} // namespace xe diff --git a/src/xenia/ui/vulkan/transient_descriptor_pool.h b/src/xenia/ui/vulkan/transient_descriptor_pool.h deleted file mode 100644 index 07760aff0..000000000 --- a/src/xenia/ui/vulkan/transient_descriptor_pool.h +++ /dev/null @@ -1,61 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_UI_VULKAN_TRANSIENT_DESCRIPTOR_POOL_H_ -#define XENIA_UI_VULKAN_TRANSIENT_DESCRIPTOR_POOL_H_ - -#include -#include -#include -#include - -#include "xenia/ui/vulkan/vulkan_provider.h" - -namespace xe { -namespace ui { -namespace vulkan { - -// A pool of descriptor pools for single-submission use. For simplicity of -// tracking when overflow happens, only allocating descriptors for sets -// containing descriptors of a single type. -class TransientDescriptorPool { - public: - TransientDescriptorPool(const VulkanProvider& provider, - VkDescriptorType descriptor_type, - uint32_t page_descriptor_set_count, - uint32_t page_descriptor_count); - ~TransientDescriptorPool(); - - void Reclaim(uint64_t completed_submission_index); - void ClearCache(); - - // Returns the allocated set, or VK_NULL_HANDLE if failed to allocate. - VkDescriptorSet Request(uint64_t submission_index, - VkDescriptorSetLayout layout, - uint32_t layout_descriptor_count); - - private: - const VulkanProvider& provider_; - - VkDescriptorType descriptor_type_; - uint32_t page_descriptor_set_count_; - uint32_t page_descriptor_count_; - - std::vector pages_writable_; - uint64_t page_current_last_submission_ = 0; - uint32_t page_current_descriptor_sets_used_ = 0; - uint32_t page_current_descriptors_used_ = 0; - std::deque> pages_submitted_; -}; - -} // namespace vulkan -} // namespace ui -} // namespace xe - -#endif // XENIA_UI_VULKAN_TRANSIENT_DESCRIPTOR_POOL_H_ From 08769de68b8e67040d60cc5661db6989274d9381 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Thu, 19 May 2022 21:56:15 +0300 Subject: [PATCH 086/123] [Vulkan] Texture object and view creation --- .../gpu/vulkan/vulkan_command_processor.cc | 36 +- src/xenia/gpu/vulkan/vulkan_texture_cache.cc | 1033 ++++++++++++++++- src/xenia/gpu/vulkan/vulkan_texture_cache.h | 209 +++- .../ui/vulkan/functions/instance_1_0.inc | 1 + 4 files changed, 1251 insertions(+), 28 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index d9b81f9a8..6dd91def7 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -738,6 +738,10 @@ void VulkanCommandProcessor::WriteRegister(uint32_t index, uint32_t value) { index <= XE_GPU_REG_SHADER_CONSTANT_FETCH_31_5) { current_graphics_descriptor_set_values_up_to_date_ &= ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants); + if (texture_cache_) { + texture_cache_->TextureFetchConstantWritten( + (index - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) / 6); + } } } @@ -1548,13 +1552,6 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, *pixel_shader, normalized_color_mask) : SpirvShaderTranslator::Modification(0); - // Set up the render targets - this may perform dispatches and draws. - if (!render_target_cache_->Update(is_rasterization_done, - normalized_depth_control, - normalized_color_mask, *vertex_shader)) { - return false; - } - // Translate the shaders. VulkanShader::VulkanTranslation* vertex_shader_translation = static_cast( @@ -1566,6 +1563,23 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, pixel_shader_modification.value)) : nullptr; + // Update the textures before other work in the submission because samplers + // depend on this (and in case of sampler overflow in a submission, + // submissions must be split) - may perform dispatches. + uint32_t used_texture_mask = + vertex_shader->GetUsedTextureMaskAfterTranslation() | + (pixel_shader != nullptr + ? pixel_shader->GetUsedTextureMaskAfterTranslation() + : 0); + texture_cache_->RequestTextures(used_texture_mask); + + // Set up the render targets - this may perform dispatches and draws. + if (!render_target_cache_->Update(is_rasterization_done, + normalized_depth_control, + normalized_color_mask, *vertex_shader)) { + return false; + } + // Update the graphics pipeline, and if the new graphics pipeline has a // different layout, invalidate incompatible descriptor sets before updating // current_guest_graphics_pipeline_layout_. @@ -2758,7 +2772,9 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, VkDescriptorImageInfo& descriptor_image_info = descriptor_write_image_info_.emplace_back(); descriptor_image_info.imageView = - texture_cache_->GetNullImageView(texture_binding.dimension); + texture_cache_->GetActiveBindingOrNullImageView( + texture_binding.fetch_constant, texture_binding.dimension, + bool(texture_binding.is_signed)); descriptor_image_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; } @@ -2782,7 +2798,9 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, VkDescriptorImageInfo& descriptor_image_info = descriptor_write_image_info_.emplace_back(); descriptor_image_info.imageView = - texture_cache_->GetNullImageView(texture_binding.dimension); + texture_cache_->GetActiveBindingOrNullImageView( + texture_binding.fetch_constant, texture_binding.dimension, + bool(texture_binding.is_signed)); descriptor_image_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; } diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc index c9b8f8e58..348c31318 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc @@ -14,6 +14,8 @@ #include "xenia/base/assert.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" +#include "xenia/base/profiling.h" +#include "xenia/gpu/texture_info.h" #include "xenia/gpu/vulkan/vulkan_command_processor.h" #include "xenia/ui/vulkan/vulkan_util.h" @@ -75,6 +77,322 @@ namespace shaders { #include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_rgba16_unorm_float_scaled_cs.h" } // namespace shaders +const VulkanTextureCache::HostFormatPair + VulkanTextureCache::kBestHostFormats[64] = { + // k_1_REVERSE + {{LoadMode::kUnknown}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + // k_1 + {{LoadMode::kUnknown}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + // k_8 + {{LoadMode::k8bpb, VK_FORMAT_R8_UNORM}, + {LoadMode::k8bpb, VK_FORMAT_R8_SNORM}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR, + true}, + // k_1_5_5_5 + // Red and blue swapped in the load shader for simplicity. + {{LoadMode::kR5G5B5A1ToB5G5R5A1, VK_FORMAT_A1R5G5B5_UNORM_PACK16}, + {}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + // k_5_6_5 + // Red and blue swapped in the load shader for simplicity. + {{LoadMode::kR5G6B5ToB5G6R5, VK_FORMAT_R5G6B5_UNORM_PACK16}, + {}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, + // k_6_5_5 + // On the host, green bits in blue, blue bits in green. + {{LoadMode::kR5G5B6ToB5G6R5WithRBGASwizzle, + VK_FORMAT_R5G6B5_UNORM_PACK16}, + {}, + XE_GPU_MAKE_TEXTURE_SWIZZLE(R, B, G, G)}, + // k_8_8_8_8 + {{LoadMode::k32bpb, VK_FORMAT_R8G8B8A8_UNORM}, + {LoadMode::k32bpb, VK_FORMAT_R8G8B8A8_SNORM}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA, + true}, + // k_2_10_10_10 + // VK_FORMAT_A2B10G10R10_SNORM_PACK32 is optional. + {{LoadMode::k32bpb, VK_FORMAT_A2B10G10R10_UNORM_PACK32}, + {LoadMode::k32bpb, VK_FORMAT_A2B10G10R10_SNORM_PACK32}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA, + true}, + // k_8_A + {{LoadMode::k8bpb, VK_FORMAT_R8_UNORM}, + {LoadMode::k8bpb, VK_FORMAT_R8_SNORM}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR, + true}, + // k_8_B + {{LoadMode::kUnknown}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + // k_8_8 + {{LoadMode::k16bpb, VK_FORMAT_R8G8_UNORM}, + {LoadMode::k16bpb, VK_FORMAT_R8G8_SNORM}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG, + true}, + // k_Cr_Y1_Cb_Y0_REP + // VK_FORMAT_G8B8G8R8_422_UNORM_KHR (added in + // VK_KHR_sampler_ycbcr_conversion and promoted to Vulkan 1.1) is + // optional. + {{LoadMode::k32bpb, VK_FORMAT_G8B8G8R8_422_UNORM_KHR, 1, 0}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, + // k_Y1_Cr_Y0_Cb_REP + // VK_FORMAT_B8G8R8G8_422_UNORM_KHR (added in + // VK_KHR_sampler_ycbcr_conversion and promoted to Vulkan 1.1) is + // optional. + {{LoadMode::k32bpb, VK_FORMAT_B8G8R8G8_422_UNORM_KHR, 1, 0}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, + // k_16_16_EDRAM + // Not usable as a texture, also has -32...32 range. + {{LoadMode::kUnknown}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, + // k_8_8_8_8_A + {{LoadMode::kUnknown}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + // k_4_4_4_4 + // Components swapped in the load shader for simplicity. + {{LoadMode::kRGBA4ToARGB4, VK_FORMAT_B4G4R4A4_UNORM_PACK16}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + // k_10_11_11 + // TODO(Triang3l): 16_UNORM/SNORM are optional, convert to float16 + // instead. + {{LoadMode::kR11G11B10ToRGBA16, VK_FORMAT_R16G16B16A16_UNORM}, + {LoadMode::kR11G11B10ToRGBA16SNorm, VK_FORMAT_R16G16B16A16_SNORM}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, + // k_11_11_10 + // TODO(Triang3l): 16_UNORM/SNORM are optional, convert to float16 + // instead. + {{LoadMode::kR10G11B11ToRGBA16, VK_FORMAT_R16G16B16A16_UNORM}, + {LoadMode::kR10G11B11ToRGBA16SNorm, VK_FORMAT_R16G16B16A16_SNORM}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, + // k_DXT1 + // VK_FORMAT_BC1_RGBA_UNORM_BLOCK is optional. + {{LoadMode::k64bpb, VK_FORMAT_BC1_RGBA_UNORM_BLOCK, 2, 2}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + // k_DXT2_3 + // VK_FORMAT_BC2_UNORM_BLOCK is optional. + {{LoadMode::k128bpb, VK_FORMAT_BC2_UNORM_BLOCK, 2, 2}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + // k_DXT4_5 + // VK_FORMAT_BC3_UNORM_BLOCK is optional. + {{LoadMode::k128bpb, VK_FORMAT_BC3_UNORM_BLOCK, 2, 2}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + // k_16_16_16_16_EDRAM + // Not usable as a texture, also has -32...32 range. + {{LoadMode::kUnknown}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + // k_24_8 + {{LoadMode::kDepthUnorm, VK_FORMAT_R32_SFLOAT}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + // k_24_8_FLOAT + {{LoadMode::kDepthFloat, VK_FORMAT_R32_SFLOAT}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + // k_16 + // VK_FORMAT_R16_UNORM and VK_FORMAT_R16_SNORM are optional. + {{LoadMode::k16bpb, VK_FORMAT_R16_UNORM}, + {LoadMode::k16bpb, VK_FORMAT_R16_SNORM}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR, + true}, + // k_16_16 + // VK_FORMAT_R16G16_UNORM and VK_FORMAT_R16G16_SNORM are optional. + {{LoadMode::k32bpb, VK_FORMAT_R16G16_UNORM}, + {LoadMode::k32bpb, VK_FORMAT_R16G16_SNORM}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG, + true}, + // k_16_16_16_16 + // VK_FORMAT_R16G16B16A16_UNORM and VK_FORMAT_R16G16B16A16_SNORM are + // optional. + {{LoadMode::k64bpb, VK_FORMAT_R16G16B16A16_UNORM}, + {LoadMode::k64bpb, VK_FORMAT_R16G16B16A16_SNORM}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA, + true}, + // k_16_EXPAND + {{LoadMode::k16bpb, VK_FORMAT_R16_SFLOAT}, + {LoadMode::k16bpb, VK_FORMAT_R16_SFLOAT}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR, + true}, + // k_16_16_EXPAND + {{LoadMode::k32bpb, VK_FORMAT_R16G16_SFLOAT}, + {LoadMode::k32bpb, VK_FORMAT_R16G16_SFLOAT}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG, + true}, + // k_16_16_16_16_EXPAND + {{LoadMode::k64bpb, VK_FORMAT_R16G16B16A16_SFLOAT}, + {LoadMode::k64bpb, VK_FORMAT_R16G16B16A16_SFLOAT}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA, + true}, + // k_16_FLOAT + {{LoadMode::k16bpb, VK_FORMAT_R16_SFLOAT}, + {LoadMode::k16bpb, VK_FORMAT_R16_SFLOAT}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR, + true}, + // k_16_16_FLOAT + {{LoadMode::k32bpb, VK_FORMAT_R16G16_SFLOAT}, + {LoadMode::k32bpb, VK_FORMAT_R16G16_SFLOAT}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG, + true}, + // k_16_16_16_16_FLOAT + {{LoadMode::k64bpb, VK_FORMAT_R16G16B16A16_SFLOAT}, + {LoadMode::k64bpb, VK_FORMAT_R16G16B16A16_SFLOAT}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA, + true}, + // k_32 + {{LoadMode::kUnknown}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + // k_32_32 + {{LoadMode::kUnknown}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, + // k_32_32_32_32 + {{LoadMode::kUnknown}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + // k_32_FLOAT + {{LoadMode::k32bpb, VK_FORMAT_R32_SFLOAT}, + {LoadMode::k32bpb, VK_FORMAT_R32_SFLOAT}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR, + true}, + // k_32_32_FLOAT + {{LoadMode::k64bpb, VK_FORMAT_R32G32_SFLOAT}, + {LoadMode::k64bpb, VK_FORMAT_R32G32_SFLOAT}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG, + true}, + // k_32_32_32_32_FLOAT + {{LoadMode::k128bpb, VK_FORMAT_R32G32B32A32_SFLOAT}, + {LoadMode::k128bpb, VK_FORMAT_R32G32B32A32_SFLOAT}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA, + true}, + // k_32_AS_8 + {{LoadMode::kUnknown}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + // k_32_AS_8_8 + {{LoadMode::kUnknown}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, + // k_16_MPEG + {{LoadMode::kUnknown}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + // k_16_16_MPEG + {{LoadMode::kUnknown}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, + // k_8_INTERLACED + {{LoadMode::kUnknown}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + // k_32_AS_8_INTERLACED + {{LoadMode::kUnknown}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + // k_32_AS_8_8_INTERLACED + {{LoadMode::kUnknown}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, + // k_16_INTERLACED + {{LoadMode::kUnknown}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + // k_16_MPEG_INTERLACED + {{LoadMode::kUnknown}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + // k_16_16_MPEG_INTERLACED + {{LoadMode::kUnknown}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, + // k_DXN + // VK_FORMAT_BC5_UNORM_BLOCK is optional. + {{LoadMode::k128bpb, VK_FORMAT_BC5_UNORM_BLOCK, 2, 2}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, + // k_8_8_8_8_AS_16_16_16_16 + {{LoadMode::k32bpb, VK_FORMAT_R8G8B8A8_UNORM}, + {LoadMode::k32bpb, VK_FORMAT_R8G8B8A8_SNORM}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA, + true}, + // k_DXT1_AS_16_16_16_16 + // VK_FORMAT_BC1_RGBA_UNORM_BLOCK is optional. + {{LoadMode::k64bpb, VK_FORMAT_BC1_RGBA_UNORM_BLOCK, 2, 2}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + // k_DXT2_3_AS_16_16_16_16 + // VK_FORMAT_BC2_UNORM_BLOCK is optional. + {{LoadMode::k128bpb, VK_FORMAT_BC2_UNORM_BLOCK, 2, 2}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + // k_DXT4_5_AS_16_16_16_16 + // VK_FORMAT_BC3_UNORM_BLOCK is optional. + {{LoadMode::k128bpb, VK_FORMAT_BC3_UNORM_BLOCK, 2, 2}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + // k_2_10_10_10_AS_16_16_16_16 + // VK_FORMAT_A2B10G10R10_SNORM_PACK32 is optional. + {{LoadMode::k32bpb, VK_FORMAT_A2B10G10R10_UNORM_PACK32}, + {LoadMode::k32bpb, VK_FORMAT_A2B10G10R10_SNORM_PACK32}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA, + true}, + // k_10_11_11_AS_16_16_16_16 + // TODO(Triang3l): 16_UNORM/SNORM are optional, convert to float16 + // instead. + {{LoadMode::kR11G11B10ToRGBA16, VK_FORMAT_R16G16B16A16_UNORM}, + {LoadMode::kR11G11B10ToRGBA16SNorm, VK_FORMAT_R16G16B16A16_SNORM}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, + // k_11_11_10_AS_16_16_16_16 + // TODO(Triang3l): 16_UNORM/SNORM are optional, convert to float16 + // instead. + {{LoadMode::kR10G11B11ToRGBA16, VK_FORMAT_R16G16B16A16_UNORM}, + {LoadMode::kR10G11B11ToRGBA16SNorm, VK_FORMAT_R16G16B16A16_SNORM}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, + // k_32_32_32_FLOAT + {{LoadMode::kUnknown}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, + // k_DXT3A + {{LoadMode::kDXT3A, VK_FORMAT_R8_UNORM}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + // k_DXT5A + // VK_FORMAT_BC4_UNORM_BLOCK is optional. + {{LoadMode::k64bpb, VK_FORMAT_BC4_UNORM_BLOCK, 2, 2}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + // k_CTX1 + {{LoadMode::kCTX1, VK_FORMAT_R8G8_UNORM}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, + // k_DXT3A_AS_1_1_1_1 + {{LoadMode::kDXT3AAs1111ToARGB4, VK_FORMAT_B4G4R4A4_UNORM_PACK16}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + // k_8_8_8_8_GAMMA_EDRAM + // Not usable as a texture. + {{LoadMode::kUnknown}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, + // k_2_10_10_10_FLOAT_EDRAM + // Not usable as a texture. + {{LoadMode::kUnknown}, + {LoadMode::kUnknown}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, +}; + VulkanTextureCache::~VulkanTextureCache() { const ui::vulkan::VulkanProvider& provider = command_processor_.GetVulkanProvider(); @@ -114,7 +432,8 @@ void VulkanTextureCache::BeginSubmission(uint64_t new_submission_index) { command_processor_.PushImageMemoryBarrier( null_images[i], null_image_subresource_range, 0, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, VK_ACCESS_TRANSFER_WRITE_BIT, - VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, false); } command_processor_.SubmitBarriers(true); DeferredCommandBuffer& command_buffer = @@ -137,15 +456,113 @@ void VulkanTextureCache::BeginSubmission(uint64_t new_submission_index) { VK_PIPELINE_STAGE_TRANSFER_BIT, guest_shader_pipeline_stages_, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, false); } null_images_cleared_ = true; } } +void VulkanTextureCache::RequestTextures(uint32_t used_texture_mask) { +#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + + TextureCache::RequestTextures(used_texture_mask); + + // Transition the textures into the needed usage. + VkPipelineStageFlags dst_stage_mask; + VkAccessFlags dst_access_mask; + VkImageLayout new_layout; + GetTextureUsageMasks(VulkanTexture::Usage::kGuestShaderSampled, + dst_stage_mask, dst_access_mask, new_layout); + uint32_t textures_remaining = used_texture_mask; + uint32_t index; + while (xe::bit_scan_forward(textures_remaining, &index)) { + textures_remaining &= ~(uint32_t(1) << index); + const TextureBinding* binding = GetValidTextureBinding(index); + if (!binding) { + continue; + } + VulkanTexture* binding_texture = + static_cast(binding->texture); + if (binding_texture != nullptr) { + // Will be referenced by the command buffer, so mark as used. + binding_texture->MarkAsUsed(); + VulkanTexture::Usage old_usage = + binding_texture->SetUsage(VulkanTexture::Usage::kGuestShaderSampled); + if (old_usage != VulkanTexture::Usage::kGuestShaderSampled) { + VkPipelineStageFlags src_stage_mask; + VkAccessFlags src_access_mask; + VkImageLayout old_layout; + GetTextureUsageMasks(old_usage, src_stage_mask, src_access_mask, + old_layout); + command_processor_.PushImageMemoryBarrier( + binding_texture->image(), + ui::vulkan::util::InitializeSubresourceRange(), src_stage_mask, + dst_stage_mask, src_access_mask, dst_access_mask, old_layout, + new_layout); + } + } + VulkanTexture* binding_texture_signed = + static_cast(binding->texture_signed); + if (binding_texture_signed != nullptr) { + binding_texture_signed->MarkAsUsed(); + VulkanTexture::Usage old_usage = binding_texture_signed->SetUsage( + VulkanTexture::Usage::kGuestShaderSampled); + if (old_usage != VulkanTexture::Usage::kGuestShaderSampled) { + VkPipelineStageFlags src_stage_mask; + VkAccessFlags src_access_mask; + VkImageLayout old_layout; + GetTextureUsageMasks(old_usage, src_stage_mask, src_access_mask, + old_layout); + command_processor_.PushImageMemoryBarrier( + binding_texture_signed->image(), + ui::vulkan::util::InitializeSubresourceRange(), src_stage_mask, + dst_stage_mask, src_access_mask, dst_access_mask, old_layout, + new_layout); + } + } + } +} + +VkImageView VulkanTextureCache::GetActiveBindingOrNullImageView( + uint32_t fetch_constant_index, xenos::FetchOpDimension dimension, + bool is_signed) const { + VkImageView image_view = VK_NULL_HANDLE; + const TextureBinding* binding = GetValidTextureBinding(fetch_constant_index); + if (binding && AreDimensionsCompatible(dimension, binding->key.dimension)) { + const VulkanTextureBinding& vulkan_binding = + vulkan_texture_bindings_[fetch_constant_index]; + image_view = is_signed ? vulkan_binding.image_view_signed + : vulkan_binding.image_view_unsigned; + } + if (image_view != VK_NULL_HANDLE) { + return image_view; + } + switch (dimension) { + case xenos::FetchOpDimension::k3DOrStacked: + return null_image_view_3d_; + case xenos::FetchOpDimension::kCube: + return null_image_view_cube_; + default: + return null_image_view_2d_array_; + } +} + +bool VulkanTextureCache::IsSignedVersionSeparateForFormat( + TextureKey key) const { + const HostFormatPair& host_format_pair = host_formats_[uint32_t(key.format)]; + if (host_format_pair.format_unsigned.format == VK_FORMAT_UNDEFINED || + host_format_pair.format_signed.format == VK_FORMAT_UNDEFINED) { + // Just one signedness. + return false; + } + return !host_format_pair.unsigned_signed_compatible; +} + uint32_t VulkanTextureCache::GetHostFormatSwizzle(TextureKey key) const { - // TODO(Triang3l): Implement GetHostFormatSwizzle. - return xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA; + return host_formats_[uint32_t(key.format)].swizzle; } uint32_t VulkanTextureCache::GetMaxHostTextureWidthHeight( @@ -194,8 +611,100 @@ uint32_t VulkanTextureCache::GetMaxHostTextureDepthOrArraySize( std::unique_ptr VulkanTextureCache::CreateTexture( TextureKey key) { - // TODO(Triang3l): Implement CreateTexture. - return std::unique_ptr(new VulkanTexture(*this, key)); + VkFormat formats[] = {VK_FORMAT_UNDEFINED, VK_FORMAT_UNDEFINED}; + const HostFormatPair& host_format = host_formats_[uint32_t(key.format)]; + if (host_format.format_signed.format == VK_FORMAT_UNDEFINED) { + // Only the unsigned format may be available, if at all. + formats[0] = host_format.format_unsigned.format; + } else if (host_format.format_unsigned.format == VK_FORMAT_UNDEFINED) { + // Only the signed format may be available, if at all. + formats[0] = host_format.format_signed.format; + } else { + // Both unsigned and signed formats are available. + if (IsSignedVersionSeparateForFormat(key)) { + formats[0] = key.signed_separate ? host_format.format_signed.format + : host_format.format_unsigned.format; + } else { + // Same format for unsigned and signed, or compatible formats. + formats[0] = host_format.format_unsigned.format; + if (host_format.format_signed.format != + host_format.format_unsigned.format) { + assert_not_zero(host_format.unsigned_signed_compatible); + formats[1] = host_format.format_signed.format; + } + } + } + if (formats[0] == VK_FORMAT_UNDEFINED) { + // TODO(Triang3l): If there's no best format, set that a format unsupported + // by the emulator completely is used to report at the end of the frame. + return nullptr; + } + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + bool is_3d = key.dimension == xenos::DataDimension::k3D; + uint32_t depth_or_array_size = key.GetDepthOrArraySize(); + + VkImageCreateInfo image_create_info; + VkImageCreateInfo* image_create_info_last = &image_create_info; + image_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + image_create_info.pNext = nullptr; + image_create_info.flags = 0; + if (formats[1] != VK_FORMAT_UNDEFINED) { + image_create_info.flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; + } + if (key.dimension == xenos::DataDimension::kCube) { + image_create_info.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; + } + image_create_info.imageType = is_3d ? VK_IMAGE_TYPE_3D : VK_IMAGE_TYPE_2D; + image_create_info.format = formats[0]; + image_create_info.extent.width = key.GetWidth(); + image_create_info.extent.height = key.GetHeight(); + if (key.scaled_resolve) { + image_create_info.extent.width *= draw_resolution_scale_x(); + image_create_info.extent.height *= draw_resolution_scale_y(); + } + image_create_info.extent.depth = is_3d ? depth_or_array_size : 1; + image_create_info.mipLevels = key.mip_max_level + 1; + image_create_info.arrayLayers = is_3d ? 1 : depth_or_array_size; + image_create_info.samples = VK_SAMPLE_COUNT_1_BIT; + image_create_info.tiling = VK_IMAGE_TILING_OPTIMAL; + image_create_info.usage = + VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; + image_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + image_create_info.queueFamilyIndexCount = 0; + image_create_info.pQueueFamilyIndices = nullptr; + image_create_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + VkImageFormatListCreateInfoKHR image_format_list_create_info; + if (formats[1] != VK_FORMAT_UNDEFINED && + provider.device_extensions().khr_image_format_list) { + image_create_info_last->pNext = &image_format_list_create_info; + image_create_info_last = + reinterpret_cast(&image_format_list_create_info); + image_format_list_create_info.sType = + VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR; + image_format_list_create_info.pNext = nullptr; + image_format_list_create_info.viewFormatCount = 2; + image_format_list_create_info.pViewFormats = formats; + } + // TODO(Triang3l): Suballocate due to the low memory allocation count limit on + // Windows (use VMA or a custom allocator, possibly based on two-level + // segregated fit just like VMA). + VkImage image; + VkDeviceMemory memory; + VkDeviceSize memory_size; + if (!ui::vulkan::util::CreateDedicatedAllocationImage( + provider, image_create_info, + ui::vulkan::util::MemoryPurpose::kDeviceLocal, image, memory, nullptr, + &memory_size)) { + return nullptr; + } + + return std::unique_ptr( + new VulkanTexture(*this, key, image, memory, memory_size)); } bool VulkanTextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, @@ -205,9 +714,148 @@ bool VulkanTextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, return true; } +void VulkanTextureCache::UpdateTextureBindingsImpl( + uint32_t fetch_constant_mask) { + uint32_t bindings_remaining = fetch_constant_mask; + uint32_t binding_index; + while (xe::bit_scan_forward(bindings_remaining, &binding_index)) { + bindings_remaining &= ~(UINT32_C(1) << binding_index); + VulkanTextureBinding& vulkan_binding = + vulkan_texture_bindings_[binding_index]; + vulkan_binding.Reset(); + const TextureBinding* binding = GetValidTextureBinding(binding_index); + if (!binding) { + continue; + } + if (IsSignedVersionSeparateForFormat(binding->key)) { + if (binding->texture && + texture_util::IsAnySignNotSigned(binding->swizzled_signs)) { + vulkan_binding.image_view_unsigned = + static_cast(binding->texture) + ->GetView(false, binding->host_swizzle); + } + if (binding->texture_signed && + texture_util::IsAnySignSigned(binding->swizzled_signs)) { + vulkan_binding.image_view_signed = + static_cast(binding->texture_signed) + ->GetView(true, binding->host_swizzle); + } + } else { + VulkanTexture* texture = static_cast(binding->texture); + if (texture) { + if (texture_util::IsAnySignNotSigned(binding->swizzled_signs)) { + vulkan_binding.image_view_unsigned = + texture->GetView(false, binding->host_swizzle); + } + if (texture_util::IsAnySignSigned(binding->swizzled_signs)) { + vulkan_binding.image_view_signed = + texture->GetView(true, binding->host_swizzle); + } + } + } + } +} + VulkanTextureCache::VulkanTexture::VulkanTexture( - VulkanTextureCache& texture_cache, const TextureKey& key) - : Texture(texture_cache, key) {} + VulkanTextureCache& texture_cache, const TextureKey& key, VkImage image, + VkDeviceMemory memory, VkDeviceSize memory_size) + : Texture(texture_cache, key), image_(image), memory_(memory) { + SetHostMemoryUsage(uint64_t(memory_size)); +} + +VulkanTextureCache::VulkanTexture::~VulkanTexture() { + const VulkanTextureCache& vulkan_texture_cache = + static_cast(texture_cache()); + const ui::vulkan::VulkanProvider& provider = + vulkan_texture_cache.command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + for (const auto& view_pair : views_) { + dfn.vkDestroyImageView(device, view_pair.second, nullptr); + } + dfn.vkDestroyImage(device, image_, nullptr); + dfn.vkFreeMemory(device, memory_, nullptr); +} + +VkImageView VulkanTextureCache::VulkanTexture::GetView(bool is_signed, + uint32_t host_swizzle) { + const VulkanTextureCache& vulkan_texture_cache = + static_cast(texture_cache()); + + ViewKey view_key; + + const HostFormatPair& host_format_pair = + vulkan_texture_cache.host_formats_[uint32_t(key().format)]; + VkFormat format = (is_signed ? host_format_pair.format_signed + : host_format_pair.format_unsigned) + .format; + if (format == VK_FORMAT_UNDEFINED) { + return VK_NULL_HANDLE; + } + // If not distinguishing between unsigned and signed formats for the same + // image, don't create two views. As this happens within an image, no need to + // care about whether unsigned and signed images are separate - if they are + // (or if there are only unsigned or only signed images), this image will have + // either all views unsigned or all views signed. + view_key.is_signed_separate_view = + is_signed && (host_format_pair.format_signed.format != + host_format_pair.format_unsigned.format); + + const ui::vulkan::VulkanProvider& provider = + vulkan_texture_cache.command_processor_.GetVulkanProvider(); + const VkPhysicalDevicePortabilitySubsetFeaturesKHR* + device_portability_subset_features = + provider.device_portability_subset_features(); + if (device_portability_subset_features && + !device_portability_subset_features->imageViewFormatSwizzle) { + host_swizzle = xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA; + } + view_key.host_swizzle = host_swizzle; + + // Try to find an existing view. + auto it = views_.find(view_key); + if (it != views_.end()) { + return it->second; + } + + // Create a new view. + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + VkImageViewCreateInfo view_create_info; + view_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + view_create_info.pNext = nullptr; + view_create_info.flags = 0; + view_create_info.image = image(); + switch (key().dimension) { + case xenos::DataDimension::k3D: + view_create_info.viewType = VK_IMAGE_VIEW_TYPE_3D; + break; + case xenos::DataDimension::kCube: + view_create_info.viewType = VK_IMAGE_VIEW_TYPE_CUBE; + break; + default: + view_create_info.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY; + break; + } + view_create_info.format = format; + view_create_info.components.r = GetComponentSwizzle(host_swizzle, 0); + view_create_info.components.g = GetComponentSwizzle(host_swizzle, 1); + view_create_info.components.b = GetComponentSwizzle(host_swizzle, 2); + view_create_info.components.a = GetComponentSwizzle(host_swizzle, 3); + view_create_info.subresourceRange = + ui::vulkan::util::InitializeSubresourceRange(); + VkImageView view; + if (dfn.vkCreateImageView(device, &view_create_info, nullptr, &view) != + VK_SUCCESS) { + XELOGE( + "VulkanTextureCache: Failed to create an image view for Vulkan format " + "{} ({}signed) with swizzle 0x{:3X}", + uint32_t(format), is_signed ? "" : "un", host_swizzle); + return VK_NULL_HANDLE; + } + views_.emplace(view_key, view); + return view; +} VulkanTextureCache::VulkanTextureCache( const RegisterFile& register_file, VulkanSharedMemory& shared_memory, @@ -225,14 +873,353 @@ VulkanTextureCache::VulkanTextureCache( bool VulkanTextureCache::Initialize() { const ui::vulkan::VulkanProvider& provider = command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::InstanceFunctions& ifn = provider.ifn(); + VkPhysicalDevice physical_device = provider.physical_device(); const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); const VkPhysicalDevicePortabilitySubsetFeaturesKHR* device_portability_subset_features = provider.device_portability_subset_features(); + // Image formats. + + // Initialize to the best formats. + for (size_t i = 0; i < 64; ++i) { + host_formats_[i] = kBestHostFormats[i]; + } + + // Check format support and switch to fallbacks if needed. + VkFormatProperties r16_unorm_properties; + ifn.vkGetPhysicalDeviceFormatProperties(physical_device, VK_FORMAT_R16_UNORM, + &r16_unorm_properties); + VkFormatProperties r16_snorm_properties; + ifn.vkGetPhysicalDeviceFormatProperties(physical_device, VK_FORMAT_R16_SNORM, + &r16_snorm_properties); + VkFormatProperties r16g16_unorm_properties; + ifn.vkGetPhysicalDeviceFormatProperties( + physical_device, VK_FORMAT_R16G16_UNORM, &r16g16_unorm_properties); + VkFormatProperties r16g16_snorm_properties; + ifn.vkGetPhysicalDeviceFormatProperties( + physical_device, VK_FORMAT_R16G16_SNORM, &r16g16_snorm_properties); + VkFormatProperties r16g16b16a16_unorm_properties; + ifn.vkGetPhysicalDeviceFormatProperties(physical_device, + VK_FORMAT_R16G16B16A16_UNORM, + &r16g16b16a16_unorm_properties); + VkFormatProperties r16g16b16a16_snorm_properties; + ifn.vkGetPhysicalDeviceFormatProperties(physical_device, + VK_FORMAT_R16G16B16A16_SNORM, + &r16g16b16a16_snorm_properties); + VkFormatProperties format_properties; + // TODO(Triang3l): k_2_10_10_10 signed -> filterable R16G16B16A16_SFLOAT + // (enough storage precision, possibly unwanted filtering precision change). + // TODO(Triang3l): k_Cr_Y1_Cb_Y0_REP -> R8G8B8A8_UNORM. + // TODO(Triang3l): k_Y1_Cr_Y0_Cb_REP -> R8G8B8A8_UNORM. + // TODO(Triang3l): k_10_11_11 -> filterable R16G16B16A16_SFLOAT (enough + // storage precision, possibly unwanted filtering precision change). + // TODO(Triang3l): k_11_11_10 -> filterable R16G16B16A16_SFLOAT (enough + // storage precision, possibly unwanted filtering precision change). + // S3TC. + // Not checking the textureCompressionBC feature because its availability + // means that all BC formats are supported, however, the device may expose + // some BC formats without this feature. Xenia doesn't BC6H and BC7 at all, + // and has fallbacks for each used format. + // TODO(Triang3l): Raise the host texture memory usage limit if S3TC has to be + // decompressed. + // TODO(Triang3l): S3TC -> 5551 or 4444 as an option. + // TODO(Triang3l): S3TC -> ETC2 / EAC (a huge research topic). + HostFormatPair& host_format_dxt1 = + host_formats_[uint32_t(xenos::TextureFormat::k_DXT1)]; + assert_true(host_format_dxt1.format_unsigned.format == + VK_FORMAT_BC1_RGBA_UNORM_BLOCK); + ifn.vkGetPhysicalDeviceFormatProperties( + physical_device, VK_FORMAT_BC1_RGBA_UNORM_BLOCK, &format_properties); + if (!(format_properties.optimalTilingFeatures & + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { + host_format_dxt1.format_unsigned.load_mode = LoadMode::kDXT1ToRGBA8; + host_format_dxt1.format_unsigned.format = VK_FORMAT_R8G8B8A8_UNORM; + host_format_dxt1.format_unsigned.block_width_log2 = 0; + host_format_dxt1.format_unsigned.block_height_log2 = 0; + host_formats_[uint32_t(xenos::TextureFormat::k_DXT1_AS_16_16_16_16)] = + host_format_dxt1; + } + HostFormatPair& host_format_dxt2_3 = + host_formats_[uint32_t(xenos::TextureFormat::k_DXT2_3)]; + assert_true(host_format_dxt2_3.format_unsigned.format == + VK_FORMAT_BC2_UNORM_BLOCK); + ifn.vkGetPhysicalDeviceFormatProperties( + physical_device, VK_FORMAT_BC2_UNORM_BLOCK, &format_properties); + if (!(format_properties.optimalTilingFeatures & + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { + host_format_dxt2_3.format_unsigned.load_mode = LoadMode::kDXT3ToRGBA8; + host_format_dxt2_3.format_unsigned.format = VK_FORMAT_R8G8B8A8_UNORM; + host_format_dxt2_3.format_unsigned.block_width_log2 = 0; + host_format_dxt2_3.format_unsigned.block_height_log2 = 0; + host_formats_[uint32_t(xenos::TextureFormat::k_DXT2_3_AS_16_16_16_16)] = + host_format_dxt2_3; + } + HostFormatPair& host_format_dxt4_5 = + host_formats_[uint32_t(xenos::TextureFormat::k_DXT4_5)]; + assert_true(host_format_dxt4_5.format_unsigned.format == + VK_FORMAT_BC3_UNORM_BLOCK); + ifn.vkGetPhysicalDeviceFormatProperties( + physical_device, VK_FORMAT_BC3_UNORM_BLOCK, &format_properties); + if (!(format_properties.optimalTilingFeatures & + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { + host_format_dxt4_5.format_unsigned.load_mode = LoadMode::kDXT5ToRGBA8; + host_format_dxt4_5.format_unsigned.format = VK_FORMAT_R8G8B8A8_UNORM; + host_format_dxt4_5.format_unsigned.block_width_log2 = 0; + host_format_dxt4_5.format_unsigned.block_height_log2 = 0; + host_formats_[uint32_t(xenos::TextureFormat::k_DXT4_5_AS_16_16_16_16)] = + host_format_dxt4_5; + } + HostFormatPair& host_format_dxn = + host_formats_[uint32_t(xenos::TextureFormat::k_DXN)]; + assert_true(host_format_dxn.format_unsigned.format == + VK_FORMAT_BC5_UNORM_BLOCK); + ifn.vkGetPhysicalDeviceFormatProperties( + physical_device, VK_FORMAT_BC5_UNORM_BLOCK, &format_properties); + if (!(format_properties.optimalTilingFeatures & + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { + host_format_dxn.format_unsigned.load_mode = LoadMode::kDXNToRG8; + host_format_dxn.format_unsigned.format = VK_FORMAT_R8G8_UNORM; + host_format_dxn.format_unsigned.block_width_log2 = 0; + host_format_dxn.format_unsigned.block_height_log2 = 0; + } + HostFormatPair& host_format_dxt5a = + host_formats_[uint32_t(xenos::TextureFormat::k_DXT5A)]; + assert_true(host_format_dxt5a.format_unsigned.format == + VK_FORMAT_BC4_UNORM_BLOCK); + ifn.vkGetPhysicalDeviceFormatProperties( + physical_device, VK_FORMAT_BC4_UNORM_BLOCK, &format_properties); + if (!(format_properties.optimalTilingFeatures & + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { + host_format_dxt5a.format_unsigned.load_mode = LoadMode::kDXT5AToR8; + host_format_dxt5a.format_unsigned.format = VK_FORMAT_R8_UNORM; + host_format_dxt5a.format_unsigned.block_width_log2 = 0; + host_format_dxt5a.format_unsigned.block_height_log2 = 0; + } + // k_16, k_16_16, k_16_16_16_16 - UNORM / SNORM are optional, fall back to + // SFLOAT, which is mandatory and is always filterable (the guest 16-bit + // format is filterable, 16-bit fixed-point is the full texture filtering + // precision on the Xenos overall). Let the user choose what's more important, + // precision (use host UNORM / SNORM if available even if they're not + // filterable) or filterability (use host UNORM / SNORM only if they're + // available and filterable). + // TODO(Triang3l): Expose a cvar for selecting the preference (filterability + // or precision). + VkFormatFeatureFlags norm16_required_features = + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; + HostFormatPair& host_format_16 = + host_formats_[uint32_t(xenos::TextureFormat::k_16)]; + assert_true(host_format_16.format_unsigned.format == VK_FORMAT_R16_UNORM); + if ((r16_unorm_properties.optimalTilingFeatures & norm16_required_features) != + norm16_required_features) { + host_format_16.format_unsigned.load_mode = LoadMode::kR16UNormToFloat; + host_format_16.format_unsigned.format = VK_FORMAT_R16_SFLOAT; + } + assert_true(host_format_16.format_signed.format == VK_FORMAT_R16_SNORM); + if ((r16_snorm_properties.optimalTilingFeatures & norm16_required_features) != + norm16_required_features) { + host_format_16.format_signed.load_mode = LoadMode::kR16SNormToFloat; + host_format_16.format_signed.format = VK_FORMAT_R16_SFLOAT; + } + host_format_16.unsigned_signed_compatible = + (host_format_16.format_unsigned.format == VK_FORMAT_R16_UNORM && + host_format_16.format_signed.format == VK_FORMAT_R16_SNORM) || + (host_format_16.format_unsigned.format == VK_FORMAT_R16_SFLOAT && + host_format_16.format_signed.format == VK_FORMAT_R16_SFLOAT); + HostFormatPair& host_format_16_16 = + host_formats_[uint32_t(xenos::TextureFormat::k_16_16)]; + assert_true(host_format_16_16.format_unsigned.format == + VK_FORMAT_R16G16_UNORM); + if ((r16g16_unorm_properties.optimalTilingFeatures & + norm16_required_features) != norm16_required_features) { + host_format_16_16.format_unsigned.load_mode = LoadMode::kRG16UNormToFloat; + host_format_16_16.format_unsigned.format = VK_FORMAT_R16G16_SFLOAT; + } + assert_true(host_format_16_16.format_signed.format == VK_FORMAT_R16G16_SNORM); + if ((r16g16_snorm_properties.optimalTilingFeatures & + norm16_required_features) != norm16_required_features) { + host_format_16_16.format_signed.load_mode = LoadMode::kRG16SNormToFloat; + host_format_16_16.format_signed.format = VK_FORMAT_R16G16_SFLOAT; + } + host_format_16_16.unsigned_signed_compatible = + (host_format_16_16.format_unsigned.format == VK_FORMAT_R16G16_UNORM && + host_format_16_16.format_signed.format == VK_FORMAT_R16G16_SNORM) || + (host_format_16_16.format_unsigned.format == VK_FORMAT_R16G16_SFLOAT && + host_format_16_16.format_signed.format == VK_FORMAT_R16G16_SFLOAT); + HostFormatPair& host_format_16_16_16_16 = + host_formats_[uint32_t(xenos::TextureFormat::k_16_16_16_16)]; + assert_true(host_format_16_16_16_16.format_unsigned.format == + VK_FORMAT_R16G16B16A16_UNORM); + if ((r16g16b16a16_unorm_properties.optimalTilingFeatures & + norm16_required_features) != norm16_required_features) { + host_format_16_16_16_16.format_unsigned.load_mode = + LoadMode::kRGBA16UNormToFloat; + host_format_16_16_16_16.format_unsigned.format = + VK_FORMAT_R16G16B16A16_SFLOAT; + } + assert_true(host_format_16_16_16_16.format_signed.format == + VK_FORMAT_R16G16B16A16_SNORM); + if ((r16g16b16a16_snorm_properties.optimalTilingFeatures & + norm16_required_features) != norm16_required_features) { + host_format_16_16_16_16.format_signed.load_mode = + LoadMode::kRGBA16SNormToFloat; + host_format_16_16_16_16.format_signed.format = + VK_FORMAT_R16G16B16A16_SFLOAT; + } + host_format_16_16_16_16.unsigned_signed_compatible = + (host_format_16_16_16_16.format_unsigned.format == + VK_FORMAT_R16G16B16A16_UNORM && + host_format_16_16_16_16.format_signed.format == + VK_FORMAT_R16G16B16A16_SNORM) || + (host_format_16_16_16_16.format_unsigned.format == + VK_FORMAT_R16G16B16A16_SFLOAT && + host_format_16_16_16_16.format_signed.format == + VK_FORMAT_R16G16B16A16_SFLOAT); + + // Normalize format information structures. + for (size_t i = 0; i < 64; ++i) { + HostFormatPair& host_format = host_formats_[i]; + // LoadMode is left uninitialized for the tail (non-existent formats), + // kUnknown may be non-zero, and format support may be disabled by setting + // the format to VK_FORMAT_UNDEFINED. + if (host_format.format_unsigned.format == VK_FORMAT_UNDEFINED) { + host_format.format_unsigned.load_mode = LoadMode::kUnknown; + } + assert_false(host_format.format_unsigned.load_mode == LoadMode::kUnknown && + host_format.format_unsigned.format != VK_FORMAT_UNDEFINED); + if (host_format.format_unsigned.load_mode == LoadMode::kUnknown) { + host_format.format_unsigned.format = VK_FORMAT_UNDEFINED; + // Surely known it's unsupported with these two conditions. + host_format.format_unsigned.linear_filterable = false; + } + if (host_format.format_signed.format == VK_FORMAT_UNDEFINED) { + host_format.format_signed.load_mode = LoadMode::kUnknown; + } + assert_false(host_format.format_signed.load_mode == LoadMode::kUnknown && + host_format.format_signed.format != VK_FORMAT_UNDEFINED); + if (host_format.format_signed.load_mode == LoadMode::kUnknown) { + host_format.format_signed.format = VK_FORMAT_UNDEFINED; + // Surely known it's unsupported with these two conditions. + host_format.format_signed.linear_filterable = false; + } + + // Check if the formats are supported and are linear-filterable. + if (host_format.format_unsigned.format != VK_FORMAT_UNDEFINED) { + ifn.vkGetPhysicalDeviceFormatProperties( + physical_device, host_format.format_unsigned.format, + &format_properties); + if (format_properties.optimalTilingFeatures & + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) { + host_format.format_unsigned.linear_filterable = + (format_properties.optimalTilingFeatures & + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT) != 0; + } else { + host_format.format_unsigned.format = VK_FORMAT_UNDEFINED; + host_format.format_unsigned.load_mode = LoadMode::kUnknown; + host_format.format_unsigned.linear_filterable = false; + } + } + if (host_format.format_signed.format != VK_FORMAT_UNDEFINED) { + ifn.vkGetPhysicalDeviceFormatProperties(physical_device, + host_format.format_signed.format, + &format_properties); + if (format_properties.optimalTilingFeatures & + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) { + host_format.format_signed.linear_filterable = + (format_properties.optimalTilingFeatures & + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT) != 0; + } else { + host_format.format_signed.format = VK_FORMAT_UNDEFINED; + host_format.format_signed.load_mode = LoadMode::kUnknown; + host_format.format_signed.linear_filterable = false; + } + } + + // Log which formats are not supported or supported via fallbacks. + const HostFormatPair& best_host_format = kBestHostFormats[i]; + const char* guest_format_name = + FormatInfo::Get(xenos::TextureFormat(i))->name; + if (best_host_format.format_unsigned.format != VK_FORMAT_UNDEFINED) { + assert_not_null(guest_format_name); + if (host_format.format_unsigned.format != VK_FORMAT_UNDEFINED) { + if (host_format.format_unsigned.format != + best_host_format.format_unsigned.format) { + XELOGGPU( + "VulkanTextureCache: Format {} (unsigned) is supported via a " + "fallback format (using the Vulkan format {} instead of the " + "preferred {})", + guest_format_name, uint32_t(host_format.format_unsigned.format), + uint32_t(best_host_format.format_unsigned.format)); + } + } else { + XELOGGPU( + "VulkanTextureCache: Format {} (unsigned) is not supported by the " + "device (preferred Vulkan format is {})", + guest_format_name, + uint32_t(best_host_format.format_unsigned.format)); + } + } + if (best_host_format.format_signed.format != VK_FORMAT_UNDEFINED) { + assert_not_null(guest_format_name); + if (host_format.format_signed.format != VK_FORMAT_UNDEFINED) { + if (host_format.format_signed.format != + best_host_format.format_signed.format) { + XELOGGPU( + "VulkanTextureCache: Format {} (signed) is supported via a " + "fallback format (using the Vulkan format {} instead of the " + "preferred {})", + guest_format_name, uint32_t(host_format.format_signed.format), + uint32_t(best_host_format.format_signed.format)); + } + } else { + XELOGGPU( + "VulkanTextureCache: Format {} (signed) is not supported by the " + "device (preferred Vulkan format is {})", + guest_format_name, uint32_t(best_host_format.format_signed.format)); + } + } + + // Signednesses with different load modes must have the data loaded + // differently, therefore can't share the image even if the format is the + // same. Also, if there's only one version, simplify the logic - there can't + // be compatibility between two formats when one of them is undefined. + if (host_format.format_unsigned.format != VK_FORMAT_UNDEFINED && + host_format.format_signed.format != VK_FORMAT_UNDEFINED) { + if (host_format.format_unsigned.load_mode == + host_format.format_signed.load_mode) { + if (host_format.format_unsigned.format == + host_format.format_signed.format) { + // Same format after all the fallbacks - force compatibilty. + host_format.unsigned_signed_compatible = true; + } + } else { + host_format.unsigned_signed_compatible = false; + } + // Formats within the same compatibility class must have the same block + // size, though the fallbacks are configured incorrectly if that's not the + // case (since such formats just can't be in one compatibility class). + assert_false(host_format.unsigned_signed_compatible && + (host_format.format_unsigned.block_width_log2 != + host_format.format_signed.block_width_log2 || + host_format.format_unsigned.block_height_log2 != + host_format.format_signed.block_height_log2)); + if (host_format.unsigned_signed_compatible && + (host_format.format_unsigned.block_width_log2 != + host_format.format_signed.block_width_log2 || + host_format.format_unsigned.block_height_log2 != + host_format.format_signed.block_height_log2)) { + host_format.unsigned_signed_compatible = false; + } + } else { + host_format.unsigned_signed_compatible = false; + } + } + // Null images as a replacement for unneeded bindings and for bindings for // which the real image hasn't been created. + // TODO(Triang3l): Use VK_EXT_robustness2 null descriptors. VkImageCreateInfo null_image_create_info; null_image_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; @@ -447,6 +1434,36 @@ bool VulkanTextureCache::Initialize() { return true; } +void VulkanTextureCache::GetTextureUsageMasks(VulkanTexture::Usage usage, + VkPipelineStageFlags& stage_mask, + VkAccessFlags& access_mask, + VkImageLayout& layout) { + stage_mask = 0; + access_mask = 0; + layout = VK_IMAGE_LAYOUT_UNDEFINED; + switch (usage) { + case VulkanTexture::Usage::kUndefined: + break; + case VulkanTexture::Usage::kTransferDestination: + stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; + access_mask = VK_ACCESS_TRANSFER_WRITE_BIT; + layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + break; + case VulkanTexture::Usage::kGuestShaderSampled: + stage_mask = guest_shader_pipeline_stages_; + access_mask = VK_ACCESS_SHADER_READ_BIT; + layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + break; + case VulkanTexture::Usage::kSwapSampled: + // The swap texture is likely to be used only for the presentation compute + // shader, and not during emulation, where it'd be used in other stages. + stage_mask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + access_mask = VK_ACCESS_SHADER_READ_BIT; + layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + break; + } +} + } // namespace vulkan } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.h b/src/xenia/gpu/vulkan/vulkan_texture_cache.h index ee4f8b9ee..0be513cdf 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.h @@ -12,8 +12,10 @@ #include #include +#include #include +#include "xenia/base/hash.h" #include "xenia/gpu/texture_cache.h" #include "xenia/gpu/vulkan/vulkan_shared_memory.h" #include "xenia/ui/vulkan/vulkan_provider.h" @@ -45,18 +47,19 @@ class VulkanTextureCache final : public TextureCache { void BeginSubmission(uint64_t new_submission_index) override; - VkImageView GetNullImageView(xenos::FetchOpDimension dimension) const { - switch (dimension) { - case xenos::FetchOpDimension::k3DOrStacked: - return null_image_view_3d_; - case xenos::FetchOpDimension::kCube: - return null_image_view_cube_; - default: - return null_image_view_2d_array_; - } - } + // Must be called within a frame - creates and untiles textures needed by + // shaders, and enqueues transitioning them into the sampled usage. This may + // bind compute pipelines (notifying the command processor about that), and + // also since it may insert deferred barriers, before flushing the barriers + // preceding host GPU work. + void RequestTextures(uint32_t used_texture_mask) override; + + VkImageView GetActiveBindingOrNullImageView(uint32_t fetch_constant_index, + xenos::FetchOpDimension dimension, + bool is_signed) const; protected: + bool IsSignedVersionSeparateForFormat(TextureKey key) const override; uint32_t GetHostFormatSwizzle(TextureKey key) const override; uint32_t GetMaxHostTextureWidthHeight( @@ -69,13 +72,187 @@ class VulkanTextureCache final : public TextureCache { bool LoadTextureDataFromResidentMemoryImpl(Texture& texture, bool load_base, bool load_mips) override; + void UpdateTextureBindingsImpl(uint32_t fetch_constant_mask) override; + private: + enum class LoadMode { + k8bpb, + k16bpb, + k32bpb, + k64bpb, + k128bpb, + kR5G5B5A1ToB5G5R5A1, + kR5G6B5ToB5G6R5, + kR5G5B6ToB5G6R5WithRBGASwizzle, + kRGBA4ToARGB4, + kR10G11B11ToRGBA16, + kR10G11B11ToRGBA16SNorm, + kR11G11B10ToRGBA16, + kR11G11B10ToRGBA16SNorm, + kR16UNormToFloat, + kR16SNormToFloat, + kRG16UNormToFloat, + kRG16SNormToFloat, + kRGBA16UNormToFloat, + kRGBA16SNormToFloat, + kDXT1ToRGBA8, + kDXT3ToRGBA8, + kDXT5ToRGBA8, + kDXNToRG8, + kDXT3A, + kDXT3AAs1111ToARGB4, + kDXT5AToR8, + kCTX1, + kDepthUnorm, + kDepthFloat, + + kCount, + + kUnknown = kCount + }; + + struct HostFormat { + LoadMode load_mode; + // Do NOT add integer formats to this - they are not filterable, can only be + // read with ImageFetch, not ImageSample! If any game is seen using + // num_format 1 for fixed-point formats (for floating-point, it's normally + // set to 1 though), add a constant buffer containing multipliers for the + // textures and multiplication to the tfetch implementation. + VkFormat format; + uint32_t block_width_log2; + uint32_t block_height_log2; + + // Set up dynamically based on what's supported by the device. + bool linear_filterable; + }; + + struct HostFormatPair { + HostFormat format_unsigned; + HostFormat format_signed; + // Mapping of Xenos swizzle components to Vulkan format components. + uint32_t swizzle; + // Whether the unsigned and the signed formats are compatible for one image + // and the same image data (on a portability subset device, this should also + // take imageViewFormatReinterpretation into account). + bool unsigned_signed_compatible; + }; + class VulkanTexture final : public Texture { public: + enum class Usage { + kUndefined, + kTransferDestination, + kGuestShaderSampled, + kSwapSampled, + }; + + // Takes ownership of the image and its memory. explicit VulkanTexture(VulkanTextureCache& texture_cache, - const TextureKey& key); + const TextureKey& key, VkImage image, + VkDeviceMemory memory, VkDeviceSize memory_size); + ~VulkanTexture(); + + VkImage image() const { return image_; } + + // Doesn't transition (the caller must insert the barrier). + Usage SetUsage(Usage new_usage) { + Usage old_usage = usage_; + usage_ = new_usage; + return old_usage; + } + + VkImageView GetView(bool is_signed, uint32_t host_swizzle); + + private: + union ViewKey { + uint32_t key; + struct { + uint32_t is_signed_separate_view : 1; + uint32_t host_swizzle : 12; + }; + + ViewKey() : key(0) { static_assert_size(*this, sizeof(key)); } + + struct Hasher { + size_t operator()(const ViewKey& key) const { + return std::hash{}(key.key); + } + }; + bool operator==(const ViewKey& other_key) const { + return key == other_key.key; + } + bool operator!=(const ViewKey& other_key) const { + return !(*this == other_key); + } + }; + + static constexpr VkComponentSwizzle GetComponentSwizzle( + uint32_t texture_swizzle, uint32_t component_index) { + xenos::XE_GPU_TEXTURE_SWIZZLE texture_component_swizzle = + xenos::XE_GPU_TEXTURE_SWIZZLE( + (texture_swizzle >> (3 * component_index)) & 0b111); + if (texture_component_swizzle == + xenos::XE_GPU_TEXTURE_SWIZZLE(component_index)) { + // The portability subset requires all swizzles to be IDENTITY, return + // IDENTITY specifically, not R, G, B, A. + return VK_COMPONENT_SWIZZLE_IDENTITY; + } + switch (texture_component_swizzle) { + case xenos::XE_GPU_TEXTURE_SWIZZLE_R: + return VK_COMPONENT_SWIZZLE_R; + case xenos::XE_GPU_TEXTURE_SWIZZLE_G: + return VK_COMPONENT_SWIZZLE_G; + case xenos::XE_GPU_TEXTURE_SWIZZLE_B: + return VK_COMPONENT_SWIZZLE_B; + case xenos::XE_GPU_TEXTURE_SWIZZLE_A: + return VK_COMPONENT_SWIZZLE_A; + case xenos::XE_GPU_TEXTURE_SWIZZLE_0: + return VK_COMPONENT_SWIZZLE_ZERO; + case xenos::XE_GPU_TEXTURE_SWIZZLE_1: + return VK_COMPONENT_SWIZZLE_ONE; + default: + // An invalid value. + return VK_COMPONENT_SWIZZLE_IDENTITY; + } + } + + VkImage image_; + VkDeviceMemory memory_; + + Usage usage_ = Usage::kUndefined; + + std::unordered_map views_; }; + struct VulkanTextureBinding { + VkImageView image_view_unsigned; + VkImageView image_view_signed; + + VulkanTextureBinding() { Reset(); } + + void Reset() { + image_view_unsigned = VK_NULL_HANDLE; + image_view_signed = VK_NULL_HANDLE; + } + }; + + static constexpr bool AreDimensionsCompatible( + xenos::FetchOpDimension binding_dimension, + xenos::DataDimension resource_dimension) { + switch (binding_dimension) { + case xenos::FetchOpDimension::k1D: + case xenos::FetchOpDimension::k2D: + return resource_dimension == xenos::DataDimension::k1D || + resource_dimension == xenos::DataDimension::k2DOrStacked; + case xenos::FetchOpDimension::k3DOrStacked: + return resource_dimension == xenos::DataDimension::k3D; + case xenos::FetchOpDimension::kCube: + return resource_dimension == xenos::DataDimension::kCube; + default: + return false; + } + } + explicit VulkanTextureCache( const RegisterFile& register_file, VulkanSharedMemory& shared_memory, uint32_t draw_resolution_scale_x, uint32_t draw_resolution_scale_y, @@ -84,9 +261,16 @@ class VulkanTextureCache final : public TextureCache { bool Initialize(); + void GetTextureUsageMasks(VulkanTexture::Usage usage, + VkPipelineStageFlags& stage_mask, + VkAccessFlags& access_mask, VkImageLayout& layout); + VulkanCommandProcessor& command_processor_; VkPipelineStageFlags guest_shader_pipeline_stages_; + static const HostFormatPair kBestHostFormats[64]; + HostFormatPair host_formats_[64]; + // If both images can be placed in the same allocation, it's one allocation, // otherwise it's two separate. std::array null_images_memory_{}; @@ -96,6 +280,9 @@ class VulkanTextureCache final : public TextureCache { VkImageView null_image_view_cube_ = VK_NULL_HANDLE; VkImageView null_image_view_3d_ = VK_NULL_HANDLE; bool null_images_cleared_ = false; + + std::array + vulkan_texture_bindings_; }; } // namespace vulkan diff --git a/src/xenia/ui/vulkan/functions/instance_1_0.inc b/src/xenia/ui/vulkan/functions/instance_1_0.inc index cdeb97209..b4ad9344a 100644 --- a/src/xenia/ui/vulkan/functions/instance_1_0.inc +++ b/src/xenia/ui/vulkan/functions/instance_1_0.inc @@ -5,6 +5,7 @@ XE_UI_VULKAN_FUNCTION(vkEnumerateDeviceExtensionProperties) XE_UI_VULKAN_FUNCTION(vkEnumeratePhysicalDevices) XE_UI_VULKAN_FUNCTION(vkGetDeviceProcAddr) XE_UI_VULKAN_FUNCTION(vkGetPhysicalDeviceFeatures) +XE_UI_VULKAN_FUNCTION(vkGetPhysicalDeviceFormatProperties) XE_UI_VULKAN_FUNCTION(vkGetPhysicalDeviceMemoryProperties) XE_UI_VULKAN_FUNCTION(vkGetPhysicalDeviceProperties) XE_UI_VULKAN_FUNCTION(vkGetPhysicalDeviceQueueFamilyProperties) From 91c4e02e96fcf39287fb1e2e8a0cdaf964742499 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 22 May 2022 15:05:15 +0300 Subject: [PATCH 087/123] [Vulkan] Implement ClearCaches and don't do it for pipelines --- .../gpu/vulkan/vulkan_command_processor.cc | 20 +++++------- .../gpu/vulkan/vulkan_command_processor.h | 2 ++ src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc | 31 +++++++------------ src/xenia/gpu/vulkan/vulkan_pipeline_cache.h | 4 +-- 4 files changed, 23 insertions(+), 34 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 6dd91def7..ce0ee1576 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -70,6 +70,11 @@ VulkanCommandProcessor::VulkanCommandProcessor( VulkanCommandProcessor::~VulkanCommandProcessor() = default; +void VulkanCommandProcessor::ClearCaches() { + CommandProcessor::ClearCaches(); + cache_clear_requested_ = true; +} + void VulkanCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) { shared_memory_->MemoryInvalidationCallback(base_ptr, length, true); @@ -2295,21 +2300,10 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { texture_cache_->ClearCache(); - pipeline_cache_->ClearCache(); - render_target_cache_->ClearCache(); - for (const auto& pipeline_layout_pair : pipeline_layouts_) { - dfn.vkDestroyPipelineLayout( - device, pipeline_layout_pair.second.GetPipelineLayout(), nullptr); - } - pipeline_layouts_.clear(); - for (const auto& descriptor_set_layout_pair : - descriptor_set_layouts_textures_) { - dfn.vkDestroyDescriptorSetLayout( - device, descriptor_set_layout_pair.second, nullptr); - } - descriptor_set_layouts_textures_.clear(); + // Not clearing the pipeline layouts and the descriptor set layouts as + // they're referenced by pipelines, which are not destroyed. primitive_processor_->ClearCache(); diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 3158db0b6..8390b67e2 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -62,6 +62,8 @@ class VulkanCommandProcessor : public CommandProcessor { kernel::KernelState* kernel_state); ~VulkanCommandProcessor(); + void ClearCaches() override; + void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override; void RestoreEdramSnapshot(const void* snapshot) override; diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index 433c42aeb..0a8a88f62 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -64,24 +64,6 @@ void VulkanPipelineCache::Shutdown() { const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); - ClearCache(); - - for (const auto& geometry_shader_pair : geometry_shaders_) { - if (geometry_shader_pair.second != VK_NULL_HANDLE) { - dfn.vkDestroyShaderModule(device, geometry_shader_pair.second, nullptr); - } - } - geometry_shaders_.clear(); - - shader_translator_.reset(); -} - -void VulkanPipelineCache::ClearCache() { - const ui::vulkan::VulkanProvider& provider = - command_processor_.GetVulkanProvider(); - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); - VkDevice device = provider.device(); - // Destroy all pipelines. last_pipeline_ = nullptr; for (const auto& pipeline_pair : pipelines_) { @@ -91,13 +73,24 @@ void VulkanPipelineCache::ClearCache() { } pipelines_.clear(); - // Destroy all shaders. + // Destroy all internal shaders. + for (const auto& geometry_shader_pair : geometry_shaders_) { + if (geometry_shader_pair.second != VK_NULL_HANDLE) { + dfn.vkDestroyShaderModule(device, geometry_shader_pair.second, nullptr); + } + } + geometry_shaders_.clear(); + + // Destroy all translated shaders. for (auto it : shaders_) { delete it.second; } shaders_.clear(); texture_binding_layout_map_.clear(); texture_binding_layouts_.clear(); + + // Shut down shader translation. + shader_translator_.reset(); } VulkanShader* VulkanPipelineCache::LoadShader(xenos::ShaderType shader_type, diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h index 58f53cff4..819bd6e16 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h @@ -58,7 +58,6 @@ class VulkanPipelineCache { bool Initialize(); void Shutdown(); - void ClearCache(); VulkanShader* LoadShader(xenos::ShaderType shader_type, const uint32_t* host_address, uint32_t dword_count); @@ -198,7 +197,8 @@ class VulkanPipelineCache { struct Pipeline { VkPipeline pipeline = VK_NULL_HANDLE; - // Owned by VulkanCommandProcessor, valid until ClearCache. + // The layouts are owned by the VulkanCommandProcessor, and must not be + // destroyed by it while the pipeline cache is active. const PipelineLayoutProvider* pipeline_layout; Pipeline(const PipelineLayoutProvider* pipeline_layout_provider) : pipeline_layout(pipeline_layout_provider) {} From f7b0edee6b8f820053da5eb01c0356c6b1a97bbc Mon Sep 17 00:00:00 2001 From: Triang3l Date: Mon, 23 May 2022 13:18:47 +0300 Subject: [PATCH 088/123] [Vulkan] GBGR/BGRG decompression --- src/xenia/gpu/vulkan/vulkan_texture_cache.cc | 62 +++++++++++++++----- src/xenia/gpu/vulkan/vulkan_texture_cache.h | 2 + 2 files changed, 50 insertions(+), 14 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc index 348c31318..f1e2d3dbc 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc @@ -35,6 +35,7 @@ namespace shaders { #include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_64bpb_scaled_cs.h" #include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_8bpb_cs.h" #include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_8bpb_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_bgrg8_rgb8_cs.h" #include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_ctx1_cs.h" #include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_depth_float_cs.h" #include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_depth_float_scaled_cs.h" @@ -47,6 +48,7 @@ namespace shaders { #include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_dxt3aas1111_argb4_cs.h" #include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_dxt5_rgba8_cs.h" #include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_dxt5a_r8_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_gbgr8_rgb8_cs.h" #include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r10g11b11_rgba16_cs.h" #include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r10g11b11_rgba16_scaled_cs.h" #include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_r10g11b11_rgba16_snorm_cs.h" @@ -138,14 +140,14 @@ const VulkanTextureCache::HostFormatPair // VK_KHR_sampler_ycbcr_conversion and promoted to Vulkan 1.1) is // optional. {{LoadMode::k32bpb, VK_FORMAT_G8B8G8R8_422_UNORM_KHR, 1, 0}, - {LoadMode::kUnknown}, + {LoadMode::kGBGR8ToRGB8, VK_FORMAT_R8G8B8A8_SNORM}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_Y1_Cr_Y0_Cb_REP // VK_FORMAT_B8G8R8G8_422_UNORM_KHR (added in // VK_KHR_sampler_ycbcr_conversion and promoted to Vulkan 1.1) is // optional. {{LoadMode::k32bpb, VK_FORMAT_B8G8R8G8_422_UNORM_KHR, 1, 0}, - {LoadMode::kUnknown}, + {LoadMode::kBGRG8ToRGB8, VK_FORMAT_R8G8B8A8_SNORM}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_16_16_EDRAM // Not usable as a texture, also has -32...32 range. @@ -889,6 +891,9 @@ bool VulkanTextureCache::Initialize() { } // Check format support and switch to fallbacks if needed. + constexpr VkFormatFeatureFlags kLinearFilterFeatures = + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; VkFormatProperties r16_unorm_properties; ifn.vkGetPhysicalDeviceFormatProperties(physical_device, VK_FORMAT_R16_UNORM, &r16_unorm_properties); @@ -912,8 +917,37 @@ bool VulkanTextureCache::Initialize() { VkFormatProperties format_properties; // TODO(Triang3l): k_2_10_10_10 signed -> filterable R16G16B16A16_SFLOAT // (enough storage precision, possibly unwanted filtering precision change). - // TODO(Triang3l): k_Cr_Y1_Cb_Y0_REP -> R8G8B8A8_UNORM. - // TODO(Triang3l): k_Y1_Cr_Y0_Cb_REP -> R8G8B8A8_UNORM. + // k_Cr_Y1_Cb_Y0_REP, k_Y1_Cr_Y0_Cb_REP. + HostFormatPair& host_format_gbgr = + host_formats_[uint32_t(xenos::TextureFormat::k_Cr_Y1_Cb_Y0_REP)]; + assert_true(host_format_gbgr.format_unsigned.format == + VK_FORMAT_G8B8G8R8_422_UNORM_KHR); + assert_true(host_format_gbgr.format_signed.format == + VK_FORMAT_R8G8B8A8_SNORM); + ifn.vkGetPhysicalDeviceFormatProperties( + physical_device, VK_FORMAT_G8B8G8R8_422_UNORM_KHR, &format_properties); + if ((format_properties.optimalTilingFeatures & kLinearFilterFeatures) != + kLinearFilterFeatures) { + host_format_gbgr.format_unsigned.load_mode = LoadMode::kGBGR8ToRGB8; + host_format_gbgr.format_unsigned.format = VK_FORMAT_R8G8B8A8_UNORM; + host_format_gbgr.format_unsigned.block_width_log2 = 0; + host_format_gbgr.unsigned_signed_compatible = true; + } + HostFormatPair& host_format_bgrg = + host_formats_[uint32_t(xenos::TextureFormat::k_Y1_Cr_Y0_Cb_REP)]; + assert_true(host_format_bgrg.format_unsigned.format == + VK_FORMAT_B8G8R8G8_422_UNORM_KHR); + assert_true(host_format_bgrg.format_signed.format == + VK_FORMAT_R8G8B8A8_SNORM); + ifn.vkGetPhysicalDeviceFormatProperties( + physical_device, VK_FORMAT_B8G8R8G8_422_UNORM_KHR, &format_properties); + if ((format_properties.optimalTilingFeatures & kLinearFilterFeatures) != + kLinearFilterFeatures) { + host_format_bgrg.format_unsigned.load_mode = LoadMode::kBGRG8ToRGB8; + host_format_bgrg.format_unsigned.format = VK_FORMAT_R8G8B8A8_UNORM; + host_format_bgrg.format_unsigned.block_width_log2 = 0; + host_format_bgrg.unsigned_signed_compatible = true; + } // TODO(Triang3l): k_10_11_11 -> filterable R16G16B16A16_SFLOAT (enough // storage precision, possibly unwanted filtering precision change). // TODO(Triang3l): k_11_11_10 -> filterable R16G16B16A16_SFLOAT (enough @@ -933,8 +967,8 @@ bool VulkanTextureCache::Initialize() { VK_FORMAT_BC1_RGBA_UNORM_BLOCK); ifn.vkGetPhysicalDeviceFormatProperties( physical_device, VK_FORMAT_BC1_RGBA_UNORM_BLOCK, &format_properties); - if (!(format_properties.optimalTilingFeatures & - VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { + if ((format_properties.optimalTilingFeatures & kLinearFilterFeatures) != + kLinearFilterFeatures) { host_format_dxt1.format_unsigned.load_mode = LoadMode::kDXT1ToRGBA8; host_format_dxt1.format_unsigned.format = VK_FORMAT_R8G8B8A8_UNORM; host_format_dxt1.format_unsigned.block_width_log2 = 0; @@ -948,8 +982,8 @@ bool VulkanTextureCache::Initialize() { VK_FORMAT_BC2_UNORM_BLOCK); ifn.vkGetPhysicalDeviceFormatProperties( physical_device, VK_FORMAT_BC2_UNORM_BLOCK, &format_properties); - if (!(format_properties.optimalTilingFeatures & - VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { + if ((format_properties.optimalTilingFeatures & kLinearFilterFeatures) != + kLinearFilterFeatures) { host_format_dxt2_3.format_unsigned.load_mode = LoadMode::kDXT3ToRGBA8; host_format_dxt2_3.format_unsigned.format = VK_FORMAT_R8G8B8A8_UNORM; host_format_dxt2_3.format_unsigned.block_width_log2 = 0; @@ -963,8 +997,8 @@ bool VulkanTextureCache::Initialize() { VK_FORMAT_BC3_UNORM_BLOCK); ifn.vkGetPhysicalDeviceFormatProperties( physical_device, VK_FORMAT_BC3_UNORM_BLOCK, &format_properties); - if (!(format_properties.optimalTilingFeatures & - VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { + if ((format_properties.optimalTilingFeatures & kLinearFilterFeatures) != + kLinearFilterFeatures) { host_format_dxt4_5.format_unsigned.load_mode = LoadMode::kDXT5ToRGBA8; host_format_dxt4_5.format_unsigned.format = VK_FORMAT_R8G8B8A8_UNORM; host_format_dxt4_5.format_unsigned.block_width_log2 = 0; @@ -978,8 +1012,8 @@ bool VulkanTextureCache::Initialize() { VK_FORMAT_BC5_UNORM_BLOCK); ifn.vkGetPhysicalDeviceFormatProperties( physical_device, VK_FORMAT_BC5_UNORM_BLOCK, &format_properties); - if (!(format_properties.optimalTilingFeatures & - VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { + if ((format_properties.optimalTilingFeatures & kLinearFilterFeatures) != + kLinearFilterFeatures) { host_format_dxn.format_unsigned.load_mode = LoadMode::kDXNToRG8; host_format_dxn.format_unsigned.format = VK_FORMAT_R8G8_UNORM; host_format_dxn.format_unsigned.block_width_log2 = 0; @@ -991,8 +1025,8 @@ bool VulkanTextureCache::Initialize() { VK_FORMAT_BC4_UNORM_BLOCK); ifn.vkGetPhysicalDeviceFormatProperties( physical_device, VK_FORMAT_BC4_UNORM_BLOCK, &format_properties); - if (!(format_properties.optimalTilingFeatures & - VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { + if ((format_properties.optimalTilingFeatures & kLinearFilterFeatures) != + kLinearFilterFeatures) { host_format_dxt5a.format_unsigned.load_mode = LoadMode::kDXT5AToR8; host_format_dxt5a.format_unsigned.format = VK_FORMAT_R8_UNORM; host_format_dxt5a.format_unsigned.block_width_log2 = 0; diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.h b/src/xenia/gpu/vulkan/vulkan_texture_cache.h index 0be513cdf..8209ccc60 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.h @@ -85,6 +85,8 @@ class VulkanTextureCache final : public TextureCache { kR5G6B5ToB5G6R5, kR5G5B6ToB5G6R5WithRBGASwizzle, kRGBA4ToARGB4, + kGBGR8ToRGB8, + kBGRG8ToRGB8, kR10G11B11ToRGBA16, kR10G11B11ToRGBA16SNorm, kR11G11B10ToRGBA16, From f994d3ebb3fa85711877c544067c198d8b6c3d96 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Mon, 23 May 2022 13:27:43 +0300 Subject: [PATCH 089/123] [Vulkan] Single block-compressed flag for host texture formats, not block sizes --- src/xenia/gpu/vulkan/vulkan_texture_cache.cc | 51 ++++++++------------ src/xenia/gpu/vulkan/vulkan_texture_cache.h | 6 ++- 2 files changed, 25 insertions(+), 32 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc index f1e2d3dbc..411b55eea 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc @@ -139,14 +139,14 @@ const VulkanTextureCache::HostFormatPair // VK_FORMAT_G8B8G8R8_422_UNORM_KHR (added in // VK_KHR_sampler_ycbcr_conversion and promoted to Vulkan 1.1) is // optional. - {{LoadMode::k32bpb, VK_FORMAT_G8B8G8R8_422_UNORM_KHR, 1, 0}, + {{LoadMode::k32bpb, VK_FORMAT_G8B8G8R8_422_UNORM_KHR, true}, {LoadMode::kGBGR8ToRGB8, VK_FORMAT_R8G8B8A8_SNORM}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_Y1_Cr_Y0_Cb_REP // VK_FORMAT_B8G8R8G8_422_UNORM_KHR (added in // VK_KHR_sampler_ycbcr_conversion and promoted to Vulkan 1.1) is // optional. - {{LoadMode::k32bpb, VK_FORMAT_B8G8R8G8_422_UNORM_KHR, 1, 0}, + {{LoadMode::k32bpb, VK_FORMAT_B8G8R8G8_422_UNORM_KHR, true}, {LoadMode::kBGRG8ToRGB8, VK_FORMAT_R8G8B8A8_SNORM}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_16_16_EDRAM @@ -177,17 +177,17 @@ const VulkanTextureCache::HostFormatPair xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_DXT1 // VK_FORMAT_BC1_RGBA_UNORM_BLOCK is optional. - {{LoadMode::k64bpb, VK_FORMAT_BC1_RGBA_UNORM_BLOCK, 2, 2}, + {{LoadMode::k64bpb, VK_FORMAT_BC1_RGBA_UNORM_BLOCK, true}, {LoadMode::kUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_DXT2_3 // VK_FORMAT_BC2_UNORM_BLOCK is optional. - {{LoadMode::k128bpb, VK_FORMAT_BC2_UNORM_BLOCK, 2, 2}, + {{LoadMode::k128bpb, VK_FORMAT_BC2_UNORM_BLOCK, true}, {LoadMode::kUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_DXT4_5 // VK_FORMAT_BC3_UNORM_BLOCK is optional. - {{LoadMode::k128bpb, VK_FORMAT_BC3_UNORM_BLOCK, 2, 2}, + {{LoadMode::k128bpb, VK_FORMAT_BC3_UNORM_BLOCK, true}, {LoadMode::kUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_16_16_16_16_EDRAM @@ -321,7 +321,7 @@ const VulkanTextureCache::HostFormatPair xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_DXN // VK_FORMAT_BC5_UNORM_BLOCK is optional. - {{LoadMode::k128bpb, VK_FORMAT_BC5_UNORM_BLOCK, 2, 2}, + {{LoadMode::k128bpb, VK_FORMAT_BC5_UNORM_BLOCK, true}, {LoadMode::kUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_8_8_8_8_AS_16_16_16_16 @@ -331,17 +331,17 @@ const VulkanTextureCache::HostFormatPair true}, // k_DXT1_AS_16_16_16_16 // VK_FORMAT_BC1_RGBA_UNORM_BLOCK is optional. - {{LoadMode::k64bpb, VK_FORMAT_BC1_RGBA_UNORM_BLOCK, 2, 2}, + {{LoadMode::k64bpb, VK_FORMAT_BC1_RGBA_UNORM_BLOCK, true}, {LoadMode::kUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_DXT2_3_AS_16_16_16_16 // VK_FORMAT_BC2_UNORM_BLOCK is optional. - {{LoadMode::k128bpb, VK_FORMAT_BC2_UNORM_BLOCK, 2, 2}, + {{LoadMode::k128bpb, VK_FORMAT_BC2_UNORM_BLOCK, true}, {LoadMode::kUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_DXT4_5_AS_16_16_16_16 // VK_FORMAT_BC3_UNORM_BLOCK is optional. - {{LoadMode::k128bpb, VK_FORMAT_BC3_UNORM_BLOCK, 2, 2}, + {{LoadMode::k128bpb, VK_FORMAT_BC3_UNORM_BLOCK, true}, {LoadMode::kUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_2_10_10_10_AS_16_16_16_16 @@ -372,7 +372,7 @@ const VulkanTextureCache::HostFormatPair xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_DXT5A // VK_FORMAT_BC4_UNORM_BLOCK is optional. - {{LoadMode::k64bpb, VK_FORMAT_BC4_UNORM_BLOCK, 2, 2}, + {{LoadMode::k64bpb, VK_FORMAT_BC4_UNORM_BLOCK, true}, {LoadMode::kUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_CTX1 @@ -930,7 +930,7 @@ bool VulkanTextureCache::Initialize() { kLinearFilterFeatures) { host_format_gbgr.format_unsigned.load_mode = LoadMode::kGBGR8ToRGB8; host_format_gbgr.format_unsigned.format = VK_FORMAT_R8G8B8A8_UNORM; - host_format_gbgr.format_unsigned.block_width_log2 = 0; + host_format_gbgr.format_unsigned.block_compressed = false; host_format_gbgr.unsigned_signed_compatible = true; } HostFormatPair& host_format_bgrg = @@ -945,7 +945,7 @@ bool VulkanTextureCache::Initialize() { kLinearFilterFeatures) { host_format_bgrg.format_unsigned.load_mode = LoadMode::kBGRG8ToRGB8; host_format_bgrg.format_unsigned.format = VK_FORMAT_R8G8B8A8_UNORM; - host_format_bgrg.format_unsigned.block_width_log2 = 0; + host_format_bgrg.format_unsigned.block_compressed = false; host_format_bgrg.unsigned_signed_compatible = true; } // TODO(Triang3l): k_10_11_11 -> filterable R16G16B16A16_SFLOAT (enough @@ -971,8 +971,7 @@ bool VulkanTextureCache::Initialize() { kLinearFilterFeatures) { host_format_dxt1.format_unsigned.load_mode = LoadMode::kDXT1ToRGBA8; host_format_dxt1.format_unsigned.format = VK_FORMAT_R8G8B8A8_UNORM; - host_format_dxt1.format_unsigned.block_width_log2 = 0; - host_format_dxt1.format_unsigned.block_height_log2 = 0; + host_format_dxt1.format_unsigned.block_compressed = false; host_formats_[uint32_t(xenos::TextureFormat::k_DXT1_AS_16_16_16_16)] = host_format_dxt1; } @@ -986,8 +985,7 @@ bool VulkanTextureCache::Initialize() { kLinearFilterFeatures) { host_format_dxt2_3.format_unsigned.load_mode = LoadMode::kDXT3ToRGBA8; host_format_dxt2_3.format_unsigned.format = VK_FORMAT_R8G8B8A8_UNORM; - host_format_dxt2_3.format_unsigned.block_width_log2 = 0; - host_format_dxt2_3.format_unsigned.block_height_log2 = 0; + host_format_dxt2_3.format_unsigned.block_compressed = false; host_formats_[uint32_t(xenos::TextureFormat::k_DXT2_3_AS_16_16_16_16)] = host_format_dxt2_3; } @@ -1001,8 +999,7 @@ bool VulkanTextureCache::Initialize() { kLinearFilterFeatures) { host_format_dxt4_5.format_unsigned.load_mode = LoadMode::kDXT5ToRGBA8; host_format_dxt4_5.format_unsigned.format = VK_FORMAT_R8G8B8A8_UNORM; - host_format_dxt4_5.format_unsigned.block_width_log2 = 0; - host_format_dxt4_5.format_unsigned.block_height_log2 = 0; + host_format_dxt4_5.format_unsigned.block_compressed = false; host_formats_[uint32_t(xenos::TextureFormat::k_DXT4_5_AS_16_16_16_16)] = host_format_dxt4_5; } @@ -1016,8 +1013,7 @@ bool VulkanTextureCache::Initialize() { kLinearFilterFeatures) { host_format_dxn.format_unsigned.load_mode = LoadMode::kDXNToRG8; host_format_dxn.format_unsigned.format = VK_FORMAT_R8G8_UNORM; - host_format_dxn.format_unsigned.block_width_log2 = 0; - host_format_dxn.format_unsigned.block_height_log2 = 0; + host_format_dxn.format_unsigned.block_compressed = false; } HostFormatPair& host_format_dxt5a = host_formats_[uint32_t(xenos::TextureFormat::k_DXT5A)]; @@ -1029,8 +1025,7 @@ bool VulkanTextureCache::Initialize() { kLinearFilterFeatures) { host_format_dxt5a.format_unsigned.load_mode = LoadMode::kDXT5AToR8; host_format_dxt5a.format_unsigned.format = VK_FORMAT_R8_UNORM; - host_format_dxt5a.format_unsigned.block_width_log2 = 0; - host_format_dxt5a.format_unsigned.block_height_log2 = 0; + host_format_dxt5a.format_unsigned.block_compressed = false; } // k_16, k_16_16, k_16_16_16_16 - UNORM / SNORM are optional, fall back to // SFLOAT, which is mandatory and is always filterable (the guest 16-bit @@ -1235,15 +1230,11 @@ bool VulkanTextureCache::Initialize() { // size, though the fallbacks are configured incorrectly if that's not the // case (since such formats just can't be in one compatibility class). assert_false(host_format.unsigned_signed_compatible && - (host_format.format_unsigned.block_width_log2 != - host_format.format_signed.block_width_log2 || - host_format.format_unsigned.block_height_log2 != - host_format.format_signed.block_height_log2)); + host_format.format_unsigned.block_compressed != + host_format.format_signed.block_compressed); if (host_format.unsigned_signed_compatible && - (host_format.format_unsigned.block_width_log2 != - host_format.format_signed.block_width_log2 || - host_format.format_unsigned.block_height_log2 != - host_format.format_signed.block_height_log2)) { + host_format.format_unsigned.block_compressed != + host_format.format_signed.block_compressed) { host_format.unsigned_signed_compatible = false; } } else { diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.h b/src/xenia/gpu/vulkan/vulkan_texture_cache.h index 8209ccc60..d7499f9a1 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.h @@ -121,8 +121,10 @@ class VulkanTextureCache final : public TextureCache { // set to 1 though), add a constant buffer containing multipliers for the // textures and multiplication to the tfetch implementation. VkFormat format; - uint32_t block_width_log2; - uint32_t block_height_log2; + // Whether the format is block-compressed on the host (the host block size + // matches the guest format block size in this case), and isn't decompressed + // on load. + bool block_compressed; // Set up dynamically based on what's supported by the device. bool linear_filterable; From 9c445d397b252e29761e8c785f43589efca66e17 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Tue, 24 May 2022 22:37:49 +0300 Subject: [PATCH 090/123] [Vulkan] Fix single-type descriptor pool reuse --- src/xenia/ui/vulkan/single_type_descriptor_set_allocator.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xenia/ui/vulkan/single_type_descriptor_set_allocator.cc b/src/xenia/ui/vulkan/single_type_descriptor_set_allocator.cc index 62621bb49..44a3d31fe 100644 --- a/src/xenia/ui/vulkan/single_type_descriptor_set_allocator.cc +++ b/src/xenia/ui/vulkan/single_type_descriptor_set_allocator.cc @@ -128,7 +128,7 @@ VkDescriptorSet SingleTypeDescriptorSetAllocator::Allocate( // or will become full, and in case of a failure to allocate internally even // though there still should be enough space, it should never be allocated // from again. - Page map_page = pages_usable_.crend()->second; + Page map_page = page_usable_last_it->second; pages_usable_.erase(page_usable_last_it); descriptor_set_allocate_info.descriptorPool = map_page.pool; if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info, From 6c9a06b2da021c31e31c857765fea72dcf1ec6fa Mon Sep 17 00:00:00 2001 From: Triang3l Date: Tue, 24 May 2022 22:42:22 +0300 Subject: [PATCH 091/123] [Vulkan] Texture loading --- .../gpu/vulkan/deferred_command_buffer.cc | 11 + .../gpu/vulkan/deferred_command_buffer.h | 35 + .../gpu/vulkan/vulkan_command_processor.cc | 151 ++- .../gpu/vulkan/vulkan_command_processor.h | 117 +- src/xenia/gpu/vulkan/vulkan_texture_cache.cc | 1038 ++++++++++++++--- src/xenia/gpu/vulkan/vulkan_texture_cache.h | 49 +- 6 files changed, 1186 insertions(+), 215 deletions(-) diff --git a/src/xenia/gpu/vulkan/deferred_command_buffer.cc b/src/xenia/gpu/vulkan/deferred_command_buffer.cc index fef52b9f4..65c80cf23 100644 --- a/src/xenia/gpu/vulkan/deferred_command_buffer.cc +++ b/src/xenia/gpu/vulkan/deferred_command_buffer.cc @@ -154,6 +154,17 @@ void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) { xe::align(sizeof(ArgsVkCopyBuffer), alignof(VkBufferCopy)))); } break; + case Command::kVkCopyBufferToImage: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdCopyBufferToImage( + command_buffer, args.src_buffer, args.dst_image, + args.dst_image_layout, args.region_count, + reinterpret_cast( + reinterpret_cast(stream) + + xe::align(sizeof(ArgsVkCopyBufferToImage), + alignof(VkBufferImageCopy)))); + } break; + case Command::kVkDispatch: { auto& args = *reinterpret_cast(stream); dfn.vkCmdDispatch(command_buffer, args.group_count_x, diff --git a/src/xenia/gpu/vulkan/deferred_command_buffer.h b/src/xenia/gpu/vulkan/deferred_command_buffer.h index 60b216354..186639c86 100644 --- a/src/xenia/gpu/vulkan/deferred_command_buffer.h +++ b/src/xenia/gpu/vulkan/deferred_command_buffer.h @@ -206,6 +206,31 @@ class DeferredCommandBuffer { regions, sizeof(VkBufferCopy) * region_count); } + VkBufferImageCopy* CmdCopyBufferToImageEmplace(VkBuffer src_buffer, + VkImage dst_image, + VkImageLayout dst_image_layout, + uint32_t region_count) { + const size_t header_size = + xe::align(sizeof(ArgsVkCopyBufferToImage), alignof(VkBufferImageCopy)); + uint8_t* args_ptr = reinterpret_cast( + WriteCommand(Command::kVkCopyBufferToImage, + header_size + sizeof(VkBufferImageCopy) * region_count)); + auto& args = *reinterpret_cast(args_ptr); + args.src_buffer = src_buffer; + args.dst_image = dst_image; + args.dst_image_layout = dst_image_layout; + args.region_count = region_count; + return reinterpret_cast(args_ptr + header_size); + } + void CmdVkCopyBufferToImage(VkBuffer src_buffer, VkImage dst_image, + VkImageLayout dst_image_layout, + uint32_t region_count, + const VkBufferImageCopy* regions) { + std::memcpy(CmdCopyBufferToImageEmplace(src_buffer, dst_image, + dst_image_layout, region_count), + regions, sizeof(VkBufferImageCopy) * region_count); + } + void CmdVkDispatch(uint32_t group_count_x, uint32_t group_count_y, uint32_t group_count_z) { auto& args = *reinterpret_cast( @@ -342,6 +367,7 @@ class DeferredCommandBuffer { kVkClearAttachments, kVkClearColorImage, kVkCopyBuffer, + kVkCopyBufferToImage, kVkDispatch, kVkDraw, kVkDrawIndexed, @@ -428,6 +454,15 @@ class DeferredCommandBuffer { static_assert(alignof(VkBufferCopy) <= alignof(uintmax_t)); }; + struct ArgsVkCopyBufferToImage { + VkBuffer src_buffer; + VkImage dst_image; + VkImageLayout dst_image_layout; + uint32_t region_count; + // Followed by aligned VkBufferImageCopy[]. + static_assert(alignof(VkBufferImageCopy) <= alignof(uintmax_t)); + }; + struct ArgsVkDispatch { uint32_t group_count_x; uint32_t group_count_y; diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index ce0ee1576..3f4faef23 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -306,6 +306,7 @@ bool VulkanCommandProcessor::SetupContext() { return false; } + // Requires the transient descriptor set layouts. // TODO(Triang3l): Actual draw resolution scale. texture_cache_ = VulkanTextureCache::Create(*register_file_, *shared_memory_, 1, 1, *this, @@ -603,10 +604,11 @@ void VulkanCommandProcessor::ShutdownContext() { const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); - for (const auto& framebuffer_pair : swap_framebuffers_outdated_) { - dfn.vkDestroyFramebuffer(device, framebuffer_pair.second, nullptr); - } - swap_framebuffers_outdated_.clear(); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + scratch_buffer_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + scratch_buffer_memory_); + for (SwapFramebuffer& swap_framebuffer : swap_framebuffers_) { ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyFramebuffer, device, swap_framebuffer.framebuffer); @@ -675,6 +677,19 @@ void VulkanCommandProcessor::ShutdownContext() { } command_buffers_writable_.clear(); + for (const auto& destroy_pair : destroy_framebuffers_) { + dfn.vkDestroyFramebuffer(device, destroy_pair.second, nullptr); + } + destroy_framebuffers_.clear(); + for (const auto& destroy_pair : destroy_buffers_) { + dfn.vkDestroyBuffer(device, destroy_pair.second, nullptr); + } + destroy_buffers_.clear(); + for (const auto& destroy_pair : destroy_memory_) { + dfn.vkFreeMemory(device, destroy_pair.second, nullptr); + } + destroy_memory_.clear(); + std::memset(closed_frame_submissions_, 0, sizeof(closed_frame_submissions_)); frame_completed_ = 0; frame_current_ = 1; @@ -843,7 +858,7 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, dfn.vkDestroyFramebuffer(device, new_swap_framebuffer.framebuffer, nullptr); } else { - swap_framebuffers_outdated_.emplace_back( + destroy_framebuffers_.emplace_back( new_swap_framebuffer.last_submission, new_swap_framebuffer.framebuffer); } @@ -1387,6 +1402,83 @@ VulkanCommandProcessor::GetPipelineLayout(size_t texture_count_pixel, return &emplaced_pair.first->second; } +VulkanCommandProcessor::ScratchBufferAcquisition +VulkanCommandProcessor::AcquireScratchGpuBuffer( + VkDeviceSize size, VkPipelineStageFlags initial_stage_mask, + VkAccessFlags initial_access_mask) { + assert_true(submission_open_); + assert_false(scratch_buffer_used_); + if (!submission_open_ || scratch_buffer_used_ || !size) { + return ScratchBufferAcquisition(); + } + + uint64_t submission_current = GetCurrentSubmission(); + + if (scratch_buffer_ != VK_NULL_HANDLE && size <= scratch_buffer_size_) { + // Already used previously - transition. + PushBufferMemoryBarrier(scratch_buffer_, 0, VK_WHOLE_SIZE, + scratch_buffer_last_stage_mask_, initial_stage_mask, + scratch_buffer_last_access_mask_, + initial_access_mask); + scratch_buffer_last_stage_mask_ = initial_stage_mask; + scratch_buffer_last_access_mask_ = initial_access_mask; + scratch_buffer_last_usage_submission_ = submission_current; + scratch_buffer_used_ = true; + return ScratchBufferAcquisition(*this, scratch_buffer_, initial_stage_mask, + initial_access_mask); + } + + size = xe::align(size, kScratchBufferSizeIncrement); + + const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); + + VkDeviceMemory new_scratch_buffer_memory; + VkBuffer new_scratch_buffer; + // VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT for + // texture loading. + if (!ui::vulkan::util::CreateDedicatedAllocationBuffer( + provider, size, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + ui::vulkan::util::MemoryPurpose::kDeviceLocal, new_scratch_buffer, + new_scratch_buffer_memory)) { + XELOGE( + "VulkanCommandProcessor: Failed to create a {} MB scratch GPU buffer", + size >> 20); + return ScratchBufferAcquisition(); + } + + if (submission_completed_ >= scratch_buffer_last_usage_submission_) { + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + if (scratch_buffer_ != VK_NULL_HANDLE) { + dfn.vkDestroyBuffer(device, scratch_buffer_, nullptr); + } + if (scratch_buffer_memory_ != VK_NULL_HANDLE) { + dfn.vkFreeMemory(device, scratch_buffer_memory_, nullptr); + } + } else { + if (scratch_buffer_ != VK_NULL_HANDLE) { + destroy_buffers_.emplace_back(scratch_buffer_last_usage_submission_, + scratch_buffer_); + } + if (scratch_buffer_memory_ != VK_NULL_HANDLE) { + destroy_memory_.emplace_back(scratch_buffer_last_usage_submission_, + scratch_buffer_memory_); + } + } + + scratch_buffer_memory_ = new_scratch_buffer_memory; + scratch_buffer_ = new_scratch_buffer; + scratch_buffer_size_ = size; + // Not used yet, no need for a barrier. + scratch_buffer_last_stage_mask_ = initial_access_mask; + scratch_buffer_last_access_mask_ = initial_stage_mask; + scratch_buffer_last_usage_submission_ = submission_current; + scratch_buffer_used_ = true; + return ScratchBufferAcquisition(*this, new_scratch_buffer, initial_stage_mask, + initial_access_mask); +} + void VulkanCommandProcessor::BindExternalGraphicsPipeline( VkPipeline pipeline, bool keep_dynamic_depth_bias, bool keep_dynamic_blend_constants, bool keep_dynamic_stencil_mask_ref) { @@ -1915,14 +2007,30 @@ void VulkanCommandProcessor::CheckSubmissionFenceAndDeviceLoss( texture_cache_->CompletedSubmissionUpdated(submission_completed_); - // Destroy outdated swap objects. - while (!swap_framebuffers_outdated_.empty()) { - const auto& framebuffer_pair = swap_framebuffers_outdated_.front(); - if (framebuffer_pair.first > submission_completed_) { + // Destroy objects scheduled for destruction. + while (!destroy_framebuffers_.empty()) { + const auto& destroy_pair = destroy_framebuffers_.front(); + if (destroy_pair.first > submission_completed_) { break; } - dfn.vkDestroyFramebuffer(device, framebuffer_pair.second, nullptr); - swap_framebuffers_outdated_.pop_front(); + dfn.vkDestroyFramebuffer(device, destroy_pair.second, nullptr); + destroy_framebuffers_.pop_front(); + } + while (!destroy_buffers_.empty()) { + const auto& destroy_pair = destroy_buffers_.front(); + if (destroy_pair.first > submission_completed_) { + break; + } + dfn.vkDestroyBuffer(device, destroy_pair.second, nullptr); + destroy_buffers_.pop_front(); + } + while (!destroy_memory_.empty()) { + const auto& destroy_pair = destroy_memory_.front(); + if (destroy_pair.first > submission_completed_) { + break; + } + dfn.vkFreeMemory(device, destroy_pair.second, nullptr); + destroy_memory_.pop_front(); } } @@ -2136,6 +2244,8 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { } if (submission_open_) { + assert_false(scratch_buffer_used_); + EndRenderPass(); render_target_cache_->EndSubmission(); @@ -3120,6 +3230,25 @@ uint8_t* VulkanCommandProcessor::WriteTransientUniformBufferBinding( return mapping; } +uint8_t* VulkanCommandProcessor::WriteTransientUniformBufferBinding( + size_t size, SingleTransientDescriptorLayout transient_descriptor_layout, + VkDescriptorSet& descriptor_set_out) { + VkDescriptorBufferInfo write_descriptor_buffer_info; + VkWriteDescriptorSet write_descriptor_set; + uint8_t* mapping = WriteTransientUniformBufferBinding( + size, transient_descriptor_layout, write_descriptor_buffer_info, + write_descriptor_set); + if (!mapping) { + return nullptr; + } + const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + dfn.vkUpdateDescriptorSets(device, 1, &write_descriptor_set, 0, nullptr); + descriptor_set_out = write_descriptor_set.dstSet; + return mapping; +} + bool VulkanCommandProcessor::WriteTransientTextureBindings( bool is_samplers, bool is_vertex, uint32_t binding_count, VkDescriptorSetLayout descriptor_set_layout, diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 8390b67e2..fa2acbb45 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -58,6 +58,84 @@ class VulkanCommandProcessor : public CommandProcessor { kCount, }; + class ScratchBufferAcquisition { + public: + explicit ScratchBufferAcquisition() = default; + explicit ScratchBufferAcquisition(VulkanCommandProcessor& command_processor, + VkBuffer buffer, + VkPipelineStageFlags stage_mask, + VkAccessFlags access_mask) + : command_processor_(&command_processor), + buffer_(buffer), + stage_mask_(stage_mask), + access_mask_(access_mask) {} + + ScratchBufferAcquisition(const ScratchBufferAcquisition& acquisition) = + delete; + ScratchBufferAcquisition& operator=( + const ScratchBufferAcquisition& acquisition) = delete; + + ScratchBufferAcquisition(ScratchBufferAcquisition&& acquisition) { + command_processor_ = acquisition.command_processor_; + buffer_ = acquisition.buffer_; + stage_mask_ = acquisition.stage_mask_; + access_mask_ = acquisition.access_mask_; + acquisition.command_processor_ = nullptr; + acquisition.buffer_ = VK_NULL_HANDLE; + acquisition.stage_mask_ = 0; + acquisition.access_mask_ = 0; + } + ScratchBufferAcquisition& operator=( + ScratchBufferAcquisition&& acquisition) { + if (this == &acquisition) { + return *this; + } + command_processor_ = acquisition.command_processor_; + buffer_ = acquisition.buffer_; + stage_mask_ = acquisition.stage_mask_; + access_mask_ = acquisition.access_mask_; + acquisition.command_processor_ = nullptr; + acquisition.buffer_ = VK_NULL_HANDLE; + acquisition.stage_mask_ = 0; + acquisition.access_mask_ = 0; + return *this; + } + + ~ScratchBufferAcquisition() { + if (buffer_ != VK_NULL_HANDLE) { + assert_true(command_processor_->scratch_buffer_used_); + assert_true(command_processor_->scratch_buffer_ == buffer_); + command_processor_->scratch_buffer_last_stage_mask_ = stage_mask_; + command_processor_->scratch_buffer_last_access_mask_ = access_mask_; + command_processor_->scratch_buffer_last_usage_submission_ = + command_processor_->GetCurrentSubmission(); + command_processor_->scratch_buffer_used_ = false; + } + } + + // VK_NULL_HANDLE if failed to acquire or if moved. + VkBuffer buffer() const { return buffer_; } + + VkPipelineStageFlags GetStageMask() const { return stage_mask_; } + VkPipelineStageFlags SetStageMask(VkPipelineStageFlags new_stage_mask) { + VkPipelineStageFlags old_stage_mask = stage_mask_; + stage_mask_ = new_stage_mask; + return old_stage_mask; + } + VkAccessFlags GetAccessMask() const { return access_mask_; } + VkAccessFlags SetAccessMask(VkAccessFlags new_access_mask) { + VkAccessFlags old_access_mask = access_mask_; + access_mask_ = new_access_mask; + return old_access_mask; + } + + private: + VulkanCommandProcessor* command_processor_ = nullptr; + VkBuffer buffer_ = VK_NULL_HANDLE; + VkPipelineStageFlags stage_mask_ = 0; + VkAccessFlags access_mask_ = 0; + }; + VulkanCommandProcessor(VulkanGraphicsSystem* graphics_system, kernel::KernelState* kernel_state); ~VulkanCommandProcessor(); @@ -140,6 +218,16 @@ class VulkanCommandProcessor : public CommandProcessor { // A frame must be open. VkDescriptorSet AllocateSingleTransientDescriptor( SingleTransientDescriptorLayout transient_descriptor_layout); + // Allocates a descriptor, space in the uniform buffer pool, and fills the + // VkWriteDescriptorSet structure and VkDescriptorBufferInfo referenced by it. + // Returns null in case of failure. + uint8_t* WriteTransientUniformBufferBinding( + size_t size, SingleTransientDescriptorLayout transient_descriptor_layout, + VkDescriptorBufferInfo& descriptor_buffer_info_out, + VkWriteDescriptorSet& write_descriptor_set_out); + uint8_t* WriteTransientUniformBufferBinding( + size_t size, SingleTransientDescriptorLayout transient_descriptor_layout, + VkDescriptorSet& descriptor_set_out); // The returned reference is valid until a cache clear. VkDescriptorSetLayout GetTextureDescriptorSetLayout(bool is_samplers, @@ -150,6 +238,13 @@ class VulkanCommandProcessor : public CommandProcessor { size_t texture_count_pixel, size_t sampler_count_pixel, size_t texture_count_vertex, size_t sampler_count_vertex); + // Returns a single temporary GPU-side buffer within a submission for tasks + // like texture untiling and resolving. May push a buffer memory barrier into + // the initial usage. Submission must be open. + ScratchBufferAcquisition AcquireScratchGpuBuffer( + VkDeviceSize size, VkPipelineStageFlags initial_stage_mask, + VkAccessFlags initial_access_mask); + // Binds a graphics pipeline for host-specific purposes, invalidating the // affected state. keep_dynamic_* must be false (to invalidate the dynamic // state after binding the pipeline with the same state being static, or if @@ -340,13 +435,6 @@ class VulkanCommandProcessor : public CommandProcessor { const draw_util::ViewportInfo& viewport_info); bool UpdateBindings(const VulkanShader* vertex_shader, const VulkanShader* pixel_shader); - // Allocates a descriptor, space in the uniform buffer pool, and fills the - // VkWriteDescriptorSet structure and VkDescriptorBufferInfo referenced by it. - // Returns null in case of failure. - uint8_t* WriteTransientUniformBufferBinding( - size_t size, SingleTransientDescriptorLayout transient_descriptor_layout, - VkDescriptorBufferInfo& descriptor_buffer_info_out, - VkWriteDescriptorSet& write_descriptor_set_out); // Allocates a descriptor set and fills the VkWriteDescriptorSet structure. // The descriptor set layout must be the one for the given is_samplers, // is_vertex, binding_count (from GetTextureDescriptorSetLayout - may be @@ -390,6 +478,11 @@ class VulkanCommandProcessor : public CommandProcessor { // Submission indices of frames that have already been submitted. uint64_t closed_frame_submissions_[kMaxFramesInFlight] = {}; + // , sorted by the submission number. + std::deque> destroy_memory_; + std::deque> destroy_buffers_; + std::deque> destroy_framebuffers_; + std::vector command_buffers_writable_; std::deque> command_buffers_submitted_; DeferredCommandBuffer deferred_command_buffer_; @@ -491,6 +584,16 @@ class VulkanCommandProcessor : public CommandProcessor { std::vector pending_barriers_; PendingBarrier current_pending_barrier_; + // GPU-local scratch buffer. + static constexpr VkDeviceSize kScratchBufferSizeIncrement = 16 * 1024 * 1024; + VkDeviceMemory scratch_buffer_memory_ = VK_NULL_HANDLE; + VkBuffer scratch_buffer_ = VK_NULL_HANDLE; + VkDeviceSize scratch_buffer_size_ = 0; + VkPipelineStageFlags scratch_buffer_last_stage_mask_ = 0; + VkAccessFlags scratch_buffer_last_access_mask_ = 0; + uint64_t scratch_buffer_last_usage_submission_ = 0; + bool scratch_buffer_used_ = false; + // The current dynamic state of the graphics pipeline bind point. Note that // binding any pipeline to the bind point with static state (even if it's // unused, like depth bias being disabled, but the values themselves still not diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc index 411b55eea..b7bd5fd9b 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc @@ -10,12 +10,15 @@ #include "xenia/gpu/vulkan/vulkan_texture_cache.h" #include +#include +#include #include "xenia/base/assert.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" #include "xenia/base/profiling.h" #include "xenia/gpu/texture_info.h" +#include "xenia/gpu/vulkan/deferred_command_buffer.h" #include "xenia/gpu/vulkan/vulkan_command_processor.h" #include "xenia/ui/vulkan/vulkan_util.h" @@ -79,319 +82,322 @@ namespace shaders { #include "xenia/gpu/shaders/bytecode/vulkan_spirv/texture_load_rgba16_unorm_float_scaled_cs.h" } // namespace shaders +static_assert(VK_FORMAT_UNDEFINED == VkFormat(0), + "Assuming that skipping a VkFormat in an initializer results in " + "VK_FORMAT_UNDEFINED"); const VulkanTextureCache::HostFormatPair VulkanTextureCache::kBestHostFormats[64] = { // k_1_REVERSE - {{LoadMode::kUnknown}, - {LoadMode::kUnknown}, + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_1 - {{LoadMode::kUnknown}, - {LoadMode::kUnknown}, + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_8 - {{LoadMode::k8bpb, VK_FORMAT_R8_UNORM}, - {LoadMode::k8bpb, VK_FORMAT_R8_SNORM}, + {{kLoadShaderIndex8bpb, VK_FORMAT_R8_UNORM}, + {kLoadShaderIndex8bpb, VK_FORMAT_R8_SNORM}, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR, true}, // k_1_5_5_5 // Red and blue swapped in the load shader for simplicity. - {{LoadMode::kR5G5B5A1ToB5G5R5A1, VK_FORMAT_A1R5G5B5_UNORM_PACK16}, - {}, + {{kLoadShaderIndexR5G5B5A1ToB5G5R5A1, VK_FORMAT_A1R5G5B5_UNORM_PACK16}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_5_6_5 // Red and blue swapped in the load shader for simplicity. - {{LoadMode::kR5G6B5ToB5G6R5, VK_FORMAT_R5G6B5_UNORM_PACK16}, - {}, + {{kLoadShaderIndexR5G6B5ToB5G6R5, VK_FORMAT_R5G6B5_UNORM_PACK16}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_6_5_5 // On the host, green bits in blue, blue bits in green. - {{LoadMode::kR5G5B6ToB5G6R5WithRBGASwizzle, + {{kLoadShaderIndexR5G5B6ToB5G6R5WithRBGASwizzle, VK_FORMAT_R5G6B5_UNORM_PACK16}, - {}, + {kLoadShaderIndexUnknown}, XE_GPU_MAKE_TEXTURE_SWIZZLE(R, B, G, G)}, // k_8_8_8_8 - {{LoadMode::k32bpb, VK_FORMAT_R8G8B8A8_UNORM}, - {LoadMode::k32bpb, VK_FORMAT_R8G8B8A8_SNORM}, + {{kLoadShaderIndex32bpb, VK_FORMAT_R8G8B8A8_UNORM}, + {kLoadShaderIndex32bpb, VK_FORMAT_R8G8B8A8_SNORM}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA, true}, // k_2_10_10_10 // VK_FORMAT_A2B10G10R10_SNORM_PACK32 is optional. - {{LoadMode::k32bpb, VK_FORMAT_A2B10G10R10_UNORM_PACK32}, - {LoadMode::k32bpb, VK_FORMAT_A2B10G10R10_SNORM_PACK32}, + {{kLoadShaderIndex32bpb, VK_FORMAT_A2B10G10R10_UNORM_PACK32}, + {kLoadShaderIndex32bpb, VK_FORMAT_A2B10G10R10_SNORM_PACK32}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA, true}, // k_8_A - {{LoadMode::k8bpb, VK_FORMAT_R8_UNORM}, - {LoadMode::k8bpb, VK_FORMAT_R8_SNORM}, + {{kLoadShaderIndex8bpb, VK_FORMAT_R8_UNORM}, + {kLoadShaderIndex8bpb, VK_FORMAT_R8_SNORM}, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR, true}, // k_8_B - {{LoadMode::kUnknown}, - {LoadMode::kUnknown}, + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_8_8 - {{LoadMode::k16bpb, VK_FORMAT_R8G8_UNORM}, - {LoadMode::k16bpb, VK_FORMAT_R8G8_SNORM}, + {{kLoadShaderIndex16bpb, VK_FORMAT_R8G8_UNORM}, + {kLoadShaderIndex16bpb, VK_FORMAT_R8G8_SNORM}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG, true}, // k_Cr_Y1_Cb_Y0_REP // VK_FORMAT_G8B8G8R8_422_UNORM_KHR (added in // VK_KHR_sampler_ycbcr_conversion and promoted to Vulkan 1.1) is // optional. - {{LoadMode::k32bpb, VK_FORMAT_G8B8G8R8_422_UNORM_KHR, true}, - {LoadMode::kGBGR8ToRGB8, VK_FORMAT_R8G8B8A8_SNORM}, + {{kLoadShaderIndex32bpb, VK_FORMAT_G8B8G8R8_422_UNORM_KHR, true}, + {kLoadShaderIndexGBGR8ToRGB8, VK_FORMAT_R8G8B8A8_SNORM}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_Y1_Cr_Y0_Cb_REP // VK_FORMAT_B8G8R8G8_422_UNORM_KHR (added in // VK_KHR_sampler_ycbcr_conversion and promoted to Vulkan 1.1) is // optional. - {{LoadMode::k32bpb, VK_FORMAT_B8G8R8G8_422_UNORM_KHR, true}, - {LoadMode::kBGRG8ToRGB8, VK_FORMAT_R8G8B8A8_SNORM}, + {{kLoadShaderIndex32bpb, VK_FORMAT_B8G8R8G8_422_UNORM_KHR, true}, + {kLoadShaderIndexBGRG8ToRGB8, VK_FORMAT_R8G8B8A8_SNORM}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_16_16_EDRAM // Not usable as a texture, also has -32...32 range. - {{LoadMode::kUnknown}, - {LoadMode::kUnknown}, + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_8_8_8_8_A - {{LoadMode::kUnknown}, - {LoadMode::kUnknown}, + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_4_4_4_4 // Components swapped in the load shader for simplicity. - {{LoadMode::kRGBA4ToARGB4, VK_FORMAT_B4G4R4A4_UNORM_PACK16}, - {LoadMode::kUnknown}, + {{kLoadShaderIndexRGBA4ToARGB4, VK_FORMAT_B4G4R4A4_UNORM_PACK16}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_10_11_11 // TODO(Triang3l): 16_UNORM/SNORM are optional, convert to float16 // instead. - {{LoadMode::kR11G11B10ToRGBA16, VK_FORMAT_R16G16B16A16_UNORM}, - {LoadMode::kR11G11B10ToRGBA16SNorm, VK_FORMAT_R16G16B16A16_SNORM}, + {{kLoadShaderIndexR11G11B10ToRGBA16, VK_FORMAT_R16G16B16A16_UNORM}, + {kLoadShaderIndexR11G11B10ToRGBA16SNorm, VK_FORMAT_R16G16B16A16_SNORM}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_11_11_10 // TODO(Triang3l): 16_UNORM/SNORM are optional, convert to float16 // instead. - {{LoadMode::kR10G11B11ToRGBA16, VK_FORMAT_R16G16B16A16_UNORM}, - {LoadMode::kR10G11B11ToRGBA16SNorm, VK_FORMAT_R16G16B16A16_SNORM}, + {{kLoadShaderIndexR10G11B11ToRGBA16, VK_FORMAT_R16G16B16A16_UNORM}, + {kLoadShaderIndexR10G11B11ToRGBA16SNorm, VK_FORMAT_R16G16B16A16_SNORM}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_DXT1 // VK_FORMAT_BC1_RGBA_UNORM_BLOCK is optional. - {{LoadMode::k64bpb, VK_FORMAT_BC1_RGBA_UNORM_BLOCK, true}, - {LoadMode::kUnknown}, + {{kLoadShaderIndex64bpb, VK_FORMAT_BC1_RGBA_UNORM_BLOCK, true}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_DXT2_3 // VK_FORMAT_BC2_UNORM_BLOCK is optional. - {{LoadMode::k128bpb, VK_FORMAT_BC2_UNORM_BLOCK, true}, - {LoadMode::kUnknown}, + {{kLoadShaderIndex128bpb, VK_FORMAT_BC2_UNORM_BLOCK, true}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_DXT4_5 // VK_FORMAT_BC3_UNORM_BLOCK is optional. - {{LoadMode::k128bpb, VK_FORMAT_BC3_UNORM_BLOCK, true}, - {LoadMode::kUnknown}, + {{kLoadShaderIndex128bpb, VK_FORMAT_BC3_UNORM_BLOCK, true}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_16_16_16_16_EDRAM // Not usable as a texture, also has -32...32 range. - {{LoadMode::kUnknown}, - {LoadMode::kUnknown}, + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_24_8 - {{LoadMode::kDepthUnorm, VK_FORMAT_R32_SFLOAT}, - {LoadMode::kUnknown}, + {{kLoadShaderIndexDepthUnorm, VK_FORMAT_R32_SFLOAT}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_24_8_FLOAT - {{LoadMode::kDepthFloat, VK_FORMAT_R32_SFLOAT}, - {LoadMode::kUnknown}, + {{kLoadShaderIndexDepthFloat, VK_FORMAT_R32_SFLOAT}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_16 // VK_FORMAT_R16_UNORM and VK_FORMAT_R16_SNORM are optional. - {{LoadMode::k16bpb, VK_FORMAT_R16_UNORM}, - {LoadMode::k16bpb, VK_FORMAT_R16_SNORM}, + {{kLoadShaderIndex16bpb, VK_FORMAT_R16_UNORM}, + {kLoadShaderIndex16bpb, VK_FORMAT_R16_SNORM}, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR, true}, // k_16_16 // VK_FORMAT_R16G16_UNORM and VK_FORMAT_R16G16_SNORM are optional. - {{LoadMode::k32bpb, VK_FORMAT_R16G16_UNORM}, - {LoadMode::k32bpb, VK_FORMAT_R16G16_SNORM}, + {{kLoadShaderIndex32bpb, VK_FORMAT_R16G16_UNORM}, + {kLoadShaderIndex32bpb, VK_FORMAT_R16G16_SNORM}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG, true}, // k_16_16_16_16 // VK_FORMAT_R16G16B16A16_UNORM and VK_FORMAT_R16G16B16A16_SNORM are // optional. - {{LoadMode::k64bpb, VK_FORMAT_R16G16B16A16_UNORM}, - {LoadMode::k64bpb, VK_FORMAT_R16G16B16A16_SNORM}, + {{kLoadShaderIndex64bpb, VK_FORMAT_R16G16B16A16_UNORM}, + {kLoadShaderIndex64bpb, VK_FORMAT_R16G16B16A16_SNORM}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA, true}, // k_16_EXPAND - {{LoadMode::k16bpb, VK_FORMAT_R16_SFLOAT}, - {LoadMode::k16bpb, VK_FORMAT_R16_SFLOAT}, + {{kLoadShaderIndex16bpb, VK_FORMAT_R16_SFLOAT}, + {kLoadShaderIndex16bpb, VK_FORMAT_R16_SFLOAT}, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR, true}, // k_16_16_EXPAND - {{LoadMode::k32bpb, VK_FORMAT_R16G16_SFLOAT}, - {LoadMode::k32bpb, VK_FORMAT_R16G16_SFLOAT}, + {{kLoadShaderIndex32bpb, VK_FORMAT_R16G16_SFLOAT}, + {kLoadShaderIndex32bpb, VK_FORMAT_R16G16_SFLOAT}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG, true}, // k_16_16_16_16_EXPAND - {{LoadMode::k64bpb, VK_FORMAT_R16G16B16A16_SFLOAT}, - {LoadMode::k64bpb, VK_FORMAT_R16G16B16A16_SFLOAT}, + {{kLoadShaderIndex64bpb, VK_FORMAT_R16G16B16A16_SFLOAT}, + {kLoadShaderIndex64bpb, VK_FORMAT_R16G16B16A16_SFLOAT}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA, true}, // k_16_FLOAT - {{LoadMode::k16bpb, VK_FORMAT_R16_SFLOAT}, - {LoadMode::k16bpb, VK_FORMAT_R16_SFLOAT}, + {{kLoadShaderIndex16bpb, VK_FORMAT_R16_SFLOAT}, + {kLoadShaderIndex16bpb, VK_FORMAT_R16_SFLOAT}, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR, true}, // k_16_16_FLOAT - {{LoadMode::k32bpb, VK_FORMAT_R16G16_SFLOAT}, - {LoadMode::k32bpb, VK_FORMAT_R16G16_SFLOAT}, + {{kLoadShaderIndex32bpb, VK_FORMAT_R16G16_SFLOAT}, + {kLoadShaderIndex32bpb, VK_FORMAT_R16G16_SFLOAT}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG, true}, // k_16_16_16_16_FLOAT - {{LoadMode::k64bpb, VK_FORMAT_R16G16B16A16_SFLOAT}, - {LoadMode::k64bpb, VK_FORMAT_R16G16B16A16_SFLOAT}, + {{kLoadShaderIndex64bpb, VK_FORMAT_R16G16B16A16_SFLOAT}, + {kLoadShaderIndex64bpb, VK_FORMAT_R16G16B16A16_SFLOAT}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA, true}, // k_32 - {{LoadMode::kUnknown}, - {LoadMode::kUnknown}, + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_32_32 - {{LoadMode::kUnknown}, - {LoadMode::kUnknown}, + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_32_32_32_32 - {{LoadMode::kUnknown}, - {LoadMode::kUnknown}, + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_32_FLOAT - {{LoadMode::k32bpb, VK_FORMAT_R32_SFLOAT}, - {LoadMode::k32bpb, VK_FORMAT_R32_SFLOAT}, + {{kLoadShaderIndex32bpb, VK_FORMAT_R32_SFLOAT}, + {kLoadShaderIndex32bpb, VK_FORMAT_R32_SFLOAT}, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR, true}, // k_32_32_FLOAT - {{LoadMode::k64bpb, VK_FORMAT_R32G32_SFLOAT}, - {LoadMode::k64bpb, VK_FORMAT_R32G32_SFLOAT}, + {{kLoadShaderIndex64bpb, VK_FORMAT_R32G32_SFLOAT}, + {kLoadShaderIndex64bpb, VK_FORMAT_R32G32_SFLOAT}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG, true}, // k_32_32_32_32_FLOAT - {{LoadMode::k128bpb, VK_FORMAT_R32G32B32A32_SFLOAT}, - {LoadMode::k128bpb, VK_FORMAT_R32G32B32A32_SFLOAT}, + {{kLoadShaderIndex128bpb, VK_FORMAT_R32G32B32A32_SFLOAT}, + {kLoadShaderIndex128bpb, VK_FORMAT_R32G32B32A32_SFLOAT}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA, true}, // k_32_AS_8 - {{LoadMode::kUnknown}, - {LoadMode::kUnknown}, + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_32_AS_8_8 - {{LoadMode::kUnknown}, - {LoadMode::kUnknown}, + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_16_MPEG - {{LoadMode::kUnknown}, - {LoadMode::kUnknown}, + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_16_16_MPEG - {{LoadMode::kUnknown}, - {LoadMode::kUnknown}, + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_8_INTERLACED - {{LoadMode::kUnknown}, - {LoadMode::kUnknown}, + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_32_AS_8_INTERLACED - {{LoadMode::kUnknown}, - {LoadMode::kUnknown}, + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_32_AS_8_8_INTERLACED - {{LoadMode::kUnknown}, - {LoadMode::kUnknown}, + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_16_INTERLACED - {{LoadMode::kUnknown}, - {LoadMode::kUnknown}, + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_16_MPEG_INTERLACED - {{LoadMode::kUnknown}, - {LoadMode::kUnknown}, + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_16_16_MPEG_INTERLACED - {{LoadMode::kUnknown}, - {LoadMode::kUnknown}, + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_DXN // VK_FORMAT_BC5_UNORM_BLOCK is optional. - {{LoadMode::k128bpb, VK_FORMAT_BC5_UNORM_BLOCK, true}, - {LoadMode::kUnknown}, + {{kLoadShaderIndex128bpb, VK_FORMAT_BC5_UNORM_BLOCK, true}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_8_8_8_8_AS_16_16_16_16 - {{LoadMode::k32bpb, VK_FORMAT_R8G8B8A8_UNORM}, - {LoadMode::k32bpb, VK_FORMAT_R8G8B8A8_SNORM}, + {{kLoadShaderIndex32bpb, VK_FORMAT_R8G8B8A8_UNORM}, + {kLoadShaderIndex32bpb, VK_FORMAT_R8G8B8A8_SNORM}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA, true}, // k_DXT1_AS_16_16_16_16 // VK_FORMAT_BC1_RGBA_UNORM_BLOCK is optional. - {{LoadMode::k64bpb, VK_FORMAT_BC1_RGBA_UNORM_BLOCK, true}, - {LoadMode::kUnknown}, + {{kLoadShaderIndex64bpb, VK_FORMAT_BC1_RGBA_UNORM_BLOCK, true}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_DXT2_3_AS_16_16_16_16 // VK_FORMAT_BC2_UNORM_BLOCK is optional. - {{LoadMode::k128bpb, VK_FORMAT_BC2_UNORM_BLOCK, true}, - {LoadMode::kUnknown}, + {{kLoadShaderIndex128bpb, VK_FORMAT_BC2_UNORM_BLOCK, true}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_DXT4_5_AS_16_16_16_16 // VK_FORMAT_BC3_UNORM_BLOCK is optional. - {{LoadMode::k128bpb, VK_FORMAT_BC3_UNORM_BLOCK, true}, - {LoadMode::kUnknown}, + {{kLoadShaderIndex128bpb, VK_FORMAT_BC3_UNORM_BLOCK, true}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_2_10_10_10_AS_16_16_16_16 // VK_FORMAT_A2B10G10R10_SNORM_PACK32 is optional. - {{LoadMode::k32bpb, VK_FORMAT_A2B10G10R10_UNORM_PACK32}, - {LoadMode::k32bpb, VK_FORMAT_A2B10G10R10_SNORM_PACK32}, + {{kLoadShaderIndex32bpb, VK_FORMAT_A2B10G10R10_UNORM_PACK32}, + {kLoadShaderIndex32bpb, VK_FORMAT_A2B10G10R10_SNORM_PACK32}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA, true}, // k_10_11_11_AS_16_16_16_16 // TODO(Triang3l): 16_UNORM/SNORM are optional, convert to float16 // instead. - {{LoadMode::kR11G11B10ToRGBA16, VK_FORMAT_R16G16B16A16_UNORM}, - {LoadMode::kR11G11B10ToRGBA16SNorm, VK_FORMAT_R16G16B16A16_SNORM}, + {{kLoadShaderIndexR11G11B10ToRGBA16, VK_FORMAT_R16G16B16A16_UNORM}, + {kLoadShaderIndexR11G11B10ToRGBA16SNorm, VK_FORMAT_R16G16B16A16_SNORM}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_11_11_10_AS_16_16_16_16 // TODO(Triang3l): 16_UNORM/SNORM are optional, convert to float16 // instead. - {{LoadMode::kR10G11B11ToRGBA16, VK_FORMAT_R16G16B16A16_UNORM}, - {LoadMode::kR10G11B11ToRGBA16SNorm, VK_FORMAT_R16G16B16A16_SNORM}, + {{kLoadShaderIndexR10G11B11ToRGBA16, VK_FORMAT_R16G16B16A16_UNORM}, + {kLoadShaderIndexR10G11B11ToRGBA16SNorm, VK_FORMAT_R16G16B16A16_SNORM}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_32_32_32_FLOAT - {{LoadMode::kUnknown}, - {LoadMode::kUnknown}, + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB}, // k_DXT3A - {{LoadMode::kDXT3A, VK_FORMAT_R8_UNORM}, - {LoadMode::kUnknown}, + {{kLoadShaderIndexDXT3A, VK_FORMAT_R8_UNORM}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_DXT5A // VK_FORMAT_BC4_UNORM_BLOCK is optional. - {{LoadMode::k64bpb, VK_FORMAT_BC4_UNORM_BLOCK, true}, - {LoadMode::kUnknown}, + {{kLoadShaderIndex64bpb, VK_FORMAT_BC4_UNORM_BLOCK, true}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_CTX1 - {{LoadMode::kCTX1, VK_FORMAT_R8G8_UNORM}, - {LoadMode::kUnknown}, + {{kLoadShaderIndexCTX1, VK_FORMAT_R8G8_UNORM}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_DXT3A_AS_1_1_1_1 - {{LoadMode::kDXT3AAs1111ToARGB4, VK_FORMAT_B4G4R4A4_UNORM_PACK16}, - {LoadMode::kUnknown}, + {{kLoadShaderIndexDXT3AAs1111ToARGB4, VK_FORMAT_B4G4R4A4_UNORM_PACK16}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_8_8_8_8_GAMMA_EDRAM // Not usable as a texture. - {{LoadMode::kUnknown}, - {LoadMode::kUnknown}, + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, // k_2_10_10_10_FLOAT_EDRAM // Not usable as a texture. - {{LoadMode::kUnknown}, - {LoadMode::kUnknown}, + {{kLoadShaderIndexUnknown}, + {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, }; @@ -421,6 +427,19 @@ VulkanTextureCache::~VulkanTextureCache() { dfn.vkFreeMemory(device, null_images_memory, nullptr); } } + for (VkPipeline load_pipeline : load_pipelines_scaled_) { + if (load_pipeline != VK_NULL_HANDLE) { + dfn.vkDestroyPipeline(device, load_pipeline, nullptr); + } + } + for (VkPipeline load_pipeline : load_pipelines_) { + if (load_pipeline != VK_NULL_HANDLE) { + dfn.vkDestroyPipeline(device, load_pipeline, nullptr); + } + } + if (load_pipeline_layout_ != VK_NULL_HANDLE) { + dfn.vkDestroyPipelineLayout(device, load_pipeline_layout_, nullptr); + } } void VulkanTextureCache::BeginSubmission(uint64_t new_submission_index) { @@ -712,7 +731,454 @@ std::unique_ptr VulkanTextureCache::CreateTexture( bool VulkanTextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, bool load_base, bool load_mips) { - // TODO(Triang3l): Implement LoadTextureDataFromResidentMemoryImpl. + VulkanTexture& vulkan_texture = static_cast(texture); + TextureKey texture_key = vulkan_texture.key(); + + // Get the pipeline. + const HostFormatPair& host_format_pair = + host_formats_[uint32_t(texture_key.format)]; + bool host_format_is_signed; + if (IsSignedVersionSeparateForFormat(texture_key)) { + host_format_is_signed = bool(texture_key.signed_separate); + } else { + host_format_is_signed = + host_format_pair.format_unsigned.load_shader == kLoadShaderIndexUnknown; + } + const HostFormat& host_format = host_format_is_signed + ? host_format_pair.format_signed + : host_format_pair.format_unsigned; + LoadShaderIndex load_shader = host_format.load_shader; + if (load_shader == kLoadShaderIndexUnknown) { + return false; + } + VkPipeline pipeline = texture_key.scaled_resolve + ? load_pipelines_scaled_[load_shader] + : load_pipelines_[load_shader]; + if (pipeline == VK_NULL_HANDLE) { + return false; + } + const LoadShaderInfo& load_shader_info = GetLoadShaderInfo(load_shader); + + // Get the guest layout. + const texture_util::TextureGuestLayout& guest_layout = + vulkan_texture.guest_layout(); + xenos::DataDimension dimension = texture_key.dimension; + bool is_3d = dimension == xenos::DataDimension::k3D; + uint32_t width = texture_key.GetWidth(); + uint32_t height = texture_key.GetHeight(); + uint32_t depth_or_array_size = texture_key.GetDepthOrArraySize(); + uint32_t depth = is_3d ? depth_or_array_size : 1; + uint32_t array_size = is_3d ? 1 : depth_or_array_size; + xenos::TextureFormat guest_format = texture_key.format; + const FormatInfo* guest_format_info = FormatInfo::Get(guest_format); + uint32_t block_width = guest_format_info->block_width; + uint32_t block_height = guest_format_info->block_height; + uint32_t bytes_per_block = guest_format_info->bytes_per_block(); + uint32_t level_first = load_base ? 0 : 1; + uint32_t level_last = load_mips ? texture_key.mip_max_level : 0; + assert_true(level_first <= level_last); + uint32_t level_packed = guest_layout.packed_level; + uint32_t level_stored_first = std::min(level_first, level_packed); + uint32_t level_stored_last = std::min(level_last, level_packed); + uint32_t texture_resolution_scale_x = + texture_key.scaled_resolve ? draw_resolution_scale_x() : 1; + uint32_t texture_resolution_scale_y = + texture_key.scaled_resolve ? draw_resolution_scale_y() : 1; + + // The loop counter can mean two things depending on whether the packed mip + // tail is stored as mip 0, because in this case, it would be ambiguous since + // both the base and the mips would be on "level 0", but stored in separate + // places. + uint32_t loop_level_first, loop_level_last; + if (level_packed == 0) { + // Packed mip tail is the level 0 - may need to load mip tails for the base, + // the mips, or both. + // Loop iteration 0 - base packed mip tail. + // Loop iteration 1 - mips packed mip tail. + loop_level_first = uint32_t(level_first != 0); + loop_level_last = uint32_t(level_last != 0); + } else { + // Packed mip tail is not the level 0. + // Loop iteration is the actual level being loaded. + loop_level_first = level_stored_first; + loop_level_last = level_stored_last; + } + + // Get the host layout and the buffer. + uint32_t host_block_width = host_format.block_compressed ? block_width : 1; + uint32_t host_block_height = host_format.block_compressed ? block_height : 1; + uint32_t host_x_blocks_per_thread = + UINT32_C(1) << load_shader_info.guest_x_blocks_per_thread_log2; + if (!host_format.block_compressed) { + // Decompressing guest blocks. + host_x_blocks_per_thread *= block_width; + } + VkDeviceSize host_buffer_size = 0; + struct HostLayout { + VkDeviceSize offset_bytes; + VkDeviceSize slice_size_bytes; + uint32_t x_pitch_blocks; + uint32_t y_pitch_blocks; + }; + HostLayout host_layout_base; + // Indexing is the same as for guest stored mips: + // 1...min(level_last, level_packed) if level_packed is not 0, or only 0 if + // level_packed == 0. + HostLayout host_layout_mips[xenos::kTextureMaxMips]; + for (uint32_t loop_level = loop_level_first; loop_level <= loop_level_last; + ++loop_level) { + bool is_base = loop_level == 0; + uint32_t level = (level_packed == 0) ? 0 : loop_level; + HostLayout& level_host_layout = + is_base ? host_layout_base : host_layout_mips[level]; + level_host_layout.offset_bytes = host_buffer_size; + uint32_t level_guest_x_extent_texels_unscaled; + uint32_t level_guest_y_extent_texels_unscaled; + uint32_t level_guest_z_extent_texels; + if (level == level_packed) { + // Loading the packed tail for the base or the mips - load the whole tail + // to copy regions out of it. + const texture_util::TextureGuestLayout::Level& guest_layout_packed = + is_base ? guest_layout.base : guest_layout.mips[level]; + level_guest_x_extent_texels_unscaled = + guest_layout_packed.x_extent_blocks * block_width; + level_guest_y_extent_texels_unscaled = + guest_layout_packed.y_extent_blocks * block_height; + level_guest_z_extent_texels = guest_layout_packed.z_extent; + } else { + level_guest_x_extent_texels_unscaled = + std::max(width >> level, UINT32_C(1)); + level_guest_y_extent_texels_unscaled = + std::max(height >> level, UINT32_C(1)); + level_guest_z_extent_texels = std::max(depth >> level, UINT32_C(1)); + } + level_host_layout.x_pitch_blocks = xe::round_up( + (level_guest_x_extent_texels_unscaled * texture_resolution_scale_x + + (host_block_width - 1)) / + host_block_width, + host_x_blocks_per_thread); + level_host_layout.y_pitch_blocks = + (level_guest_y_extent_texels_unscaled * texture_resolution_scale_y + + (host_block_height - 1)) / + host_block_height; + level_host_layout.slice_size_bytes = + VkDeviceSize(load_shader_info.bytes_per_host_block) * + level_host_layout.x_pitch_blocks * level_host_layout.y_pitch_blocks * + level_guest_z_extent_texels; + host_buffer_size += level_host_layout.slice_size_bytes * array_size; + } + VulkanCommandProcessor::ScratchBufferAcquisition scratch_buffer_acquisition( + command_processor_.AcquireScratchGpuBuffer( + host_buffer_size, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT)); + VkBuffer scratch_buffer = scratch_buffer_acquisition.buffer(); + if (scratch_buffer == VK_NULL_HANDLE) { + return false; + } + + // Begin loading. + // TODO(Triang3l): Going from one descriptor to another on per-array-layer + // or even per-8-depth-slices level to stay within maxStorageBufferRange. + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + VulkanSharedMemory& vulkan_shared_memory = + static_cast(shared_memory()); + std::array write_descriptor_sets; + uint32_t write_descriptor_set_count = 0; + VkDescriptorSet descriptor_set_dest = + command_processor_.AllocateSingleTransientDescriptor( + VulkanCommandProcessor::SingleTransientDescriptorLayout :: + kStorageBufferCompute); + if (!descriptor_set_dest) { + return false; + } + VkDescriptorBufferInfo write_descriptor_set_dest_buffer_info; + { + write_descriptor_set_dest_buffer_info.buffer = scratch_buffer; + write_descriptor_set_dest_buffer_info.offset = 0; + write_descriptor_set_dest_buffer_info.range = host_buffer_size; + VkWriteDescriptorSet& write_descriptor_set_dest = + write_descriptor_sets[write_descriptor_set_count++]; + write_descriptor_set_dest.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write_descriptor_set_dest.pNext = nullptr; + write_descriptor_set_dest.dstSet = descriptor_set_dest; + write_descriptor_set_dest.dstBinding = 0; + write_descriptor_set_dest.dstArrayElement = 0; + write_descriptor_set_dest.descriptorCount = 1; + write_descriptor_set_dest.descriptorType = + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + write_descriptor_set_dest.pImageInfo = nullptr; + write_descriptor_set_dest.pBufferInfo = + &write_descriptor_set_dest_buffer_info; + write_descriptor_set_dest.pTexelBufferView = nullptr; + } + // TODO(Triang3l): Use a single 512 MB shared memory binding if possible. + // TODO(Triang3l): Scaled resolve buffer bindings. + VkDescriptorSet descriptor_set_source_base = VK_NULL_HANDLE; + VkDescriptorSet descriptor_set_source_mips = VK_NULL_HANDLE; + VkDescriptorBufferInfo write_descriptor_set_source_base_buffer_info; + VkDescriptorBufferInfo write_descriptor_set_source_mips_buffer_info; + if (level_first == 0) { + descriptor_set_source_base = + command_processor_.AllocateSingleTransientDescriptor( + VulkanCommandProcessor::SingleTransientDescriptorLayout :: + kStorageBufferCompute); + if (!descriptor_set_source_base) { + return false; + } + write_descriptor_set_source_base_buffer_info.buffer = + vulkan_shared_memory.buffer(); + write_descriptor_set_source_base_buffer_info.offset = texture_key.base_page + << 12; + write_descriptor_set_source_base_buffer_info.range = + vulkan_texture.GetGuestBaseSize(); + VkWriteDescriptorSet& write_descriptor_set_source_base = + write_descriptor_sets[write_descriptor_set_count++]; + write_descriptor_set_source_base.sType = + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write_descriptor_set_source_base.pNext = nullptr; + write_descriptor_set_source_base.dstSet = descriptor_set_source_base; + write_descriptor_set_source_base.dstBinding = 0; + write_descriptor_set_source_base.dstArrayElement = 0; + write_descriptor_set_source_base.descriptorCount = 1; + write_descriptor_set_source_base.descriptorType = + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + write_descriptor_set_source_base.pImageInfo = nullptr; + write_descriptor_set_source_base.pBufferInfo = + &write_descriptor_set_source_base_buffer_info; + write_descriptor_set_source_base.pTexelBufferView = nullptr; + } + if (level_last != 0) { + descriptor_set_source_mips = + command_processor_.AllocateSingleTransientDescriptor( + VulkanCommandProcessor::SingleTransientDescriptorLayout :: + kStorageBufferCompute); + if (!descriptor_set_source_mips) { + return false; + } + write_descriptor_set_source_mips_buffer_info.buffer = + vulkan_shared_memory.buffer(); + write_descriptor_set_source_mips_buffer_info.offset = texture_key.mip_page + << 12; + write_descriptor_set_source_mips_buffer_info.range = + vulkan_texture.GetGuestMipsSize(); + VkWriteDescriptorSet& write_descriptor_set_source_mips = + write_descriptor_sets[write_descriptor_set_count++]; + write_descriptor_set_source_mips.sType = + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write_descriptor_set_source_mips.pNext = nullptr; + write_descriptor_set_source_mips.dstSet = descriptor_set_source_mips; + write_descriptor_set_source_mips.dstBinding = 0; + write_descriptor_set_source_mips.dstArrayElement = 0; + write_descriptor_set_source_mips.descriptorCount = 1; + write_descriptor_set_source_mips.descriptorType = + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + write_descriptor_set_source_mips.pImageInfo = nullptr; + write_descriptor_set_source_mips.pBufferInfo = + &write_descriptor_set_source_mips_buffer_info; + write_descriptor_set_source_mips.pTexelBufferView = nullptr; + } + if (write_descriptor_set_count) { + dfn.vkUpdateDescriptorSets(device, write_descriptor_set_count, + write_descriptor_sets.data(), 0, nullptr); + } + vulkan_shared_memory.Use(VulkanSharedMemory::Usage::kRead); + + // Submit the copy buffer population commands. + + DeferredCommandBuffer& command_buffer = + command_processor_.deferred_command_buffer(); + + command_processor_.BindExternalComputePipeline(pipeline); + + command_buffer.CmdVkBindDescriptorSets( + VK_PIPELINE_BIND_POINT_COMPUTE, load_pipeline_layout_, + kLoadDescriptorSetIndexDestination, 1, &descriptor_set_dest, 0, nullptr); + + VkDescriptorSet descriptor_set_source_current = VK_NULL_HANDLE; + + LoadConstants load_constants; + load_constants.is_tiled_3d_endian_scale = + uint32_t(texture_key.tiled) | (uint32_t(is_3d) << 1) | + (uint32_t(texture_key.endianness) << 2) | + (texture_resolution_scale_x << 4) | (texture_resolution_scale_y << 6); + + uint32_t guest_x_blocks_per_group_log2 = + load_shader_info.GetGuestXBlocksPerGroupLog2(); + for (uint32_t loop_level = loop_level_first; loop_level <= loop_level_last; + ++loop_level) { + bool is_base = loop_level == 0; + uint32_t level = (level_packed == 0) ? 0 : loop_level; + + VkDescriptorSet descriptor_set_source = + is_base ? descriptor_set_source_base : descriptor_set_source_mips; + if (descriptor_set_source_current != descriptor_set_source) { + descriptor_set_source_current = descriptor_set_source; + command_buffer.CmdVkBindDescriptorSets( + VK_PIPELINE_BIND_POINT_COMPUTE, load_pipeline_layout_, + kLoadDescriptorSetIndexSource, 1, &descriptor_set_source, 0, nullptr); + } + + // TODO(Triang3l): guest_offset relative to the storage buffer origin. + load_constants.guest_offset = 0; + if (!is_base) { + load_constants.guest_offset += + guest_layout.mip_offsets_bytes[level] * + (texture_resolution_scale_x * texture_resolution_scale_y); + } + const texture_util::TextureGuestLayout::Level& level_guest_layout = + is_base ? guest_layout.base : guest_layout.mips[level]; + uint32_t level_guest_pitch = level_guest_layout.row_pitch_bytes; + if (texture_key.tiled) { + // Shaders expect pitch in blocks for tiled textures. + level_guest_pitch /= bytes_per_block; + assert_zero(level_guest_pitch & (xenos::kTextureTileWidthHeight - 1)); + } + load_constants.guest_pitch_aligned = level_guest_pitch; + load_constants.guest_z_stride_block_rows_aligned = + level_guest_layout.z_slice_stride_block_rows; + assert_true(dimension != xenos::DataDimension::k3D || + !(load_constants.guest_z_stride_block_rows_aligned & + (xenos::kTextureTileWidthHeight - 1))); + + uint32_t level_width, level_height, level_depth; + if (level == level_packed) { + // This is the packed mip tail, containing not only the specified level, + // but also other levels at different offsets - load the entire needed + // extents. + level_width = level_guest_layout.x_extent_blocks * block_width; + level_height = level_guest_layout.y_extent_blocks * block_height; + level_depth = level_guest_layout.z_extent; + } else { + level_width = std::max(width >> level, UINT32_C(1)); + level_height = std::max(height >> level, UINT32_C(1)); + level_depth = std::max(depth >> level, UINT32_C(1)); + } + load_constants.size_blocks[0] = (level_width + (block_width - 1)) / + block_width * texture_resolution_scale_x; + load_constants.size_blocks[1] = (level_height + (block_height - 1)) / + block_height * texture_resolution_scale_y; + load_constants.size_blocks[2] = level_depth; + load_constants.height_texels = level_height; + + uint32_t group_count_x = + (load_constants.size_blocks[0] + + ((UINT32_C(1) << guest_x_blocks_per_group_log2) - 1)) >> + guest_x_blocks_per_group_log2; + uint32_t group_count_y = + (load_constants.size_blocks[1] + + ((UINT32_C(1) << kLoadGuestYBlocksPerGroupLog2) - 1)) >> + kLoadGuestYBlocksPerGroupLog2; + + // TODO(Triang3l): host_offset relative to the storage buffer origin. + const HostLayout& level_host_layout = + is_base ? host_layout_base : host_layout_mips[level]; + load_constants.host_offset = uint32_t(level_host_layout.offset_bytes); + load_constants.host_pitch = load_shader_info.bytes_per_host_block * + level_host_layout.x_pitch_blocks; + + uint32_t level_array_slice_stride_bytes_scaled = + level_guest_layout.array_slice_stride_bytes * + (texture_resolution_scale_x * texture_resolution_scale_y); + for (uint32_t slice = 0; slice < array_size; ++slice) { + VkDescriptorSet descriptor_set_constants; + void* constants_mapping = + command_processor_.WriteTransientUniformBufferBinding( + sizeof(load_constants), + VulkanCommandProcessor::SingleTransientDescriptorLayout :: + kUniformBufferCompute, + descriptor_set_constants); + if (!constants_mapping) { + return false; + } + std::memcpy(constants_mapping, &load_constants, sizeof(load_constants)); + command_buffer.CmdVkBindDescriptorSets( + VK_PIPELINE_BIND_POINT_COMPUTE, load_pipeline_layout_, + kLoadDescriptorSetIndexConstants, 1, &descriptor_set_constants, 0, + nullptr); + command_processor_.SubmitBarriers(true); + command_buffer.CmdVkDispatch(group_count_x, group_count_y, + load_constants.size_blocks[2]); + load_constants.guest_offset += level_array_slice_stride_bytes_scaled; + load_constants.host_offset += + uint32_t(level_host_layout.slice_size_bytes); + } + } + + // Submit copying from the copy buffer to the host texture. + command_processor_.PushBufferMemoryBarrier( + scratch_buffer, 0, VK_WHOLE_SIZE, + scratch_buffer_acquisition.SetStageMask(VK_PIPELINE_STAGE_TRANSFER_BIT), + VK_PIPELINE_STAGE_TRANSFER_BIT, + scratch_buffer_acquisition.SetAccessMask(VK_ACCESS_TRANSFER_READ_BIT), + VK_ACCESS_TRANSFER_READ_BIT); + vulkan_texture.MarkAsUsed(); + VulkanTexture::Usage texture_old_usage = + vulkan_texture.SetUsage(VulkanTexture::Usage::kTransferDestination); + if (texture_old_usage != VulkanTexture::Usage::kTransferDestination) { + VkPipelineStageFlags texture_src_stage_mask, texture_dst_stage_mask; + VkAccessFlags texture_src_access_mask, texture_dst_access_mask; + VkImageLayout texture_old_layout, texture_new_layout; + GetTextureUsageMasks(texture_old_usage, texture_src_stage_mask, + texture_src_access_mask, texture_old_layout); + GetTextureUsageMasks(VulkanTexture::Usage::kTransferDestination, + texture_dst_stage_mask, texture_dst_access_mask, + texture_new_layout); + command_processor_.PushImageMemoryBarrier( + vulkan_texture.image(), ui::vulkan::util::InitializeSubresourceRange(), + texture_src_stage_mask, texture_dst_stage_mask, texture_src_access_mask, + texture_dst_access_mask, texture_old_layout, texture_new_layout); + } + command_processor_.SubmitBarriers(true); + VkBufferImageCopy* copy_regions = command_buffer.CmdCopyBufferToImageEmplace( + scratch_buffer, vulkan_texture.image(), + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, level_last - level_first + 1); + for (uint32_t level = level_first; level <= level_last; ++level) { + VkBufferImageCopy& copy_region = copy_regions[level - level_first]; + const HostLayout& level_host_layout = + level != 0 ? host_layout_mips[std::min(level, level_packed)] + : host_layout_base; + copy_region.bufferOffset = level_host_layout.offset_bytes; + if (level >= level_packed) { + uint32_t level_offset_blocks_x, level_offset_blocks_y, level_offset_z; + texture_util::GetPackedMipOffset(width, height, depth, guest_format, + level, level_offset_blocks_x, + level_offset_blocks_y, level_offset_z); + uint32_t level_offset_host_blocks_x = + texture_resolution_scale_x * level_offset_blocks_x; + uint32_t level_offset_host_blocks_y = + texture_resolution_scale_y * level_offset_blocks_y; + if (!host_format.block_compressed) { + level_offset_host_blocks_x *= block_width; + level_offset_host_blocks_y *= block_height; + } + copy_region.bufferOffset += + load_shader_info.bytes_per_host_block * + (level_offset_host_blocks_x + + level_host_layout.x_pitch_blocks * + (level_offset_host_blocks_y + level_host_layout.y_pitch_blocks * + VkDeviceSize(level_offset_z))); + } + copy_region.bufferRowLength = + level_host_layout.x_pitch_blocks * host_block_width; + copy_region.bufferImageHeight = + level_host_layout.y_pitch_blocks * host_block_height; + copy_region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + copy_region.imageSubresource.mipLevel = level; + copy_region.imageSubresource.baseArrayLayer = 0; + copy_region.imageSubresource.layerCount = array_size; + copy_region.imageOffset.x = 0; + copy_region.imageOffset.y = 0; + copy_region.imageOffset.z = 0; + copy_region.imageExtent.width = + std::max((width * texture_resolution_scale_x) >> level, UINT32_C(1)); + copy_region.imageExtent.height = + std::max((height * texture_resolution_scale_y) >> level, UINT32_C(1)); + copy_region.imageExtent.depth = std::max(depth >> level, UINT32_C(1)); + } + return true; } @@ -886,7 +1352,7 @@ bool VulkanTextureCache::Initialize() { // Image formats. // Initialize to the best formats. - for (size_t i = 0; i < 64; ++i) { + for (size_t i = 0; i < xe::countof(host_formats_); ++i) { host_formats_[i] = kBestHostFormats[i]; } @@ -928,7 +1394,7 @@ bool VulkanTextureCache::Initialize() { physical_device, VK_FORMAT_G8B8G8R8_422_UNORM_KHR, &format_properties); if ((format_properties.optimalTilingFeatures & kLinearFilterFeatures) != kLinearFilterFeatures) { - host_format_gbgr.format_unsigned.load_mode = LoadMode::kGBGR8ToRGB8; + host_format_gbgr.format_unsigned.load_shader = kLoadShaderIndexGBGR8ToRGB8; host_format_gbgr.format_unsigned.format = VK_FORMAT_R8G8B8A8_UNORM; host_format_gbgr.format_unsigned.block_compressed = false; host_format_gbgr.unsigned_signed_compatible = true; @@ -943,7 +1409,7 @@ bool VulkanTextureCache::Initialize() { physical_device, VK_FORMAT_B8G8R8G8_422_UNORM_KHR, &format_properties); if ((format_properties.optimalTilingFeatures & kLinearFilterFeatures) != kLinearFilterFeatures) { - host_format_bgrg.format_unsigned.load_mode = LoadMode::kBGRG8ToRGB8; + host_format_bgrg.format_unsigned.load_shader = kLoadShaderIndexBGRG8ToRGB8; host_format_bgrg.format_unsigned.format = VK_FORMAT_R8G8B8A8_UNORM; host_format_bgrg.format_unsigned.block_compressed = false; host_format_bgrg.unsigned_signed_compatible = true; @@ -969,7 +1435,7 @@ bool VulkanTextureCache::Initialize() { physical_device, VK_FORMAT_BC1_RGBA_UNORM_BLOCK, &format_properties); if ((format_properties.optimalTilingFeatures & kLinearFilterFeatures) != kLinearFilterFeatures) { - host_format_dxt1.format_unsigned.load_mode = LoadMode::kDXT1ToRGBA8; + host_format_dxt1.format_unsigned.load_shader = kLoadShaderIndexDXT1ToRGBA8; host_format_dxt1.format_unsigned.format = VK_FORMAT_R8G8B8A8_UNORM; host_format_dxt1.format_unsigned.block_compressed = false; host_formats_[uint32_t(xenos::TextureFormat::k_DXT1_AS_16_16_16_16)] = @@ -983,7 +1449,8 @@ bool VulkanTextureCache::Initialize() { physical_device, VK_FORMAT_BC2_UNORM_BLOCK, &format_properties); if ((format_properties.optimalTilingFeatures & kLinearFilterFeatures) != kLinearFilterFeatures) { - host_format_dxt2_3.format_unsigned.load_mode = LoadMode::kDXT3ToRGBA8; + host_format_dxt2_3.format_unsigned.load_shader = + kLoadShaderIndexDXT3ToRGBA8; host_format_dxt2_3.format_unsigned.format = VK_FORMAT_R8G8B8A8_UNORM; host_format_dxt2_3.format_unsigned.block_compressed = false; host_formats_[uint32_t(xenos::TextureFormat::k_DXT2_3_AS_16_16_16_16)] = @@ -997,7 +1464,8 @@ bool VulkanTextureCache::Initialize() { physical_device, VK_FORMAT_BC3_UNORM_BLOCK, &format_properties); if ((format_properties.optimalTilingFeatures & kLinearFilterFeatures) != kLinearFilterFeatures) { - host_format_dxt4_5.format_unsigned.load_mode = LoadMode::kDXT5ToRGBA8; + host_format_dxt4_5.format_unsigned.load_shader = + kLoadShaderIndexDXT5ToRGBA8; host_format_dxt4_5.format_unsigned.format = VK_FORMAT_R8G8B8A8_UNORM; host_format_dxt4_5.format_unsigned.block_compressed = false; host_formats_[uint32_t(xenos::TextureFormat::k_DXT4_5_AS_16_16_16_16)] = @@ -1011,7 +1479,7 @@ bool VulkanTextureCache::Initialize() { physical_device, VK_FORMAT_BC5_UNORM_BLOCK, &format_properties); if ((format_properties.optimalTilingFeatures & kLinearFilterFeatures) != kLinearFilterFeatures) { - host_format_dxn.format_unsigned.load_mode = LoadMode::kDXNToRG8; + host_format_dxn.format_unsigned.load_shader = kLoadShaderIndexDXNToRG8; host_format_dxn.format_unsigned.format = VK_FORMAT_R8G8_UNORM; host_format_dxn.format_unsigned.block_compressed = false; } @@ -1023,7 +1491,7 @@ bool VulkanTextureCache::Initialize() { physical_device, VK_FORMAT_BC4_UNORM_BLOCK, &format_properties); if ((format_properties.optimalTilingFeatures & kLinearFilterFeatures) != kLinearFilterFeatures) { - host_format_dxt5a.format_unsigned.load_mode = LoadMode::kDXT5AToR8; + host_format_dxt5a.format_unsigned.load_shader = kLoadShaderIndexDXT5AToR8; host_format_dxt5a.format_unsigned.format = VK_FORMAT_R8_UNORM; host_format_dxt5a.format_unsigned.block_compressed = false; } @@ -1043,13 +1511,14 @@ bool VulkanTextureCache::Initialize() { assert_true(host_format_16.format_unsigned.format == VK_FORMAT_R16_UNORM); if ((r16_unorm_properties.optimalTilingFeatures & norm16_required_features) != norm16_required_features) { - host_format_16.format_unsigned.load_mode = LoadMode::kR16UNormToFloat; + host_format_16.format_unsigned.load_shader = + kLoadShaderIndexR16UNormToFloat; host_format_16.format_unsigned.format = VK_FORMAT_R16_SFLOAT; } assert_true(host_format_16.format_signed.format == VK_FORMAT_R16_SNORM); if ((r16_snorm_properties.optimalTilingFeatures & norm16_required_features) != norm16_required_features) { - host_format_16.format_signed.load_mode = LoadMode::kR16SNormToFloat; + host_format_16.format_signed.load_shader = kLoadShaderIndexR16SNormToFloat; host_format_16.format_signed.format = VK_FORMAT_R16_SFLOAT; } host_format_16.unsigned_signed_compatible = @@ -1063,13 +1532,15 @@ bool VulkanTextureCache::Initialize() { VK_FORMAT_R16G16_UNORM); if ((r16g16_unorm_properties.optimalTilingFeatures & norm16_required_features) != norm16_required_features) { - host_format_16_16.format_unsigned.load_mode = LoadMode::kRG16UNormToFloat; + host_format_16_16.format_unsigned.load_shader = + kLoadShaderIndexRG16UNormToFloat; host_format_16_16.format_unsigned.format = VK_FORMAT_R16G16_SFLOAT; } assert_true(host_format_16_16.format_signed.format == VK_FORMAT_R16G16_SNORM); if ((r16g16_snorm_properties.optimalTilingFeatures & norm16_required_features) != norm16_required_features) { - host_format_16_16.format_signed.load_mode = LoadMode::kRG16SNormToFloat; + host_format_16_16.format_signed.load_shader = + kLoadShaderIndexRG16SNormToFloat; host_format_16_16.format_signed.format = VK_FORMAT_R16G16_SFLOAT; } host_format_16_16.unsigned_signed_compatible = @@ -1083,8 +1554,8 @@ bool VulkanTextureCache::Initialize() { VK_FORMAT_R16G16B16A16_UNORM); if ((r16g16b16a16_unorm_properties.optimalTilingFeatures & norm16_required_features) != norm16_required_features) { - host_format_16_16_16_16.format_unsigned.load_mode = - LoadMode::kRGBA16UNormToFloat; + host_format_16_16_16_16.format_unsigned.load_shader = + kLoadShaderIndexRGBA16UNormToFloat; host_format_16_16_16_16.format_unsigned.format = VK_FORMAT_R16G16B16A16_SFLOAT; } @@ -1092,8 +1563,8 @@ bool VulkanTextureCache::Initialize() { VK_FORMAT_R16G16B16A16_SNORM); if ((r16g16b16a16_snorm_properties.optimalTilingFeatures & norm16_required_features) != norm16_required_features) { - host_format_16_16_16_16.format_signed.load_mode = - LoadMode::kRGBA16SNormToFloat; + host_format_16_16_16_16.format_signed.load_shader = + kLoadShaderIndexRGBA16SNormToFloat; host_format_16_16_16_16.format_signed.format = VK_FORMAT_R16G16B16A16_SFLOAT; } @@ -1108,27 +1579,29 @@ bool VulkanTextureCache::Initialize() { VK_FORMAT_R16G16B16A16_SFLOAT); // Normalize format information structures. - for (size_t i = 0; i < 64; ++i) { + for (size_t i = 0; i < xe::countof(host_formats_); ++i) { HostFormatPair& host_format = host_formats_[i]; - // LoadMode is left uninitialized for the tail (non-existent formats), - // kUnknown may be non-zero, and format support may be disabled by setting - // the format to VK_FORMAT_UNDEFINED. + // load_shader_index is left uninitialized for the tail (non-existent + // formats), kLoadShaderIndexUnknown may be non-zero, and format support may + // be disabled by setting the format to VK_FORMAT_UNDEFINED. if (host_format.format_unsigned.format == VK_FORMAT_UNDEFINED) { - host_format.format_unsigned.load_mode = LoadMode::kUnknown; + host_format.format_unsigned.load_shader = kLoadShaderIndexUnknown; } - assert_false(host_format.format_unsigned.load_mode == LoadMode::kUnknown && + assert_false(host_format.format_unsigned.load_shader == + kLoadShaderIndexUnknown && host_format.format_unsigned.format != VK_FORMAT_UNDEFINED); - if (host_format.format_unsigned.load_mode == LoadMode::kUnknown) { + if (host_format.format_unsigned.load_shader == kLoadShaderIndexUnknown) { host_format.format_unsigned.format = VK_FORMAT_UNDEFINED; // Surely known it's unsupported with these two conditions. host_format.format_unsigned.linear_filterable = false; } if (host_format.format_signed.format == VK_FORMAT_UNDEFINED) { - host_format.format_signed.load_mode = LoadMode::kUnknown; + host_format.format_signed.load_shader = kLoadShaderIndexUnknown; } - assert_false(host_format.format_signed.load_mode == LoadMode::kUnknown && + assert_false(host_format.format_signed.load_shader == + kLoadShaderIndexUnknown && host_format.format_signed.format != VK_FORMAT_UNDEFINED); - if (host_format.format_signed.load_mode == LoadMode::kUnknown) { + if (host_format.format_signed.load_shader == kLoadShaderIndexUnknown) { host_format.format_signed.format = VK_FORMAT_UNDEFINED; // Surely known it's unsupported with these two conditions. host_format.format_signed.linear_filterable = false; @@ -1146,7 +1619,7 @@ bool VulkanTextureCache::Initialize() { VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT) != 0; } else { host_format.format_unsigned.format = VK_FORMAT_UNDEFINED; - host_format.format_unsigned.load_mode = LoadMode::kUnknown; + host_format.format_unsigned.load_shader = kLoadShaderIndexUnknown; host_format.format_unsigned.linear_filterable = false; } } @@ -1161,7 +1634,7 @@ bool VulkanTextureCache::Initialize() { VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT) != 0; } else { host_format.format_signed.format = VK_FORMAT_UNDEFINED; - host_format.format_signed.load_mode = LoadMode::kUnknown; + host_format.format_signed.load_shader = kLoadShaderIndexUnknown; host_format.format_signed.linear_filterable = false; } } @@ -1210,14 +1683,14 @@ bool VulkanTextureCache::Initialize() { } } - // Signednesses with different load modes must have the data loaded + // Signednesses with different load shaders must have the data loaded // differently, therefore can't share the image even if the format is the // same. Also, if there's only one version, simplify the logic - there can't // be compatibility between two formats when one of them is undefined. if (host_format.format_unsigned.format != VK_FORMAT_UNDEFINED && host_format.format_signed.format != VK_FORMAT_UNDEFINED) { - if (host_format.format_unsigned.load_mode == - host_format.format_signed.load_mode) { + if (host_format.format_unsigned.load_shader == + host_format.format_signed.load_shader) { if (host_format.format_unsigned.format == host_format.format_signed.format) { // Same format after all the fallbacks - force compatibilty. @@ -1242,6 +1715,251 @@ bool VulkanTextureCache::Initialize() { } } + // Load pipeline layout. + + VkDescriptorSetLayout load_descriptor_set_layouts[kLoadDescriptorSetCount] = + {}; + VkDescriptorSetLayout load_descriptor_set_layout_storage_buffer = + command_processor_.GetSingleTransientDescriptorLayout( + VulkanCommandProcessor::SingleTransientDescriptorLayout :: + kStorageBufferCompute); + assert_true(load_descriptor_set_layout_storage_buffer != VK_NULL_HANDLE); + load_descriptor_set_layouts[kLoadDescriptorSetIndexDestination] = + load_descriptor_set_layout_storage_buffer; + load_descriptor_set_layouts[kLoadDescriptorSetIndexSource] = + load_descriptor_set_layout_storage_buffer; + load_descriptor_set_layouts[kLoadDescriptorSetIndexConstants] = + command_processor_.GetSingleTransientDescriptorLayout( + VulkanCommandProcessor::SingleTransientDescriptorLayout :: + kUniformBufferCompute); + assert_true(load_descriptor_set_layouts[kLoadDescriptorSetIndexConstants] != + VK_NULL_HANDLE); + VkPipelineLayoutCreateInfo load_pipeline_layout_create_info; + load_pipeline_layout_create_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + load_pipeline_layout_create_info.pNext = nullptr; + load_pipeline_layout_create_info.flags = 0; + load_pipeline_layout_create_info.setLayoutCount = kLoadDescriptorSetCount; + load_pipeline_layout_create_info.pSetLayouts = load_descriptor_set_layouts; + load_pipeline_layout_create_info.pushConstantRangeCount = 0; + load_pipeline_layout_create_info.pPushConstantRanges = nullptr; + if (dfn.vkCreatePipelineLayout(device, &load_pipeline_layout_create_info, + nullptr, &load_pipeline_layout_)) { + XELOGE("VulkanTexture: Failed to create the texture load pipeline layout"); + return false; + } + + // Load pipelines, only the ones needed for the formats that will be used. + + bool load_shaders_needed[kLoadShaderCount] = {}; + for (size_t i = 0; i < xe::countof(host_formats_); ++i) { + const HostFormatPair& host_format = host_formats_[i]; + if (host_format.format_unsigned.load_shader != kLoadShaderIndexUnknown) { + load_shaders_needed[host_format.format_unsigned.load_shader] = true; + } + if (host_format.format_signed.load_shader != kLoadShaderIndexUnknown) { + load_shaders_needed[host_format.format_signed.load_shader] = true; + } + } + + std::pair load_shader_code[kLoadShaderCount] = {}; + load_shader_code[kLoadShaderIndex8bpb] = std::make_pair( + shaders::texture_load_8bpb_cs, sizeof(shaders::texture_load_8bpb_cs)); + load_shader_code[kLoadShaderIndex16bpb] = std::make_pair( + shaders::texture_load_16bpb_cs, sizeof(shaders::texture_load_16bpb_cs)); + load_shader_code[kLoadShaderIndex32bpb] = std::make_pair( + shaders::texture_load_32bpb_cs, sizeof(shaders::texture_load_32bpb_cs)); + load_shader_code[kLoadShaderIndex64bpb] = std::make_pair( + shaders::texture_load_64bpb_cs, sizeof(shaders::texture_load_64bpb_cs)); + load_shader_code[kLoadShaderIndex128bpb] = std::make_pair( + shaders::texture_load_128bpb_cs, sizeof(shaders::texture_load_128bpb_cs)); + load_shader_code[kLoadShaderIndexR5G5B5A1ToB5G5R5A1] = + std::make_pair(shaders::texture_load_r5g5b5a1_b5g5r5a1_cs, + sizeof(shaders::texture_load_r5g5b5a1_b5g5r5a1_cs)); + load_shader_code[kLoadShaderIndexR5G6B5ToB5G6R5] = + std::make_pair(shaders::texture_load_r5g6b5_b5g6r5_cs, + sizeof(shaders::texture_load_r5g6b5_b5g6r5_cs)); + load_shader_code[kLoadShaderIndexR5G5B6ToB5G6R5WithRBGASwizzle] = + std::make_pair( + shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs, + sizeof(shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_cs)); + load_shader_code[kLoadShaderIndexRGBA4ToARGB4] = + std::make_pair(shaders::texture_load_r4g4b4a4_a4r4g4b4_cs, + sizeof(shaders::texture_load_r4g4b4a4_a4r4g4b4_cs)); + load_shader_code[kLoadShaderIndexGBGR8ToRGB8] = + std::make_pair(shaders::texture_load_gbgr8_rgb8_cs, + sizeof(shaders::texture_load_gbgr8_rgb8_cs)); + load_shader_code[kLoadShaderIndexBGRG8ToRGB8] = + std::make_pair(shaders::texture_load_bgrg8_rgb8_cs, + sizeof(shaders::texture_load_bgrg8_rgb8_cs)); + load_shader_code[kLoadShaderIndexR10G11B11ToRGBA16] = + std::make_pair(shaders::texture_load_r10g11b11_rgba16_cs, + sizeof(shaders::texture_load_r10g11b11_rgba16_cs)); + load_shader_code[kLoadShaderIndexR10G11B11ToRGBA16SNorm] = + std::make_pair(shaders::texture_load_r10g11b11_rgba16_snorm_cs, + sizeof(shaders::texture_load_r10g11b11_rgba16_snorm_cs)); + load_shader_code[kLoadShaderIndexR11G11B10ToRGBA16] = + std::make_pair(shaders::texture_load_r11g11b10_rgba16_cs, + sizeof(shaders::texture_load_r11g11b10_rgba16_cs)); + load_shader_code[kLoadShaderIndexR11G11B10ToRGBA16SNorm] = + std::make_pair(shaders::texture_load_r11g11b10_rgba16_snorm_cs, + sizeof(shaders::texture_load_r11g11b10_rgba16_snorm_cs)); + load_shader_code[kLoadShaderIndexR16UNormToFloat] = + std::make_pair(shaders::texture_load_r16_unorm_float_cs, + sizeof(shaders::texture_load_r16_unorm_float_cs)); + load_shader_code[kLoadShaderIndexR16SNormToFloat] = + std::make_pair(shaders::texture_load_r16_snorm_float_cs, + sizeof(shaders::texture_load_r16_snorm_float_cs)); + load_shader_code[kLoadShaderIndexRG16UNormToFloat] = + std::make_pair(shaders::texture_load_rg16_unorm_float_cs, + sizeof(shaders::texture_load_rg16_unorm_float_cs)); + load_shader_code[kLoadShaderIndexRG16SNormToFloat] = + std::make_pair(shaders::texture_load_rg16_snorm_float_cs, + sizeof(shaders::texture_load_rg16_snorm_float_cs)); + load_shader_code[kLoadShaderIndexRGBA16UNormToFloat] = + std::make_pair(shaders::texture_load_rgba16_unorm_float_cs, + sizeof(shaders::texture_load_rgba16_unorm_float_cs)); + load_shader_code[kLoadShaderIndexRGBA16SNormToFloat] = + std::make_pair(shaders::texture_load_rgba16_snorm_float_cs, + sizeof(shaders::texture_load_rgba16_snorm_float_cs)); + load_shader_code[kLoadShaderIndexDXT1ToRGBA8] = + std::make_pair(shaders::texture_load_dxt1_rgba8_cs, + sizeof(shaders::texture_load_dxt1_rgba8_cs)); + load_shader_code[kLoadShaderIndexDXT3ToRGBA8] = + std::make_pair(shaders::texture_load_dxt3_rgba8_cs, + sizeof(shaders::texture_load_dxt3_rgba8_cs)); + load_shader_code[kLoadShaderIndexDXT5ToRGBA8] = + std::make_pair(shaders::texture_load_dxt5_rgba8_cs, + sizeof(shaders::texture_load_dxt5_rgba8_cs)); + load_shader_code[kLoadShaderIndexDXNToRG8] = + std::make_pair(shaders::texture_load_dxn_rg8_cs, + sizeof(shaders::texture_load_dxn_rg8_cs)); + load_shader_code[kLoadShaderIndexDXT3A] = std::make_pair( + shaders::texture_load_dxt3a_cs, sizeof(shaders::texture_load_dxt3a_cs)); + load_shader_code[kLoadShaderIndexDXT3AAs1111ToARGB4] = + std::make_pair(shaders::texture_load_dxt3aas1111_argb4_cs, + sizeof(shaders::texture_load_dxt3aas1111_argb4_cs)); + load_shader_code[kLoadShaderIndexDXT5AToR8] = + std::make_pair(shaders::texture_load_dxt5a_r8_cs, + sizeof(shaders::texture_load_dxt5a_r8_cs)); + load_shader_code[kLoadShaderIndexCTX1] = std::make_pair( + shaders::texture_load_ctx1_cs, sizeof(shaders::texture_load_ctx1_cs)); + load_shader_code[kLoadShaderIndexDepthUnorm] = + std::make_pair(shaders::texture_load_depth_unorm_cs, + sizeof(shaders::texture_load_depth_unorm_cs)); + load_shader_code[kLoadShaderIndexDepthFloat] = + std::make_pair(shaders::texture_load_depth_float_cs, + sizeof(shaders::texture_load_depth_float_cs)); + std::pair load_shader_code_scaled[kLoadShaderCount] = + {}; + if (IsDrawResolutionScaled()) { + load_shader_code_scaled[kLoadShaderIndex8bpb] = + std::make_pair(shaders::texture_load_8bpb_scaled_cs, + sizeof(shaders::texture_load_8bpb_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndex16bpb] = + std::make_pair(shaders::texture_load_16bpb_scaled_cs, + sizeof(shaders::texture_load_16bpb_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndex32bpb] = + std::make_pair(shaders::texture_load_32bpb_scaled_cs, + sizeof(shaders::texture_load_32bpb_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndex64bpb] = + std::make_pair(shaders::texture_load_64bpb_scaled_cs, + sizeof(shaders::texture_load_64bpb_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndex128bpb] = + std::make_pair(shaders::texture_load_128bpb_scaled_cs, + sizeof(shaders::texture_load_128bpb_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexR5G5B5A1ToB5G5R5A1] = + std::make_pair( + shaders::texture_load_r5g5b5a1_b5g5r5a1_scaled_cs, + sizeof(shaders::texture_load_r5g5b5a1_b5g5r5a1_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexR5G6B5ToB5G6R5] = + std::make_pair(shaders::texture_load_r5g6b5_b5g6r5_scaled_cs, + sizeof(shaders::texture_load_r5g6b5_b5g6r5_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexR5G5B6ToB5G6R5WithRBGASwizzle] = + std::make_pair( + shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_scaled_cs, + sizeof(shaders::texture_load_r5g5b6_b5g6r5_swizzle_rbga_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexRGBA4ToARGB4] = std::make_pair( + shaders::texture_load_r4g4b4a4_a4r4g4b4_scaled_cs, + sizeof(shaders::texture_load_r4g4b4a4_a4r4g4b4_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexR10G11B11ToRGBA16] = std::make_pair( + shaders::texture_load_r10g11b11_rgba16_scaled_cs, + sizeof(shaders::texture_load_r10g11b11_rgba16_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexR10G11B11ToRGBA16SNorm] = + std::make_pair( + shaders::texture_load_r10g11b11_rgba16_snorm_scaled_cs, + sizeof(shaders::texture_load_r10g11b11_rgba16_snorm_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexR11G11B10ToRGBA16] = std::make_pair( + shaders::texture_load_r11g11b10_rgba16_scaled_cs, + sizeof(shaders::texture_load_r11g11b10_rgba16_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexR11G11B10ToRGBA16SNorm] = + std::make_pair( + shaders::texture_load_r11g11b10_rgba16_snorm_scaled_cs, + sizeof(shaders::texture_load_r11g11b10_rgba16_snorm_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexR16UNormToFloat] = + std::make_pair(shaders::texture_load_r16_unorm_float_scaled_cs, + sizeof(shaders::texture_load_r16_unorm_float_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexR16SNormToFloat] = + std::make_pair(shaders::texture_load_r16_snorm_float_scaled_cs, + sizeof(shaders::texture_load_r16_snorm_float_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexRG16UNormToFloat] = std::make_pair( + shaders::texture_load_rg16_unorm_float_scaled_cs, + sizeof(shaders::texture_load_rg16_unorm_float_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexRG16SNormToFloat] = std::make_pair( + shaders::texture_load_rg16_snorm_float_scaled_cs, + sizeof(shaders::texture_load_rg16_snorm_float_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexRGBA16UNormToFloat] = + std::make_pair( + shaders::texture_load_rgba16_unorm_float_scaled_cs, + sizeof(shaders::texture_load_rgba16_unorm_float_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexRGBA16SNormToFloat] = + std::make_pair( + shaders::texture_load_rgba16_snorm_float_scaled_cs, + sizeof(shaders::texture_load_rgba16_snorm_float_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexDepthUnorm] = + std::make_pair(shaders::texture_load_depth_unorm_scaled_cs, + sizeof(shaders::texture_load_depth_unorm_scaled_cs)); + load_shader_code_scaled[kLoadShaderIndexDepthFloat] = + std::make_pair(shaders::texture_load_depth_float_scaled_cs, + sizeof(shaders::texture_load_depth_float_scaled_cs)); + } + + for (size_t i = 0; i < kLoadShaderCount; ++i) { + if (!load_shaders_needed[i]) { + continue; + } + const std::pair& current_load_shader_code = + load_shader_code[i]; + assert_not_null(current_load_shader_code.first); + load_pipelines_[i] = ui::vulkan::util::CreateComputePipeline( + provider, load_pipeline_layout_, current_load_shader_code.first, + current_load_shader_code.second); + if (load_pipelines_[i] == VK_NULL_HANDLE) { + XELOGE( + "VulkanTextureCache: Failed to create the texture loading pipeline " + "for shader {}", + i); + return false; + } + if (IsDrawResolutionScaled()) { + const std::pair& + current_load_shader_code_scaled = load_shader_code_scaled[i]; + if (current_load_shader_code_scaled.first) { + load_pipelines_scaled_[i] = ui::vulkan::util::CreateComputePipeline( + provider, load_pipeline_layout_, + current_load_shader_code_scaled.first, + current_load_shader_code_scaled.second); + if (load_pipelines_scaled_[i] == VK_NULL_HANDLE) { + XELOGE( + "VulkanTextureCache: Failed to create the resolution-scaled " + "texture loading pipeline for shader {}", + i); + return false; + } + } + } + } + // Null images as a replacement for unneeded bindings and for bindings for // which the real image hasn't been created. // TODO(Triang3l): Use VK_EXT_robustness2 null descriptors. diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.h b/src/xenia/gpu/vulkan/vulkan_texture_cache.h index d7499f9a1..3ea67bbe5 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.h @@ -28,6 +28,8 @@ class VulkanCommandProcessor; class VulkanTextureCache final : public TextureCache { public: + // Transient descriptor set layouts must be initialized in the command + // processor. static std::unique_ptr Create( const RegisterFile& register_file, VulkanSharedMemory& shared_memory, uint32_t draw_resolution_scale_x, uint32_t draw_resolution_scale_y, @@ -75,46 +77,15 @@ class VulkanTextureCache final : public TextureCache { void UpdateTextureBindingsImpl(uint32_t fetch_constant_mask) override; private: - enum class LoadMode { - k8bpb, - k16bpb, - k32bpb, - k64bpb, - k128bpb, - kR5G5B5A1ToB5G5R5A1, - kR5G6B5ToB5G6R5, - kR5G5B6ToB5G6R5WithRBGASwizzle, - kRGBA4ToARGB4, - kGBGR8ToRGB8, - kBGRG8ToRGB8, - kR10G11B11ToRGBA16, - kR10G11B11ToRGBA16SNorm, - kR11G11B10ToRGBA16, - kR11G11B10ToRGBA16SNorm, - kR16UNormToFloat, - kR16SNormToFloat, - kRG16UNormToFloat, - kRG16SNormToFloat, - kRGBA16UNormToFloat, - kRGBA16SNormToFloat, - kDXT1ToRGBA8, - kDXT3ToRGBA8, - kDXT5ToRGBA8, - kDXNToRG8, - kDXT3A, - kDXT3AAs1111ToARGB4, - kDXT5AToR8, - kCTX1, - kDepthUnorm, - kDepthFloat, - - kCount, - - kUnknown = kCount + enum LoadDescriptorSetIndex { + kLoadDescriptorSetIndexDestination, + kLoadDescriptorSetIndexSource, + kLoadDescriptorSetIndexConstants, + kLoadDescriptorSetCount, }; struct HostFormat { - LoadMode load_mode; + LoadShaderIndex load_shader; // Do NOT add integer formats to this - they are not filterable, can only be // read with ImageFetch, not ImageSample! If any game is seen using // num_format 1 for fixed-point formats (for floating-point, it's normally @@ -275,6 +246,10 @@ class VulkanTextureCache final : public TextureCache { static const HostFormatPair kBestHostFormats[64]; HostFormatPair host_formats_[64]; + VkPipelineLayout load_pipeline_layout_ = VK_NULL_HANDLE; + std::array load_pipelines_{}; + std::array load_pipelines_scaled_{}; + // If both images can be placed in the same allocation, it's one allocation, // otherwise it's two separate. std::array null_images_memory_{}; From a8cfe9bebba96cb1987ae932697fce9dbad8ea7d Mon Sep 17 00:00:00 2001 From: Triang3l Date: Thu, 2 Jun 2022 23:10:50 +0300 Subject: [PATCH 092/123] [Vulkan] Unsubsample odd-sized 4:2:2 textures --- src/xenia/gpu/vulkan/vulkan_texture_cache.cc | 62 ++++++++++++++++++-- src/xenia/gpu/vulkan/vulkan_texture_cache.h | 4 ++ 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc index b7bd5fd9b..1806d7df3 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc @@ -401,6 +401,24 @@ const VulkanTextureCache::HostFormatPair xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA}, }; +// Vulkan requires 2x1 (4:2:2) subsampled images to have an even width. +// Always decompressing them to RGBA8, which is required to be linear-filterable +// as UNORM and SNORM. + +const VulkanTextureCache::HostFormatPair + VulkanTextureCache::kHostFormatGBGRUnaligned = { + {kLoadShaderIndexGBGR8ToRGB8, VK_FORMAT_R8G8B8A8_UNORM, false, true}, + {kLoadShaderIndexGBGR8ToRGB8, VK_FORMAT_R8G8B8A8_SNORM, false, true}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB, + true}; + +const VulkanTextureCache::HostFormatPair + VulkanTextureCache::kHostFormatBGRGUnaligned = { + {kLoadShaderIndexBGRG8ToRGB8, VK_FORMAT_R8G8B8A8_UNORM, false, true}, + {kLoadShaderIndexBGRG8ToRGB8, VK_FORMAT_R8G8B8A8_SNORM, false, true}, + xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB, + true}; + VulkanTextureCache::~VulkanTextureCache() { const ui::vulkan::VulkanProvider& provider = command_processor_.GetVulkanProvider(); @@ -573,7 +591,7 @@ VkImageView VulkanTextureCache::GetActiveBindingOrNullImageView( bool VulkanTextureCache::IsSignedVersionSeparateForFormat( TextureKey key) const { - const HostFormatPair& host_format_pair = host_formats_[uint32_t(key.format)]; + const HostFormatPair& host_format_pair = GetHostFormatPair(key); if (host_format_pair.format_unsigned.format == VK_FORMAT_UNDEFINED || host_format_pair.format_signed.format == VK_FORMAT_UNDEFINED) { // Just one signedness. @@ -583,7 +601,7 @@ bool VulkanTextureCache::IsSignedVersionSeparateForFormat( } uint32_t VulkanTextureCache::GetHostFormatSwizzle(TextureKey key) const { - return host_formats_[uint32_t(key.format)].swizzle; + return GetHostFormatPair(key).swizzle; } uint32_t VulkanTextureCache::GetMaxHostTextureWidthHeight( @@ -633,7 +651,7 @@ uint32_t VulkanTextureCache::GetMaxHostTextureDepthOrArraySize( std::unique_ptr VulkanTextureCache::CreateTexture( TextureKey key) { VkFormat formats[] = {VK_FORMAT_UNDEFINED, VK_FORMAT_UNDEFINED}; - const HostFormatPair& host_format = host_formats_[uint32_t(key.format)]; + const HostFormatPair& host_format = GetHostFormatPair(key); if (host_format.format_signed.format == VK_FORMAT_UNDEFINED) { // Only the unsigned format may be available, if at all. formats[0] = host_format.format_unsigned.format; @@ -735,8 +753,7 @@ bool VulkanTextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, TextureKey texture_key = vulkan_texture.key(); // Get the pipeline. - const HostFormatPair& host_format_pair = - host_formats_[uint32_t(texture_key.format)]; + const HostFormatPair& host_format_pair = GetHostFormatPair(texture_key); bool host_format_is_signed; if (IsSignedVersionSeparateForFormat(texture_key)) { host_format_is_signed = bool(texture_key.signed_separate); @@ -1253,7 +1270,7 @@ VkImageView VulkanTextureCache::VulkanTexture::GetView(bool is_signed, ViewKey view_key; const HostFormatPair& host_format_pair = - vulkan_texture_cache.host_formats_[uint32_t(key().format)]; + vulkan_texture_cache.GetHostFormatPair(key()); VkFormat format = (is_signed ? host_format_pair.format_signed : host_format_pair.format_unsigned) .format; @@ -1761,6 +1778,26 @@ bool VulkanTextureCache::Initialize() { load_shaders_needed[host_format.format_signed.load_shader] = true; } } + if (kHostFormatGBGRUnaligned.format_unsigned.load_shader != + kLoadShaderIndexUnknown) { + load_shaders_needed[kHostFormatGBGRUnaligned.format_unsigned.load_shader] = + true; + } + if (kHostFormatGBGRUnaligned.format_signed.load_shader != + kLoadShaderIndexUnknown) { + load_shaders_needed[kHostFormatGBGRUnaligned.format_signed.load_shader] = + true; + } + if (kHostFormatBGRGUnaligned.format_unsigned.load_shader != + kLoadShaderIndexUnknown) { + load_shaders_needed[kHostFormatBGRGUnaligned.format_unsigned.load_shader] = + true; + } + if (kHostFormatBGRGUnaligned.format_signed.load_shader != + kLoadShaderIndexUnknown) { + load_shaders_needed[kHostFormatBGRGUnaligned.format_signed.load_shader] = + true; + } std::pair load_shader_code[kLoadShaderCount] = {}; load_shader_code[kLoadShaderIndex8bpb] = std::make_pair( @@ -2177,6 +2214,19 @@ bool VulkanTextureCache::Initialize() { return true; } +const VulkanTextureCache::HostFormatPair& VulkanTextureCache::GetHostFormatPair( + TextureKey key) const { + if (key.format == xenos::TextureFormat::k_Cr_Y1_Cb_Y0_REP && + (key.GetWidth() & 1)) { + return kHostFormatGBGRUnaligned; + } + if (key.format == xenos::TextureFormat::k_Y1_Cr_Y0_Cb_REP && + (key.GetWidth() & 1)) { + return kHostFormatBGRGUnaligned; + } + return host_formats_[uint32_t(key.format)]; +} + void VulkanTextureCache::GetTextureUsageMasks(VulkanTexture::Usage usage, VkPipelineStageFlags& stage_mask, VkAccessFlags& access_mask, diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.h b/src/xenia/gpu/vulkan/vulkan_texture_cache.h index 3ea67bbe5..6b6c4814a 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.h @@ -236,6 +236,8 @@ class VulkanTextureCache final : public TextureCache { bool Initialize(); + const HostFormatPair& GetHostFormatPair(TextureKey key) const; + void GetTextureUsageMasks(VulkanTexture::Usage usage, VkPipelineStageFlags& stage_mask, VkAccessFlags& access_mask, VkImageLayout& layout); @@ -244,6 +246,8 @@ class VulkanTextureCache final : public TextureCache { VkPipelineStageFlags guest_shader_pipeline_stages_; static const HostFormatPair kBestHostFormats[64]; + static const HostFormatPair kHostFormatGBGRUnaligned; + static const HostFormatPair kHostFormatBGRGUnaligned; HostFormatPair host_formats_[64]; VkPipelineLayout load_pipeline_layout_ = VK_NULL_HANDLE; From 1a22216e44bee7dbea2134c5482f981e51879ffa Mon Sep 17 00:00:00 2001 From: Triang3l Date: Thu, 9 Jun 2022 21:42:16 +0300 Subject: [PATCH 093/123] [SPIR-V] Texture fetch instructions --- src/xenia/gpu/spirv_shader_translator.cc | 237 ++ src/xenia/gpu/spirv_shader_translator.h | 42 +- .../gpu/spirv_shader_translator_fetch.cc | 2045 ++++++++++++++++- .../gpu/vulkan/vulkan_command_processor.cc | 54 +- .../gpu/vulkan/vulkan_command_processor.h | 3 +- 5 files changed, 2363 insertions(+), 18 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index c05e4043c..b59b637b6 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -30,6 +30,7 @@ SpirvShaderTranslator::Features::Features(bool all) max_storage_buffer_range(all ? UINT32_MAX : (128 * 1024 * 1024)), clip_distance(all), cull_distance(all), + image_view_format_swizzle(all), signed_zero_inf_nan_preserve_float32(all), denorm_flush_to_zero_float32(all) {} @@ -51,6 +52,15 @@ SpirvShaderTranslator::Features::Features( } else { spirv_version = spv::Spv_1_0; } + const VkPhysicalDevicePortabilitySubsetFeaturesKHR* + device_portability_subset_features = + provider.device_portability_subset_features(); + if (device_portability_subset_features) { + image_view_format_swizzle = + bool(device_portability_subset_features->imageViewFormatSwizzle); + } else { + image_view_format_swizzle = true; + } if (spirv_version >= spv::Spv_1_4 || device_extensions.khr_shader_float_controls) { const VkPhysicalDeviceFloatControlsPropertiesKHR& @@ -198,6 +208,14 @@ void SpirvShaderTranslator::StartTranslation() { size_t offset; spv::Id type; }; + spv::Id type_uint4_array_2 = builder_->makeArrayType( + type_uint4_, builder_->makeUintConstant(2), sizeof(uint32_t) * 4); + builder_->addDecoration(type_uint4_array_2, spv::DecorationArrayStride, + sizeof(uint32_t) * 4); + spv::Id type_uint4_array_4 = builder_->makeArrayType( + type_uint4_, builder_->makeUintConstant(4), sizeof(uint32_t) * 4); + builder_->addDecoration(type_uint4_array_4, spv::DecorationArrayStride, + sizeof(uint32_t) * 4); const SystemConstant system_constants[] = { {"flags", offsetof(SystemConstants, flags), type_uint_}, {"vertex_index_endian", offsetof(SystemConstants, vertex_index_endian), @@ -206,6 +224,10 @@ void SpirvShaderTranslator::StartTranslation() { type_int_}, {"ndc_scale", offsetof(SystemConstants, ndc_scale), type_float3_}, {"ndc_offset", offsetof(SystemConstants, ndc_offset), type_float3_}, + {"texture_swizzled_signs", + offsetof(SystemConstants, texture_swizzled_signs), type_uint4_array_2}, + {"texture_swizzles", offsetof(SystemConstants, texture_swizzles), + type_uint4_array_4}, }; id_vector_temp_.clear(); id_vector_temp_.reserve(xe::countof(system_constants)); @@ -401,6 +423,15 @@ void SpirvShaderTranslator::StartTranslation() { var_main_vfetch_address_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassFunction, type_int_, "xe_var_vfetch_address", const_int_0_); + var_main_tfetch_lod_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassFunction, type_float_, + "xe_var_tfetch_lod", const_float_0_); + var_main_tfetch_gradients_h_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassFunction, type_float3_, + "xe_var_tfetch_gradients_h", const_float3_0_); + var_main_tfetch_gradients_v_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassFunction, type_float3_, + "xe_var_tfetch_gradients_v", const_float3_0_); uint32_t register_array_size = register_count(); if (register_array_size) { id_vector_temp_.clear(); @@ -945,6 +976,29 @@ void SpirvShaderTranslator::ProcessJumpInstruction( builder_->createBranch(main_loop_continue_); } +spv::Id SpirvShaderTranslator::SpirvSmearScalarResultOrConstant( + spv::Id scalar, spv::Id vector_type) { + bool is_constant = builder_->isConstant(scalar); + bool is_spec_constant = builder_->isSpecConstant(scalar); + if (!is_constant && !is_spec_constant) { + return builder_->smearScalar(spv::NoPrecision, scalar, vector_type); + } + assert_true(builder_->getTypeClass(builder_->getTypeId(scalar)) == + builder_->getTypeClass(builder_->getScalarTypeId(vector_type))); + if (!builder_->isVectorType(vector_type)) { + assert_true(builder_->isScalarType(vector_type)); + return scalar; + } + int num_components = builder_->getNumTypeComponents(vector_type); + id_vector_temp_util_.clear(); + id_vector_temp_util_.reserve(size_t(num_components)); + for (int i = 0; i < num_components; ++i) { + id_vector_temp_util_.push_back(scalar); + } + return builder_->makeCompositeConstant(vector_type, id_vector_temp_util_, + is_spec_constant); +} + void SpirvShaderTranslator::EnsureBuildPointAvailable() { if (!builder_->getBuildPoint()->isTerminated()) { return; @@ -2049,5 +2103,188 @@ spv::Id SpirvShaderTranslator::LoadUint32FromSharedMemory( return value_phi_result; } +spv::Id SpirvShaderTranslator::PWLGammaToLinear(spv::Id gamma, + bool gamma_pre_saturated) { + spv::Id value_type = builder_->getTypeId(gamma); + assert_true(builder_->isFloatType(builder_->getScalarTypeId(value_type))); + bool is_vector = builder_->isVectorType(value_type); + assert_true(is_vector || builder_->isFloatType(value_type)); + int num_components = builder_->getNumTypeComponents(value_type); + assert_true(num_components < 4); + spv::Id bool_type = type_bool_vectors_[num_components - 1]; + + spv::Id const_vector_0 = const_float_vectors_0_[num_components - 1]; + spv::Id const_vector_1 = SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(1.0f), value_type); + + if (!gamma_pre_saturated) { + // Saturate, flushing NaN to 0. + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + id_vector_temp_.push_back(gamma); + id_vector_temp_.push_back(const_vector_0); + id_vector_temp_.push_back(const_vector_1); + gamma = builder_->createBuiltinCall(value_type, ext_inst_glsl_std_450_, + GLSLstd450NClamp, id_vector_temp_); + } + + spv::Id is_piece_at_least_3 = builder_->createBinOp( + spv::OpFOrdGreaterThanEqual, bool_type, gamma, + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(192.0f / 255.0f), value_type)); + spv::Id scale_3_or_2 = builder_->createTriOp( + spv::OpSelect, value_type, is_piece_at_least_3, + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(8.0f / 1024.0f), value_type), + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(4.0f / 1024.0f), value_type)); + spv::Id offset_3_or_2 = builder_->createTriOp( + spv::OpSelect, value_type, is_piece_at_least_3, + SpirvSmearScalarResultOrConstant(builder_->makeFloatConstant(-1024.0f), + value_type), + SpirvSmearScalarResultOrConstant(builder_->makeFloatConstant(-256.0f), + value_type)); + + spv::Id is_piece_at_least_1 = builder_->createBinOp( + spv::OpFOrdGreaterThanEqual, bool_type, gamma, + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(64.0f / 255.0f), value_type)); + spv::Id scale_1_or_0 = builder_->createTriOp( + spv::OpSelect, value_type, is_piece_at_least_1, + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(2.0f / 1024.0f), value_type), + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(1.0f / 1024.0f), value_type)); + spv::Id offset_1_or_0 = builder_->createTriOp( + spv::OpSelect, value_type, is_piece_at_least_1, + SpirvSmearScalarResultOrConstant(builder_->makeFloatConstant(-64.0f), + value_type), + const_vector_0); + + spv::Id is_piece_at_least_2 = builder_->createBinOp( + spv::OpFOrdGreaterThanEqual, bool_type, gamma, + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(96.0f / 255.0f), value_type)); + spv::Id scale = + builder_->createTriOp(spv::OpSelect, value_type, is_piece_at_least_2, + scale_3_or_2, scale_1_or_0); + spv::Id offset = + builder_->createTriOp(spv::OpSelect, value_type, is_piece_at_least_2, + offset_3_or_2, offset_1_or_0); + + spv::Op value_times_scalar_opcode = + is_vector ? spv::OpVectorTimesScalar : spv::OpFMul; + // linear = gamma * (255.0f * 1024.0f) * scale + offset + spv::Id linear = + builder_->createBinOp(value_times_scalar_opcode, value_type, gamma, + builder_->makeFloatConstant(255.0f * 1024.0f)); + builder_->addDecoration(linear, spv::DecorationNoContraction); + linear = builder_->createBinOp(spv::OpFMul, value_type, linear, scale); + builder_->addDecoration(linear, spv::DecorationNoContraction); + linear = builder_->createBinOp(spv::OpFAdd, value_type, linear, offset); + builder_->addDecoration(linear, spv::DecorationNoContraction); + // linear += trunc(linear * scale) + spv::Id linear_integer_term = + builder_->createBinOp(spv::OpFMul, value_type, linear, scale); + builder_->addDecoration(linear_integer_term, spv::DecorationNoContraction); + id_vector_temp_.clear(); + id_vector_temp_.push_back(linear_integer_term); + linear_integer_term = builder_->createBuiltinCall( + value_type, ext_inst_glsl_std_450_, GLSLstd450Trunc, id_vector_temp_); + linear = builder_->createBinOp(spv::OpFAdd, value_type, linear, + linear_integer_term); + builder_->addDecoration(linear, spv::DecorationNoContraction); + // linear *= 1.0f / 1023.0f + linear = builder_->createBinOp(value_times_scalar_opcode, value_type, linear, + builder_->makeFloatConstant(1.0f / 1023.0f)); + builder_->addDecoration(linear, spv::DecorationNoContraction); + return linear; +} + +spv::Id SpirvShaderTranslator::LinearToPWLGamma(spv::Id linear, + bool linear_pre_saturated) { + spv::Id value_type = builder_->getTypeId(linear); + assert_true(builder_->isFloatType(builder_->getScalarTypeId(value_type))); + bool is_vector = builder_->isVectorType(value_type); + assert_true(is_vector || builder_->isFloatType(value_type)); + int num_components = builder_->getNumTypeComponents(value_type); + assert_true(num_components < 4); + spv::Id bool_type = type_bool_vectors_[num_components - 1]; + + spv::Id const_vector_0 = const_float_vectors_0_[num_components - 1]; + spv::Id const_vector_1 = SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(1.0f), value_type); + + if (!linear_pre_saturated) { + // Saturate, flushing NaN to 0. + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + id_vector_temp_.push_back(linear); + id_vector_temp_.push_back(const_vector_0); + id_vector_temp_.push_back(const_vector_1); + linear = builder_->createBuiltinCall(value_type, ext_inst_glsl_std_450_, + GLSLstd450NClamp, id_vector_temp_); + } + + spv::Id is_piece_at_least_3 = builder_->createBinOp( + spv::OpFOrdGreaterThanEqual, bool_type, linear, + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(512.0f / 1023.0f), value_type)); + spv::Id scale_3_or_2 = builder_->createTriOp( + spv::OpSelect, value_type, is_piece_at_least_3, + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(1023.0f / 8.0f), value_type), + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(1023.0f / 4.0f), value_type)); + spv::Id offset_3_or_2 = builder_->createTriOp( + spv::OpSelect, value_type, is_piece_at_least_3, + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(128.0f / 255.0f), value_type), + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(64.0f / 255.0f), value_type)); + + spv::Id is_piece_at_least_1 = builder_->createBinOp( + spv::OpFOrdGreaterThanEqual, bool_type, linear, + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(64.0f / 1023.0f), value_type)); + spv::Id scale_1_or_0 = builder_->createTriOp( + spv::OpSelect, value_type, is_piece_at_least_1, + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(1023.0f / 2.0f), value_type), + SpirvSmearScalarResultOrConstant(builder_->makeFloatConstant(1023.0f), + value_type)); + spv::Id offset_1_or_0 = builder_->createTriOp( + spv::OpSelect, value_type, is_piece_at_least_1, + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(32.0f / 255.0f), value_type), + const_vector_0); + + spv::Id is_piece_at_least_2 = builder_->createBinOp( + spv::OpFOrdGreaterThanEqual, bool_type, linear, + SpirvSmearScalarResultOrConstant( + builder_->makeFloatConstant(128.0f / 1023.0f), value_type)); + spv::Id scale = + builder_->createTriOp(spv::OpSelect, value_type, is_piece_at_least_2, + scale_3_or_2, scale_1_or_0); + spv::Id offset = + builder_->createTriOp(spv::OpSelect, value_type, is_piece_at_least_2, + offset_3_or_2, offset_1_or_0); + + // gamma = trunc(linear * scale) * (1.0f / 255.0f) + offset + spv::Id gamma = builder_->createBinOp(spv::OpFMul, value_type, linear, scale); + builder_->addDecoration(gamma, spv::DecorationNoContraction); + id_vector_temp_.clear(); + id_vector_temp_.push_back(gamma); + gamma = builder_->createBuiltinCall(value_type, ext_inst_glsl_std_450_, + GLSLstd450Trunc, id_vector_temp_); + gamma = builder_->createBinOp( + is_vector ? spv::OpVectorTimesScalar : spv::OpFMul, value_type, gamma, + builder_->makeFloatConstant(1.0f / 255.0f)); + builder_->addDecoration(gamma, spv::DecorationNoContraction); + gamma = builder_->createBinOp(spv::OpFAdd, value_type, gamma, offset); + builder_->addDecoration(gamma, spv::DecorationNoContraction); + return gamma; +} + } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index abc3225a5..3df49136f 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -84,6 +84,16 @@ class SpirvShaderTranslator : public ShaderTranslator { float ndc_offset[3]; uint32_t padding_ndc_offset; + + // Each byte contains post-swizzle TextureSign values for each of the needed + // components of each of the 32 used texture fetch constants. + uint32_t texture_swizzled_signs[8]; + + // If the imageViewFormatSwizzle portability subset is not supported, the + // component swizzle (taking both guest and host swizzles into account) to + // apply to the result directly in the shader code. In each uint32_t, + // swizzles for 2 texture fetch constants (in bits 0:11 and 12:23). + uint32_t texture_swizzles[16]; }; // The minimum limit for maxPerStageDescriptorStorageBuffers is 4, and for @@ -151,6 +161,7 @@ class SpirvShaderTranslator : public ShaderTranslator { uint32_t max_storage_buffer_range; bool clip_distance; bool cull_distance; + bool image_view_format_swizzle; bool signed_zero_inf_nan_preserve_float32; bool denorm_flush_to_zero_float32; }; @@ -244,7 +255,6 @@ class SpirvShaderTranslator : public ShaderTranslator { xenos::FetchOpDimension dimension; bool is_signed; - spv::Id type; spv::Id variable; }; @@ -259,6 +269,7 @@ class SpirvShaderTranslator : public ShaderTranslator { }; // Builder helpers. + spv::Id SpirvSmearScalarResultOrConstant(spv::Id scalar, spv::Id vector_type); void SpirvCreateSelectionMerge( spv::Id merge_block_id, spv::SelectionControlMask selection_control_mask = spv::SelectionControlMaskNone) { @@ -379,6 +390,10 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id LoadUint32FromSharedMemory(spv::Id address_dwords_int); + // The source may be a floating-point scalar or a vector. + spv::Id PWLGammaToLinear(spv::Id gamma, bool gamma_pre_saturated); + spv::Id LinearToPWLGamma(spv::Id linear, bool linear_pre_saturated); + size_t FindOrAddTextureBinding(uint32_t fetch_constant, xenos::FetchOpDimension dimension, bool is_signed); @@ -387,6 +402,24 @@ class SpirvShaderTranslator : public ShaderTranslator { xenos::TextureFilter min_filter, xenos::TextureFilter mip_filter, xenos::AnisoFilter aniso_filter); + // `texture_parameters` need to be set up except for `sampler`, which will be + // set internally, optionally doing linear interpolation between the an + // existing value and the new one (the result location may be the same as for + // the first lerp endpoint, but not across signedness). + void SampleTexture(spv::Builder::TextureParameters& texture_parameters, + spv::ImageOperandsMask image_operands_mask, + spv::Id image_unsigned, spv::Id image_signed, + spv::Id sampler, spv::Id is_all_signed, + spv::Id is_any_signed, spv::Id& result_unsigned_out, + spv::Id& result_signed_out, + spv::Id lerp_factor = spv::NoResult, + spv::Id lerp_first_unsigned = spv::NoResult, + spv::Id lerp_first_signed = spv::NoResult); + // `texture_parameters` need to be set up except for `sampler`, which will be + // set internally. + spv::Id QueryTextureLod(spv::Builder::TextureParameters& texture_parameters, + spv::Id image_unsigned, spv::Id image_signed, + spv::Id sampler, spv::Id is_all_signed); Features features_; @@ -473,6 +506,8 @@ class SpirvShaderTranslator : public ShaderTranslator { kSystemConstantIndexVertexBaseIndex, kSystemConstantNdcScale, kSystemConstantNdcOffset, + kSystemConstantTextureSwizzledSigns, + kSystemConstantTextureSwizzles, }; spv::Id uniform_system_constants_; spv::Id uniform_float_constants_; @@ -522,6 +557,11 @@ class SpirvShaderTranslator : public ShaderTranslator { // `base + index * stride` in dwords from the last vfetch_full as it may be // needed by vfetch_mini - int. spv::Id var_main_vfetch_address_; + // float. + spv::Id var_main_tfetch_lod_; + // float3. + spv::Id var_main_tfetch_gradients_h_; + spv::Id var_main_tfetch_gradients_v_; // float4[register_count()]. spv::Id var_main_registers_; // VS only - float3 (special exports). diff --git a/src/xenia/gpu/spirv_shader_translator_fetch.cc b/src/xenia/gpu/spirv_shader_translator_fetch.cc index 5ec982618..98e9a5836 100644 --- a/src/xenia/gpu/spirv_shader_translator_fetch.cc +++ b/src/xenia/gpu/spirv_shader_translator_fetch.cc @@ -17,6 +17,7 @@ #include "third_party/fmt/include/fmt/format.h" #include "third_party/glslang/SPIRV/GLSL.std.450.h" +#include "xenia/base/assert.h" #include "xenia/base/math.h" namespace xe { @@ -541,20 +542,1929 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction( EnsureBuildPointAvailable(); - // TODO(Triang3l): Fetch the texture. - if (instr.opcode == ucode::FetchOpcode::kTextureFetch) { - uint32_t fetch_constant_index = instr.operands[1].storage_index; + // Handle the instructions for setting the register LOD. + switch (instr.opcode) { + case ucode::FetchOpcode::kSetTextureLod: + builder_->createStore( + GetOperandComponents(LoadOperandStorage(instr.operands[0]), + instr.operands[0], 0b0001), + var_main_tfetch_lod_); + return; + case ucode::FetchOpcode::kSetTextureGradientsHorz: + builder_->createStore( + GetOperandComponents(LoadOperandStorage(instr.operands[0]), + instr.operands[0], 0b0111), + var_main_tfetch_gradients_h_); + return; + case ucode::FetchOpcode::kSetTextureGradientsVert: + builder_->createStore( + GetOperandComponents(LoadOperandStorage(instr.operands[0]), + instr.operands[0], 0b0111), + var_main_tfetch_gradients_v_); + return; + default: + break; + } + + // Handle instructions that store something. + uint32_t used_result_components = instr.result.GetUsedResultComponents(); + uint32_t used_result_nonzero_components = instr.GetNonZeroResultComponents(); + switch (instr.opcode) { + case ucode::FetchOpcode::kTextureFetch: + break; + case ucode::FetchOpcode::kGetTextureBorderColorFrac: + // TODO(Triang3l): Bind a black texture with a white border to calculate + // the border color fraction (in the X component of the result). + assert_always(); + EmitTranslationError("getBCF is unimplemented", false); + used_result_nonzero_components = 0; + break; + case ucode::FetchOpcode::kGetTextureComputedLod: + break; + case ucode::FetchOpcode::kGetTextureGradients: + break; + case ucode::FetchOpcode::kGetTextureWeights: + // FIXME(Triang3l): Currently disregarding the LOD completely in + // getWeights because the needed code would be very complicated, while + // getWeights is mostly used for things like PCF of shadow maps, that + // don't have mips. The LOD would be needed for the mip lerp factor in W + // of the return value and to choose the LOD where interpolation would + // take place for XYZ. That would require either implementing the LOD + // calculation algorithm using the ALU (since the `lod` instruction is + // limited to pixel shaders and can't be used when there's control flow + // divergence, unlike explicit gradients), or sampling a texture filled + // with LOD numbers (easier and more consistent - unclamped LOD doesn't + // make sense for getWeights anyway). The same applies to offsets. + used_result_nonzero_components &= ~uint32_t(0b1000); + break; + default: + assert_unhandled_case(instr.opcode); + EmitTranslationError("Unknown texture fetch operation"); + used_result_nonzero_components = 0; + } + uint32_t used_result_component_count = xe::bit_count(used_result_components); + if (!used_result_nonzero_components) { + // Nothing to fetch, only constant 0/1 writes - simplify the rest of the + // function so it doesn't have to handle this case. + if (used_result_components) { + StoreResult(instr.result, + const_float_vectors_0_[used_result_component_count - 1]); + } + return; + } + + spv::Id result[] = {const_float_0_, const_float_0_, const_float_0_, + const_float_0_}; + + if (instr.opcode == ucode::FetchOpcode::kGetTextureGradients) { + // Doesn't need the texture, handle separately. + spv::Id operand_0_storage = LoadOperandStorage(instr.operands[0]); + bool derivative_function_x_used = + (used_result_nonzero_components & 0b0011) != 0; + bool derivative_function_y_used = + (used_result_nonzero_components & 0b1100) != 0; + spv::Id derivative_function_x = spv::NoResult; + spv::Id derivative_function_y = spv::NoResult; + if (derivative_function_x_used && derivative_function_y_used) { + spv::Id derivative_function = + GetOperandComponents(operand_0_storage, instr.operands[0], 0b0011); + derivative_function_x = + builder_->createCompositeExtract(derivative_function, type_float_, 0); + derivative_function_y = + builder_->createCompositeExtract(derivative_function, type_float_, 1); + } else { + if (derivative_function_x_used) { + derivative_function_x = + GetOperandComponents(operand_0_storage, instr.operands[0], 0b0001); + } + if (derivative_function_y_used) { + derivative_function_y = + GetOperandComponents(operand_0_storage, instr.operands[0], 0b0010); + } + } + builder_->addCapability(spv::CapabilityDerivativeControl); + uint32_t derivative_components_remaining = used_result_nonzero_components; + uint32_t derivative_component_index; + while (xe::bit_scan_forward(derivative_components_remaining, + &derivative_component_index)) { + derivative_components_remaining &= + ~(UINT32_C(1) << derivative_component_index); + result[derivative_component_index] = builder_->createUnaryOp( + (derivative_component_index & 0b01) ? spv::OpDPdyCoarse + : spv::OpDPdxCoarse, + type_float_, + (derivative_component_index & 0b10) ? derivative_function_y + : derivative_function_x); + } + } else { + // kTextureFetch, kGetTextureComputedLod or kGetTextureWeights. + + // Whether to use gradients (implicit or explicit) for LOD calculation. bool use_computed_lod = instr.attributes.use_computed_lod && (is_pixel_shader() || instr.attributes.use_register_gradients); - FindOrAddTextureBinding(fetch_constant_index, instr.dimension, false); - FindOrAddTextureBinding(fetch_constant_index, instr.dimension, true); - FindOrAddSamplerBinding(fetch_constant_index, instr.attributes.mag_filter, - instr.attributes.min_filter, - instr.attributes.mip_filter, - use_computed_lod ? instr.attributes.aniso_filter - : xenos::AnisoFilter::kDisabled); + if (instr.opcode == ucode::FetchOpcode::kGetTextureComputedLod && + (!use_computed_lod || instr.attributes.use_register_gradients)) { + assert_always(); + EmitTranslationError( + "getCompTexLOD used with explicit LOD or gradients - contradicts " + "MSDN", + false); + StoreResult(instr.result, + const_float_vectors_0_[used_result_component_count - 1]); + return; + } + + uint32_t fetch_constant_index = instr.operands[1].storage_index; + uint32_t fetch_constant_word_0_index = 6 * fetch_constant_index; + + spv::Id sampler = spv::NoResult; + spv::Id image_2d_array_or_cube_unsigned = spv::NoResult; + spv::Id image_2d_array_or_cube_signed = spv::NoResult; + spv::Id image_3d_unsigned = spv::NoResult; + spv::Id image_3d_signed = spv::NoResult; + if (instr.opcode != ucode::FetchOpcode::kGetTextureWeights) { + bool bindings_set_up = true; + // While GL_ARB_texture_query_lod specifies the value for + // GL_NEAREST_MIPMAP_NEAREST and GL_LINEAR_MIPMAP_NEAREST minifying + // functions as rounded (unlike the `lod` instruction in Direct3D 10.1+, + // which is not defined for point sampling), the XNA assembler doesn't + // accept MipFilter overrides for getCompTexLOD - probably should be + // linear only, though not known exactly. + // + // 4D5307F2 uses vertex displacement map textures for tessellated models + // like the beehive tree with explicit LOD with point sampling (they store + // values packed in two components), however, the fetch constant has + // anisotropic filtering enabled. However, Direct3D 12 doesn't allow + // mixing anisotropic and point filtering. Possibly anistropic filtering + // should be disabled when explicit LOD is used - do this here. + size_t sampler_index = FindOrAddSamplerBinding( + fetch_constant_index, instr.attributes.mag_filter, + instr.attributes.min_filter, + instr.opcode == ucode::FetchOpcode::kGetTextureComputedLod + ? xenos::TextureFilter::kLinear + : instr.attributes.mip_filter, + use_computed_lod ? instr.attributes.aniso_filter + : xenos::AnisoFilter::kDisabled); + xenos::FetchOpDimension dimension_2d_array_or_cube = + instr.dimension == xenos::FetchOpDimension::k3DOrStacked + ? xenos::FetchOpDimension::k2D + : instr.dimension; + size_t image_2d_array_or_cube_unsigned_index = FindOrAddTextureBinding( + fetch_constant_index, dimension_2d_array_or_cube, false); + size_t image_2d_array_or_cube_signed_index = FindOrAddTextureBinding( + fetch_constant_index, dimension_2d_array_or_cube, true); + if (sampler_index == SIZE_MAX || + image_2d_array_or_cube_unsigned_index == SIZE_MAX || + image_2d_array_or_cube_signed_index == SIZE_MAX) { + bindings_set_up = false; + } + size_t image_3d_unsigned_index = SIZE_MAX; + size_t image_3d_signed_index = SIZE_MAX; + if (instr.dimension == xenos::FetchOpDimension::k3DOrStacked) { + image_3d_unsigned_index = FindOrAddTextureBinding( + fetch_constant_index, xenos::FetchOpDimension::k3DOrStacked, false); + image_3d_signed_index = FindOrAddTextureBinding( + fetch_constant_index, xenos::FetchOpDimension::k3DOrStacked, true); + if (image_3d_unsigned_index == SIZE_MAX || + image_3d_signed_index == SIZE_MAX) { + bindings_set_up = false; + } + } + if (!bindings_set_up) { + // Too many image or sampler bindings used. + StoreResult(instr.result, + const_float_vectors_0_[used_result_component_count - 1]); + return; + } + sampler = builder_->createLoad(sampler_bindings_[sampler_index].variable, + spv::NoPrecision); + const TextureBinding& image_2d_array_or_cube_unsigned_binding = + texture_bindings_[image_2d_array_or_cube_unsigned_index]; + image_2d_array_or_cube_unsigned = builder_->createLoad( + image_2d_array_or_cube_unsigned_binding.variable, spv::NoPrecision); + const TextureBinding& image_2d_array_or_cube_signed_binding = + texture_bindings_[image_2d_array_or_cube_signed_index]; + image_2d_array_or_cube_signed = builder_->createLoad( + image_2d_array_or_cube_signed_binding.variable, spv::NoPrecision); + if (image_3d_unsigned_index != SIZE_MAX) { + const TextureBinding& image_3d_unsigned_binding = + texture_bindings_[image_3d_unsigned_index]; + image_3d_unsigned = builder_->createLoad( + image_3d_unsigned_binding.variable, spv::NoPrecision); + } + if (image_3d_signed_index != SIZE_MAX) { + const TextureBinding& image_3d_signed_binding = + texture_bindings_[image_3d_signed_index]; + image_3d_signed = builder_->createLoad(image_3d_signed_binding.variable, + spv::NoPrecision); + } + } + + // Get offsets applied to the coordinates before sampling. + // FIXME(Triang3l): Offsets need to be applied at the LOD being fetched, not + // at LOD 0. However, since offsets have granularity of 0.5, not 1, on the + // Xenos, they can't be passed directly as ConstOffset to the image sample + // instruction (plus-minus 0.5 offsets are very common in games). But + // offsetting at mip levels is a rare usage case, mostly offsets are used + // for things like shadow maps and blur, where there are no mips. + float offset_values[3] = {}; + // MSDN doesn't list offsets as getCompTexLOD parameters. + if (instr.opcode != ucode::FetchOpcode::kGetTextureComputedLod) { + // Add a small epsilon to the offset (1.5/4 the fixed-point texture + // coordinate ULP with 8-bit subtexel precision - shouldn't significantly + // effect the fixed-point conversion; 1/4 is also not enough with 3x + // resolution scaling very noticeably on the weapon in 4D5307E6, at least + // on the Direct3D 12 backend) to resolve ambiguity when fetching + // point-sampled textures between texels. This applies to both normalized + // (58410954 Xbox Live Arcade logo, coordinates interpolated between + // vertices with half-pixel offset) and unnormalized (4D5307E6 lighting + // G-buffer reading, ps_param_gen pixels) coordinates. On Nvidia Pascal, + // without this adjustment, blockiness is visible in both cases. Possibly + // there is a better way, however, an attempt was made to error-correct + // division by adding the difference between original and re-denormalized + // coordinates, but on Nvidia, `mul` (on Direct3D 12) and internal + // multiplication in texture sampling apparently round differently, so + // `mul` gives a value that would be floored as expected, but the + // left/upper pixel is still sampled instead. + const float kRoundingOffset = 1.5f / 1024.0f; + switch (instr.dimension) { + case xenos::FetchOpDimension::k1D: + offset_values[0] = instr.attributes.offset_x + kRoundingOffset; + if (instr.opcode == ucode::FetchOpcode::kGetTextureWeights) { + // For coordinate lerp factors. This needs to be done separately for + // point mag/min filters, but they're currently not handled here + // anyway. + offset_values[0] -= 0.5f; + } + break; + case xenos::FetchOpDimension::k2D: + offset_values[0] = instr.attributes.offset_x + kRoundingOffset; + offset_values[1] = instr.attributes.offset_y + kRoundingOffset; + if (instr.opcode == ucode::FetchOpcode::kGetTextureWeights) { + offset_values[0] -= 0.5f; + offset_values[1] -= 0.5f; + } + break; + case xenos::FetchOpDimension::k3DOrStacked: + offset_values[0] = instr.attributes.offset_x + kRoundingOffset; + offset_values[1] = instr.attributes.offset_y + kRoundingOffset; + offset_values[2] = instr.attributes.offset_z + kRoundingOffset; + if (instr.opcode == ucode::FetchOpcode::kGetTextureWeights) { + offset_values[0] -= 0.5f; + offset_values[1] -= 0.5f; + offset_values[2] -= 0.5f; + } + break; + case xenos::FetchOpDimension::kCube: + // Applying the rounding epsilon to cube maps too for potential game + // passes processing cube map faces themselves. + offset_values[0] = instr.attributes.offset_x + kRoundingOffset; + offset_values[1] = instr.attributes.offset_y + kRoundingOffset; + if (instr.opcode == ucode::FetchOpcode::kGetTextureWeights) { + offset_values[0] -= 0.5f; + offset_values[1] -= 0.5f; + // The logic for ST weights is the same for all faces. + // FIXME(Triang3l): If LOD calculation is added to getWeights, face + // offset probably will need to be handled too (if the hardware + // supports it at all, though MSDN lists OffsetZ in tfetchCube). + } else { + offset_values[2] = instr.attributes.offset_z; + } + break; + } + } + uint32_t offsets_not_zero = 0b000; + for (uint32_t i = 0; i < 3; ++i) { + if (offset_values[i]) { + offsets_not_zero |= 1 << i; + } + } + + // Fetch constant word usage: + // - 2: Size (needed only once). + // - 3: Exponent adjustment (needed only once). + // - 4: Conditionally for 3D kTextureFetch: stacked texture filtering modes. + // Unconditionally LOD kTextureFetch: LOD and gradient exponent bias, + // result exponent bias. + // - 5: Dimensionality (3D or 2D stacked - needed only once). + + // Load the texture size and whether it's 3D or stacked if needed. + // 1D: X - width. + // 2D, cube: X - width, Y - height (cube maps probably can be only square, + // but for simplicity). + // 3D: X - width, Y - height, Z - depth. + uint32_t size_needed_components = 0b000; + bool data_is_3d_needed = false; + if (instr.opcode == ucode::FetchOpcode::kGetTextureWeights) { + // Size needed for denormalization for coordinate lerp factor. + // FIXME(Triang3l): Currently disregarding the LOD completely in + // getWeights. However, if the LOD lerp factor and the LOD where filtering + // would happen are ever calculated, all components of the size may be + // needed for ALU LOD calculation with normalized coordinates (or, if a + // texture filled with LOD indices is used, coordinates will need to be + // normalized as normally). + if (!instr.attributes.unnormalized_coordinates) { + switch (instr.dimension) { + case xenos::FetchOpDimension::k1D: + size_needed_components |= used_result_nonzero_components & 0b0001; + break; + case xenos::FetchOpDimension::k2D: + case xenos::FetchOpDimension::kCube: + size_needed_components |= used_result_nonzero_components & 0b0011; + break; + case xenos::FetchOpDimension::k3DOrStacked: + size_needed_components |= used_result_nonzero_components & 0b0111; + break; + } + } + } else { + // Size needed for normalization (or, for stacked texture layers, + // denormalization) and for offsets. + size_needed_components |= offsets_not_zero; + switch (instr.dimension) { + case xenos::FetchOpDimension::k1D: + if (instr.attributes.unnormalized_coordinates) { + size_needed_components |= 0b0001; + } + break; + case xenos::FetchOpDimension::k2D: + if (instr.attributes.unnormalized_coordinates) { + size_needed_components |= 0b0011; + } + break; + case xenos::FetchOpDimension::k3DOrStacked: + // Stacked and 3D textures are fetched from different bindings - the + // check is always needed. + data_is_3d_needed = true; + if (instr.attributes.unnormalized_coordinates) { + // Need to normalize all (if 3D). + size_needed_components |= 0b0111; + } else { + // Need to denormalize Z (if stacked). + size_needed_components |= 0b0100; + } + break; + case xenos::FetchOpDimension::kCube: + if (instr.attributes.unnormalized_coordinates) { + size_needed_components |= 0b0011; + } + // The size is not needed for face ID offset. + size_needed_components &= 0b0011; + break; + } + } + if (instr.dimension == xenos::FetchOpDimension::k3DOrStacked && + size_needed_components) { + // Stacked and 3D textures have different size packing - need to get + // whether the texture is 3D unconditionally. + data_is_3d_needed = true; + } + spv::Id data_is_3d = spv::NoResult; + if (data_is_3d_needed) { + // Get the data dimensionality from the bits 9:10 of the fetch constant + // word 5. + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + id_vector_temp_.push_back(const_int_0_); + id_vector_temp_.push_back(builder_->makeIntConstant( + int((fetch_constant_word_0_index + 5) >> 2))); + id_vector_temp_.push_back(builder_->makeIntConstant( + int((fetch_constant_word_0_index + 5) & 3))); + spv::Id fetch_constant_word_5 = + builder_->createLoad(builder_->createAccessChain( + spv::StorageClassUniform, + uniform_fetch_constants_, id_vector_temp_), + spv::NoPrecision); + spv::Id data_dimension = builder_->createTriOp( + spv::OpBitFieldUExtract, type_uint_, fetch_constant_word_5, + builder_->makeUintConstant(9), builder_->makeUintConstant(2)); + data_is_3d = builder_->createBinOp( + spv::OpIEqual, type_bool_, data_dimension, + builder_->makeUintConstant( + static_cast(xenos::DataDimension::k3D))); + } + spv::Id size[3] = {}; + if (size_needed_components) { + // Get the size from the fetch constant word 2. + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + id_vector_temp_.push_back(const_int_0_); + id_vector_temp_.push_back(builder_->makeIntConstant( + int((fetch_constant_word_0_index + 2) >> 2))); + id_vector_temp_.push_back(builder_->makeIntConstant( + int((fetch_constant_word_0_index + 2) & 3))); + spv::Id fetch_constant_word_2 = + builder_->createLoad(builder_->createAccessChain( + spv::StorageClassUniform, + uniform_fetch_constants_, id_vector_temp_), + spv::NoPrecision); + switch (instr.dimension) { + case xenos::FetchOpDimension::k1D: { + if (size_needed_components & 0b1) { + size[0] = builder_->createTriOp( + spv::OpBitFieldUExtract, type_uint_, fetch_constant_word_2, + const_uint_0_, + builder_->makeUintConstant(xenos::kTexture1DMaxWidthLog2)); + } + assert_zero(size_needed_components & 0b110); + } break; + case xenos::FetchOpDimension::k2D: + case xenos::FetchOpDimension::kCube: { + if (size_needed_components & 0b1) { + size[0] = builder_->createTriOp( + spv::OpBitFieldUExtract, type_uint_, fetch_constant_word_2, + const_uint_0_, + builder_->makeUintConstant( + xenos::kTexture2DCubeMaxWidthHeightLog2)); + } + if (size_needed_components & 0b10) { + spv::Id width_height_bit_count = builder_->makeUintConstant( + xenos::kTexture2DCubeMaxWidthHeightLog2); + size[1] = builder_->createTriOp( + spv::OpBitFieldUExtract, type_uint_, fetch_constant_word_2, + width_height_bit_count, width_height_bit_count); + } + assert_zero(size_needed_components & 0b100); + } break; + case xenos::FetchOpDimension::k3DOrStacked: { + if (size_needed_components & 0b1) { + spv::Id size_3d = + builder_->createTriOp(spv::OpBitFieldUExtract, type_uint_, + fetch_constant_word_2, const_uint_0_, + builder_->makeUintConstant( + xenos::kTexture3DMaxWidthHeightLog2)); + spv::Id size_2d = builder_->createTriOp( + spv::OpBitFieldUExtract, type_uint_, fetch_constant_word_2, + const_uint_0_, + builder_->makeUintConstant( + xenos::kTexture2DCubeMaxWidthHeightLog2)); + assert_true(data_is_3d != spv::NoResult); + size[0] = builder_->createTriOp(spv::OpSelect, type_uint_, + data_is_3d, size_3d, size_2d); + } + if (size_needed_components & 0b10) { + spv::Id width_height_bit_count_3d = + builder_->makeUintConstant(xenos::kTexture3DMaxWidthHeightLog2); + spv::Id size_3d = builder_->createTriOp( + spv::OpBitFieldUExtract, type_uint_, fetch_constant_word_2, + width_height_bit_count_3d, width_height_bit_count_3d); + spv::Id width_height_bit_count_2d = builder_->makeUintConstant( + xenos::kTexture2DCubeMaxWidthHeightLog2); + spv::Id size_2d = builder_->createTriOp( + spv::OpBitFieldUExtract, type_uint_, fetch_constant_word_2, + width_height_bit_count_2d, width_height_bit_count_2d); + assert_true(data_is_3d != spv::NoResult); + size[1] = builder_->createTriOp(spv::OpSelect, type_uint_, + data_is_3d, size_3d, size_2d); + } + if (size_needed_components & 0b100) { + spv::Id size_3d = builder_->createTriOp( + spv::OpBitFieldUExtract, type_uint_, fetch_constant_word_2, + builder_->makeUintConstant(xenos::kTexture3DMaxWidthHeightLog2 * + 2), + builder_->makeUintConstant(xenos::kTexture3DMaxDepthLog2)); + spv::Id size_2d = builder_->createTriOp( + spv::OpBitFieldUExtract, type_uint_, fetch_constant_word_2, + builder_->makeUintConstant( + xenos::kTexture2DCubeMaxWidthHeightLog2 * 2), + builder_->makeUintConstant(xenos::kTexture2DMaxStackDepthLog2)); + assert_true(data_is_3d != spv::NoResult); + size[2] = builder_->createTriOp(spv::OpSelect, type_uint_, + data_is_3d, size_3d, size_2d); + } + } break; + } + { + uint32_t size_remaining_components = size_needed_components; + uint32_t size_component_index; + while (xe::bit_scan_forward(size_remaining_components, + &size_component_index)) { + size_remaining_components &= ~(UINT32_C(1) << size_component_index); + spv::Id& size_component_ref = size[size_component_index]; + // Fetch constants store size minus 1 - add 1. + size_component_ref = + builder_->createBinOp(spv::OpIAdd, type_uint_, size_component_ref, + builder_->makeUintConstant(1)); + // Convert the size to float for multiplication or division. + size_component_ref = builder_->createUnaryOp( + spv::OpConvertUToF, type_float_, size_component_ref); + } + } + } + + // FIXME(Triang3l): Mip lerp factor needs to be calculated, and the + // coordinate lerp factors should be calculated at the mip level texels + // would be sampled from. That would require some way of calculating the + // LOD that would be applicable to explicit gradients and vertex shaders. + // Also, with point sampling, possibly lerp factors need to be 0. W (mip + // lerp factor) should have been masked out previously because it's not + // supported currently. + assert_false(instr.opcode == ucode::FetchOpcode::kGetTextureWeights && + (used_result_nonzero_components & 0b1000)); + + // Load the needed original values of the coordinates operand. + uint32_t coordinates_needed_components = + instr.opcode == ucode::FetchOpcode::kGetTextureWeights + ? used_result_nonzero_components + : ((UINT32_C(1) + << xenos::GetFetchOpDimensionComponentCount(instr.dimension)) - + 1); + assert_not_zero(coordinates_needed_components); + spv::Id coordinates_operand = + GetOperandComponents(LoadOperandStorage(instr.operands[0]), + instr.operands[0], coordinates_needed_components); + spv::Id coordinates[] = {const_float_0_, const_float_0_, const_float_0_}; + if (xe::bit_count(coordinates_needed_components) > 1) { + uint32_t coordinates_remaining_components = coordinates_needed_components; + uint32_t coordinate_component_index; + uint32_t coordinate_operand_component_index = 0; + while (xe::bit_scan_forward(coordinates_remaining_components, + &coordinate_component_index)) { + coordinates_remaining_components &= + ~(UINT32_C(1) << coordinate_component_index); + coordinates[coordinate_component_index] = + builder_->createCompositeExtract( + coordinates_operand, type_float_, + coordinate_operand_component_index++); + } + } else { + uint32_t coordinate_component_index; + xe::bit_scan_forward(coordinates_needed_components, + &coordinate_component_index); + coordinates[coordinate_component_index] = coordinates_operand; + } + + // TODO(Triang3l): Reverting the resolution scale. + + if (instr.opcode == ucode::FetchOpcode::kGetTextureWeights) { + // FIXME(Triang3l): Filtering modes should possibly be taken into account, + // but for simplicity, not doing that - from a high level point of view, + // would be useless to get weights that will always be zero. + uint32_t coordinates_remaining_components = coordinates_needed_components; + uint32_t coordinate_component_index; + while (xe::bit_scan_forward(coordinates_remaining_components, + &coordinate_component_index)) { + coordinates_remaining_components &= + ~(UINT32_C(1) << coordinate_component_index); + spv::Id result_component = coordinates[coordinate_component_index]; + // Need unnormalized coordinates. + if (!instr.attributes.unnormalized_coordinates) { + spv::Id size_component = size[coordinate_component_index]; + assert_true(size_component != spv::NoResult); + result_component = builder_->createBinOp( + spv::OpFMul, type_float_, result_component, size_component); + builder_->addDecoration(result_component, + spv::DecorationNoContraction); + } + float component_offset = offset_values[coordinate_component_index]; + if (component_offset) { + result_component = builder_->createBinOp( + spv::OpFAdd, type_float_, result_component, + builder_->makeFloatConstant(component_offset)); + builder_->addDecoration(result_component, + spv::DecorationNoContraction); + } + // 0.5 has already been subtracted via offsets previously. + id_vector_temp_.clear(); + id_vector_temp_.push_back(result_component); + result_component = + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450Fract, id_vector_temp_); + result[coordinate_component_index] = result_component; + } + } else { + // kTextureFetch or kGetTextureComputedLod. + + // Normalize the XY coordinates, and apply the offset. + for (uint32_t i = 0; + i <= uint32_t(instr.dimension != xenos::FetchOpDimension::k1D); + ++i) { + spv::Id& coordinate_ref = coordinates[i]; + spv::Id component_offset = + offset_values[i] ? builder_->makeFloatConstant(offset_values[i]) + : spv::NoResult; + spv::Id size_component = size[i]; + if (instr.attributes.unnormalized_coordinates) { + if (component_offset != spv::NoResult) { + coordinate_ref = builder_->createBinOp( + spv::OpFAdd, type_float_, coordinate_ref, component_offset); + builder_->addDecoration(coordinate_ref, + spv::DecorationNoContraction); + } + assert_true(size_component != spv::NoResult); + coordinate_ref = builder_->createBinOp( + spv::OpFDiv, type_float_, coordinate_ref, size_component); + builder_->addDecoration(coordinate_ref, spv::DecorationNoContraction); + } else { + if (component_offset != spv::NoResult) { + assert_true(size_component != spv::NoResult); + spv::Id component_offset_normalized = builder_->createBinOp( + spv::OpFDiv, type_float_, component_offset, size_component); + builder_->addDecoration(component_offset_normalized, + spv::DecorationNoContraction); + coordinate_ref = + builder_->createBinOp(spv::OpFAdd, type_float_, coordinate_ref, + component_offset_normalized); + builder_->addDecoration(coordinate_ref, + spv::DecorationNoContraction); + } + } + } + if (instr.dimension == xenos::FetchOpDimension::k3DOrStacked) { + spv::Id& z_coordinate_ref = coordinates[2]; + spv::Id z_offset = offset_values[2] + ? builder_->makeFloatConstant(offset_values[2]) + : spv::NoResult; + spv::Id z_size = size[2]; + if (instr.attributes.unnormalized_coordinates) { + // Apply the offset, and normalize the Z coordinate for a 3D texture. + if (z_offset != spv::NoResult) { + z_coordinate_ref = builder_->createBinOp( + spv::OpFAdd, type_float_, z_coordinate_ref, z_offset); + builder_->addDecoration(z_coordinate_ref, + spv::DecorationNoContraction); + } + spv::Block& block_dimension_head = *builder_->getBuildPoint(); + spv::Block& block_dimension_merge = builder_->makeNewBlock(); + spv::Block& block_dimension_3d = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_dimension_merge.getId(), + spv::SelectionControlDontFlattenMask); + assert_true(data_is_3d != spv::NoResult); + builder_->createConditionalBranch(data_is_3d, &block_dimension_3d, + &block_dimension_merge); + builder_->setBuildPoint(&block_dimension_3d); + assert_true(z_size != spv::NoResult); + spv::Id z_3d = builder_->createBinOp(spv::OpFDiv, type_float_, + z_coordinate_ref, z_size); + builder_->addDecoration(z_3d, spv::DecorationNoContraction); + builder_->createBranch(&block_dimension_merge); + builder_->setBuildPoint(&block_dimension_merge); + { + std::unique_ptr z_phi_op = + std::make_unique(builder_->getUniqueId(), + type_float_, spv::OpPhi); + z_phi_op->addIdOperand(z_3d); + z_phi_op->addIdOperand(block_dimension_3d.getId()); + z_phi_op->addIdOperand(z_coordinate_ref); + z_phi_op->addIdOperand(block_dimension_head.getId()); + z_coordinate_ref = z_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(z_phi_op)); + } + } else { + // Denormalize the Z coordinate for a stacked texture, and apply the + // offset. + spv::Block& block_dimension_head = *builder_->getBuildPoint(); + spv::Block& block_dimension_merge = builder_->makeNewBlock(); + spv::Block* block_dimension_3d = + z_offset != spv::NoResult ? &builder_->makeNewBlock() : nullptr; + spv::Block& block_dimension_stacked = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_dimension_merge.getId(), + spv::SelectionControlDontFlattenMask); + assert_true(data_is_3d != spv::NoResult); + builder_->createConditionalBranch( + data_is_3d, + block_dimension_3d ? block_dimension_3d : &block_dimension_merge, + &block_dimension_stacked); + // 3D case. + spv::Id z_3d = z_coordinate_ref; + if (block_dimension_3d) { + builder_->setBuildPoint(block_dimension_3d); + if (z_offset != spv::NoResult) { + assert_true(z_size != spv::NoResult); + spv::Id z_offset_normalized = builder_->createBinOp( + spv::OpFDiv, type_float_, z_offset, z_size); + builder_->addDecoration(z_offset_normalized, + spv::DecorationNoContraction); + z_3d = builder_->createBinOp(spv::OpFAdd, type_float_, z_3d, + z_offset_normalized); + builder_->addDecoration(z_3d, spv::DecorationNoContraction); + } + builder_->createBranch(&block_dimension_merge); + } + // Stacked case. + builder_->setBuildPoint(&block_dimension_stacked); + spv::Id z_stacked = z_coordinate_ref; + assert_true(z_size != spv::NoResult); + z_stacked = builder_->createBinOp(spv::OpFMul, type_float_, z_stacked, + z_size); + builder_->addDecoration(z_stacked, spv::DecorationNoContraction); + if (z_offset != spv::NoResult) { + z_stacked = builder_->createBinOp(spv::OpFAdd, type_float_, + z_stacked, z_offset); + builder_->addDecoration(z_stacked, spv::DecorationNoContraction); + } + builder_->createBranch(&block_dimension_merge); + // Select one of the two. + builder_->setBuildPoint(&block_dimension_merge); + { + std::unique_ptr z_phi_op = + std::make_unique(builder_->getUniqueId(), + type_float_, spv::OpPhi); + z_phi_op->addIdOperand(z_3d); + z_phi_op->addIdOperand((block_dimension_3d ? *block_dimension_3d + : block_dimension_head) + .getId()); + z_phi_op->addIdOperand(z_stacked); + z_phi_op->addIdOperand(block_dimension_stacked.getId()); + z_coordinate_ref = z_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(z_phi_op)); + } + } + } else if (instr.dimension == xenos::FetchOpDimension::kCube) { + // Transform the cube coordinates from 2D to 3D. + // Move SC/TC from 1...2 to -1...1. + spv::Id const_float_2 = builder_->makeFloatConstant(2.0f); + spv::Id const_float_minus_3 = builder_->makeFloatConstant(-3.0f); + for (uint32_t i = 0; i < 2; ++i) { + spv::Id& coordinate_ref = coordinates[i]; + coordinate_ref = builder_->createBinOp(spv::OpFMul, type_float_, + coordinate_ref, const_float_2); + builder_->addDecoration(coordinate_ref, spv::DecorationNoContraction); + coordinate_ref = builder_->createBinOp( + spv::OpFAdd, type_float_, coordinate_ref, const_float_minus_3); + builder_->addDecoration(coordinate_ref, spv::DecorationNoContraction); + } + // Get the face index (floored, within 0...5 - OpConvertFToU is + // undefined for out-of-range values, so clamping from both sides + // manually). + spv::Id face = coordinates[2]; + if (offset_values[2]) { + face = builder_->createBinOp( + spv::OpFAdd, type_float_, face, + builder_->makeFloatConstant(offset_values[2])); + builder_->addDecoration(face, spv::DecorationNoContraction); + } + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + id_vector_temp_.push_back(const_float_0_); + id_vector_temp_.push_back(face); + face = builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450NMax, id_vector_temp_); + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + id_vector_temp_.push_back(builder_->makeFloatConstant(5.0f)); + id_vector_temp_.push_back(face); + face = builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450FMin, id_vector_temp_); + face = builder_->createUnaryOp(spv::OpConvertFToU, type_uint_, face); + // Split the face index into the axis and the sign. + spv::Id const_uint_1 = builder_->makeUintConstant(1); + spv::Id face_axis = builder_->createBinOp( + spv::OpShiftRightLogical, type_uint_, face, const_uint_1); + spv::Id face_is_negative = builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp(spv::OpBitwiseAnd, type_uint_, face, + const_uint_1), + const_uint_0_); + spv::Id face_sign = + builder_->createTriOp(spv::OpSelect, type_float_, face_is_negative, + builder_->makeFloatConstant(-1.0f), + builder_->makeFloatConstant(1.0f)); + // Remap the axes in a way opposite to the ALU cube instruction. + spv::Id sc_negated = builder_->createUnaryOp( + spv::OpFNegate, type_float_, coordinates[0]); + builder_->addDecoration(sc_negated, spv::DecorationNoContraction); + spv::Id tc_negated = builder_->createUnaryOp( + spv::OpFNegate, type_float_, coordinates[1]); + builder_->addDecoration(tc_negated, spv::DecorationNoContraction); + spv::Block& block_ma_head = *builder_->getBuildPoint(); + spv::Block& block_ma_x = builder_->makeNewBlock(); + spv::Block& block_ma_y = builder_->makeNewBlock(); + spv::Block& block_ma_z = builder_->makeNewBlock(); + spv::Block& block_ma_merge = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_ma_merge.getId()); + { + std::unique_ptr ma_switch_op = + std::make_unique(spv::OpSwitch); + ma_switch_op->addIdOperand(face_axis); + // Make Z the default. + ma_switch_op->addIdOperand(block_ma_z.getId()); + ma_switch_op->addImmediateOperand(0); + ma_switch_op->addIdOperand(block_ma_x.getId()); + ma_switch_op->addImmediateOperand(1); + ma_switch_op->addIdOperand(block_ma_y.getId()); + builder_->getBuildPoint()->addInstruction(std::move(ma_switch_op)); + } + block_ma_x.addPredecessor(&block_ma_head); + block_ma_y.addPredecessor(&block_ma_head); + block_ma_z.addPredecessor(&block_ma_head); + // X is the major axis case. + builder_->setBuildPoint(&block_ma_x); + spv::Id ma_x_y = tc_negated; + spv::Id ma_x_z = + builder_->createTriOp(spv::OpSelect, type_float_, face_is_negative, + coordinates[0], sc_negated); + builder_->createBranch(&block_ma_merge); + // Y is the major axis case. + builder_->setBuildPoint(&block_ma_y); + spv::Id ma_y_x = coordinates[0]; + spv::Id ma_y_z = + builder_->createTriOp(spv::OpSelect, type_float_, face_is_negative, + tc_negated, coordinates[1]); + builder_->createBranch(&block_ma_merge); + // Z is the major axis case. + builder_->setBuildPoint(&block_ma_z); + spv::Id ma_z_x = + builder_->createTriOp(spv::OpSelect, type_float_, face_is_negative, + sc_negated, coordinates[0]); + spv::Id ma_z_y = tc_negated; + builder_->createBranch(&block_ma_merge); + // Gather the coordinate components from the branches. + builder_->setBuildPoint(&block_ma_merge); + { + std::unique_ptr x_phi_op = + std::make_unique(builder_->getUniqueId(), + type_float_, spv::OpPhi); + x_phi_op->addIdOperand(face_sign); + x_phi_op->addIdOperand(block_ma_x.getId()); + x_phi_op->addIdOperand(ma_y_x); + x_phi_op->addIdOperand(block_ma_y.getId()); + x_phi_op->addIdOperand(ma_z_x); + x_phi_op->addIdOperand(block_ma_z.getId()); + coordinates[0] = x_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(x_phi_op)); + } + { + std::unique_ptr y_phi_op = + std::make_unique(builder_->getUniqueId(), + type_float_, spv::OpPhi); + y_phi_op->addIdOperand(ma_x_y); + y_phi_op->addIdOperand(block_ma_x.getId()); + y_phi_op->addIdOperand(face_sign); + y_phi_op->addIdOperand(block_ma_y.getId()); + y_phi_op->addIdOperand(ma_z_y); + y_phi_op->addIdOperand(block_ma_z.getId()); + coordinates[1] = y_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(y_phi_op)); + } + { + std::unique_ptr z_phi_op = + std::make_unique(builder_->getUniqueId(), + type_float_, spv::OpPhi); + z_phi_op->addIdOperand(ma_x_z); + z_phi_op->addIdOperand(block_ma_x.getId()); + z_phi_op->addIdOperand(ma_y_z); + z_phi_op->addIdOperand(block_ma_y.getId()); + z_phi_op->addIdOperand(face_sign); + z_phi_op->addIdOperand(block_ma_z.getId()); + coordinates[2] = z_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(z_phi_op)); + } + } + + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + id_vector_temp_.push_back( + builder_->makeIntConstant(kSystemConstantTextureSwizzledSigns)); + id_vector_temp_.push_back( + builder_->makeIntConstant(fetch_constant_index >> 4)); + id_vector_temp_.push_back( + builder_->makeIntConstant((fetch_constant_index >> 2) & 3)); + // All 32 bits containing the values for 4 fetch constants (use + // OpBitFieldUExtract to get the signednesses for the specific components + // of this texture). + spv::Id swizzled_signs_word = + builder_->createLoad(builder_->createAccessChain( + spv::StorageClassUniform, + uniform_system_constants_, id_vector_temp_), + spv::NoPrecision); + uint32_t swizzled_signs_word_offset = 8 * (fetch_constant_index & 3); + + spv::Builder::TextureParameters texture_parameters = {}; + + if (instr.opcode == ucode::FetchOpcode::kGetTextureComputedLod) { + // kGetTextureComputedLod. + + // Check if the signed binding is needs to be accessed rather than the + // unsigned (if all signednesses are signed). + spv::Id swizzled_signs_all_signed = builder_->createBinOp( + spv::OpIEqual, type_bool_, + builder_->createTriOp( + spv::OpBitFieldUExtract, type_uint_, swizzled_signs_word, + builder_->makeUintConstant(swizzled_signs_word_offset), + builder_->makeUintConstant(8)), + builder_->makeUintConstant(uint32_t(xenos::TextureSign::kSigned) * + 0b01010101)); + + // OpImageQueryLod doesn't need the array layer component. + // So, 3 coordinate components for 3D cube, 2 in other cases (including + // 1D, which are emulated as 2D arrays). + // OpSampledImage must be in the same block as where its result is used. + if (instr.dimension == xenos::FetchOpDimension::k3DOrStacked) { + // Check if the texture is 3D or stacked. + spv::Block& block_dimension_head = *builder_->getBuildPoint(); + spv::Block& block_dimension_3d_start = builder_->makeNewBlock(); + spv::Block& block_dimension_stacked_start = builder_->makeNewBlock(); + spv::Block& block_dimension_merge = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_dimension_merge.getId(), + spv::SelectionControlDontFlattenMask); + assert_true(data_is_3d != spv::NoResult); + builder_->createConditionalBranch(data_is_3d, + &block_dimension_3d_start, + &block_dimension_stacked_start); + + // 3D. + builder_->setBuildPoint(&block_dimension_3d_start); + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + for (uint32_t i = 0; i < 3; ++i) { + id_vector_temp_.push_back(coordinates[i]); + } + texture_parameters.coords = + builder_->createCompositeConstruct(type_float3_, id_vector_temp_); + spv::Id lod_3d = QueryTextureLod(texture_parameters, + image_3d_unsigned, image_3d_signed, + sampler, swizzled_signs_all_signed); + // Get the actual build point for phi. + spv::Block& block_dimension_3d_end = *builder_->getBuildPoint(); + builder_->createBranch(&block_dimension_merge); + + // 2D stacked. + builder_->setBuildPoint(&block_dimension_stacked_start); + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + for (uint32_t i = 0; i < 2; ++i) { + id_vector_temp_.push_back(coordinates[i]); + } + texture_parameters.coords = + builder_->createCompositeConstruct(type_float2_, id_vector_temp_); + spv::Id lod_stacked = QueryTextureLod( + texture_parameters, image_2d_array_or_cube_unsigned, + image_2d_array_or_cube_signed, sampler, + swizzled_signs_all_signed); + // Get the actual build point for phi. + spv::Block& block_dimension_stacked_end = *builder_->getBuildPoint(); + builder_->createBranch(&block_dimension_merge); + + // Choose between the 3D and the stacked result based on the actual + // data dimensionality. + builder_->setBuildPoint(&block_dimension_merge); + { + std::unique_ptr dimension_phi_op = + std::make_unique(builder_->getUniqueId(), + type_float_, spv::OpPhi); + dimension_phi_op->addIdOperand(lod_3d); + dimension_phi_op->addIdOperand(block_dimension_3d_end.getId()); + dimension_phi_op->addIdOperand(lod_stacked); + dimension_phi_op->addIdOperand(block_dimension_stacked_end.getId()); + result[0] = dimension_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(dimension_phi_op)); + } + } else { + uint32_t lod_query_coordinate_component_count = + instr.dimension == xenos::FetchOpDimension::kCube ? 3 : 2; + id_vector_temp_.clear(); + id_vector_temp_.reserve(lod_query_coordinate_component_count); + for (uint32_t i = 0; i < lod_query_coordinate_component_count; ++i) { + id_vector_temp_.push_back(coordinates[i]); + } + texture_parameters.coords = builder_->createCompositeConstruct( + type_float_vectors_[lod_query_coordinate_component_count - 1], + id_vector_temp_); + result[0] = QueryTextureLod(texture_parameters, + image_2d_array_or_cube_unsigned, + image_2d_array_or_cube_signed, sampler, + swizzled_signs_all_signed); + } + } else { + // kTextureFetch. + assert_true(instr.opcode == ucode::FetchOpcode::kTextureFetch); + + // Extract the signedness for each component of the swizzled result, and + // get which bindings (unsigned and signed) are needed. + spv::Id swizzled_signs[4] = {}; + spv::Id result_is_signed[4] = {}; + spv::Id is_all_signed = spv::NoResult; + spv::Id is_any_signed = spv::NoResult; + spv::Id const_uint_2 = builder_->makeUintConstant(2); + spv::Id const_uint_sign_signed = + builder_->makeUintConstant(uint32_t(xenos::TextureSign::kSigned)); + { + uint32_t result_remaining_components = used_result_nonzero_components; + uint32_t result_component_index; + while (xe::bit_scan_forward(result_remaining_components, + &result_component_index)) { + result_remaining_components &= + ~(UINT32_C(1) << result_component_index); + spv::Id result_component_sign = builder_->createTriOp( + spv::OpBitFieldUExtract, type_uint_, swizzled_signs_word, + builder_->makeUintConstant(swizzled_signs_word_offset + + 2 * result_component_index), + const_uint_2); + swizzled_signs[result_component_index] = result_component_sign; + spv::Id is_component_signed = builder_->createBinOp( + spv::OpIEqual, type_bool_, result_component_sign, + const_uint_sign_signed); + result_is_signed[result_component_index] = is_component_signed; + if (is_all_signed != spv::NoResult) { + is_all_signed = + builder_->createBinOp(spv::OpLogicalAnd, type_bool_, + is_all_signed, is_component_signed); + } else { + is_all_signed = is_component_signed; + } + if (is_any_signed != spv::NoResult) { + is_any_signed = + builder_->createBinOp(spv::OpLogicalOr, type_bool_, + is_any_signed, is_component_signed); + } else { + is_any_signed = is_component_signed; + } + } + } + + // Load the fetch constant word 4, needed unconditionally for LOD + // biasing, for result exponent biasing, and conditionally for stacked + // texture filtering. + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + id_vector_temp_.push_back(const_int_0_); + id_vector_temp_.push_back(builder_->makeIntConstant( + int((fetch_constant_word_0_index + 4) >> 2))); + id_vector_temp_.push_back(builder_->makeIntConstant( + int((fetch_constant_word_0_index + 4) & 3))); + spv::Id fetch_constant_word_4 = + builder_->createLoad(builder_->createAccessChain( + spv::StorageClassUniform, + uniform_fetch_constants_, id_vector_temp_), + spv::NoPrecision); + spv::Id fetch_constant_word_4_signed = builder_->createUnaryOp( + spv::OpBitcast, type_int_, fetch_constant_word_4); + + // Accumulate the explicit LOD (or LOD bias) sources (in D3D11.3 + // specification order: specified LOD + sampler LOD bias + instruction + // LOD bias). + // Fetch constant LOD (bits 12:21 of the word 4). + spv::Id lod = builder_->createBinOp( + spv::OpFMul, type_float_, + builder_->createUnaryOp( + spv::OpConvertUToF, type_float_, + builder_->createTriOp(spv::OpBitFieldSExtract, type_int_, + fetch_constant_word_4_signed, + builder_->makeUintConstant(12), + builder_->makeUintConstant(10))), + builder_->makeFloatConstant(1.0f / 32.0f)); + builder_->addDecoration(lod, spv::DecorationNoContraction); + // Register LOD. + if (instr.attributes.use_register_lod) { + lod = builder_->createBinOp( + spv::OpFAdd, type_float_, + builder_->createLoad(var_main_tfetch_lod_, spv::NoPrecision), + lod); + builder_->addDecoration(lod, spv::DecorationNoContraction); + } + // Instruction LOD bias. + if (instr.attributes.lod_bias) { + lod = builder_->createBinOp( + spv::OpFAdd, type_float_, lod, + builder_->makeFloatConstant(instr.attributes.lod_bias)); + builder_->addDecoration(lod, spv::DecorationNoContraction); + } + + // Calculate the gradients for sampling the texture if needed. + // 2D vectors for k1D (because 1D images are emulated as 2D arrays), + // k2D. + // 3D vectors for k3DOrStacked, kCube. + spv::Id gradients_h = spv::NoResult, gradients_v = spv::NoResult; + if (use_computed_lod) { + // TODO(Triang3l): Gradient exponent adjustment is currently not done + // in getCompTexLOD, so not doing it here too for now. Apply the + // gradient exponent biases from the word 4 of the fetch constant in + // the future when it's handled in getCompTexLOD somehow. + id_vector_temp_.clear(); + id_vector_temp_.push_back(lod); + spv::Id lod_gradient_scale = + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450Exp2, id_vector_temp_); + switch (instr.dimension) { + case xenos::FetchOpDimension::k1D: { + spv::Id gradient_h_1d, gradient_v_1d; + if (instr.attributes.use_register_gradients) { + id_vector_temp_.clear(); + // First component. + id_vector_temp_.push_back(const_int_0_); + gradient_h_1d = builder_->createLoad( + builder_->createAccessChain(spv::StorageClassFunction, + var_main_tfetch_gradients_h_, + id_vector_temp_), + spv::NoPrecision); + gradient_v_1d = builder_->createLoad( + builder_->createAccessChain(spv::StorageClassFunction, + var_main_tfetch_gradients_v_, + id_vector_temp_), + spv::NoPrecision); + if (instr.attributes.unnormalized_coordinates) { + // Normalize the gradients. + assert_true(size[0] != spv::NoResult); + gradient_h_1d = builder_->createBinOp( + spv::OpFDiv, type_float_, gradient_h_1d, size[0]); + builder_->addDecoration(gradient_h_1d, + spv::DecorationNoContraction); + gradient_v_1d = builder_->createBinOp( + spv::OpFDiv, type_float_, gradient_v_1d, size[0]); + builder_->addDecoration(gradient_v_1d, + spv::DecorationNoContraction); + } + } else { + builder_->addCapability(spv::CapabilityDerivativeControl); + gradient_h_1d = builder_->createUnaryOp( + spv::OpDPdxCoarse, type_float_, coordinates[0]); + gradient_v_1d = builder_->createUnaryOp( + spv::OpDPdyCoarse, type_float_, coordinates[0]); + } + gradient_h_1d = builder_->createBinOp( + spv::OpFMul, type_float_, gradient_h_1d, lod_gradient_scale); + builder_->addDecoration(gradient_h_1d, + spv::DecorationNoContraction); + gradient_v_1d = builder_->createBinOp( + spv::OpFMul, type_float_, gradient_v_1d, lod_gradient_scale); + builder_->addDecoration(gradient_v_1d, + spv::DecorationNoContraction); + // 1D textures are sampled as 2D arrays - need 2-component + // gradients. + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + id_vector_temp_.push_back(gradient_h_1d); + id_vector_temp_.push_back(const_float_0_); + gradients_h = builder_->createCompositeConstruct(type_float2_, + id_vector_temp_); + id_vector_temp_[0] = gradient_v_1d; + gradients_v = builder_->createCompositeConstruct(type_float2_, + id_vector_temp_); + } break; + case xenos::FetchOpDimension::k2D: { + if (instr.attributes.use_register_gradients) { + for (uint32_t i = 0; i < 2; ++i) { + spv::Id register_gradient_3d = + builder_->createLoad(i ? var_main_tfetch_gradients_h_ + : var_main_tfetch_gradients_v_, + spv::NoPrecision); + spv::Id register_gradient_x = + builder_->createCompositeExtract(register_gradient_3d, + type_float_, 0); + spv::Id register_gradient_y = + builder_->createCompositeExtract(register_gradient_3d, + type_float_, 1); + if (instr.attributes.unnormalized_coordinates) { + // Normalize the gradients. + assert_true(size[0] != spv::NoResult); + register_gradient_x = builder_->createBinOp( + spv::OpFDiv, type_float_, register_gradient_x, size[0]); + builder_->addDecoration(register_gradient_x, + spv::DecorationNoContraction); + assert_true(size[1] != spv::NoResult); + register_gradient_y = builder_->createBinOp( + spv::OpFDiv, type_float_, register_gradient_y, size[1]); + builder_->addDecoration(register_gradient_y, + spv::DecorationNoContraction); + } + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + id_vector_temp_.push_back(register_gradient_x); + id_vector_temp_.push_back(register_gradient_y); + (i ? gradients_v : gradients_h) = + builder_->createCompositeConstruct(type_float2_, + id_vector_temp_); + } + } else { + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + for (uint32_t i = 0; i < 2; ++i) { + id_vector_temp_.push_back(coordinates[i]); + } + spv::Id gradient_coordinate_vector = + builder_->createCompositeConstruct(type_float2_, + id_vector_temp_); + builder_->addCapability(spv::CapabilityDerivativeControl); + gradients_h = + builder_->createUnaryOp(spv::OpDPdxCoarse, type_float2_, + gradient_coordinate_vector); + gradients_v = + builder_->createUnaryOp(spv::OpDPdyCoarse, type_float2_, + gradient_coordinate_vector); + } + gradients_h = + builder_->createBinOp(spv::OpVectorTimesScalar, type_float2_, + gradients_h, lod_gradient_scale); + builder_->addDecoration(gradients_h, + spv::DecorationNoContraction); + gradients_v = + builder_->createBinOp(spv::OpVectorTimesScalar, type_float2_, + gradients_v, lod_gradient_scale); + builder_->addDecoration(gradients_v, + spv::DecorationNoContraction); + } break; + case xenos::FetchOpDimension::k3DOrStacked: { + if (instr.attributes.use_register_gradients) { + gradients_h = builder_->createLoad(var_main_tfetch_gradients_h_, + spv::NoPrecision); + gradients_v = builder_->createLoad(var_main_tfetch_gradients_v_, + spv::NoPrecision); + if (instr.attributes.unnormalized_coordinates) { + // Normalize the gradients. + for (uint32_t i = 0; i < 2; ++i) { + spv::Id& gradient_ref = i ? gradients_v : gradients_h; + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + for (uint32_t j = 0; j < 3; ++j) { + assert_true(size[j] != spv::NoResult); + id_vector_temp_.push_back(builder_->createBinOp( + spv::OpFDiv, type_float_, + builder_->createCompositeExtract(gradient_ref, + type_float_, j), + size[j])); + builder_->addDecoration(id_vector_temp_.back(), + spv::DecorationNoContraction); + } + gradient_ref = builder_->createCompositeConstruct( + type_float3_, id_vector_temp_); + } + } + } else { + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + for (uint32_t i = 0; i < 3; ++i) { + id_vector_temp_.push_back(coordinates[i]); + } + spv::Id gradient_coordinate_vector = + builder_->createCompositeConstruct(type_float3_, + id_vector_temp_); + builder_->addCapability(spv::CapabilityDerivativeControl); + gradients_h = + builder_->createUnaryOp(spv::OpDPdxCoarse, type_float3_, + gradient_coordinate_vector); + gradients_v = + builder_->createUnaryOp(spv::OpDPdyCoarse, type_float3_, + gradient_coordinate_vector); + } + gradients_h = + builder_->createBinOp(spv::OpVectorTimesScalar, type_float3_, + gradients_h, lod_gradient_scale); + builder_->addDecoration(gradients_h, + spv::DecorationNoContraction); + gradients_v = + builder_->createBinOp(spv::OpVectorTimesScalar, type_float3_, + gradients_v, lod_gradient_scale); + builder_->addDecoration(gradients_v, + spv::DecorationNoContraction); + } break; + case xenos::FetchOpDimension::kCube: { + if (instr.attributes.use_register_gradients) { + // Register gradients are already in the cube space for cube + // maps. + // TODO(Triang3l): Are cube map register gradients unnormalized + // if the coordinates themselves are unnormalized? + gradients_h = builder_->createLoad(var_main_tfetch_gradients_h_, + spv::NoPrecision); + gradients_v = builder_->createLoad(var_main_tfetch_gradients_v_, + spv::NoPrecision); + } else { + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + for (uint32_t i = 0; i < 3; ++i) { + id_vector_temp_.push_back(coordinates[i]); + } + spv::Id gradient_coordinate_vector = + builder_->createCompositeConstruct(type_float3_, + id_vector_temp_); + builder_->addCapability(spv::CapabilityDerivativeControl); + gradients_h = + builder_->createUnaryOp(spv::OpDPdxCoarse, type_float3_, + gradient_coordinate_vector); + gradients_v = + builder_->createUnaryOp(spv::OpDPdyCoarse, type_float3_, + gradient_coordinate_vector); + } + gradients_h = + builder_->createBinOp(spv::OpVectorTimesScalar, type_float3_, + gradients_h, lod_gradient_scale); + builder_->addDecoration(gradients_h, + spv::DecorationNoContraction); + gradients_v = + builder_->createBinOp(spv::OpVectorTimesScalar, type_float3_, + gradients_v, lod_gradient_scale); + builder_->addDecoration(gradients_v, + spv::DecorationNoContraction); + } break; + } + } + + // Sample the texture. + spv::ImageOperandsMask image_operands_mask = + use_computed_lod ? spv::ImageOperandsGradMask + : spv::ImageOperandsLodMask; + spv::Id sample_result_unsigned, sample_result_signed; + if (!use_computed_lod) { + texture_parameters.lod = lod; + } + if (instr.dimension == xenos::FetchOpDimension::k3DOrStacked) { + // 3D (3 coordinate components, 3 gradient components, single fetch) + // or 2D stacked (2 coordinate components + 1 array layer coordinate + // component, 2 gradient components, two fetches if the Z axis is + // linear-filtered). + + spv::Block& block_dimension_head = *builder_->getBuildPoint(); + spv::Block& block_dimension_3d_start = builder_->makeNewBlock(); + spv::Block& block_dimension_stacked_start = builder_->makeNewBlock(); + spv::Block& block_dimension_merge = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_dimension_merge.getId(), + spv::SelectionControlDontFlattenMask); + assert_true(data_is_3d != spv::NoResult); + builder_->createConditionalBranch(data_is_3d, + &block_dimension_3d_start, + &block_dimension_stacked_start); + + // 3D. + builder_->setBuildPoint(&block_dimension_3d_start); + if (use_computed_lod) { + texture_parameters.gradX = gradients_h; + texture_parameters.gradY = gradients_v; + } + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + for (uint32_t i = 0; i < 3; ++i) { + id_vector_temp_.push_back(coordinates[i]); + } + texture_parameters.coords = + builder_->createCompositeConstruct(type_float3_, id_vector_temp_); + spv::Id sample_result_unsigned_3d, sample_result_signed_3d; + SampleTexture(texture_parameters, image_operands_mask, + image_3d_unsigned, image_3d_signed, sampler, + is_all_signed, is_any_signed, sample_result_unsigned_3d, + sample_result_signed_3d); + // Get the actual build point after the SampleTexture call for phi. + spv::Block& block_dimension_3d_end = *builder_->getBuildPoint(); + builder_->createBranch(&block_dimension_merge); + + // 2D stacked. + builder_->setBuildPoint(&block_dimension_stacked_start); + if (use_computed_lod) { + // Extract 2D gradients for stacked textures which are 2D arrays. + { + std::unique_ptr shuffle_op = + std::make_unique(builder_->getUniqueId(), + type_float2_, + spv::OpVectorShuffle); + shuffle_op->addIdOperand(gradients_h); + shuffle_op->addIdOperand(gradients_h); + shuffle_op->addImmediateOperand(0); + shuffle_op->addImmediateOperand(1); + texture_parameters.gradX = shuffle_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(shuffle_op)); + } + { + std::unique_ptr shuffle_op = + std::make_unique(builder_->getUniqueId(), + type_float2_, + spv::OpVectorShuffle); + shuffle_op->addIdOperand(gradients_v); + shuffle_op->addIdOperand(gradients_v); + shuffle_op->addImmediateOperand(0); + shuffle_op->addImmediateOperand(1); + texture_parameters.gradY = shuffle_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(shuffle_op)); + } + } + // Check if linear filtering is needed. + bool vol_mag_filter_is_fetch_const = + instr.attributes.vol_mag_filter == + xenos::TextureFilter::kUseFetchConst; + bool vol_min_filter_is_fetch_const = + instr.attributes.vol_min_filter == + xenos::TextureFilter::kUseFetchConst; + bool vol_mag_filter_is_linear = + instr.attributes.vol_mag_filter == xenos::TextureFilter::kLinear; + bool vol_min_filter_is_linear = + instr.attributes.vol_min_filter == xenos::TextureFilter::kLinear; + spv::Id vol_filter_is_linear = spv::NoResult; + if (use_computed_lod && + (vol_mag_filter_is_fetch_const || vol_min_filter_is_fetch_const || + vol_mag_filter_is_linear != vol_min_filter_is_linear)) { + // Check if minifying along layers (derivative > 1 along any axis). + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + for (uint32_t i = 0; i < 2; ++i) { + id_vector_temp_.push_back(builder_->createCompositeExtract( + i ? gradients_v : gradients_h, type_float_, 2)); + } + spv::Id layer_max_gradient = + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450NMax, id_vector_temp_); + if (!instr.attributes.unnormalized_coordinates) { + // Denormalize the gradient if provided as normalized. + assert_true(size[2] != spv::NoResult); + layer_max_gradient = builder_->createBinOp( + spv::OpFMul, type_float_, layer_max_gradient, size[2]); + builder_->addDecoration(layer_max_gradient, + spv::DecorationNoContraction); + } + // For NaN, considering that magnification is being done. + spv::Id is_minifying_z = builder_->createBinOp( + spv::OpFOrdLessThan, type_bool_, layer_max_gradient, + builder_->makeFloatConstant(1.0f)); + // Choose what filter is actually used, the minification or the + // magnification one. + spv::Id vol_mag_filter_is_linear_loaded = + vol_mag_filter_is_fetch_const + ? builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, + fetch_constant_word_4, + builder_->makeUintConstant(UINT32_C(1) << 0)), + const_uint_0_) + : builder_->makeBoolConstant(vol_mag_filter_is_linear); + spv::Id vol_min_filter_is_linear_loaded = + vol_min_filter_is_fetch_const + ? builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, + fetch_constant_word_4, + builder_->makeUintConstant(UINT32_C(1) << 1)), + const_uint_0_) + : builder_->makeBoolConstant(vol_min_filter_is_linear); + vol_filter_is_linear = + builder_->createTriOp(spv::OpSelect, type_bool_, is_minifying_z, + vol_min_filter_is_linear_loaded, + vol_mag_filter_is_linear_loaded); + } else { + // No gradients, or using the same filter overrides for magnifying + // and minifying. Assume always magnifying if no gradients (LOD 0, + // always <= 0). LOD is within 2D layers, not between them (unlike + // in 3D textures, which have mips with depth reduced), so it + // shouldn't have effect on filtering between layers. + if (vol_mag_filter_is_fetch_const) { + vol_filter_is_linear = builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, fetch_constant_word_4, + builder_->makeUintConstant(UINT32_C(1) << 0)), + const_uint_0_); + } + } + spv::Id layer_coordinate = coordinates[2]; + // Linear filtering may be needed either based on a dynamic condition + // (the filtering mode is taken from the fetch constant, or it's + // different for magnification and minification), or on a static one + // (with gradients - specified in the instruction for both + // magnification and minification as linear, without gradients - + // specified for magnification as linear). + // If the filter is linear, subtract 0.5 from the Z coordinate of the + // first layer in filtering because 0.5 is in the middle of it. + if (vol_filter_is_linear != spv::NoResult) { + spv::Id layer_coordinate_linear = builder_->createBinOp( + spv::OpFSub, type_float_, layer_coordinate, + builder_->makeFloatConstant(0.5f)); + builder_->addDecoration(layer_coordinate_linear, + spv::DecorationNoContraction); + layer_coordinate = builder_->createTriOp( + spv::OpSelect, type_float_, vol_filter_is_linear, + layer_coordinate_linear, layer_coordinate); + } else if (vol_mag_filter_is_linear) { + layer_coordinate = builder_->createBinOp( + spv::OpFSub, type_float_, layer_coordinate, + builder_->makeFloatConstant(0.5f)); + builder_->addDecoration(layer_coordinate, + spv::DecorationNoContraction); + } + // Sample the first layer, needed regardless of whether filtering is + // needed. + // Floor the array layer (Vulkan does rounding to nearest or + 0.5 and + // floor even for the layer index, but on the Xenos, addressing is + // similar to that of 3D textures). This is needed for both point and + // linear filtering (with linear, 0.5 was subtracted previously). + id_vector_temp_.clear(); + id_vector_temp_.push_back(layer_coordinate); + spv::Id layer_0_coordinate = + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450Floor, id_vector_temp_); + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + id_vector_temp_.push_back(coordinates[0]); + id_vector_temp_.push_back(coordinates[1]); + id_vector_temp_.push_back(layer_0_coordinate); + texture_parameters.coords = + builder_->createCompositeConstruct(type_float3_, id_vector_temp_); + spv::Id sample_result_unsigned_stacked, sample_result_signed_stacked; + SampleTexture(texture_parameters, image_operands_mask, + image_2d_array_or_cube_unsigned, + image_2d_array_or_cube_signed, sampler, is_all_signed, + is_any_signed, sample_result_unsigned_stacked, + sample_result_signed_stacked); + // Sample the second layer if linear filtering is potentially needed + // (conditionally or unconditionally, depending on whether the filter + // needs to be chosen at runtime), and filter. + if (vol_filter_is_linear != spv::NoResult || + vol_mag_filter_is_linear) { + spv::Block& block_z_head = *builder_->getBuildPoint(); + spv::Block& block_z_linear = (vol_filter_is_linear != spv::NoResult) + ? builder_->makeNewBlock() + : block_z_head; + spv::Block& block_z_merge = (vol_filter_is_linear != spv::NoResult) + ? builder_->makeNewBlock() + : block_z_head; + if (vol_filter_is_linear != spv::NoResult) { + SpirvCreateSelectionMerge(block_z_merge.getId(), + spv::SelectionControlDontFlattenMask); + builder_->createConditionalBranch( + vol_filter_is_linear, &block_z_linear, &block_z_merge); + builder_->setBuildPoint(&block_z_linear); + } + spv::Id layer_1_coordinate = builder_->createBinOp( + spv::OpFAdd, type_float_, layer_0_coordinate, + builder_->makeFloatConstant(1.0f)); + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + id_vector_temp_.push_back(coordinates[0]); + id_vector_temp_.push_back(coordinates[1]); + id_vector_temp_.push_back(layer_1_coordinate); + texture_parameters.coords = builder_->createCompositeConstruct( + type_float3_, id_vector_temp_); + id_vector_temp_.clear(); + id_vector_temp_.push_back(layer_coordinate); + spv::Id layer_lerp_factor = + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450Fract, id_vector_temp_); + spv::Id sample_result_unsigned_stacked_filtered; + spv::Id sample_result_signed_stacked_filtered; + SampleTexture( + texture_parameters, image_operands_mask, + image_2d_array_or_cube_unsigned, image_2d_array_or_cube_signed, + sampler, is_all_signed, is_any_signed, + sample_result_unsigned_stacked_filtered, + sample_result_signed_stacked_filtered, layer_lerp_factor, + sample_result_unsigned_stacked, sample_result_signed_stacked); + if (vol_filter_is_linear != spv::NoResult) { + // Get the actual build point after the SampleTexture call for + // phi. + spv::Block& block_z_linear_end = *builder_->getBuildPoint(); + builder_->createBranch(&block_z_merge); + builder_->setBuildPoint(&block_z_merge); + { + std::unique_ptr filter_phi_op = + std::make_unique( + builder_->getUniqueId(), type_float4_, spv::OpPhi); + filter_phi_op->addIdOperand( + sample_result_unsigned_stacked_filtered); + filter_phi_op->addIdOperand(block_z_linear_end.getId()); + filter_phi_op->addIdOperand(sample_result_unsigned_stacked); + filter_phi_op->addIdOperand(block_z_head.getId()); + sample_result_unsigned_stacked = filter_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(filter_phi_op)); + } + { + std::unique_ptr filter_phi_op = + std::make_unique( + builder_->getUniqueId(), type_float4_, spv::OpPhi); + filter_phi_op->addIdOperand( + sample_result_signed_stacked_filtered); + filter_phi_op->addIdOperand(block_z_linear_end.getId()); + filter_phi_op->addIdOperand(sample_result_signed_stacked); + filter_phi_op->addIdOperand(block_z_head.getId()); + sample_result_signed_stacked = filter_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(filter_phi_op)); + } + } else { + sample_result_unsigned_stacked = + sample_result_unsigned_stacked_filtered; + sample_result_signed_stacked = + sample_result_signed_stacked_filtered; + } + } + // Get the actual build point for phi. + spv::Block& block_dimension_stacked_end = *builder_->getBuildPoint(); + builder_->createBranch(&block_dimension_merge); + + // Choose between the 3D and the stacked result based on the actual + // data dimensionality. + builder_->setBuildPoint(&block_dimension_merge); + { + std::unique_ptr dimension_phi_op = + std::make_unique(builder_->getUniqueId(), + type_float4_, spv::OpPhi); + dimension_phi_op->addIdOperand(sample_result_unsigned_3d); + dimension_phi_op->addIdOperand(block_dimension_3d_end.getId()); + dimension_phi_op->addIdOperand(sample_result_unsigned_stacked); + dimension_phi_op->addIdOperand(block_dimension_stacked_end.getId()); + sample_result_unsigned = dimension_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(dimension_phi_op)); + } + { + std::unique_ptr dimension_phi_op = + std::make_unique(builder_->getUniqueId(), + type_float4_, spv::OpPhi); + dimension_phi_op->addIdOperand(sample_result_signed_3d); + dimension_phi_op->addIdOperand(block_dimension_3d_end.getId()); + dimension_phi_op->addIdOperand(sample_result_signed_stacked); + dimension_phi_op->addIdOperand(block_dimension_stacked_end.getId()); + sample_result_signed = dimension_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(dimension_phi_op)); + } + } else { + if (use_computed_lod) { + texture_parameters.gradX = gradients_h; + texture_parameters.gradY = gradients_v; + } + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + for (uint32_t i = 0; i < 3; ++i) { + id_vector_temp_.push_back(coordinates[i]); + } + texture_parameters.coords = + builder_->createCompositeConstruct(type_float3_, id_vector_temp_); + SampleTexture(texture_parameters, image_operands_mask, + image_2d_array_or_cube_unsigned, + image_2d_array_or_cube_signed, sampler, is_all_signed, + is_any_signed, sample_result_unsigned, + sample_result_signed); + } + + // Swizzle the result components manually if needed, to `result`. + // Because the same host format component may be replicated into + // multiple guest components (such as for formats with less than 4 + // components), yet the signedness is per-guest-component, it's not + // possible to apply the signedness to host components before swizzling, + // so doing it during (for unsigned vs. signed) and after (for biased + // and gamma) swizzling. + if (!features_.image_view_format_swizzle) { + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + id_vector_temp_.push_back( + builder_->makeIntConstant(kSystemConstantTextureSwizzles)); + id_vector_temp_.push_back( + builder_->makeIntConstant(fetch_constant_index >> 3)); + id_vector_temp_.push_back( + builder_->makeIntConstant((fetch_constant_index >> 1) & 3)); + // All 32 bits containing the values (24 bits) for 2 fetch constants. + spv::Id swizzle_word = builder_->createLoad( + builder_->createAccessChain(spv::StorageClassUniform, + uniform_system_constants_, + id_vector_temp_), + spv::NoPrecision); + uint32_t swizzle_word_offset = 3 * 4 * (fetch_constant_index & 1); + spv::Id const_float_1 = builder_->makeFloatConstant(1.0f); + uint32_t result_remaining_components = used_result_nonzero_components; + uint32_t result_component_index; + while (xe::bit_scan_forward(result_remaining_components, + &result_component_index)) { + result_remaining_components &= + ~(UINT32_C(1) << result_component_index); + uint32_t swizzle_bit_0_value = + UINT32_C(1) + << (swizzle_word_offset + 3 * result_component_index); + spv::Id swizzle_bit_0 = builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, swizzle_word, + builder_->makeUintConstant(swizzle_bit_0_value)), + const_uint_0_); + // Bit 2 - X/Y/Z/W or 0/1. + spv::Id swizzle_bit_2 = builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, swizzle_word, + builder_->makeUintConstant(swizzle_bit_0_value << 2)), + const_uint_0_); + spv::Block& block_swizzle_head = *builder_->getBuildPoint(); + spv::Block& block_swizzle_constant = builder_->makeNewBlock(); + spv::Block& block_swizzle_component = builder_->makeNewBlock(); + spv::Block& block_swizzle_merge = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_swizzle_merge.getId(), + spv::SelectionControlDontFlattenMask); + builder_->createConditionalBranch(swizzle_bit_2, + &block_swizzle_constant, + &block_swizzle_component); + // Constant values. + builder_->setBuildPoint(&block_swizzle_constant); + // Bit 0 - 0 or 1. + spv::Id swizzle_result_constant = + builder_->createTriOp(spv::OpSelect, type_float_, swizzle_bit_0, + const_float_1, const_float_0_); + builder_->createBranch(&block_swizzle_merge); + // Fetched components. + spv::Id swizzle_result_component; + { + builder_->setBuildPoint(&block_swizzle_component); + // Select whether the result is signed or unsigned (or biased or + // gamma-corrected) based on the post-swizzle signedness. + spv::Id swizzle_sample_result = builder_->createTriOp( + spv::OpSelect, type_float4_, + builder_->smearScalar( + spv::NoPrecision, + result_is_signed[result_component_index], type_bool4_), + sample_result_signed, sample_result_unsigned); + // Bit 0 - X or Y, Z or W, 0 or 1. + spv::Id swizzle_x_or_y = builder_->createTriOp( + spv::OpSelect, type_float_, swizzle_bit_0, + builder_->createCompositeExtract(swizzle_sample_result, + type_float_, 1), + builder_->createCompositeExtract(swizzle_sample_result, + type_float_, 0)); + spv::Id swizzle_z_or_w = builder_->createTriOp( + spv::OpSelect, type_float_, swizzle_bit_0, + builder_->createCompositeExtract(swizzle_sample_result, + type_float_, 3), + builder_->createCompositeExtract(swizzle_sample_result, + type_float_, 2)); + // Bit 1 - X/Y or Z/W. + spv::Id swizzle_bit_1 = builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, swizzle_word, + builder_->makeUintConstant(swizzle_bit_0_value << 1)), + const_uint_0_); + swizzle_result_component = builder_->createTriOp( + spv::OpSelect, type_float_, swizzle_bit_1, swizzle_z_or_w, + swizzle_x_or_y); + builder_->createBranch(&block_swizzle_merge); + } + // Select between the constants and the fetched components. + builder_->setBuildPoint(&block_swizzle_merge); + { + std::unique_ptr swizzle_phi_op = + std::make_unique(builder_->getUniqueId(), + type_float_, spv::OpPhi); + swizzle_phi_op->addIdOperand(swizzle_result_constant); + swizzle_phi_op->addIdOperand(block_swizzle_constant.getId()); + swizzle_phi_op->addIdOperand(swizzle_result_component); + swizzle_phi_op->addIdOperand(block_swizzle_component.getId()); + result[result_component_index] = swizzle_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(swizzle_phi_op)); + } + } + } + + // Apply the signednesses to all the needed components. If swizzling is + // done in the shader rather than via the image view, unsigned or signed + // source has already been selected into `result` - only need to bias or + // to gamma-correct. + spv::Id const_float_2 = builder_->makeFloatConstant(2.0f); + spv::Id const_float_minus_1 = builder_->makeFloatConstant(-1.0f); + { + uint32_t result_remaining_components = used_result_nonzero_components; + uint32_t result_component_index; + while (xe::bit_scan_forward(result_remaining_components, + &result_component_index)) { + result_remaining_components &= + ~(UINT32_C(1) << result_component_index); + spv::Id sample_result_component_unsigned = + features_.image_view_format_swizzle + ? builder_->createCompositeExtract(sample_result_unsigned, + type_float_, + result_component_index) + : result[result_component_index]; + spv::Block& block_sign_head = *builder_->getBuildPoint(); + spv::Block* block_sign_signed = features_.image_view_format_swizzle + ? &builder_->makeNewBlock() + : nullptr; + spv::Block& block_sign_unsigned_biased = builder_->makeNewBlock(); + spv::Block& block_sign_gamma_start = builder_->makeNewBlock(); + spv::Block& block_sign_merge = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_sign_merge.getId(), + spv::SelectionControlDontFlattenMask); + { + std::unique_ptr sign_switch_op = + std::make_unique(spv::OpSwitch); + sign_switch_op->addIdOperand( + swizzled_signs[result_component_index]); + // Make unsigned (do nothing, take the unsigned component in the + // phi) the default, and also, if unsigned or signed has already + // been selected in swizzling, make signed the default to since + // it, just like unsigned, doesn't need any transformations. + sign_switch_op->addIdOperand(block_sign_merge.getId()); + if (block_sign_signed) { + sign_switch_op->addImmediateOperand( + uint32_t(xenos::TextureSign::kSigned)); + sign_switch_op->addIdOperand(block_sign_signed->getId()); + } + sign_switch_op->addImmediateOperand( + uint32_t(xenos::TextureSign::kUnsignedBiased)); + sign_switch_op->addIdOperand(block_sign_unsigned_biased.getId()); + sign_switch_op->addImmediateOperand( + uint32_t(xenos::TextureSign::kGamma)); + sign_switch_op->addIdOperand(block_sign_gamma_start.getId()); + builder_->getBuildPoint()->addInstruction( + std::move(sign_switch_op)); + } + if (block_sign_signed) { + block_sign_signed->addPredecessor(&block_sign_head); + } + block_sign_unsigned_biased.addPredecessor(&block_sign_head); + block_sign_gamma_start.addPredecessor(&block_sign_head); + block_sign_merge.addPredecessor(&block_sign_head); + // Signed. + spv::Id sample_result_component_signed = + sample_result_component_unsigned; + if (block_sign_signed) { + builder_->setBuildPoint(block_sign_signed); + sample_result_component_signed = builder_->createCompositeExtract( + sample_result_signed, type_float_, result_component_index); + builder_->createBranch(&block_sign_merge); + } + // Unsigned biased. + builder_->setBuildPoint(&block_sign_unsigned_biased); + spv::Id sample_result_component_unsigned_biased = + builder_->createBinOp(spv::OpFMul, type_float_, + sample_result_component_unsigned, + const_float_2); + builder_->addDecoration(sample_result_component_unsigned_biased, + spv::DecorationNoContraction); + sample_result_component_unsigned_biased = builder_->createBinOp( + spv::OpFAdd, type_float_, + sample_result_component_unsigned_biased, const_float_minus_1); + builder_->addDecoration(sample_result_component_unsigned_biased, + spv::DecorationNoContraction); + builder_->createBranch(&block_sign_merge); + // Gamma. + builder_->setBuildPoint(&block_sign_gamma_start); + // TODO(Triang3l): Gamma resolve target as sRGB sampling. + spv::Id sample_result_component_gamma = + PWLGammaToLinear(sample_result_component_unsigned, false); + // Get the current build point for the phi operation not to assume + // that it will be the same as before PWLGammaToLinear. + spv::Block& block_sign_gamma_end = *builder_->getBuildPoint(); + builder_->createBranch(&block_sign_merge); + // Merge. + builder_->setBuildPoint(&block_sign_merge); + { + std::unique_ptr sign_phi_op = + std::make_unique(builder_->getUniqueId(), + type_float_, spv::OpPhi); + if (block_sign_signed) { + sign_phi_op->addIdOperand(sample_result_component_signed); + sign_phi_op->addIdOperand(block_sign_signed->getId()); + } + sign_phi_op->addIdOperand( + sample_result_component_unsigned_biased); + sign_phi_op->addIdOperand(block_sign_unsigned_biased.getId()); + sign_phi_op->addIdOperand(sample_result_component_gamma); + sign_phi_op->addIdOperand(block_sign_gamma_end.getId()); + sign_phi_op->addIdOperand(sample_result_component_unsigned); + sign_phi_op->addIdOperand(block_sign_head.getId()); + result[result_component_index] = sign_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(sign_phi_op)); + } + } + } + + // Apply the exponent bias from the bits 13:18 of the fetch constant + // word 4. + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + id_vector_temp_.push_back(builder_->makeFloatConstant(1.0f)); + id_vector_temp_.push_back(builder_->createTriOp( + spv::OpBitFieldSExtract, type_int_, fetch_constant_word_4_signed, + builder_->makeUintConstant(13), builder_->makeUintConstant(6))); + spv::Id result_exponent_bias = + builder_->createBuiltinCall(type_float_, ext_inst_glsl_std_450_, + GLSLstd450Ldexp, id_vector_temp_); + { + uint32_t result_remaining_components = used_result_nonzero_components; + uint32_t result_component_index; + while (xe::bit_scan_forward(result_remaining_components, + &result_component_index)) { + result_remaining_components &= + ~(UINT32_C(1) << result_component_index); + spv::Id& result_component_ref = result[result_component_index]; + result_component_ref = builder_->createBinOp( + spv::OpFMul, type_float_, result_component_ref, + result_exponent_bias); + builder_->addDecoration(result_component_ref, + spv::DecorationNoContraction); + } + } + } + } } + + // Store the needed components of the result. + spv::Id result_vector; + if (used_result_component_count > 1) { + id_vector_temp_.clear(); + id_vector_temp_.reserve(used_result_component_count); + uint32_t result_components_remaining = used_result_components; + uint32_t result_component_index; + while (xe::bit_scan_forward(result_components_remaining, + &result_component_index)) { + result_components_remaining &= ~(UINT32_C(1) << result_component_index); + id_vector_temp_.push_back(result[result_component_index]); + } + result_vector = builder_->createCompositeConstruct( + type_float_vectors_[used_result_component_count - 1], id_vector_temp_); + } else { + uint32_t result_component_index; + xe::bit_scan_forward(used_result_components, &result_component_index); + result_vector = result[result_component_index]; + } + StoreResult(instr.result, result_vector); } size_t SpirvShaderTranslator::FindOrAddTextureBinding( @@ -599,12 +2509,10 @@ size_t SpirvShaderTranslator::FindOrAddTextureBinding( is_array = true; dimension_name = "2d"; } - new_texture_binding.type = - builder_->makeImageType(type_float_, type_dimension, false, is_array, - false, 1, spv::ImageFormatUnknown); new_texture_binding.variable = builder_->createVariable( spv::NoPrecision, spv::StorageClassUniformConstant, - new_texture_binding.type, + builder_->makeImageType(type_float_, type_dimension, false, is_array, + false, 1, spv::ImageFormatUnknown), fmt::format("xe_texture{}_{}_{}", fetch_constant, dimension_name, is_signed ? 's' : 'u') .c_str()); @@ -675,5 +2583,114 @@ size_t SpirvShaderTranslator::FindOrAddSamplerBinding( return new_sampler_binding_index; } +void SpirvShaderTranslator::SampleTexture( + spv::Builder::TextureParameters& texture_parameters, + spv::ImageOperandsMask image_operands_mask, spv::Id image_unsigned, + spv::Id image_signed, spv::Id sampler, spv::Id is_all_signed, + spv::Id is_any_signed, spv::Id& result_unsigned_out, + spv::Id& result_signed_out, spv::Id lerp_factor, + spv::Id lerp_first_unsigned, spv::Id lerp_first_signed) { + for (uint32_t i = 0; i < 2; ++i) { + spv::Block& block_sign_head = *builder_->getBuildPoint(); + spv::Block& block_sign = builder_->makeNewBlock(); + spv::Block& block_sign_merge = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_sign_merge.getId(), + spv::SelectionControlDontFlattenMask); + // Unsigned (i == 0) - if there are any non-signed components. + // Signed (i == 1) - if there are any signed components. + builder_->createConditionalBranch(i ? is_any_signed : is_all_signed, + i ? &block_sign : &block_sign_merge, + i ? &block_sign_merge : &block_sign); + builder_->setBuildPoint(&block_sign); + spv::Id image = i ? image_signed : image_unsigned; + // OpSampledImage must be in the same block as where its result is used. + texture_parameters.sampler = builder_->createBinOp( + spv::OpSampledImage, + builder_->makeSampledImageType(builder_->getTypeId(image)), image, + sampler); + spv::Id result = builder_->createTextureCall( + spv::NoPrecision, type_float4_, false, false, false, false, false, + texture_parameters, image_operands_mask); + if (lerp_factor != spv::NoResult) { + spv::Id lerp_first = i ? lerp_first_signed : lerp_first_unsigned; + if (lerp_first != spv::NoResult) { + spv::Id lerp_difference = builder_->createBinOp( + spv::OpFSub, type_float4_, result, lerp_first); + builder_->addDecoration(lerp_difference, spv::DecorationNoContraction); + lerp_difference = + builder_->createBinOp(spv::OpVectorTimesScalar, type_float4_, + lerp_difference, lerp_factor); + builder_->addDecoration(lerp_difference, spv::DecorationNoContraction); + result = builder_->createBinOp(spv::OpFAdd, type_float4_, result, + lerp_difference); + builder_->addDecoration(result, spv::DecorationNoContraction); + } + } + builder_->createBranch(&block_sign_merge); + builder_->setBuildPoint(&block_sign_merge); + { + std::unique_ptr phi_op = + std::make_unique(builder_->getUniqueId(), + type_float4_, spv::OpPhi); + phi_op->addIdOperand(result); + phi_op->addIdOperand(block_sign.getId()); + phi_op->addIdOperand(const_float4_0_); + phi_op->addIdOperand(block_sign_head.getId()); + // This may overwrite the first lerp endpoint for the sign (such usage of + // this function is allowed). + (i ? result_signed_out : result_unsigned_out) = phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(phi_op)); + } + } +} + +spv::Id SpirvShaderTranslator::QueryTextureLod( + spv::Builder::TextureParameters& texture_parameters, spv::Id image_unsigned, + spv::Id image_signed, spv::Id sampler, spv::Id is_all_signed) { + // OpSampledImage must be in the same block as where its result is used. + spv::Block& block_sign_head = *builder_->getBuildPoint(); + spv::Block& block_sign_signed = builder_->makeNewBlock(); + spv::Block& block_sign_unsigned = builder_->makeNewBlock(); + spv::Block& block_sign_merge = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_sign_merge.getId(), + spv::SelectionControlDontFlattenMask); + builder_->createConditionalBranch(is_all_signed, &block_sign_signed, + &block_sign_unsigned); + builder_->setBuildPoint(&block_sign_signed); + texture_parameters.sampler = builder_->createBinOp( + spv::OpSampledImage, + builder_->makeSampledImageType(builder_->getTypeId(image_signed)), + image_signed, sampler); + spv::Id lod_signed = builder_->createCompositeExtract( + builder_->createTextureQueryCall(spv::OpImageQueryLod, texture_parameters, + false), + type_float_, 1); + builder_->createBranch(&block_sign_merge); + builder_->setBuildPoint(&block_sign_unsigned); + texture_parameters.sampler = builder_->createBinOp( + spv::OpSampledImage, + builder_->makeSampledImageType(builder_->getTypeId(image_unsigned)), + image_unsigned, sampler); + spv::Id lod_unsigned = builder_->createCompositeExtract( + builder_->createTextureQueryCall(spv::OpImageQueryLod, texture_parameters, + false), + type_float_, 1); + builder_->createBranch(&block_sign_merge); + builder_->setBuildPoint(&block_sign_merge); + spv::Id result; + { + std::unique_ptr sign_phi_op = + std::make_unique(builder_->getUniqueId(), type_float_, + spv::OpPhi); + sign_phi_op->addIdOperand(lod_signed); + sign_phi_op->addIdOperand(block_sign_signed.getId()); + sign_phi_op->addIdOperand(lod_unsigned); + sign_phi_op->addIdOperand(block_sign_unsigned.getId()); + result = sign_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(sign_phi_op)); + } + return result; +} + } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 3f4faef23..0fde1e1ec 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -1774,7 +1774,7 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, // Update system constants before uploading them. UpdateSystemConstantValues(primitive_processing_result.host_index_endian, - viewport_info); + viewport_info, used_texture_mask); // Update uniform buffers and descriptor sets after binding the pipeline with // the new layout. @@ -2682,7 +2682,8 @@ void VulkanCommandProcessor::UpdateDynamicState( } void VulkanCommandProcessor::UpdateSystemConstantValues( - xenos::Endian index_endian, const draw_util::ViewportInfo& viewport_info) { + xenos::Endian index_endian, const draw_util::ViewportInfo& viewport_info, + uint32_t used_texture_mask) { #if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES @@ -2731,6 +2732,55 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( system_constants_.ndc_offset[i] = viewport_info.ndc_offset[i]; } + // Texture signedness / gamma. + { + uint32_t textures_remaining = used_texture_mask; + uint32_t texture_index; + while (xe::bit_scan_forward(textures_remaining, &texture_index)) { + textures_remaining &= ~(UINT32_C(1) << texture_index); + uint32_t& texture_signs_uint = + system_constants_.texture_swizzled_signs[texture_index >> 2]; + uint32_t texture_signs_shift = 8 * (texture_index & 3); + uint8_t texture_signs = + texture_cache_->GetActiveTextureSwizzledSigns(texture_index); + uint32_t texture_signs_shifted = uint32_t(texture_signs) + << texture_signs_shift; + uint32_t texture_signs_mask = (UINT32_C(1 << 8) - 1) + << texture_signs_shift; + dirty |= + (texture_signs_uint & texture_signs_mask) != texture_signs_shifted; + texture_signs_uint = + (texture_signs_uint & ~texture_signs_mask) | texture_signs_shifted; + } + } + + // Texture host swizzle in the shader. + const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); + const VkPhysicalDevicePortabilitySubsetFeaturesKHR* + device_portability_subset_features = + provider.device_portability_subset_features(); + if (device_portability_subset_features && + !device_portability_subset_features->imageViewFormatSwizzle) { + uint32_t textures_remaining = used_texture_mask; + uint32_t texture_index; + while (xe::bit_scan_forward(textures_remaining, &texture_index)) { + textures_remaining &= ~(UINT32_C(1) << texture_index); + uint32_t& texture_swizzles_uint = + system_constants_.texture_swizzles[texture_index >> 1]; + uint32_t texture_swizzle_shift = 12 * (texture_index & 1); + uint32_t texture_swizzle = + texture_cache_->GetActiveTextureHostSwizzle(texture_index); + uint32_t texture_swizzle_shifted = uint32_t(texture_swizzle) + << texture_swizzle_shift; + uint32_t texture_swizzle_mask = (UINT32_C(1 << 12) - 1) + << texture_swizzle_shift; + dirty |= (texture_swizzles_uint & texture_swizzle_mask) != + texture_swizzle_shifted; + texture_swizzles_uint = (texture_swizzles_uint & ~texture_swizzle_mask) | + texture_swizzle_shifted; + } + } + if (dirty) { current_graphics_descriptor_set_values_up_to_date_ &= ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSystemConstants); diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index fa2acbb45..5c8cfecab 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -432,7 +432,8 @@ class VulkanCommandProcessor : public CommandProcessor { bool primitive_polygonal, reg::RB_DEPTHCONTROL normalized_depth_control); void UpdateSystemConstantValues(xenos::Endian index_endian, - const draw_util::ViewportInfo& viewport_info); + const draw_util::ViewportInfo& viewport_info, + uint32_t used_texture_mask); bool UpdateBindings(const VulkanShader* vertex_shader, const VulkanShader* pixel_shader); // Allocates a descriptor set and fills the VkWriteDescriptorSet structure. From 140ed51e9a191acb3f76e089bccd8015ded625f9 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 12 Jun 2022 19:44:24 +0300 Subject: [PATCH 094/123] [GPU] Fix missing xenia-ui dependency in gpu > gpu-shader-compiler (needed for gmake2) --- src/xenia/gpu/premake5.lua | 1 + 1 file changed, 1 insertion(+) diff --git a/src/xenia/gpu/premake5.lua b/src/xenia/gpu/premake5.lua index 50a721017..971d6ef70 100644 --- a/src/xenia/gpu/premake5.lua +++ b/src/xenia/gpu/premake5.lua @@ -32,6 +32,7 @@ project("xenia-gpu-shader-compiler") "snappy", "xenia-base", "xenia-gpu", + "xenia-ui", "xenia-ui-vulkan", }) includedirs({ From ac268afbe9a37ca4ca6ad715cc48d28feb4437b3 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 12 Jun 2022 19:45:12 +0300 Subject: [PATCH 095/123] [Vulkan] Fix 1<< uint32_t constants --- src/xenia/gpu/vulkan/vulkan_command_processor.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 0fde1e1ec..009436834 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -2745,7 +2745,7 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( texture_cache_->GetActiveTextureSwizzledSigns(texture_index); uint32_t texture_signs_shifted = uint32_t(texture_signs) << texture_signs_shift; - uint32_t texture_signs_mask = (UINT32_C(1 << 8) - 1) + uint32_t texture_signs_mask = ((UINT32_C(1) << 8) - 1) << texture_signs_shift; dirty |= (texture_signs_uint & texture_signs_mask) != texture_signs_shifted; @@ -2772,7 +2772,7 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( texture_cache_->GetActiveTextureHostSwizzle(texture_index); uint32_t texture_swizzle_shifted = uint32_t(texture_swizzle) << texture_swizzle_shift; - uint32_t texture_swizzle_mask = (UINT32_C(1 << 12) - 1) + uint32_t texture_swizzle_mask = ((UINT32_C(1) << 12) - 1) << texture_swizzle_shift; dirty |= (texture_swizzles_uint & texture_swizzle_mask) != texture_swizzle_shifted; From 127bf342644559a4b15614fe281f3a3cf5823aa3 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Mon, 13 Jun 2022 13:03:02 +0300 Subject: [PATCH 096/123] [Vulkan] Trace dump tool --- src/xenia/gpu/vulkan/premake5.lua | 56 +++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/src/xenia/gpu/vulkan/premake5.lua b/src/xenia/gpu/vulkan/premake5.lua index ffc359504..e704547ee 100644 --- a/src/xenia/gpu/vulkan/premake5.lua +++ b/src/xenia/gpu/vulkan/premake5.lua @@ -22,3 +22,59 @@ project("xenia-gpu-vulkan") files({ "../shaders/bytecode/vulkan_spirv/*.h", }) + +group("src") +project("xenia-gpu-vulkan-trace-dump") + uuid("0dd0dd1c-b321-494d-ab9a-6c062f0c65cc") + kind("ConsoleApp") + language("C++") + links({ + "xenia-apu", + "xenia-apu-nop", + "xenia-base", + "xenia-core", + "xenia-cpu", + "xenia-cpu-backend-x64", + "xenia-gpu", + "xenia-gpu-vulkan", + "xenia-hid", + "xenia-hid-nop", + "xenia-kernel", + "xenia-ui", + "xenia-ui-vulkan", + "xenia-vfs", + }) + links({ + "aes_128", + "capstone", + "fmt", + "glslang-spirv", + "imgui", + "libavcodec", + "libavutil", + "mspack", + "snappy", + "xxhash", + }) + files({ + "vulkan_trace_dump_main.cc", + "../../base/console_app_main_"..platform_suffix..".cc", + }) + + filter("platforms:Linux") + links({ + "X11", + "xcb", + "X11-xcb", + }) + + filter("platforms:Windows") + -- Only create the .user file if it doesn't already exist. + local user_file = project_root.."/build/xenia-gpu-vulkan-trace-dump.vcxproj.user" + if not os.isfile(user_file) then + debugdir(project_root) + debugargs({ + "2>&1", + "1>scratch/stdout-trace-dump.txt", + }) + end From c6ec6d8239f2364c5103b8acdc005050279a6a1b Mon Sep 17 00:00:00 2001 From: Triang3l Date: Mon, 20 Jun 2022 22:24:07 +0300 Subject: [PATCH 097/123] [Vulkan] Use UDiv/UMod by constant tile size + minor transfer cleanup Drivers compile that to a multiplication and a shift anyway. --- .../gpu/vulkan/vulkan_render_target_cache.cc | 127 ++++++------------ 1 file changed, 41 insertions(+), 86 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc index d979c5748..bf2ed52b7 100644 --- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc @@ -1606,7 +1606,7 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( id_vector_temp.push_back(builder.makeRuntimeArray(type_uint)); // Storage buffers have std430 packing, no padding to 4-component vectors. builder.addDecoration(id_vector_temp.back(), spv::DecorationArrayStride, - sizeof(float)); + sizeof(uint32_t)); spv::Id type_host_depth_source_buffer = builder.makeStructType(id_vector_temp, "XeTransferHostDepthBuffer"); builder.addMemberName(type_host_depth_source_buffer, 0, "host_depth"); @@ -1754,12 +1754,19 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( // Working with unsigned numbers for simplicity now, bitcasting to signed will // be done at texture fetch. - uint32_t tile_width_samples_scaled = + uint32_t tile_width_samples = xenos::kEdramTileWidthSamples * draw_resolution_scale_x(); - uint32_t tile_height_samples_scaled = + uint32_t tile_height_samples = xenos::kEdramTileHeightSamples * draw_resolution_scale_y(); - // Convert the fragment coordinates to uint2. + // Split the destination pixel index into 32bpp tile and 32bpp-tile-relative + // pixel index. + // Note that division by non-power-of-two constants will include a 4-cycle + // 32*32 multiplication on AMD, even though so many bits are not needed for + // the pixel position - however, if an OpUnreachable path is inserted for the + // case when the position has upper bits set, for some reason, the code for it + // is not eliminated when compiling the shader for AMD via RenderDoc on + // Windows, as of June 2022. uint_vector_temp.clear(); uint_vector_temp.reserve(2); uint_vector_temp.push_back(0); @@ -1770,77 +1777,25 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( spv::NoPrecision, type_float2, builder.createLoad(input_fragment_coord, spv::NoPrecision), uint_vector_temp)); - - // Prove to the AMD compiler that 24*24 multiplication can be done. 16 bits - // are more than enough for coordinates even with 3x resolution scaling (and - // Direct3D 11 hardware has 16.8 fixed-point coordinates). - // TODO(Triang3l): OpUnreachable if the coordinates have upper bits set. - - // Split the destination pixel coordinate into scalars. spv::Id dest_pixel_x = builder.createCompositeExtract(dest_pixel_coord, type_uint, 0); + spv::Id const_dest_tile_width_pixels = builder.makeUintConstant( + tile_width_samples >> + (uint32_t(dest_is_64bpp) + + uint32_t(key.dest_msaa_samples >= xenos::MsaaSamples::k4X))); + spv::Id dest_tile_index_x = builder.createBinOp( + spv::OpUDiv, type_uint, dest_pixel_x, const_dest_tile_width_pixels); + spv::Id dest_tile_pixel_x = builder.createBinOp( + spv::OpUMod, type_uint, dest_pixel_x, const_dest_tile_width_pixels); spv::Id dest_pixel_y = builder.createCompositeExtract(dest_pixel_coord, type_uint, 1); - - // Split the destination pixel index into 32bpp tile and 32bpp-tile-relative - // pixel index. - uint32_t dest_sample_width_log2 = - uint32_t(dest_is_64bpp) + - uint32_t(key.dest_msaa_samples >= xenos::MsaaSamples::k4X); - uint32_t dest_sample_height_log2 = - uint32_t(key.dest_msaa_samples >= xenos::MsaaSamples::k2X); - uint32_t dest_tile_width_divide_scale, dest_tile_width_divide_shift; - draw_util::GetEdramTileWidthDivideScaleAndUpperShift( - draw_resolution_scale_x(), dest_tile_width_divide_scale, - dest_tile_width_divide_shift); - // Doing 16*16=32 multiplication, not 32*32=64. - // TODO(Triang3l): Abstract this away, don't do 32*32 on Direct3D 12 too. - dest_tile_width_divide_scale &= UINT16_MAX; - dest_tile_width_divide_shift += 16; - // Need the host tile size in pixels, not samples. - dest_tile_width_divide_shift -= dest_sample_width_log2; - spv::Id dest_tile_index_x = builder.createBinOp( - spv::OpShiftRightLogical, type_uint, - builder.createBinOp( - spv::OpIMul, type_uint, dest_pixel_x, - builder.makeUintConstant(dest_tile_width_divide_scale)), - builder.makeUintConstant(dest_tile_width_divide_shift)); - spv::Id dest_tile_pixel_x = builder.createBinOp( - spv::OpISub, type_uint, dest_pixel_x, - builder.createBinOp(spv::OpIMul, type_uint, dest_tile_index_x, - builder.makeUintConstant(tile_width_samples_scaled >> - dest_sample_width_log2))); - spv::Id dest_tile_index_y, dest_tile_pixel_y; - static_assert( - TextureCache::kMaxDrawResolutionScaleAlongAxis <= 3, - "VulkanRenderTargetCache EDRAM range ownership transfer shader " - "generation supports Y draw resolution scaling factors of only up to 3"); - if (draw_resolution_scale_y() == 3) { - dest_tile_index_y = builder.createBinOp( - spv::OpShiftRightLogical, type_uint, - builder.createBinOp( - spv::OpIMul, type_uint, dest_pixel_y, - builder.makeUintConstant(draw_util::kDivideScale3 & UINT16_MAX)), - builder.makeUintConstant(draw_util::kDivideUpperShift3 + 16 + 4 - - dest_sample_height_log2)); - dest_tile_pixel_y = builder.createBinOp( - spv::OpISub, type_uint, dest_pixel_y, - builder.createBinOp( - spv::OpIMul, type_uint, dest_tile_index_y, - builder.makeUintConstant(tile_height_samples_scaled >> - dest_sample_height_log2))); - } else { - assert_true(draw_resolution_scale_y() <= 2); - uint32_t dest_tile_height_pixels_log2 = - (draw_resolution_scale_y() == 2 ? 5 : 4) - dest_sample_height_log2; - dest_tile_index_y = builder.createBinOp( - spv::OpShiftRightLogical, type_uint, dest_pixel_y, - builder.makeUintConstant(dest_tile_height_pixels_log2)); - dest_tile_pixel_y = builder.createBinOp( - spv::OpBitwiseAnd, type_uint, dest_pixel_y, - builder.makeUintConstant((uint32_t(1) << dest_tile_height_pixels_log2) - - 1)); - } + spv::Id const_dest_tile_height_pixels = builder.makeUintConstant( + tile_height_samples >> + uint32_t(key.dest_msaa_samples >= xenos::MsaaSamples::k2X)); + spv::Id dest_tile_index_y = builder.createBinOp( + spv::OpUDiv, type_uint, dest_pixel_y, const_dest_tile_height_pixels); + spv::Id dest_tile_pixel_y = builder.createBinOp( + spv::OpUMod, type_uint, dest_pixel_y, const_dest_tile_height_pixels); assert_true(push_constants_member_address != UINT32_MAX); id_vector_temp.clear(); @@ -2269,7 +2224,7 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( // Copying between color and depth / stencil - swap 40-32bpp-sample columns // in the pixel index within the source 32bpp tile. uint32_t source_32bpp_tile_half_pixels = - tile_width_samples_scaled >> (1 + source_pixel_width_dwords_log2); + tile_width_samples >> (1 + source_pixel_width_dwords_log2); source_tile_pixel_x = builder.createUnaryOp( spv::OpBitcast, type_uint, builder.createBinOp( @@ -2315,7 +2270,7 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( spv::OpIAdd, type_uint, builder.createBinOp( spv::OpIMul, type_uint, - builder.makeUintConstant(tile_width_samples_scaled >> + builder.makeUintConstant(tile_width_samples >> source_pixel_width_dwords_log2), source_tile_index_x), source_tile_pixel_x)); @@ -2326,7 +2281,7 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( builder.createBinOp( spv::OpIMul, type_uint, builder.makeUintConstant( - tile_height_samples_scaled >> + tile_height_samples >> uint32_t(key.source_msaa_samples >= xenos::MsaaSamples::k2X)), source_tile_index_y), source_tile_pixel_y)); @@ -2688,8 +2643,8 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( switch (source_depth_format) { case xenos::DepthRenderTargetFormat::kD24S8: { // Round to the nearest even integer. This seems to be the - // correct, adding +0.5 and rounding towards zero results in red - // instead of black in the 4D5307E6 clear shader. + // correct conversion, adding +0.5 and rounding towards zero results + // in red instead of black in the 4D5307E6 clear shader. id_vector_temp.clear(); id_vector_temp.push_back(builder.createBinOp( spv::OpFMul, type_float, source_depth_float[i], @@ -3003,9 +2958,9 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( } else { switch (source_depth_format) { case xenos::DepthRenderTargetFormat::kD24S8: { - // Round to the nearest even integer. This seems to be the correct, - // adding +0.5 and rounding towards zero results in red instead of - // black in the 4D5307E6 clear shader. + // Round to the nearest even integer. This seems to be the correct + // conversion, adding +0.5 and rounding towards zero results in red + // instead of black in the 4D5307E6 clear shader. id_vector_temp.clear(); id_vector_temp.push_back(builder.createBinOp( spv::OpFMul, type_float, source_depth_float[0], @@ -3384,7 +3339,7 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( spv::OpIAdd, type_uint, builder.createBinOp(spv::OpIMul, type_uint, builder.makeUintConstant( - tile_width_samples_scaled >> + tile_width_samples >> uint32_t(key.source_msaa_samples >= xenos::MsaaSamples::k4X)), host_depth_source_tile_index_x), @@ -3395,7 +3350,7 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( spv::OpIAdd, type_uint, builder.createBinOp(spv::OpIMul, type_uint, builder.makeUintConstant( - tile_height_samples_scaled >> + tile_height_samples >> uint32_t(key.source_msaa_samples >= xenos::MsaaSamples::k2X)), host_depth_source_tile_index_y), @@ -3469,14 +3424,14 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( spv::OpIAdd, type_uint, builder.createBinOp( spv::OpIMul, type_uint, - builder.makeUintConstant(tile_width_samples_scaled * - tile_height_samples_scaled), + builder.makeUintConstant(tile_width_samples * + tile_height_samples), dest_tile_index), builder.createBinOp( spv::OpIAdd, type_uint, builder.createBinOp( spv::OpIMul, type_uint, - builder.makeUintConstant(tile_width_samples_scaled), + builder.makeUintConstant(tile_width_samples), dest_tile_sample_y), dest_tile_sample_x)); id_vector_temp.clear(); @@ -3505,8 +3460,8 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( switch (dest_depth_format) { case xenos::DepthRenderTargetFormat::kD24S8: { // Round to the nearest even integer. This seems to be the - // correct, adding +0.5 and rounding towards zero results in red - // instead of black in the 4D5307E6 clear shader. + // correct conversion, adding +0.5 and rounding towards zero + // results in red instead of black in the 4D5307E6 clear shader. id_vector_temp.clear(); id_vector_temp.push_back(builder.createBinOp( spv::OpFMul, type_float, host_depth32, From 0dc480721fcb8cfd8e47a00520b3a8c92f357971 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Mon, 20 Jun 2022 22:29:07 +0300 Subject: [PATCH 098/123] [Vulkan] Render target resolving --- .../gpu/vulkan/vulkan_command_processor.cc | 12 +- .../gpu/vulkan/vulkan_render_target_cache.cc | 1091 ++++++++++++++++- .../gpu/vulkan/vulkan_render_target_cache.h | 189 ++- 3 files changed, 1287 insertions(+), 5 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 009436834..dacff7b63 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -289,10 +289,11 @@ bool VulkanCommandProcessor::SetupContext() { return false; } + // Requires the transient descriptor set layouts. // TODO(Triang3l): Get the actual draw resolution scale when the texture cache // supports resolution scaling. render_target_cache_ = std::make_unique( - *register_file_, *memory_, &trace_writer_, 1, 1, *this); + *register_file_, *memory_, trace_writer_, 1, 1, *this); if (!render_target_cache_->Initialize()) { XELOGE("Failed to initialize the render target cache"); return false; @@ -1884,6 +1885,14 @@ bool VulkanCommandProcessor::IssueCopy() { return false; } + uint32_t written_address, written_length; + if (!render_target_cache_->Resolve(*memory_, *shared_memory_, *texture_cache_, + written_address, written_length)) { + return false; + } + + // TODO(Triang3l): CPU readback. + return true; } @@ -1893,6 +1902,7 @@ void VulkanCommandProcessor::InitializeTrace() { if (!BeginSubmission(true)) { return; } + // TODO(Triang3l): Write the EDRAM. bool shared_memory_submitted = shared_memory_->InitializeTraceSubmitDownloads(); if (!shared_memory_submitted) { diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc index bf2ed52b7..4d021ca7a 100644 --- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc @@ -43,8 +43,63 @@ namespace shaders { #include "xenia/gpu/shaders/bytecode/vulkan_spirv/host_depth_store_2xmsaa_cs.h" #include "xenia/gpu/shaders/bytecode/vulkan_spirv/host_depth_store_4xmsaa_cs.h" #include "xenia/gpu/shaders/bytecode/vulkan_spirv/passthrough_position_xy_vs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_fast_32bpp_1x2xmsaa_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_fast_32bpp_1x2xmsaa_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_fast_32bpp_4xmsaa_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_fast_32bpp_4xmsaa_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_fast_64bpp_1x2xmsaa_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_fast_64bpp_1x2xmsaa_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_fast_64bpp_4xmsaa_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_fast_64bpp_4xmsaa_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_full_128bpp_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_full_128bpp_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_full_16bpp_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_full_16bpp_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_full_32bpp_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_full_32bpp_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_full_64bpp_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_full_64bpp_scaled_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_full_8bpp_cs.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_full_8bpp_scaled_cs.h" } // namespace shaders +const VulkanRenderTargetCache::ResolveCopyShaderCode + VulkanRenderTargetCache::kResolveCopyShaders[size_t( + draw_util::ResolveCopyShaderIndex::kCount)] = { + {shaders::resolve_fast_32bpp_1x2xmsaa_cs, + sizeof(shaders::resolve_fast_32bpp_1x2xmsaa_cs), + shaders::resolve_fast_32bpp_1x2xmsaa_scaled_cs, + sizeof(shaders::resolve_fast_32bpp_1x2xmsaa_scaled_cs)}, + {shaders::resolve_fast_32bpp_4xmsaa_cs, + sizeof(shaders::resolve_fast_32bpp_4xmsaa_cs), + shaders::resolve_fast_32bpp_4xmsaa_scaled_cs, + sizeof(shaders::resolve_fast_32bpp_4xmsaa_scaled_cs)}, + {shaders::resolve_fast_64bpp_1x2xmsaa_cs, + sizeof(shaders::resolve_fast_64bpp_1x2xmsaa_cs), + shaders::resolve_fast_64bpp_1x2xmsaa_scaled_cs, + sizeof(shaders::resolve_fast_64bpp_1x2xmsaa_scaled_cs)}, + {shaders::resolve_fast_64bpp_4xmsaa_cs, + sizeof(shaders::resolve_fast_64bpp_4xmsaa_cs), + shaders::resolve_fast_64bpp_4xmsaa_scaled_cs, + sizeof(shaders::resolve_fast_64bpp_4xmsaa_scaled_cs)}, + {shaders::resolve_full_8bpp_cs, sizeof(shaders::resolve_full_8bpp_cs), + shaders::resolve_full_8bpp_scaled_cs, + sizeof(shaders::resolve_full_8bpp_scaled_cs)}, + {shaders::resolve_full_16bpp_cs, sizeof(shaders::resolve_full_16bpp_cs), + shaders::resolve_full_16bpp_scaled_cs, + sizeof(shaders::resolve_full_16bpp_scaled_cs)}, + {shaders::resolve_full_32bpp_cs, sizeof(shaders::resolve_full_32bpp_cs), + shaders::resolve_full_32bpp_scaled_cs, + sizeof(shaders::resolve_full_32bpp_scaled_cs)}, + {shaders::resolve_full_64bpp_cs, sizeof(shaders::resolve_full_64bpp_cs), + shaders::resolve_full_64bpp_scaled_cs, + sizeof(shaders::resolve_full_64bpp_scaled_cs)}, + {shaders::resolve_full_128bpp_cs, + sizeof(shaders::resolve_full_128bpp_cs), + shaders::resolve_full_128bpp_scaled_cs, + sizeof(shaders::resolve_full_128bpp_scaled_cs)}, +}; + const VulkanRenderTargetCache::TransferPipelineLayoutInfo VulkanRenderTargetCache::kTransferPipelineLayoutInfos[size_t( TransferPipelineLayoutIndex::kCount)] = { @@ -116,11 +171,12 @@ const VulkanRenderTargetCache::TransferModeInfo VulkanRenderTargetCache::VulkanRenderTargetCache( const RegisterFile& register_file, const Memory& memory, - TraceWriter* trace_writer, uint32_t draw_resolution_scale_x, + TraceWriter& trace_writer, uint32_t draw_resolution_scale_x, uint32_t draw_resolution_scale_y, VulkanCommandProcessor& command_processor) - : RenderTargetCache(register_file, memory, trace_writer, + : RenderTargetCache(register_file, memory, &trace_writer, draw_resolution_scale_x, draw_resolution_scale_y), - command_processor_(command_processor) {} + command_processor_(command_processor), + trace_writer_(trace_writer) {} VulkanRenderTargetCache::~VulkanRenderTargetCache() { Shutdown(true); } @@ -285,6 +341,79 @@ bool VulkanRenderTargetCache::Initialize() { dfn.vkUpdateDescriptorSets(device, 1, &edram_storage_buffer_descriptor_write, 0, nullptr); + bool draw_resolution_scaled = IsDrawResolutionScaled(); + + // Resolve copy pipeline layout. + VkDescriptorSetLayout + resolve_copy_descriptor_set_layouts[kResolveCopyDescriptorSetCount] = {}; + resolve_copy_descriptor_set_layouts[kResolveCopyDescriptorSetEdram] = + descriptor_set_layout_storage_buffer_; + resolve_copy_descriptor_set_layouts[kResolveCopyDescriptorSetDest] = + command_processor_.GetSingleTransientDescriptorLayout( + VulkanCommandProcessor::SingleTransientDescriptorLayout :: + kStorageBufferCompute); + VkPushConstantRange resolve_copy_push_constant_range; + resolve_copy_push_constant_range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + resolve_copy_push_constant_range.offset = 0; + // Potentially binding all of the shared memory at 1x resolution, but only + // portions with scaled resolution. + resolve_copy_push_constant_range.size = + draw_resolution_scaled + ? sizeof(draw_util::ResolveCopyShaderConstants::DestRelative) + : sizeof(draw_util::ResolveCopyShaderConstants); + VkPipelineLayoutCreateInfo resolve_copy_pipeline_layout_create_info; + resolve_copy_pipeline_layout_create_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + resolve_copy_pipeline_layout_create_info.pNext = nullptr; + resolve_copy_pipeline_layout_create_info.flags = 0; + resolve_copy_pipeline_layout_create_info.setLayoutCount = + kResolveCopyDescriptorSetCount; + resolve_copy_pipeline_layout_create_info.pSetLayouts = + resolve_copy_descriptor_set_layouts; + resolve_copy_pipeline_layout_create_info.pushConstantRangeCount = 1; + resolve_copy_pipeline_layout_create_info.pPushConstantRanges = + &resolve_copy_push_constant_range; + if (dfn.vkCreatePipelineLayout( + device, &resolve_copy_pipeline_layout_create_info, nullptr, + &resolve_copy_pipeline_layout_) != VK_SUCCESS) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the resolve copy pipeline " + "layout"); + Shutdown(); + return false; + } + + // Resolve copy pipelines. + for (size_t i = 0; i < size_t(draw_util::ResolveCopyShaderIndex::kCount); + ++i) { + const draw_util::ResolveCopyShaderInfo& resolve_copy_shader_info = + draw_util::resolve_copy_shader_info[i]; + const ResolveCopyShaderCode& resolve_copy_shader_code = + kResolveCopyShaders[i]; + // Somewhat verification whether resolve_copy_shaders_ is up to date. + assert_true(resolve_copy_shader_code.unscaled && + resolve_copy_shader_code.unscaled_size_bytes && + resolve_copy_shader_code.scaled && + resolve_copy_shader_code.scaled_size_bytes); + VkPipeline resolve_copy_pipeline = ui::vulkan::util::CreateComputePipeline( + provider, resolve_copy_pipeline_layout_, + draw_resolution_scaled ? resolve_copy_shader_code.scaled + : resolve_copy_shader_code.unscaled, + draw_resolution_scaled ? resolve_copy_shader_code.scaled_size_bytes + : resolve_copy_shader_code.unscaled_size_bytes); + if (resolve_copy_pipeline == VK_NULL_HANDLE) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the resolve copy " + "pipeline {}", + resolve_copy_shader_info.debug_name); + Shutdown(); + return false; + } + provider.SetDeviceObjectName(VK_OBJECT_TYPE_PIPELINE, resolve_copy_pipeline, + resolve_copy_shader_info.debug_name); + resolve_copy_pipelines_[i] = resolve_copy_pipeline; + } + // TODO(Triang3l): All paths (FSI). // TODO(Triang3l): Handle sampledImageIntegerSampleCounts 4 not supported in @@ -461,6 +590,52 @@ bool VulkanRenderTargetCache::Initialize() { } } + // Dump pipeline layouts. + VkDescriptorSetLayout + dump_pipeline_layout_descriptor_set_layouts[kDumpDescriptorSetCount]; + dump_pipeline_layout_descriptor_set_layouts[kDumpDescriptorSetEdram] = + descriptor_set_layout_storage_buffer_; + dump_pipeline_layout_descriptor_set_layouts[kDumpDescriptorSetSource] = + descriptor_set_layout_sampled_image_; + VkPushConstantRange dump_pipeline_layout_push_constant_range; + dump_pipeline_layout_push_constant_range.stageFlags = + VK_SHADER_STAGE_COMPUTE_BIT; + dump_pipeline_layout_push_constant_range.offset = 0; + dump_pipeline_layout_push_constant_range.size = + sizeof(uint32_t) * kDumpPushConstantCount; + VkPipelineLayoutCreateInfo dump_pipeline_layout_create_info; + dump_pipeline_layout_create_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + dump_pipeline_layout_create_info.pNext = nullptr; + dump_pipeline_layout_create_info.flags = 0; + dump_pipeline_layout_create_info.setLayoutCount = + uint32_t(xe::countof(dump_pipeline_layout_descriptor_set_layouts)); + dump_pipeline_layout_create_info.pSetLayouts = + dump_pipeline_layout_descriptor_set_layouts; + dump_pipeline_layout_create_info.pushConstantRangeCount = 1; + dump_pipeline_layout_create_info.pPushConstantRanges = + &dump_pipeline_layout_push_constant_range; + if (dfn.vkCreatePipelineLayout(device, &dump_pipeline_layout_create_info, + nullptr, + &dump_pipeline_layout_color_) != VK_SUCCESS) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the color render target " + "dumping pipeline layout"); + Shutdown(); + return false; + } + dump_pipeline_layout_descriptor_set_layouts[kDumpDescriptorSetSource] = + descriptor_set_layout_sampled_image_x2_; + if (dfn.vkCreatePipelineLayout(device, &dump_pipeline_layout_create_info, + nullptr, + &dump_pipeline_layout_depth_) != VK_SUCCESS) { + XELOGE( + "VulkanRenderTargetCache: Failed to create the depth render target " + "dumping pipeline layout"); + Shutdown(); + return false; + } + InitializeCommon(); return true; } @@ -471,6 +646,17 @@ void VulkanRenderTargetCache::Shutdown(bool from_destructor) { const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); + for (const auto& dump_pipeline_pair : dump_pipelines_) { + // May be null to prevent recreation attempts. + if (dump_pipeline_pair.second != VK_NULL_HANDLE) { + dfn.vkDestroyPipeline(device, dump_pipeline_pair.second, nullptr); + } + } + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyPipelineLayout, device, + dump_pipeline_layout_depth_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyPipelineLayout, device, + dump_pipeline_layout_color_); + for (const auto& transfer_pipeline_array_pair : transfer_pipelines_) { for (VkPipeline transfer_pipeline : transfer_pipeline_array_pair.second) { // May be null to prevent recreation attempts. @@ -516,6 +702,13 @@ void VulkanRenderTargetCache::Shutdown(bool from_destructor) { } render_passes_.clear(); + for (VkPipeline& resolve_copy_pipeline : resolve_copy_pipelines_) { + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyPipeline, device, + resolve_copy_pipeline); + } + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyPipelineLayout, device, + resolve_copy_pipeline_layout_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorPool, device, edram_storage_buffer_descriptor_pool_); ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, @@ -578,6 +771,194 @@ void VulkanRenderTargetCache::EndSubmission() { } } +bool VulkanRenderTargetCache::Resolve(const Memory& memory, + VulkanSharedMemory& shared_memory, + VulkanTextureCache& texture_cache, + uint32_t& written_address_out, + uint32_t& written_length_out) { + written_address_out = 0; + written_length_out = 0; + + bool draw_resolution_scaled = IsDrawResolutionScaled(); + + draw_util::ResolveInfo resolve_info; + // TODO(Triang3l): Truncation of fixed16 (but not fixed16 as float16) range to + // -1 to 1. + if (!draw_util::GetResolveInfo( + register_file(), memory, trace_writer_, draw_resolution_scale_x(), + draw_resolution_scale_y(), false, false, resolve_info)) { + return false; + } + + // Nothing to copy/clear. + if (!resolve_info.coordinate_info.width_div_8 || + !resolve_info.coordinate_info.height_div_8) { + return true; + } + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + DeferredCommandBuffer& command_buffer = + command_processor_.deferred_command_buffer(); + + // Copying. + bool copied = false; + if (resolve_info.copy_dest_extent_length) { + if (GetPath() == Path::kHostRenderTargets) { + // Dump the current contents of the render targets owning the affected + // range to edram_buffer_. + // TODO(Triang3l): Direct host render target -> shared memory resolve + // shaders for non-converting cases. + uint32_t dump_base; + uint32_t dump_row_length_used; + uint32_t dump_rows; + uint32_t dump_pitch; + resolve_info.GetCopyEdramTileSpan(dump_base, dump_row_length_used, + dump_rows, dump_pitch); + DumpRenderTargets(dump_base, dump_row_length_used, dump_rows, dump_pitch); + } + + draw_util::ResolveCopyShaderConstants copy_shader_constants; + uint32_t copy_group_count_x, copy_group_count_y; + draw_util::ResolveCopyShaderIndex copy_shader = resolve_info.GetCopyShader( + draw_resolution_scale_x(), draw_resolution_scale_y(), + copy_shader_constants, copy_group_count_x, copy_group_count_y); + assert_true(copy_group_count_x && copy_group_count_y); + if (copy_shader != draw_util::ResolveCopyShaderIndex::kUnknown) { + const draw_util::ResolveCopyShaderInfo& copy_shader_info = + draw_util::resolve_copy_shader_info[size_t(copy_shader)]; + + // Make sure there is memory to write to. + bool copy_dest_committed; + // TODO(Triang3l): Resolution-scaled buffer committing. + copy_dest_committed = + shared_memory.RequestRange(resolve_info.copy_dest_extent_start, + resolve_info.copy_dest_extent_length); + if (!copy_dest_committed) { + XELOGE( + "VulkanRenderTargetCache: Failed to obtain the resolve destination " + "memory region"); + } else { + // TODO(Triang3l): Switching between descriptors if exceeding + // maxStorageBufferRange. + // TODO(Triang3l): Use a single 512 MB shared memory binding if + // possible. + VkDescriptorSet descriptor_set_dest = + command_processor_.AllocateSingleTransientDescriptor( + VulkanCommandProcessor::SingleTransientDescriptorLayout :: + kStorageBufferCompute); + if (descriptor_set_dest != VK_NULL_HANDLE) { + // Write the destination descriptor. + // TODO(Triang3l): Scaled resolve buffer binding. + VkDescriptorBufferInfo write_descriptor_set_dest_buffer_info; + write_descriptor_set_dest_buffer_info.buffer = shared_memory.buffer(); + write_descriptor_set_dest_buffer_info.offset = + resolve_info.copy_dest_base; + write_descriptor_set_dest_buffer_info.range = + resolve_info.copy_dest_extent_start - + resolve_info.copy_dest_base + + resolve_info.copy_dest_extent_length; + VkWriteDescriptorSet write_descriptor_set_dest; + write_descriptor_set_dest.sType = + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write_descriptor_set_dest.pNext = nullptr; + write_descriptor_set_dest.dstSet = descriptor_set_dest; + write_descriptor_set_dest.dstBinding = 0; + write_descriptor_set_dest.dstArrayElement = 0; + write_descriptor_set_dest.descriptorCount = 1; + write_descriptor_set_dest.descriptorType = + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + write_descriptor_set_dest.pImageInfo = nullptr; + write_descriptor_set_dest.pBufferInfo = + &write_descriptor_set_dest_buffer_info; + write_descriptor_set_dest.pTexelBufferView = nullptr; + dfn.vkUpdateDescriptorSets(device, 1, &write_descriptor_set_dest, 0, + nullptr); + + // Submit the resolve. + // TODO(Triang3l): Transition the scaled resolve buffer. + shared_memory.Use(VulkanSharedMemory::Usage::kComputeWrite, + std::pair( + resolve_info.copy_dest_extent_start, + resolve_info.copy_dest_extent_length)); + UseEdramBuffer(EdramBufferUsage::kComputeRead); + command_processor_.BindExternalComputePipeline( + resolve_copy_pipelines_[size_t(copy_shader)]); + VkDescriptorSet descriptor_sets[kResolveCopyDescriptorSetCount] = {}; + descriptor_sets[kResolveCopyDescriptorSetEdram] = + edram_storage_buffer_descriptor_set_; + descriptor_sets[kResolveCopyDescriptorSetDest] = descriptor_set_dest; + command_buffer.CmdVkBindDescriptorSets( + VK_PIPELINE_BIND_POINT_COMPUTE, resolve_copy_pipeline_layout_, 0, + uint32_t(xe::countof(descriptor_sets)), descriptor_sets, 0, + nullptr); + if (draw_resolution_scaled) { + command_buffer.CmdVkPushConstants( + resolve_copy_pipeline_layout_, VK_SHADER_STAGE_COMPUTE_BIT, 0, + sizeof(copy_shader_constants.dest_relative), + ©_shader_constants.dest_relative); + } else { + // TODO(Triang3l): Proper dest_base in case of one 512 MB shared + // memory binding, or multiple shared memory bindings in case of + // splitting due to maxStorageBufferRange overflow. + copy_shader_constants.dest_base -= + uint32_t(write_descriptor_set_dest_buffer_info.offset); + command_buffer.CmdVkPushConstants( + resolve_copy_pipeline_layout_, VK_SHADER_STAGE_COMPUTE_BIT, 0, + sizeof(copy_shader_constants), ©_shader_constants); + } + command_processor_.SubmitBarriers(true); + command_buffer.CmdVkDispatch(copy_group_count_x, copy_group_count_y, + 1); + + // Invalidate textures and mark the range as scaled if needed. + texture_cache.MarkRangeAsResolved( + resolve_info.copy_dest_extent_start, + resolve_info.copy_dest_extent_length); + written_address_out = resolve_info.copy_dest_extent_start; + written_length_out = resolve_info.copy_dest_extent_length; + copied = true; + } + } + } + } else { + copied = true; + } + + // Clearing. + bool cleared = false; + bool clear_depth = resolve_info.IsClearingDepth(); + bool clear_color = resolve_info.IsClearingColor(); + if (clear_depth || clear_color) { + // TODO(Triang3l): Fragment shader interlock path EDRAM buffer clearing. + if (GetPath() == Path::kHostRenderTargets) { + Transfer::Rectangle clear_rectangle; + RenderTarget* clear_render_targets[2]; + // If PrepareHostRenderTargetsResolveClear returns false, may be just an + // empty region (success) or an error - don't care. + if (PrepareHostRenderTargetsResolveClear( + resolve_info, clear_rectangle, clear_render_targets[0], + clear_transfers_[0], clear_render_targets[1], + clear_transfers_[1])) { + uint64_t clear_values[2]; + clear_values[0] = resolve_info.rb_depth_clear; + clear_values[1] = resolve_info.rb_color_clear | + (uint64_t(resolve_info.rb_color_clear_lo) << 32); + PerformTransfersAndResolveClears(2, clear_render_targets, + clear_transfers_, clear_values, + &clear_rectangle); + } + cleared = true; + } + } else { + cleared = true; + } + + return copied && cleared; +} + bool VulkanRenderTargetCache::Update( bool is_rasterization_done, reg::RB_DEPTHCONTROL normalized_depth_control, uint32_t normalized_color_mask, const Shader& vertex_shader) { @@ -4839,6 +5220,710 @@ void VulkanRenderTargetCache::PerformTransfersAndResolveClears( } } +VkPipeline VulkanRenderTargetCache::GetDumpPipeline(DumpPipelineKey key) { + auto pipeline_it = dump_pipelines_.find(key); + if (pipeline_it != dump_pipelines_.end()) { + return pipeline_it->second; + } + + std::vector id_vector_temp; + + spv::Builder builder(spv::Spv_1_0, + (SpirvShaderTranslator::kSpirvMagicToolId << 16) | 1, + nullptr); + spv::Id ext_inst_glsl_std_450 = builder.import("GLSL.std.450"); + builder.addCapability(spv::CapabilityShader); + builder.setMemoryModel(spv::AddressingModelLogical, spv::MemoryModelGLSL450); + builder.setSource(spv::SourceLanguageUnknown, 0); + + spv::Id type_void = builder.makeVoidType(); + spv::Id type_int = builder.makeIntType(32); + spv::Id type_int2 = builder.makeVectorType(type_int, 2); + spv::Id type_uint = builder.makeUintType(32); + spv::Id type_uint2 = builder.makeVectorType(type_uint, 2); + spv::Id type_uint3 = builder.makeVectorType(type_uint, 3); + spv::Id type_float = builder.makeFloatType(32); + + // Bindings. + // EDRAM buffer. + bool format_is_64bpp = !key.is_depth && xenos::IsColorRenderTargetFormat64bpp( + key.GetColorFormat()); + id_vector_temp.clear(); + id_vector_temp.push_back( + builder.makeRuntimeArray(format_is_64bpp ? type_uint2 : type_uint)); + // Storage buffers have std430 packing, no padding to 4-component vectors. + builder.addDecoration(id_vector_temp.back(), spv::DecorationArrayStride, + sizeof(uint32_t) << uint32_t(format_is_64bpp)); + spv::Id type_edram = builder.makeStructType(id_vector_temp, "XeEdram"); + builder.addMemberName(type_edram, 0, "edram"); + builder.addMemberDecoration(type_edram, 0, spv::DecorationNonReadable); + builder.addMemberDecoration(type_edram, 0, spv::DecorationOffset, 0); + // Block since SPIR-V 1.3, but since SPIR-V 1.0 is generated, it's + // BufferBlock. + builder.addDecoration(type_edram, spv::DecorationBufferBlock); + // StorageBuffer since SPIR-V 1.3, but since SPIR-V 1.0 is generated, it's + // Uniform. + spv::Id edram_buffer = builder.createVariable( + spv::NoPrecision, spv::StorageClassUniform, type_edram, "xe_edram"); + builder.addDecoration(edram_buffer, spv::DecorationDescriptorSet, + kDumpDescriptorSetEdram); + builder.addDecoration(edram_buffer, spv::DecorationBinding, 0); + // Color or depth source. + bool source_is_multisampled = key.msaa_samples != xenos::MsaaSamples::k1X; + bool source_is_uint; + if (key.is_depth) { + source_is_uint = false; + } else { + GetColorOwnershipTransferVulkanFormat(key.GetColorFormat(), + &source_is_uint); + } + spv::Id source_component_type = source_is_uint ? type_uint : type_float; + spv::Id source_texture = builder.createVariable( + spv::NoPrecision, spv::StorageClassUniformConstant, + builder.makeImageType(source_component_type, spv::Dim2D, false, false, + source_is_multisampled, 1, spv::ImageFormatUnknown), + "xe_edram_dump_source"); + builder.addDecoration(source_texture, spv::DecorationDescriptorSet, + kDumpDescriptorSetSource); + builder.addDecoration(source_texture, spv::DecorationBinding, 0); + // Stencil source. + spv::Id source_stencil_texture = spv::NoResult; + if (key.is_depth) { + source_stencil_texture = builder.createVariable( + spv::NoPrecision, spv::StorageClassUniformConstant, + builder.makeImageType(type_uint, spv::Dim2D, false, false, + source_is_multisampled, 1, + spv::ImageFormatUnknown), + "xe_edram_dump_stencil"); + builder.addDecoration(source_stencil_texture, spv::DecorationDescriptorSet, + kDumpDescriptorSetSource); + builder.addDecoration(source_stencil_texture, spv::DecorationBinding, 1); + } + // Push constants. + id_vector_temp.clear(); + id_vector_temp.reserve(kDumpPushConstantCount); + for (uint32_t i = 0; i < kDumpPushConstantCount; ++i) { + id_vector_temp.push_back(type_uint); + } + spv::Id type_push_constants = + builder.makeStructType(id_vector_temp, "XeEdramDumpPushConstants"); + builder.addMemberName(type_push_constants, kDumpPushConstantPitches, + "pitches"); + builder.addMemberDecoration(type_push_constants, kDumpPushConstantPitches, + spv::DecorationOffset, + int(sizeof(uint32_t) * kDumpPushConstantPitches)); + builder.addMemberName(type_push_constants, kDumpPushConstantOffsets, + "offsets"); + builder.addMemberDecoration(type_push_constants, kDumpPushConstantOffsets, + spv::DecorationOffset, + int(sizeof(uint32_t) * kDumpPushConstantOffsets)); + builder.addDecoration(type_push_constants, spv::DecorationBlock); + spv::Id push_constants = builder.createVariable( + spv::NoPrecision, spv::StorageClassPushConstant, type_push_constants, + "xe_edram_dump_push_constants"); + + // gl_GlobalInvocationID input. + spv::Id input_global_invocation_id = + builder.createVariable(spv::NoPrecision, spv::StorageClassInput, + type_uint3, "gl_GlobalInvocationID"); + builder.addDecoration(input_global_invocation_id, spv::DecorationBuiltIn, + spv::BuiltInGlobalInvocationId); + + // Begin the main function. + std::vector main_param_types; + std::vector> main_precisions; + spv::Block* main_entry; + spv::Function* main_function = + builder.makeFunctionEntry(spv::NoPrecision, type_void, "main", + main_param_types, main_precisions, &main_entry); + + // For now, as the exact addressing in 64bpp render targets relatively to + // 32bpp is unknown, treating 64bpp tiles as storing 40x16 samples rather than + // 80x16 for simplicity of addressing into the texture. + + // Split the destination sample index into the 32bpp tile and the + // 32bpp-tile-relative sample index. + // Note that division by non-power-of-two constants will include a 4-cycle + // 32*32 multiplication on AMD, even though so many bits are not needed for + // the sample position - however, if an OpUnreachable path is inserted for the + // case when the position has upper bits set, for some reason, the code for it + // is not eliminated when compiling the shader for AMD via RenderDoc on + // Windows, as of June 2022. + spv::Id global_invocation_id = + builder.createLoad(input_global_invocation_id, spv::NoPrecision); + spv::Id rectangle_sample_x = + builder.createCompositeExtract(global_invocation_id, type_uint, 0); + uint32_t tile_width = + (xenos::kEdramTileWidthSamples >> uint32_t(format_is_64bpp)) * + draw_resolution_scale_x(); + spv::Id const_tile_width = builder.makeUintConstant(tile_width); + spv::Id rectangle_tile_index_x = builder.createBinOp( + spv::OpUDiv, type_uint, rectangle_sample_x, const_tile_width); + spv::Id tile_sample_x = builder.createBinOp( + spv::OpUMod, type_uint, rectangle_sample_x, const_tile_width); + spv::Id rectangle_sample_y = + builder.createCompositeExtract(global_invocation_id, type_uint, 1); + uint32_t tile_height = + xenos::kEdramTileHeightSamples * draw_resolution_scale_y(); + spv::Id const_tile_height = builder.makeUintConstant(tile_height); + spv::Id rectangle_tile_index_y = builder.createBinOp( + spv::OpUDiv, type_uint, rectangle_sample_y, const_tile_height); + spv::Id tile_sample_y = builder.createBinOp( + spv::OpUMod, type_uint, rectangle_sample_y, const_tile_height); + + // Get the tile index in the EDRAM relative to the dump rectangle base tile. + id_vector_temp.clear(); + id_vector_temp.push_back(builder.makeIntConstant(kDumpPushConstantPitches)); + spv::Id pitches_constant = builder.createLoad( + builder.createAccessChain(spv::StorageClassPushConstant, push_constants, + id_vector_temp), + spv::NoPrecision); + spv::Id const_uint_0 = builder.makeUintConstant(0); + spv::Id const_edram_pitch_tiles_bits = + builder.makeUintConstant(xenos::kEdramPitchTilesBits); + spv::Id rectangle_tile_index = builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createBinOp( + spv::OpIMul, type_uint, + builder.createTriOp(spv::OpBitFieldUExtract, type_uint, + pitches_constant, const_uint_0, + const_edram_pitch_tiles_bits), + rectangle_tile_index_y), + rectangle_tile_index_x); + // Add the base tile in the dispatch to the dispatch-local tile index. + id_vector_temp.clear(); + id_vector_temp.push_back(builder.makeIntConstant(kDumpPushConstantOffsets)); + spv::Id offsets_constant = builder.createLoad( + builder.createAccessChain(spv::StorageClassPushConstant, push_constants, + id_vector_temp), + spv::NoPrecision); + spv::Id const_edram_base_tiles_bits = + builder.makeUintConstant(xenos::kEdramBaseTilesBits); + spv::Id edram_tile_index = builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createTriOp(spv::OpBitFieldUExtract, type_uint, offsets_constant, + const_uint_0, const_edram_base_tiles_bits), + rectangle_tile_index); + + // Combine the tile sample index and the tile index into the EDRAM sample + // index. + spv::Id edram_sample_address = builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createBinOp(spv::OpIMul, type_uint, + builder.makeUintConstant(tile_width * tile_height), + edram_tile_index), + builder.createBinOp(spv::OpIAdd, type_uint, + builder.createBinOp(spv::OpIMul, type_uint, + const_tile_width, tile_sample_y), + tile_sample_x)); + if (key.is_depth) { + // Swap 40-sample columns in the depth buffer in the destination address to + // get the final address of the sample in the EDRAM. + uint32_t tile_width_half = tile_width >> 1; + edram_sample_address = builder.createUnaryOp( + spv::OpBitcast, type_uint, + builder.createBinOp( + spv::OpIAdd, type_int, + builder.createUnaryOp(spv::OpBitcast, type_int, + edram_sample_address), + builder.createTriOp( + spv::OpSelect, type_int, + builder.createBinOp(spv::OpULessThan, builder.makeBoolType(), + tile_sample_x, + builder.makeUintConstant(tile_width_half)), + builder.makeIntConstant(int32_t(tile_width_half)), + builder.makeIntConstant(-int32_t(tile_width_half))))); + } + + // Get the linear tile index within the source texture. + spv::Id source_tile_index = builder.createBinOp( + spv::OpISub, type_uint, edram_tile_index, + builder.createTriOp(spv::OpBitFieldUExtract, type_uint, offsets_constant, + const_edram_base_tiles_bits, + const_edram_base_tiles_bits)); + // Split the linear tile index in the source texture into X and Y in tiles. + spv::Id source_pitch_tiles = builder.createTriOp( + spv::OpBitFieldUExtract, type_uint, pitches_constant, + const_edram_pitch_tiles_bits, const_edram_pitch_tiles_bits); + spv::Id source_tile_index_y = builder.createBinOp( + spv::OpUDiv, type_uint, source_tile_index, source_pitch_tiles); + spv::Id source_tile_index_x = builder.createBinOp( + spv::OpUMod, type_uint, source_tile_index, source_pitch_tiles); + // Combine the source tile offset and the sample index within the tile. + spv::Id source_sample_x = builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createBinOp(spv::OpIMul, type_uint, const_tile_width, + source_tile_index_x), + tile_sample_x); + spv::Id source_sample_y = builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createBinOp(spv::OpIMul, type_uint, const_tile_height, + source_tile_index_y), + tile_sample_y); + // Get the source pixel coordinate and the sample index within the pixel. + spv::Id source_pixel_x = source_sample_x, source_pixel_y = source_sample_y; + spv::Id source_sample_id = spv::NoResult; + if (source_is_multisampled) { + spv::Id const_uint_1 = builder.makeUintConstant(1); + source_pixel_y = builder.createBinOp(spv::OpShiftRightLogical, type_uint, + source_sample_y, const_uint_1); + if (key.msaa_samples >= xenos::MsaaSamples::k4X) { + source_pixel_x = builder.createBinOp(spv::OpShiftRightLogical, type_uint, + source_sample_x, const_uint_1); + // 4x MSAA source texture sample index - bit 0 for horizontal, bit 1 for + // vertical. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(builder.createBinOp( + spv::OpBitwiseAnd, type_uint, source_sample_x, const_uint_1)); + id_vector_temp.push_back(source_sample_y); + id_vector_temp.push_back(const_uint_1); + id_vector_temp.push_back(const_uint_1); + source_sample_id = + builder.createOp(spv::OpBitFieldInsert, type_uint, id_vector_temp); + } else { + // 2x MSAA source texture sample index - convert from the guest to + // the Vulkan standard sample locations. + source_sample_id = builder.createTriOp( + spv::OpSelect, type_uint, + builder.createBinOp( + spv::OpINotEqual, builder.makeBoolType(), + builder.createBinOp(spv::OpBitwiseAnd, type_uint, source_sample_y, + const_uint_1), + const_uint_0), + builder.makeUintConstant(draw_util::GetD3D10SampleIndexForGuest2xMSAA( + 1, msaa_2x_attachments_supported_)), + builder.makeUintConstant(draw_util::GetD3D10SampleIndexForGuest2xMSAA( + 0, msaa_2x_attachments_supported_))); + } + } + + // Load the source, and pack the value into one or two 32-bit integers. + spv::Id packed[2] = {}; + spv::Builder::TextureParameters source_texture_parameters = {}; + source_texture_parameters.sampler = + builder.createLoad(source_texture, spv::NoPrecision); + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back( + builder.createUnaryOp(spv::OpBitcast, type_int, source_pixel_x)); + id_vector_temp.push_back( + builder.createUnaryOp(spv::OpBitcast, type_int, source_pixel_y)); + source_texture_parameters.coords = + builder.createCompositeConstruct(type_int2, id_vector_temp); + if (source_is_multisampled) { + source_texture_parameters.sample = + builder.createUnaryOp(spv::OpBitcast, type_int, source_sample_id); + } else { + source_texture_parameters.lod = builder.makeIntConstant(0); + } + spv::Id source_vec4 = builder.createTextureCall( + spv::NoPrecision, builder.makeVectorType(source_component_type, 4), false, + true, false, false, false, source_texture_parameters, + spv::ImageOperandsMaskNone); + if (key.is_depth) { + source_texture_parameters.sampler = + builder.createLoad(source_stencil_texture, spv::NoPrecision); + spv::Id source_stencil = builder.createCompositeExtract( + builder.createTextureCall( + spv::NoPrecision, builder.makeVectorType(type_uint, 4), false, true, + false, false, false, source_texture_parameters, + spv::ImageOperandsMaskNone), + type_uint, 0); + spv::Id source_depth32 = + builder.createCompositeExtract(source_vec4, type_float, 0); + switch (key.GetDepthFormat()) { + case xenos::DepthRenderTargetFormat::kD24S8: { + // Round to the nearest even integer. This seems to be the correct + // conversion, adding +0.5 and rounding towards zero results in red + // instead of black in the 4D5307E6 clear shader. + id_vector_temp.clear(); + id_vector_temp.push_back( + builder.createBinOp(spv::OpFMul, type_float, source_depth32, + builder.makeFloatConstant(float(0xFFFFFF)))); + packed[0] = builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBuiltinCall(type_float, ext_inst_glsl_std_450, + GLSLstd450RoundEven, id_vector_temp)); + } break; + case xenos::DepthRenderTargetFormat::kD24FS8: { + packed[0] = SpirvShaderTranslator::PreClampedDepthTo20e4( + builder, source_depth32, true, ext_inst_glsl_std_450); + } break; + } + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(source_stencil); + id_vector_temp.push_back(packed[0]); + id_vector_temp.push_back(builder.makeUintConstant(8)); + id_vector_temp.push_back(builder.makeUintConstant(24)); + packed[0] = + builder.createOp(spv::OpBitFieldInsert, type_uint, id_vector_temp); + } else { + switch (key.GetColorFormat()) { + case xenos::ColorRenderTargetFormat::k_8_8_8_8: + case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: { + spv::Id unorm_round_offset = builder.makeFloatConstant(0.5f); + spv::Id unorm_scale = builder.makeFloatConstant(255.0f); + packed[0] = builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp( + spv::OpFMul, type_float, + builder.createCompositeExtract(source_vec4, type_float, 0), + unorm_scale), + unorm_round_offset)); + spv::Id component_width = builder.makeUintConstant(8); + for (uint32_t i = 1; i < 4; ++i) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(packed[0]); + id_vector_temp.push_back(builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp(spv::OpFMul, type_float, + builder.createCompositeExtract( + source_vec4, type_float, i), + unorm_scale), + unorm_round_offset))); + id_vector_temp.push_back(builder.makeUintConstant(8 * i)); + id_vector_temp.push_back(component_width); + packed[0] = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + } break; + case xenos::ColorRenderTargetFormat::k_2_10_10_10: + case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: { + spv::Id unorm_round_offset = builder.makeFloatConstant(0.5f); + spv::Id unorm_scale_rgb = builder.makeFloatConstant(1023.0f); + packed[0] = builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp( + spv::OpFMul, type_float, + builder.createCompositeExtract(source_vec4, type_float, 0), + unorm_scale_rgb), + unorm_round_offset)); + spv::Id width_rgb = builder.makeUintConstant(10); + spv::Id unorm_scale_a = builder.makeFloatConstant(3.0f); + spv::Id width_a = builder.makeUintConstant(2); + for (uint32_t i = 1; i < 4; ++i) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(packed[0]); + id_vector_temp.push_back(builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp(spv::OpFMul, type_float, + builder.createCompositeExtract( + source_vec4, type_float, i), + i == 3 ? unorm_scale_a : unorm_scale_rgb), + unorm_round_offset))); + id_vector_temp.push_back(builder.makeUintConstant(10 * i)); + id_vector_temp.push_back(i == 3 ? width_a : width_rgb); + packed[0] = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + } break; + case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT: + case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16: { + // Float16 has a wider range for both color and alpha, also NaNs - clamp + // and convert. + packed[0] = SpirvShaderTranslator::UnclampedFloat32To7e3( + builder, builder.createCompositeExtract(source_vec4, type_float, 0), + ext_inst_glsl_std_450); + spv::Id width_rgb = builder.makeUintConstant(10); + for (uint32_t i = 1; i < 3; ++i) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(packed[0]); + id_vector_temp.push_back(SpirvShaderTranslator::UnclampedFloat32To7e3( + builder, + builder.createCompositeExtract(source_vec4, type_float, i), + ext_inst_glsl_std_450)); + id_vector_temp.push_back(builder.makeUintConstant(10 * i)); + id_vector_temp.push_back(width_rgb); + packed[0] = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + // Saturate and convert the alpha. + id_vector_temp.clear(); + id_vector_temp.reserve(3); + id_vector_temp.push_back( + builder.createCompositeExtract(source_vec4, type_float, 3)); + id_vector_temp.push_back(builder.makeFloatConstant(0.0f)); + id_vector_temp.push_back(builder.makeFloatConstant(1.0f)); + spv::Id alpha_saturated = + builder.createBuiltinCall(type_float, ext_inst_glsl_std_450, + GLSLstd450NClamp, id_vector_temp); + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(packed[0]); + id_vector_temp.push_back(builder.createUnaryOp( + spv::OpConvertFToU, type_uint, + builder.createBinOp( + spv::OpFAdd, type_float, + builder.createBinOp(spv::OpFMul, type_float, alpha_saturated, + builder.makeFloatConstant(3.0f)), + builder.makeFloatConstant(0.5f)))); + id_vector_temp.push_back(builder.makeUintConstant(30)); + id_vector_temp.push_back(builder.makeUintConstant(2)); + packed[0] = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + } break; + case xenos::ColorRenderTargetFormat::k_16_16: + case xenos::ColorRenderTargetFormat::k_16_16_16_16: + case xenos::ColorRenderTargetFormat::k_16_16_FLOAT: + case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT: { + // All 64bpp formats, and all 16 bits per component formats, are + // represented as integers in ownership transfer for safe handling of + // NaN encodings and -32768 / -32767. + // TODO(Triang3l): Handle the case when that's not true (no multisampled + // sampled images, no 16-bit UNORM, no cross-packing 32bpp aliasing on a + // portability subset device or a 64bpp format where that wouldn't help + // anyway). + spv::Id component_offset_width = builder.makeUintConstant(16); + for (uint32_t i = 0; i <= uint32_t(format_is_64bpp); ++i) { + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back( + builder.createCompositeExtract(source_vec4, type_uint, 2 * i)); + id_vector_temp.push_back(builder.createCompositeExtract( + source_vec4, type_uint, 2 * i + 1)); + id_vector_temp.push_back(component_offset_width); + id_vector_temp.push_back(component_offset_width); + packed[i] = builder.createOp(spv::OpBitFieldInsert, type_uint, + id_vector_temp); + } + } break; + // Float32 is transferred as uint32 to preserve NaN encodings. However, + // multisampled sampled image support is optional in Vulkan. + case xenos::ColorRenderTargetFormat::k_32_FLOAT: + case xenos::ColorRenderTargetFormat::k_32_32_FLOAT: { + for (uint32_t i = 0; i <= uint32_t(format_is_64bpp); ++i) { + spv::Id& packed_ref = packed[i]; + packed_ref = builder.createCompositeExtract(source_vec4, + source_component_type, i); + if (!source_is_uint) { + packed_ref = + builder.createUnaryOp(spv::OpBitcast, type_uint, packed_ref); + } + } + } break; + } + } + + // Write the packed value to the EDRAM buffer. + spv::Id store_value = packed[0]; + if (format_is_64bpp) { + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(packed[0]); + id_vector_temp.push_back(packed[1]); + store_value = builder.createCompositeConstruct(type_uint2, id_vector_temp); + } + id_vector_temp.clear(); + id_vector_temp.reserve(2); + // The only SSBO structure member. + id_vector_temp.push_back(builder.makeIntConstant(0)); + id_vector_temp.push_back( + builder.createUnaryOp(spv::OpBitcast, type_int, edram_sample_address)); + // StorageBuffer since SPIR-V 1.3, but since SPIR-V 1.0 is generated, it's + // Uniform. + builder.createStore(store_value, + builder.createAccessChain(spv::StorageClassUniform, + edram_buffer, id_vector_temp)); + + // End the main function and make it the entry point. + builder.leaveFunction(); + builder.addExecutionMode(main_function, spv::ExecutionModeLocalSize, + kDumpSamplesPerGroupX, kDumpSamplesPerGroupY, 1); + spv::Instruction* entry_point = builder.addEntryPoint( + spv::ExecutionModelGLCompute, main_function, "main"); + // Bindings only need to be added to the entry point's interface starting with + // SPIR-V 1.4 - emitting 1.0 here, so only inputs / outputs. + entry_point->addIdOperand(input_global_invocation_id); + + // Serialize the shader code. + std::vector shader_code; + builder.dump(shader_code); + + // Create the pipeline, and store the handle even if creation fails not to try + // to create it again later. + VkPipeline pipeline = ui::vulkan::util::CreateComputePipeline( + command_processor_.GetVulkanProvider(), + key.is_depth ? dump_pipeline_layout_depth_ : dump_pipeline_layout_color_, + reinterpret_cast(shader_code.data()), + sizeof(uint32_t) * shader_code.size()); + if (pipeline == VK_NULL_HANDLE) { + XELOGE( + "VulkanRenderTargetCache: Failed to create a render target dumping " + "pipeline for {}-sample render targets with format {}", + UINT32_C(1) << uint32_t(key.msaa_samples), + key.is_depth + ? xenos::GetDepthRenderTargetFormatName(key.GetDepthFormat()) + : xenos::GetColorRenderTargetFormatName(key.GetColorFormat())); + } + dump_pipelines_.emplace(key, pipeline); + return pipeline; +} + +void VulkanRenderTargetCache::DumpRenderTargets(uint32_t dump_base, + uint32_t dump_row_length_used, + uint32_t dump_rows, + uint32_t dump_pitch) { + assert_true(GetPath() == Path::kHostRenderTargets); + + GetResolveCopyRectanglesToDump(dump_base, dump_row_length_used, dump_rows, + dump_pitch, dump_rectangles_); + if (dump_rectangles_.empty()) { + return; + } + + // Clear previously set temporary indices. + for (const ResolveCopyDumpRectangle& rectangle : dump_rectangles_) { + static_cast(rectangle.render_target) + ->SetTemporarySortIndex(UINT32_MAX); + } + // Gather all needed barriers and info needed to sort the invocations. + UseEdramBuffer(EdramBufferUsage::kComputeWrite); + dump_invocations_.clear(); + dump_invocations_.reserve(dump_rectangles_.size()); + constexpr VkPipelineStageFlags kRenderTargetDstStageMask = + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + constexpr VkAccessFlags kRenderTargetDstAccessMask = + VK_ACCESS_SHADER_READ_BIT; + constexpr VkImageLayout kRenderTargetNewLayout = + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + uint32_t rt_sort_index = 0; + for (const ResolveCopyDumpRectangle& rectangle : dump_rectangles_) { + auto& vulkan_rt = + *static_cast(rectangle.render_target); + RenderTargetKey rt_key = vulkan_rt.key(); + command_processor_.PushImageMemoryBarrier( + vulkan_rt.image(), + ui::vulkan::util::InitializeSubresourceRange( + rt_key.is_depth + ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) + : VK_IMAGE_ASPECT_COLOR_BIT), + vulkan_rt.current_stage_mask(), VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + vulkan_rt.current_access_mask(), VK_ACCESS_SHADER_READ_BIT, + vulkan_rt.current_layout(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + vulkan_rt.SetUsage(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + if (vulkan_rt.temporary_sort_index() == UINT32_MAX) { + vulkan_rt.SetTemporarySortIndex(rt_sort_index++); + } + DumpPipelineKey pipeline_key; + pipeline_key.msaa_samples = rt_key.msaa_samples; + pipeline_key.resource_format = rt_key.resource_format; + pipeline_key.is_depth = rt_key.is_depth; + dump_invocations_.emplace_back(rectangle, pipeline_key); + } + + // Sort the invocations to reduce context and binding switches. + std::sort(dump_invocations_.begin(), dump_invocations_.end()); + + // Dump the render targets. + DeferredCommandBuffer& command_buffer = + command_processor_.deferred_command_buffer(); + bool edram_buffer_bound = false; + VkDescriptorSet last_source_descriptor_set = VK_NULL_HANDLE; + DumpPitches last_pitches; + DumpOffsets last_offsets; + bool pitches_bound = false, offsets_bound = false; + for (const DumpInvocation& invocation : dump_invocations_) { + const ResolveCopyDumpRectangle& rectangle = invocation.rectangle; + auto& vulkan_rt = + *static_cast(rectangle.render_target); + RenderTargetKey rt_key = vulkan_rt.key(); + DumpPipelineKey pipeline_key = invocation.pipeline_key; + VkPipeline pipeline = GetDumpPipeline(pipeline_key); + if (!pipeline) { + continue; + } + command_processor_.BindExternalComputePipeline(pipeline); + + VkPipelineLayout pipeline_layout = rt_key.is_depth + ? dump_pipeline_layout_depth_ + : dump_pipeline_layout_color_; + + // Only need to bind the EDRAM buffer once (relying on pipeline layout + // compatibility). + if (!edram_buffer_bound) { + edram_buffer_bound = true; + command_buffer.CmdVkBindDescriptorSets( + VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, + kDumpDescriptorSetEdram, 1, &edram_storage_buffer_descriptor_set_, 0, + nullptr); + } + + VkDescriptorSet source_descriptor_set = + vulkan_rt.GetDescriptorSetTransferSource(); + if (last_source_descriptor_set != source_descriptor_set) { + last_source_descriptor_set = source_descriptor_set; + command_buffer.CmdVkBindDescriptorSets( + VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, + kDumpDescriptorSetSource, 1, &source_descriptor_set, 0, nullptr); + } + + DumpPitches pitches; + pitches.dest_pitch = dump_pitch; + pitches.source_pitch = rt_key.GetPitchTiles(); + if (last_pitches != pitches) { + last_pitches = pitches; + pitches_bound = false; + } + if (!pitches_bound) { + pitches_bound = true; + command_buffer.CmdVkPushConstants( + pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, + sizeof(uint32_t) * kDumpPushConstantPitches, sizeof(last_pitches), + &last_pitches); + } + + DumpOffsets offsets; + offsets.source_base_tiles = rt_key.base_tiles; + ResolveCopyDumpRectangle::Dispatch + dispatches[ResolveCopyDumpRectangle::kMaxDispatches]; + uint32_t dispatch_count = + rectangle.GetDispatches(dump_pitch, dump_row_length_used, dispatches); + for (uint32_t i = 0; i < dispatch_count; ++i) { + const ResolveCopyDumpRectangle::Dispatch& dispatch = dispatches[i]; + offsets.dispatch_first_tile = dump_base + dispatch.offset; + if (last_offsets != offsets) { + last_offsets = offsets; + offsets_bound = false; + } + if (!offsets_bound) { + offsets_bound = true; + command_buffer.CmdVkPushConstants( + pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, + sizeof(uint32_t) * kDumpPushConstantOffsets, sizeof(last_offsets), + &last_offsets); + } + command_processor_.SubmitBarriers(true); + command_buffer.CmdVkDispatch( + (draw_resolution_scale_x() * + (xenos::kEdramTileWidthSamples >> uint32_t(rt_key.Is64bpp())) * + dispatch.width_tiles + + (kDumpSamplesPerGroupX - 1)) / + kDumpSamplesPerGroupX, + (draw_resolution_scale_y() * xenos::kEdramTileHeightSamples * + dispatch.height_tiles + + (kDumpSamplesPerGroupY - 1)) / + kDumpSamplesPerGroupY, + 1); + } + MarkEdramBufferModified(); + } +} + } // namespace vulkan } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.h b/src/xenia/gpu/vulkan/vulkan_render_target_cache.h index acd8f500d..10b2c1aed 100644 --- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.h @@ -20,6 +20,8 @@ #include "xenia/base/hash.h" #include "xenia/base/xxhash.h" #include "xenia/gpu/render_target_cache.h" +#include "xenia/gpu/vulkan/vulkan_shared_memory.h" +#include "xenia/gpu/vulkan/vulkan_texture_cache.h" #include "xenia/gpu/xenos.h" #include "xenia/ui/vulkan/single_layout_descriptor_set_pool.h" #include "xenia/ui/vulkan/vulkan_provider.h" @@ -86,12 +88,14 @@ class VulkanRenderTargetCache final : public RenderTargetCache { }; VulkanRenderTargetCache(const RegisterFile& register_file, - const Memory& memory, TraceWriter* trace_writer, + const Memory& memory, TraceWriter& trace_writer, uint32_t draw_resolution_scale_x, uint32_t draw_resolution_scale_y, VulkanCommandProcessor& command_processor); ~VulkanRenderTargetCache(); + // Transient descriptor set layouts must be initialized in the command + // processor. bool Initialize(); void Shutdown(bool from_destructor = false); void ClearCache() override; @@ -102,6 +106,13 @@ class VulkanRenderTargetCache final : public RenderTargetCache { // TODO(Triang3l): Fragment shader interlock. Path GetPath() const override { return Path::kHostRenderTargets; } + // Performs the resolve to a shared memory area according to the current + // register values, and also clears the render targets if needed. Must be in a + // frame for calling. + bool Resolve(const Memory& memory, VulkanSharedMemory& shared_memory, + VulkanTextureCache& texture_cache, uint32_t& written_address_out, + uint32_t& written_length_out); + bool Update(bool is_rasterization_done, reg::RB_DEPTHCONTROL normalized_depth_control, uint32_t normalized_color_mask, @@ -182,6 +193,7 @@ class VulkanRenderTargetCache final : public RenderTargetCache { // Trace playback. kTransferWrite, }; + enum class EdramBufferModificationStatus { // The values are ordered by how strong the barrier conditions are. // No uncommitted shader writes. @@ -192,6 +204,23 @@ class VulkanRenderTargetCache final : public RenderTargetCache { // Need to commit before any next fragment shader interlock usage. kViaUnordered, }; + + enum ResolveCopyDescriptorSet : uint32_t { + // Never changes. + kResolveCopyDescriptorSetEdram, + // Shared memory or a region in it. + kResolveCopyDescriptorSetDest, + + kResolveCopyDescriptorSetCount, + }; + + struct ResolveCopyShaderCode { + const uint32_t* unscaled; + size_t unscaled_size_bytes; + const uint32_t* scaled; + size_t scaled_size_bytes; + }; + static void GetEdramBufferUsageMasks(EdramBufferUsage usage, VkPipelineStageFlags& stage_mask_out, VkAccessFlags& access_mask_out); @@ -204,6 +233,7 @@ class VulkanRenderTargetCache final : public RenderTargetCache { EdramBufferModificationStatus::kViaFragmentShaderInterlock); VulkanCommandProcessor& command_processor_; + TraceWriter& trace_writer_; // Accessible in fragment and compute shaders. VkDescriptorSetLayout descriptor_set_layout_storage_buffer_ = VK_NULL_HANDLE; @@ -224,6 +254,12 @@ class VulkanRenderTargetCache final : public RenderTargetCache { VkDescriptorPool edram_storage_buffer_descriptor_pool_ = VK_NULL_HANDLE; VkDescriptorSet edram_storage_buffer_descriptor_set_; + VkPipelineLayout resolve_copy_pipeline_layout_ = VK_NULL_HANDLE; + static const ResolveCopyShaderCode + kResolveCopyShaders[size_t(draw_util::ResolveCopyShaderIndex::kCount)]; + std::array + resolve_copy_pipelines_{}; + // RenderPassKey::key -> VkRenderPass. // VK_NULL_HANDLE if failed to create. std::unordered_map render_passes_; @@ -627,6 +663,136 @@ class VulkanRenderTargetCache final : public RenderTargetCache { } }; + union DumpPipelineKey { + uint32_t key; + struct { + xenos::MsaaSamples msaa_samples : 2; + uint32_t resource_format : 4; + // Last bit because this affects the pipeline - after sorting, only change + // it at most once. Depth buffers have an additional stencil SRV. + uint32_t is_depth : 1; + }; + + DumpPipelineKey() : key(0) { static_assert_size(*this, sizeof(key)); } + + struct Hasher { + size_t operator()(const DumpPipelineKey& key) const { + return std::hash{}(key.key); + } + }; + bool operator==(const DumpPipelineKey& other_key) const { + return key == other_key.key; + } + bool operator!=(const DumpPipelineKey& other_key) const { + return !(*this == other_key); + } + bool operator<(const DumpPipelineKey& other_key) const { + return key < other_key.key; + } + + xenos::ColorRenderTargetFormat GetColorFormat() const { + assert_false(is_depth); + return xenos::ColorRenderTargetFormat(resource_format); + } + xenos::DepthRenderTargetFormat GetDepthFormat() const { + assert_true(is_depth); + return xenos::DepthRenderTargetFormat(resource_format); + } + }; + + // There's no strict dependency on the group size in dumping, for simplicity + // calculations especially with resolution scaling, dividing manually (as the + // group size is not unlimited). The only restriction is that an integer + // multiple of it must be 80x16 samples (and no larger than that) for 32bpp, + // or 40x16 samples for 64bpp (because only a half of the pair of tiles may + // need to be dumped). Using 8x16 since that's 128 - the minimum required + // group size on Vulkan, and the maximum number of lanes in a subgroup on + // Vulkan. + static constexpr uint32_t kDumpSamplesPerGroupX = 8; + static constexpr uint32_t kDumpSamplesPerGroupY = 16; + + union DumpPitches { + uint32_t pitches; + struct { + // Both in tiles. + uint32_t dest_pitch : xenos::kEdramPitchTilesBits; + uint32_t source_pitch : xenos::kEdramPitchTilesBits; + }; + DumpPitches() : pitches(0) { static_assert_size(*this, sizeof(pitches)); } + bool operator==(const DumpPitches& other_pitches) const { + return pitches == other_pitches.pitches; + } + bool operator!=(const DumpPitches& other_pitches) const { + return !(*this == other_pitches); + } + }; + + union DumpOffsets { + uint32_t offsets; + struct { + uint32_t dispatch_first_tile : xenos::kEdramBaseTilesBits; + uint32_t source_base_tiles : xenos::kEdramBaseTilesBits; + }; + DumpOffsets() : offsets(0) { static_assert_size(*this, sizeof(offsets)); } + bool operator==(const DumpOffsets& other_offsets) const { + return offsets == other_offsets.offsets; + } + bool operator!=(const DumpOffsets& other_offsets) const { + return !(*this == other_offsets); + } + }; + + enum DumpDescriptorSet : uint32_t { + // Never changes. Same in both color and depth pipeline layouts, keep the + // first for pipeline layout compatibility, to only have to set it once. + kDumpDescriptorSetEdram, + // One resolve may need multiple sources. Different descriptor set layouts + // for color and depth. + kDumpDescriptorSetSource, + + kDumpDescriptorSetCount, + }; + + enum DumpPushConstant : uint32_t { + // May be different for different sources. + kDumpPushConstantPitches, + // May be changed multiple times for the same source. + kDumpPushConstantOffsets, + + kDumpPushConstantCount, + }; + + struct DumpInvocation { + ResolveCopyDumpRectangle rectangle; + DumpPipelineKey pipeline_key; + DumpInvocation(const ResolveCopyDumpRectangle& rectangle, + const DumpPipelineKey& pipeline_key) + : rectangle(rectangle), pipeline_key(pipeline_key) {} + bool operator<(const DumpInvocation& other_invocation) { + // Sort by the pipeline key primarily to reduce pipeline state (context) + // switches. + if (pipeline_key != other_invocation.pipeline_key) { + return pipeline_key < other_invocation.pipeline_key; + } + assert_not_null(rectangle.render_target); + uint32_t render_target_index = + static_cast(rectangle.render_target) + ->temporary_sort_index(); + const ResolveCopyDumpRectangle& other_rectangle = + other_invocation.rectangle; + uint32_t other_render_target_index = + static_cast(other_rectangle.render_target) + ->temporary_sort_index(); + if (render_target_index != other_render_target_index) { + return render_target_index < other_render_target_index; + } + if (rectangle.row_first != other_rectangle.row_first) { + return rectangle.row_first < other_rectangle.row_first; + } + return rectangle.row_first_start < other_rectangle.row_first_start; + } + }; + // Returns the framebuffer object, or VK_NULL_HANDLE if failed to create. const Framebuffer* GetFramebuffer( RenderPassKey render_pass_key, uint32_t pitch_tiles_at_32bpp, @@ -649,6 +815,13 @@ class VulkanRenderTargetCache final : public RenderTargetCache { const uint64_t* render_target_resolve_clear_values = nullptr, const Transfer::Rectangle* resolve_clear_rectangle = nullptr); + VkPipeline GetDumpPipeline(DumpPipelineKey key); + + // Writes contents of host render targets within rectangles from + // ResolveInfo::GetCopyEdramTileSpan to edram_buffer_. + void DumpRenderTargets(uint32_t dump_base, uint32_t dump_row_length_used, + uint32_t dump_rows, uint32_t dump_pitch); + bool gamma_render_target_as_srgb_ = false; bool msaa_2x_attachments_supported_ = false; @@ -688,8 +861,22 @@ class VulkanRenderTargetCache final : public RenderTargetCache { TransferPipelineKey::Hasher> transfer_pipelines_; + VkPipelineLayout dump_pipeline_layout_color_ = VK_NULL_HANDLE; + VkPipelineLayout dump_pipeline_layout_depth_ = VK_NULL_HANDLE; + // Compute pipelines for copying host render target contents to the EDRAM + // buffer. VK_NULL_HANDLE if failed to create. + std::unordered_map + dump_pipelines_; + + // Temporary storage for Resolve. + std::vector clear_transfers_[2]; + // Temporary storage for PerformTransfersAndResolveClears. std::vector current_transfer_invocations_; + + // Temporary storage for DumpRenderTargets. + std::vector dump_rectangles_; + std::vector dump_invocations_; }; } // namespace vulkan From 4514050f5549638bc377edf86f4b919bf91778f4 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Wed, 22 Jun 2022 13:25:06 +0300 Subject: [PATCH 099/123] [Vulkan] Truncate depth to float24 in EDRAM range ownership transfers and resolves by default Doesn't ruin the "greater or equal" depth test in subsequent rendering passes if precision is lost, unlike rounding to the nearest --- src/xenia/gpu/spirv_shader_translator.h | 5 ++-- src/xenia/gpu/spirv_shader_translator_rb.cc | 30 ++++++++++--------- .../gpu/vulkan/vulkan_render_target_cache.cc | 14 ++++++--- .../gpu/vulkan/vulkan_render_target_cache.h | 4 +++ 4 files changed, 33 insertions(+), 20 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 3df49136f..075279848 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -208,10 +208,11 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id ext_inst_glsl_std_450); // Converts the depth value externally clamped to the representable [0, 2) // range to 20e4 floating point, with zeros in bits 24:31, rounding to the - // nearest even. If remap_from_0_to_0_5 is true, it's assumed that 0...1 is - // pre-remapped to 0...0.5 in the input. + // nearest even or towards zero. If remap_from_0_to_0_5 is true, it's assumed + // that 0...1 is pre-remapped to 0...0.5 in the input. static spv::Id PreClampedDepthTo20e4(spv::Builder& builder, spv::Id f32_scalar, + bool round_to_nearest_even, bool remap_from_0_to_0_5, spv::Id ext_inst_glsl_std_450); // Converts the 20e4 number in bits [f24_shift, f24_shift + 10) to a 32-bit diff --git a/src/xenia/gpu/spirv_shader_translator_rb.cc b/src/xenia/gpu/spirv_shader_translator_rb.cc index 4cb260bdd..8282016b5 100644 --- a/src/xenia/gpu/spirv_shader_translator_rb.cc +++ b/src/xenia/gpu/spirv_shader_translator_rb.cc @@ -230,8 +230,8 @@ spv::Id SpirvShaderTranslator::Float7e3To32(spv::Builder& builder, } spv::Id SpirvShaderTranslator::PreClampedDepthTo20e4( - spv::Builder& builder, spv::Id f32_scalar, bool remap_from_0_to_0_5, - spv::Id ext_inst_glsl_std_450) { + spv::Builder& builder, spv::Id f32_scalar, bool round_to_nearest_even, + bool remap_from_0_to_0_5, spv::Id ext_inst_glsl_std_450) { // CFloat24 from d3dref9.dll + // https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp // Assuming the value is already clamped to [0, 2) (in all places, the depth @@ -305,18 +305,20 @@ spv::Id SpirvShaderTranslator::PreClampedDepthTo20e4( builder.makeUintConstant(0x38800000 - (remap_bias << 23))), denormal_biased_f32, normal_biased_f32); - // Build the 20e4 number rounding to the nearest even. - // ((biased_f32 + 3 + ((biased_f32 >> 3) & 1)) >> 3) & 0xFFFFFF - return builder.createTriOp( - spv::OpBitFieldUExtract, type_uint, - builder.createBinOp( - spv::OpIAdd, type_uint, - builder.createBinOp(spv::OpIAdd, type_uint, biased_f32, - builder.makeUintConstant(3)), - builder.createTriOp(spv::OpBitFieldUExtract, type_uint, biased_f32, - builder.makeUintConstant(3), - builder.makeUintConstant(1))), - builder.makeUintConstant(3), builder.makeUintConstant(24)); + // Build the 20e4 number rounding to the nearest even or towards zero. + if (round_to_nearest_even) { + // biased_f32 += 3 + ((biased_f32 >> 3) & 1) + biased_f32 = builder.createBinOp( + spv::OpIAdd, type_uint, + builder.createBinOp(spv::OpIAdd, type_uint, biased_f32, + builder.makeUintConstant(3)), + builder.createTriOp(spv::OpBitFieldUExtract, type_uint, biased_f32, + builder.makeUintConstant(3), + builder.makeUintConstant(1))); + } + return builder.createTriOp(spv::OpBitFieldUExtract, type_uint, biased_f32, + builder.makeUintConstant(3), + builder.makeUintConstant(24)); } spv::Id SpirvShaderTranslator::Depth20e4To32(spv::Builder& builder, diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc index 4d021ca7a..4d8545fd0 100644 --- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc @@ -416,6 +416,8 @@ bool VulkanRenderTargetCache::Initialize() { // TODO(Triang3l): All paths (FSI). + depth_float24_round_ = cvars::depth_float24_round; + // TODO(Triang3l): Handle sampledImageIntegerSampleCounts 4 not supported in // transfers. if (cvars::native_2x_msaa) { @@ -3037,7 +3039,8 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( } break; case xenos::DepthRenderTargetFormat::kD24FS8: { depth24 = SpirvShaderTranslator::PreClampedDepthTo20e4( - builder, source_depth_float[i], true, ext_inst_glsl_std_450); + builder, source_depth_float[i], depth_float24_round(), true, + ext_inst_glsl_std_450); } break; } // Merge depth and stencil. @@ -3353,7 +3356,8 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( } break; case xenos::DepthRenderTargetFormat::kD24FS8: { packed = SpirvShaderTranslator::PreClampedDepthTo20e4( - builder, source_depth_float[0], true, ext_inst_glsl_std_450); + builder, source_depth_float[0], depth_float24_round(), true, + ext_inst_glsl_std_450); } break; } if (mode.output == TransferOutput::kDepth) { @@ -3855,7 +3859,8 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader( } break; case xenos::DepthRenderTargetFormat::kD24FS8: { host_depth24 = SpirvShaderTranslator::PreClampedDepthTo20e4( - builder, host_depth32, true, ext_inst_glsl_std_450); + builder, host_depth32, depth_float24_round(), true, + ext_inst_glsl_std_450); } break; } assert_true(host_depth24 != spv::NoResult); @@ -5548,7 +5553,8 @@ VkPipeline VulkanRenderTargetCache::GetDumpPipeline(DumpPipelineKey key) { } break; case xenos::DepthRenderTargetFormat::kD24FS8: { packed[0] = SpirvShaderTranslator::PreClampedDepthTo20e4( - builder, source_depth32, true, ext_inst_glsl_std_450); + builder, source_depth32, depth_float24_round(), true, + ext_inst_glsl_std_450); } break; } id_vector_temp.clear(); diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.h b/src/xenia/gpu/vulkan/vulkan_render_target_cache.h index 10b2c1aed..2857fde1f 100644 --- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.h @@ -128,6 +128,8 @@ class VulkanRenderTargetCache final : public RenderTargetCache { return last_update_framebuffer_; } + bool depth_float24_round() const { return depth_float24_round_; } + bool msaa_2x_attachments_supported() const { return msaa_2x_attachments_supported_; } @@ -824,6 +826,8 @@ class VulkanRenderTargetCache final : public RenderTargetCache { bool gamma_render_target_as_srgb_ = false; + bool depth_float24_round_ = false; + bool msaa_2x_attachments_supported_ = false; bool msaa_2x_no_attachments_supported_ = false; From 4b4205ba0062c72168527f63bbc85736457d9a47 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 25 Jun 2022 14:33:43 +0300 Subject: [PATCH 100/123] [Vulkan] Frontbuffer presentation --- .../bytecode/vulkan_spirv/fullscreen_tc_vs.h | 101 -- .../gpu/shaders/bytecode/vulkan_spirv/uv_ps.h | 58 -- src/xenia/gpu/shaders/fullscreen_tc.vs.glsl | 10 - src/xenia/gpu/shaders/uv.ps.glsl | 10 - .../gpu/vulkan/vulkan_command_processor.cc | 961 ++++++++++++++---- .../gpu/vulkan/vulkan_command_processor.h | 73 +- src/xenia/gpu/vulkan/vulkan_texture_cache.cc | 98 +- src/xenia/gpu/vulkan/vulkan_texture_cache.h | 11 +- src/xenia/ui/vulkan/functions/device_1_0.inc | 2 + 9 files changed, 917 insertions(+), 407 deletions(-) delete mode 100644 src/xenia/gpu/shaders/bytecode/vulkan_spirv/fullscreen_tc_vs.h delete mode 100644 src/xenia/gpu/shaders/bytecode/vulkan_spirv/uv_ps.h delete mode 100644 src/xenia/gpu/shaders/fullscreen_tc.vs.glsl delete mode 100644 src/xenia/gpu/shaders/uv.ps.glsl diff --git a/src/xenia/gpu/shaders/bytecode/vulkan_spirv/fullscreen_tc_vs.h b/src/xenia/gpu/shaders/bytecode/vulkan_spirv/fullscreen_tc_vs.h deleted file mode 100644 index 0639f924a..000000000 --- a/src/xenia/gpu/shaders/bytecode/vulkan_spirv/fullscreen_tc_vs.h +++ /dev/null @@ -1,101 +0,0 @@ -// Generated with `xb buildshaders`. -#if 0 -; SPIR-V -; Version: 1.0 -; Generator: Khronos Glslang Reference Front End; 10 -; Bound: 23240 -; Schema: 0 - OpCapability Shader - %1 = OpExtInstImport "GLSL.std.450" - OpMemoryModel Logical GLSL450 - OpEntryPoint Vertex %5663 "main" %3877 %gl_VertexIndex %4930 - OpDecorate %3877 Location 0 - OpDecorate %gl_VertexIndex BuiltIn VertexIndex - OpMemberDecorate %_struct_1032 0 BuiltIn Position - OpMemberDecorate %_struct_1032 1 BuiltIn PointSize - OpDecorate %_struct_1032 Block - %void = OpTypeVoid - %1282 = OpTypeFunction %void - %float = OpTypeFloat 32 - %v2float = OpTypeVector %float 2 -%_ptr_Output_v2float = OpTypePointer Output %v2float - %3877 = OpVariable %_ptr_Output_v2float Output - %int = OpTypeInt 32 1 -%_ptr_Input_int = OpTypePointer Input %int -%gl_VertexIndex = OpVariable %_ptr_Input_int Input - %uint = OpTypeInt 32 0 - %uint_1 = OpConstant %uint 1 - %v2uint = OpTypeVector %uint 2 - %uint_2 = OpConstant %uint 2 - %v4float = OpTypeVector %float 4 -%_struct_1032 = OpTypeStruct %v4float %float -%_ptr_Output__struct_1032 = OpTypePointer Output %_struct_1032 - %4930 = OpVariable %_ptr_Output__struct_1032 Output - %int_0 = OpConstant %int 0 - %float_2 = OpConstant %float 2 - %float_1 = OpConstant %float 1 - %float_0 = OpConstant %float 0 -%_ptr_Output_v4float = OpTypePointer Output %v4float - %1849 = OpConstantComposite %v2uint %uint_2 %uint_2 - %768 = OpConstantComposite %v2float %float_1 %float_1 - %5663 = OpFunction %void None %1282 - %6733 = OpLabel - %12420 = OpLoad %int %gl_VertexIndex - %12986 = OpBitcast %uint %12420 - %21962 = OpShiftLeftLogical %int %12420 %uint_1 - %19941 = OpBitcast %uint %21962 - %15527 = OpCompositeConstruct %v2uint %12986 %19941 - %7198 = OpBitwiseAnd %v2uint %15527 %1849 - %12989 = OpConvertUToF %v2float %7198 - OpStore %3877 %12989 - %23239 = OpLoad %v2float %3877 - %20253 = OpVectorTimesScalar %v2float %23239 %float_2 - %23195 = OpFSub %v2float %20253 %768 - %7674 = OpCompositeExtract %float %23195 0 - %15569 = OpCompositeExtract %float %23195 1 - %18260 = OpCompositeConstruct %v4float %7674 %15569 %float_0 %float_1 - %12055 = OpAccessChain %_ptr_Output_v4float %4930 %int_0 - OpStore %12055 %18260 - OpReturn - OpFunctionEnd -#endif - -const uint32_t fullscreen_tc_vs[] = { - 0x07230203, 0x00010000, 0x0008000A, 0x00005AC8, 0x00000000, 0x00020011, - 0x00000001, 0x0006000B, 0x00000001, 0x4C534C47, 0x6474732E, 0x3035342E, - 0x00000000, 0x0003000E, 0x00000000, 0x00000001, 0x0008000F, 0x00000000, - 0x0000161F, 0x6E69616D, 0x00000000, 0x00000F25, 0x00001029, 0x00001342, - 0x00040047, 0x00000F25, 0x0000001E, 0x00000000, 0x00040047, 0x00001029, - 0x0000000B, 0x0000002A, 0x00050048, 0x00000408, 0x00000000, 0x0000000B, - 0x00000000, 0x00050048, 0x00000408, 0x00000001, 0x0000000B, 0x00000001, - 0x00030047, 0x00000408, 0x00000002, 0x00020013, 0x00000008, 0x00030021, - 0x00000502, 0x00000008, 0x00030016, 0x0000000D, 0x00000020, 0x00040017, - 0x00000013, 0x0000000D, 0x00000002, 0x00040020, 0x00000290, 0x00000003, - 0x00000013, 0x0004003B, 0x00000290, 0x00000F25, 0x00000003, 0x00040015, - 0x0000000C, 0x00000020, 0x00000001, 0x00040020, 0x00000289, 0x00000001, - 0x0000000C, 0x0004003B, 0x00000289, 0x00001029, 0x00000001, 0x00040015, - 0x0000000B, 0x00000020, 0x00000000, 0x0004002B, 0x0000000B, 0x00000A0D, - 0x00000001, 0x00040017, 0x00000011, 0x0000000B, 0x00000002, 0x0004002B, - 0x0000000B, 0x00000A10, 0x00000002, 0x00040017, 0x0000001D, 0x0000000D, - 0x00000004, 0x0004001E, 0x00000408, 0x0000001D, 0x0000000D, 0x00040020, - 0x00000685, 0x00000003, 0x00000408, 0x0004003B, 0x00000685, 0x00001342, - 0x00000003, 0x0004002B, 0x0000000C, 0x00000A0B, 0x00000000, 0x0004002B, - 0x0000000D, 0x00000018, 0x40000000, 0x0004002B, 0x0000000D, 0x0000008A, - 0x3F800000, 0x0004002B, 0x0000000D, 0x00000A0C, 0x00000000, 0x00040020, - 0x0000029A, 0x00000003, 0x0000001D, 0x0005002C, 0x00000011, 0x00000739, - 0x00000A10, 0x00000A10, 0x0005002C, 0x00000013, 0x00000300, 0x0000008A, - 0x0000008A, 0x00050036, 0x00000008, 0x0000161F, 0x00000000, 0x00000502, - 0x000200F8, 0x00001A4D, 0x0004003D, 0x0000000C, 0x00003084, 0x00001029, - 0x0004007C, 0x0000000B, 0x000032BA, 0x00003084, 0x000500C4, 0x0000000C, - 0x000055CA, 0x00003084, 0x00000A0D, 0x0004007C, 0x0000000B, 0x00004DE5, - 0x000055CA, 0x00050050, 0x00000011, 0x00003CA7, 0x000032BA, 0x00004DE5, - 0x000500C7, 0x00000011, 0x00001C1E, 0x00003CA7, 0x00000739, 0x00040070, - 0x00000013, 0x000032BD, 0x00001C1E, 0x0003003E, 0x00000F25, 0x000032BD, - 0x0004003D, 0x00000013, 0x00005AC7, 0x00000F25, 0x0005008E, 0x00000013, - 0x00004F1D, 0x00005AC7, 0x00000018, 0x00050083, 0x00000013, 0x00005A9B, - 0x00004F1D, 0x00000300, 0x00050051, 0x0000000D, 0x00001DFA, 0x00005A9B, - 0x00000000, 0x00050051, 0x0000000D, 0x00003CD1, 0x00005A9B, 0x00000001, - 0x00070050, 0x0000001D, 0x00004754, 0x00001DFA, 0x00003CD1, 0x00000A0C, - 0x0000008A, 0x00050041, 0x0000029A, 0x00002F17, 0x00001342, 0x00000A0B, - 0x0003003E, 0x00002F17, 0x00004754, 0x000100FD, 0x00010038, -}; diff --git a/src/xenia/gpu/shaders/bytecode/vulkan_spirv/uv_ps.h b/src/xenia/gpu/shaders/bytecode/vulkan_spirv/uv_ps.h deleted file mode 100644 index 01bf8e075..000000000 --- a/src/xenia/gpu/shaders/bytecode/vulkan_spirv/uv_ps.h +++ /dev/null @@ -1,58 +0,0 @@ -// Generated with `xb buildshaders`. -#if 0 -; SPIR-V -; Version: 1.0 -; Generator: Khronos Glslang Reference Front End; 10 -; Bound: 24988 -; Schema: 0 - OpCapability Shader - %1 = OpExtInstImport "GLSL.std.450" - OpMemoryModel Logical GLSL450 - OpEntryPoint Fragment %5663 "main" %5120 %3877 - OpExecutionMode %5663 OriginUpperLeft - OpDecorate %5120 RelaxedPrecision - OpDecorate %5120 Location 0 - OpDecorate %3877 Location 0 - %void = OpTypeVoid - %1282 = OpTypeFunction %void - %float = OpTypeFloat 32 - %v4float = OpTypeVector %float 4 -%_ptr_Output_v4float = OpTypePointer Output %v4float - %5120 = OpVariable %_ptr_Output_v4float Output - %v2float = OpTypeVector %float 2 -%_ptr_Input_v2float = OpTypePointer Input %v2float - %3877 = OpVariable %_ptr_Input_v2float Input - %float_0 = OpConstant %float 0 - %float_1 = OpConstant %float 1 - %5663 = OpFunction %void None %1282 - %24987 = OpLabel - %17674 = OpLoad %v2float %3877 - %21995 = OpCompositeExtract %float %17674 0 - %23327 = OpCompositeExtract %float %17674 1 - %22408 = OpCompositeConstruct %v4float %21995 %23327 %float_0 %float_1 - OpStore %5120 %22408 - OpReturn - OpFunctionEnd -#endif - -const uint32_t uv_ps[] = { - 0x07230203, 0x00010000, 0x0008000A, 0x0000619C, 0x00000000, 0x00020011, - 0x00000001, 0x0006000B, 0x00000001, 0x4C534C47, 0x6474732E, 0x3035342E, - 0x00000000, 0x0003000E, 0x00000000, 0x00000001, 0x0007000F, 0x00000004, - 0x0000161F, 0x6E69616D, 0x00000000, 0x00001400, 0x00000F25, 0x00030010, - 0x0000161F, 0x00000007, 0x00030047, 0x00001400, 0x00000000, 0x00040047, - 0x00001400, 0x0000001E, 0x00000000, 0x00040047, 0x00000F25, 0x0000001E, - 0x00000000, 0x00020013, 0x00000008, 0x00030021, 0x00000502, 0x00000008, - 0x00030016, 0x0000000D, 0x00000020, 0x00040017, 0x0000001D, 0x0000000D, - 0x00000004, 0x00040020, 0x0000029A, 0x00000003, 0x0000001D, 0x0004003B, - 0x0000029A, 0x00001400, 0x00000003, 0x00040017, 0x00000013, 0x0000000D, - 0x00000002, 0x00040020, 0x00000290, 0x00000001, 0x00000013, 0x0004003B, - 0x00000290, 0x00000F25, 0x00000001, 0x0004002B, 0x0000000D, 0x00000A0C, - 0x00000000, 0x0004002B, 0x0000000D, 0x0000008A, 0x3F800000, 0x00050036, - 0x00000008, 0x0000161F, 0x00000000, 0x00000502, 0x000200F8, 0x0000619B, - 0x0004003D, 0x00000013, 0x0000450A, 0x00000F25, 0x00050051, 0x0000000D, - 0x000055EB, 0x0000450A, 0x00000000, 0x00050051, 0x0000000D, 0x00005B1F, - 0x0000450A, 0x00000001, 0x00070050, 0x0000001D, 0x00005788, 0x000055EB, - 0x00005B1F, 0x00000A0C, 0x0000008A, 0x0003003E, 0x00001400, 0x00005788, - 0x000100FD, 0x00010038, -}; diff --git a/src/xenia/gpu/shaders/fullscreen_tc.vs.glsl b/src/xenia/gpu/shaders/fullscreen_tc.vs.glsl deleted file mode 100644 index 340091ac3..000000000 --- a/src/xenia/gpu/shaders/fullscreen_tc.vs.glsl +++ /dev/null @@ -1,10 +0,0 @@ -#version 310 es - -// A triangle covering the whole viewport. - -layout(location = 0) out vec2 xe_var_texcoord; - -void main() { - xe_var_texcoord = vec2(uvec2(gl_VertexIndex, gl_VertexIndex << 1u) & 2u); - gl_Position = vec4(xe_var_texcoord * 2.0 - 1.0, 0.0, 1.0); -} diff --git a/src/xenia/gpu/shaders/uv.ps.glsl b/src/xenia/gpu/shaders/uv.ps.glsl deleted file mode 100644 index 03eaef5ce..000000000 --- a/src/xenia/gpu/shaders/uv.ps.glsl +++ /dev/null @@ -1,10 +0,0 @@ -#version 310 es -precision highp float; - -layout(location = 0) in vec2 xe_var_texcoord; - -layout(location = 0) out lowp vec4 xe_frag_color; - -void main() { - xe_frag_color = vec4(xe_var_texcoord, 0.0, 1.0); -} diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index a6c4b9f41..8b45446db 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -10,6 +10,7 @@ #include "xenia/gpu/vulkan/vulkan_command_processor.h" #include +#include #include #include #include @@ -17,6 +18,7 @@ #include #include "xenia/base/assert.h" +#include "xenia/base/byte_order.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" #include "xenia/base/profiling.h" @@ -40,8 +42,12 @@ namespace vulkan { // Generated with `xb buildshaders`. namespace shaders { -#include "xenia/gpu/shaders/bytecode/vulkan_spirv/fullscreen_tc_vs.h" -#include "xenia/gpu/shaders/bytecode/vulkan_spirv/uv_ps.h" +// TODO(Triang3l): Remove the texture coordinates. +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/apply_gamma_pwl_fxaa_luma_ps.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/apply_gamma_pwl_ps.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/apply_gamma_table_fxaa_luma_ps.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/apply_gamma_table_ps.h" +#include "xenia/gpu/shaders/bytecode/vulkan_spirv/fullscreen_cw_vs.h" } // namespace shaders // No specific reason for 32768 descriptors, just the "too much" amount from @@ -386,209 +392,578 @@ bool VulkanCommandProcessor::SetupContext() { // Swap objects. - // Swap render pass. Doesn't make assumptions about outer usage (explicit - // barriers must be used instead) for simplicity of use in different scenarios - // with different pipelines. - VkAttachmentDescription swap_render_pass_attachment; - swap_render_pass_attachment.flags = 0; - swap_render_pass_attachment.format = - ui::vulkan::VulkanPresenter::kGuestOutputFormat; - swap_render_pass_attachment.samples = VK_SAMPLE_COUNT_1_BIT; - swap_render_pass_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; - swap_render_pass_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; - swap_render_pass_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; - swap_render_pass_attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; - swap_render_pass_attachment.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - swap_render_pass_attachment.finalLayout = - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - VkAttachmentReference swap_render_pass_color_attachment; - swap_render_pass_color_attachment.attachment = 0; - swap_render_pass_color_attachment.layout = - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - VkSubpassDescription swap_render_pass_subpass = {}; - swap_render_pass_subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - swap_render_pass_subpass.colorAttachmentCount = 1; - swap_render_pass_subpass.pColorAttachments = - &swap_render_pass_color_attachment; - VkSubpassDependency swap_render_pass_dependencies[2]; - for (uint32_t i = 0; i < 2; ++i) { - VkSubpassDependency& swap_render_pass_dependency = - swap_render_pass_dependencies[i]; - swap_render_pass_dependency.srcSubpass = i ? 0 : VK_SUBPASS_EXTERNAL; - swap_render_pass_dependency.dstSubpass = i ? VK_SUBPASS_EXTERNAL : 0; - swap_render_pass_dependency.srcStageMask = - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - swap_render_pass_dependency.dstStageMask = - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - swap_render_pass_dependency.srcAccessMask = - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - swap_render_pass_dependency.dstAccessMask = - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - swap_render_pass_dependency.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT; + // Gamma ramp, either device-local and host-visible at once, or separate + // device-local texel buffer and host-visible upload buffer. + gamma_ramp_256_entry_table_current_frame_ = UINT32_MAX; + gamma_ramp_pwl_current_frame_ = UINT32_MAX; + // Try to create a device-local host-visible buffer first, to skip copying. + constexpr uint32_t kGammaRampSize256EntryTable = sizeof(uint32_t) * 256; + constexpr uint32_t kGammaRampSizePWL = sizeof(uint16_t) * 2 * 3 * 128; + constexpr uint32_t kGammaRampSize = + kGammaRampSize256EntryTable + kGammaRampSizePWL; + VkBufferCreateInfo gamma_ramp_host_visible_buffer_create_info; + gamma_ramp_host_visible_buffer_create_info.sType = + VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + gamma_ramp_host_visible_buffer_create_info.pNext = nullptr; + gamma_ramp_host_visible_buffer_create_info.flags = 0; + gamma_ramp_host_visible_buffer_create_info.size = + kGammaRampSize * kMaxFramesInFlight; + gamma_ramp_host_visible_buffer_create_info.usage = + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT; + gamma_ramp_host_visible_buffer_create_info.sharingMode = + VK_SHARING_MODE_EXCLUSIVE; + gamma_ramp_host_visible_buffer_create_info.queueFamilyIndexCount = 0; + gamma_ramp_host_visible_buffer_create_info.pQueueFamilyIndices = nullptr; + if (dfn.vkCreateBuffer(device, &gamma_ramp_host_visible_buffer_create_info, + nullptr, &gamma_ramp_buffer_) == VK_SUCCESS) { + bool use_gamma_ramp_host_visible_buffer = false; + VkMemoryRequirements gamma_ramp_host_visible_buffer_memory_requirements; + dfn.vkGetBufferMemoryRequirements( + device, gamma_ramp_buffer_, + &gamma_ramp_host_visible_buffer_memory_requirements); + uint32_t gamma_ramp_host_visible_buffer_memory_types = + gamma_ramp_host_visible_buffer_memory_requirements.memoryTypeBits & + (provider.memory_types_device_local() & + provider.memory_types_host_visible()); + VkMemoryAllocateInfo gamma_ramp_host_visible_buffer_memory_allocate_info; + // Prefer a host-uncached (because it's write-only) memory type, but try a + // host-cached host-visible device-local one as well. + if (xe::bit_scan_forward( + gamma_ramp_host_visible_buffer_memory_types & + ~provider.memory_types_host_cached(), + &(gamma_ramp_host_visible_buffer_memory_allocate_info + .memoryTypeIndex)) || + xe::bit_scan_forward( + gamma_ramp_host_visible_buffer_memory_types, + &(gamma_ramp_host_visible_buffer_memory_allocate_info + .memoryTypeIndex))) { + VkMemoryAllocateInfo* + gamma_ramp_host_visible_buffer_memory_allocate_info_last = + &gamma_ramp_host_visible_buffer_memory_allocate_info; + gamma_ramp_host_visible_buffer_memory_allocate_info.sType = + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + gamma_ramp_host_visible_buffer_memory_allocate_info.pNext = nullptr; + gamma_ramp_host_visible_buffer_memory_allocate_info.allocationSize = + gamma_ramp_host_visible_buffer_memory_requirements.size; + VkMemoryDedicatedAllocateInfoKHR + gamma_ramp_host_visible_buffer_memory_dedicated_allocate_info; + if (provider.device_extensions().khr_dedicated_allocation) { + gamma_ramp_host_visible_buffer_memory_allocate_info_last->pNext = + &gamma_ramp_host_visible_buffer_memory_dedicated_allocate_info; + gamma_ramp_host_visible_buffer_memory_allocate_info_last = + reinterpret_cast( + &gamma_ramp_host_visible_buffer_memory_dedicated_allocate_info); + gamma_ramp_host_visible_buffer_memory_dedicated_allocate_info.sType = + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR; + gamma_ramp_host_visible_buffer_memory_dedicated_allocate_info.pNext = + nullptr; + gamma_ramp_host_visible_buffer_memory_dedicated_allocate_info.image = + VK_NULL_HANDLE; + gamma_ramp_host_visible_buffer_memory_dedicated_allocate_info.buffer = + gamma_ramp_buffer_; + } + if (dfn.vkAllocateMemory( + device, &gamma_ramp_host_visible_buffer_memory_allocate_info, + nullptr, &gamma_ramp_buffer_memory_) == VK_SUCCESS) { + if (dfn.vkBindBufferMemory(device, gamma_ramp_buffer_, + gamma_ramp_buffer_memory_, + 0) == VK_SUCCESS) { + if (dfn.vkMapMemory(device, gamma_ramp_buffer_memory_, 0, + VK_WHOLE_SIZE, 0, + &gamma_ramp_upload_mapping_) == VK_SUCCESS) { + use_gamma_ramp_host_visible_buffer = true; + gamma_ramp_upload_memory_size_ = + gamma_ramp_host_visible_buffer_memory_allocate_info + .allocationSize; + gamma_ramp_upload_memory_type_ = + gamma_ramp_host_visible_buffer_memory_allocate_info + .memoryTypeIndex; + } + } + if (!use_gamma_ramp_host_visible_buffer) { + dfn.vkFreeMemory(device, gamma_ramp_buffer_memory_, nullptr); + gamma_ramp_buffer_memory_ = VK_NULL_HANDLE; + } + } + } + if (!use_gamma_ramp_host_visible_buffer) { + dfn.vkDestroyBuffer(device, gamma_ramp_buffer_, nullptr); + gamma_ramp_buffer_ = VK_NULL_HANDLE; + } } - VkRenderPassCreateInfo swap_render_pass_create_info; - swap_render_pass_create_info.sType = - VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; - swap_render_pass_create_info.pNext = nullptr; - swap_render_pass_create_info.flags = 0; - swap_render_pass_create_info.attachmentCount = 1; - swap_render_pass_create_info.pAttachments = &swap_render_pass_attachment; - swap_render_pass_create_info.subpassCount = 1; - swap_render_pass_create_info.pSubpasses = &swap_render_pass_subpass; - swap_render_pass_create_info.dependencyCount = - uint32_t(xe::countof(swap_render_pass_dependencies)); - swap_render_pass_create_info.pDependencies = swap_render_pass_dependencies; - if (dfn.vkCreateRenderPass(device, &swap_render_pass_create_info, nullptr, - &swap_render_pass_) != VK_SUCCESS) { - XELOGE("Failed to create the Vulkan render pass for presentation"); - return false; + if (gamma_ramp_buffer_ == VK_NULL_HANDLE) { + // Create separate buffers for the shader and uploading. + if (!ui::vulkan::util::CreateDedicatedAllocationBuffer( + provider, kGammaRampSize, + VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, + ui::vulkan::util::MemoryPurpose::kDeviceLocal, gamma_ramp_buffer_, + gamma_ramp_buffer_memory_)) { + XELOGE("Failed to create the gamma ramp buffer"); + return false; + } + if (!ui::vulkan::util::CreateDedicatedAllocationBuffer( + provider, kGammaRampSize * kMaxFramesInFlight, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + ui::vulkan::util::MemoryPurpose::kUpload, gamma_ramp_upload_buffer_, + gamma_ramp_upload_buffer_memory_, &gamma_ramp_upload_memory_type_, + &gamma_ramp_upload_memory_size_)) { + XELOGE("Failed to create the gamma ramp upload buffer"); + return false; + } + if (dfn.vkMapMemory(device, gamma_ramp_upload_buffer_memory_, 0, + VK_WHOLE_SIZE, 0, + &gamma_ramp_upload_mapping_) != VK_SUCCESS) { + XELOGE("Failed to map the gamma ramp upload buffer"); + return false; + } } - // Swap pipeline layout. - // TODO(Triang3l): Source binding, push constants, FXAA pipeline layout. - VkPipelineLayoutCreateInfo swap_pipeline_layout_create_info; - swap_pipeline_layout_create_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - swap_pipeline_layout_create_info.pNext = nullptr; - swap_pipeline_layout_create_info.flags = 0; - swap_pipeline_layout_create_info.setLayoutCount = 0; - swap_pipeline_layout_create_info.pSetLayouts = nullptr; - swap_pipeline_layout_create_info.pushConstantRangeCount = 0; - swap_pipeline_layout_create_info.pPushConstantRanges = nullptr; - if (dfn.vkCreatePipelineLayout(device, &swap_pipeline_layout_create_info, - nullptr, - &swap_pipeline_layout_) != VK_SUCCESS) { - XELOGE("Failed to create the Vulkan pipeline layout for presentation"); - return false; + // Gamma ramp buffer views. + uint32_t gamma_ramp_frame_count = + gamma_ramp_upload_buffer_ == VK_NULL_HANDLE ? kMaxFramesInFlight : 1; + VkBufferViewCreateInfo gamma_ramp_buffer_view_create_info; + gamma_ramp_buffer_view_create_info.sType = + VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO; + gamma_ramp_buffer_view_create_info.pNext = nullptr; + gamma_ramp_buffer_view_create_info.flags = 0; + gamma_ramp_buffer_view_create_info.buffer = gamma_ramp_buffer_; + // 256-entry table. + gamma_ramp_buffer_view_create_info.format = + VK_FORMAT_A2B10G10R10_UNORM_PACK32; + gamma_ramp_buffer_view_create_info.range = kGammaRampSize256EntryTable; + for (uint32_t i = 0; i < gamma_ramp_frame_count; ++i) { + gamma_ramp_buffer_view_create_info.offset = kGammaRampSize * i; + if (dfn.vkCreateBufferView(device, &gamma_ramp_buffer_view_create_info, + nullptr, &gamma_ramp_buffer_views_[i * 2]) != + VK_SUCCESS) { + XELOGE("Failed to create a 256-entry table gamma ramp buffer view"); + return false; + } + } + // Piecewise linear. + gamma_ramp_buffer_view_create_info.format = VK_FORMAT_R16G16_UINT; + gamma_ramp_buffer_view_create_info.range = kGammaRampSizePWL; + for (uint32_t i = 0; i < gamma_ramp_frame_count; ++i) { + gamma_ramp_buffer_view_create_info.offset = + kGammaRampSize * i + kGammaRampSize256EntryTable; + if (dfn.vkCreateBufferView(device, &gamma_ramp_buffer_view_create_info, + nullptr, &gamma_ramp_buffer_views_[i * 2 + 1]) != + VK_SUCCESS) { + XELOGE("Failed to create a PWL gamma ramp buffer view"); + return false; + } } - // Swap pipeline. - - VkPipelineShaderStageCreateInfo swap_pipeline_stages[2]; - swap_pipeline_stages[0].sType = - VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - swap_pipeline_stages[0].pNext = nullptr; - swap_pipeline_stages[0].flags = 0; - swap_pipeline_stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; - swap_pipeline_stages[0].module = ui::vulkan::util::CreateShaderModule( - provider, shaders::fullscreen_tc_vs, sizeof(shaders::fullscreen_tc_vs)); - if (swap_pipeline_stages[0].module == VK_NULL_HANDLE) { - XELOGE("Failed to create the Vulkan vertex shader module for presentation"); - return false; - } - swap_pipeline_stages[0].pName = "main"; - swap_pipeline_stages[0].pSpecializationInfo = nullptr; - swap_pipeline_stages[1].sType = - VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - swap_pipeline_stages[1].pNext = nullptr; - swap_pipeline_stages[1].flags = 0; - swap_pipeline_stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; - swap_pipeline_stages[1].module = ui::vulkan::util::CreateShaderModule( - provider, shaders::uv_ps, sizeof(shaders::uv_ps)); - if (swap_pipeline_stages[1].module == VK_NULL_HANDLE) { + // Swap descriptor set layouts. + VkDescriptorSetLayoutBinding swap_descriptor_set_layout_binding; + swap_descriptor_set_layout_binding.binding = 0; + swap_descriptor_set_layout_binding.descriptorCount = 1; + swap_descriptor_set_layout_binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + swap_descriptor_set_layout_binding.pImmutableSamplers = nullptr; + VkDescriptorSetLayoutCreateInfo swap_descriptor_set_layout_create_info; + swap_descriptor_set_layout_create_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + swap_descriptor_set_layout_create_info.pNext = nullptr; + swap_descriptor_set_layout_create_info.flags = 0; + swap_descriptor_set_layout_create_info.bindingCount = 1; + swap_descriptor_set_layout_create_info.pBindings = + &swap_descriptor_set_layout_binding; + swap_descriptor_set_layout_binding.descriptorType = + VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + if (dfn.vkCreateDescriptorSetLayout( + device, &swap_descriptor_set_layout_create_info, nullptr, + &swap_descriptor_set_layout_sampled_image_) != VK_SUCCESS) { XELOGE( - "Failed to create the Vulkan fragment shader module for presentation"); - dfn.vkDestroyShaderModule(device, swap_pipeline_stages[0].module, nullptr); + "Failed to create the presentation sampled image descriptor set " + "layout"); + return false; + } + swap_descriptor_set_layout_binding.descriptorType = + VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + if (dfn.vkCreateDescriptorSetLayout( + device, &swap_descriptor_set_layout_create_info, nullptr, + &swap_descriptor_set_layout_uniform_texel_buffer_) != VK_SUCCESS) { + XELOGE( + "Failed to create the presentation uniform texel buffer descriptor set " + "layout"); return false; } - swap_pipeline_stages[1].pName = "main"; - swap_pipeline_stages[1].pSpecializationInfo = nullptr; - VkPipelineVertexInputStateCreateInfo swap_pipeline_vertex_input_state = {}; - swap_pipeline_vertex_input_state.sType = + // Swap descriptor pool. + std::array swap_descriptor_pool_sizes; + VkDescriptorPoolCreateInfo swap_descriptor_pool_create_info; + swap_descriptor_pool_create_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + swap_descriptor_pool_create_info.pNext = nullptr; + swap_descriptor_pool_create_info.flags = 0; + swap_descriptor_pool_create_info.maxSets = 0; + swap_descriptor_pool_create_info.poolSizeCount = 0; + swap_descriptor_pool_create_info.pPoolSizes = + swap_descriptor_pool_sizes.data(); + // TODO(Triang3l): FXAA combined image and sampler sources. + { + VkDescriptorPoolSize& swap_descriptor_pool_size_sampled_image = + swap_descriptor_pool_sizes[swap_descriptor_pool_create_info + .poolSizeCount++]; + swap_descriptor_pool_size_sampled_image.type = + VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + // Source images. + swap_descriptor_pool_size_sampled_image.descriptorCount = + kMaxFramesInFlight; + swap_descriptor_pool_create_info.maxSets += kMaxFramesInFlight; + } + // 256-entry table and PWL gamma ramps. If the gamma ramp buffer is + // host-visible, for multiple frames. + uint32_t gamma_ramp_buffer_view_count = 2 * gamma_ramp_frame_count; + { + VkDescriptorPoolSize& swap_descriptor_pool_size_uniform_texel_buffer = + swap_descriptor_pool_sizes[swap_descriptor_pool_create_info + .poolSizeCount++]; + swap_descriptor_pool_size_uniform_texel_buffer.type = + VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + swap_descriptor_pool_size_uniform_texel_buffer.descriptorCount = + gamma_ramp_buffer_view_count; + swap_descriptor_pool_create_info.maxSets += gamma_ramp_buffer_view_count; + } + if (dfn.vkCreateDescriptorPool(device, &swap_descriptor_pool_create_info, + nullptr, + &swap_descriptor_pool_) != VK_SUCCESS) { + XELOGE("Failed to create the presentation descriptor pool"); + return false; + } + + // Swap descriptor set allocation. + VkDescriptorSetAllocateInfo swap_descriptor_set_allocate_info; + swap_descriptor_set_allocate_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + swap_descriptor_set_allocate_info.pNext = nullptr; + swap_descriptor_set_allocate_info.descriptorPool = swap_descriptor_pool_; + swap_descriptor_set_allocate_info.descriptorSetCount = 1; + swap_descriptor_set_allocate_info.pSetLayouts = + &swap_descriptor_set_layout_uniform_texel_buffer_; + for (uint32_t i = 0; i < gamma_ramp_buffer_view_count; ++i) { + if (dfn.vkAllocateDescriptorSets(device, &swap_descriptor_set_allocate_info, + &swap_descriptors_gamma_ramp_[i]) != + VK_SUCCESS) { + XELOGE("Failed to allocate the gamma ramp descriptor sets"); + return false; + } + } + swap_descriptor_set_allocate_info.pSetLayouts = + &swap_descriptor_set_layout_sampled_image_; + for (uint32_t i = 0; i < kMaxFramesInFlight; ++i) { + if (dfn.vkAllocateDescriptorSets(device, &swap_descriptor_set_allocate_info, + &swap_descriptors_source_[i]) != + VK_SUCCESS) { + XELOGE( + "Failed to allocate the presentation source image descriptor sets"); + return false; + } + } + + // Gamma ramp descriptor sets. + VkWriteDescriptorSet gamma_ramp_write_descriptor_set; + gamma_ramp_write_descriptor_set.sType = + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + gamma_ramp_write_descriptor_set.pNext = nullptr; + gamma_ramp_write_descriptor_set.dstBinding = 0; + gamma_ramp_write_descriptor_set.dstArrayElement = 0; + gamma_ramp_write_descriptor_set.descriptorCount = 1; + gamma_ramp_write_descriptor_set.descriptorType = + VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + gamma_ramp_write_descriptor_set.pImageInfo = nullptr; + gamma_ramp_write_descriptor_set.pBufferInfo = nullptr; + for (uint32_t i = 0; i < gamma_ramp_buffer_view_count; ++i) { + gamma_ramp_write_descriptor_set.dstSet = swap_descriptors_gamma_ramp_[i]; + gamma_ramp_write_descriptor_set.pTexelBufferView = + &gamma_ramp_buffer_views_[i]; + dfn.vkUpdateDescriptorSets(device, 1, &gamma_ramp_write_descriptor_set, 0, + nullptr); + } + + // Gamma ramp application pipeline layout. + std::array + swap_apply_gamma_descriptor_set_layouts{}; + swap_apply_gamma_descriptor_set_layouts[kSwapApplyGammaDescriptorSetRamp] = + swap_descriptor_set_layout_uniform_texel_buffer_; + swap_apply_gamma_descriptor_set_layouts[kSwapApplyGammaDescriptorSetSource] = + swap_descriptor_set_layout_sampled_image_; + VkPipelineLayoutCreateInfo swap_apply_gamma_pipeline_layout_create_info; + swap_apply_gamma_pipeline_layout_create_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + swap_apply_gamma_pipeline_layout_create_info.pNext = nullptr; + swap_apply_gamma_pipeline_layout_create_info.flags = 0; + swap_apply_gamma_pipeline_layout_create_info.setLayoutCount = + uint32_t(swap_apply_gamma_descriptor_set_layouts.size()); + swap_apply_gamma_pipeline_layout_create_info.pSetLayouts = + swap_apply_gamma_descriptor_set_layouts.data(); + swap_apply_gamma_pipeline_layout_create_info.pushConstantRangeCount = 0; + swap_apply_gamma_pipeline_layout_create_info.pPushConstantRanges = nullptr; + if (dfn.vkCreatePipelineLayout( + device, &swap_apply_gamma_pipeline_layout_create_info, nullptr, + &swap_apply_gamma_pipeline_layout_) != VK_SUCCESS) { + XELOGE("Failed to create the gamma ramp application pipeline layout"); + return false; + } + + // Gamma application render pass. Doesn't make assumptions about outer usage + // (explicit barriers must be used instead) for simplicity of use in different + // scenarios with different pipelines. + VkAttachmentDescription swap_apply_gamma_render_pass_attachment; + swap_apply_gamma_render_pass_attachment.flags = 0; + swap_apply_gamma_render_pass_attachment.format = + ui::vulkan::VulkanPresenter::kGuestOutputFormat; + swap_apply_gamma_render_pass_attachment.samples = VK_SAMPLE_COUNT_1_BIT; + swap_apply_gamma_render_pass_attachment.loadOp = + VK_ATTACHMENT_LOAD_OP_DONT_CARE; + swap_apply_gamma_render_pass_attachment.storeOp = + VK_ATTACHMENT_STORE_OP_STORE; + swap_apply_gamma_render_pass_attachment.stencilLoadOp = + VK_ATTACHMENT_LOAD_OP_DONT_CARE; + swap_apply_gamma_render_pass_attachment.stencilStoreOp = + VK_ATTACHMENT_STORE_OP_DONT_CARE; + swap_apply_gamma_render_pass_attachment.initialLayout = + VK_IMAGE_LAYOUT_UNDEFINED; + swap_apply_gamma_render_pass_attachment.finalLayout = + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + VkAttachmentReference swap_apply_gamma_render_pass_color_attachment; + swap_apply_gamma_render_pass_color_attachment.attachment = 0; + swap_apply_gamma_render_pass_color_attachment.layout = + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + VkSubpassDescription swap_apply_gamma_render_pass_subpass = {}; + swap_apply_gamma_render_pass_subpass.pipelineBindPoint = + VK_PIPELINE_BIND_POINT_GRAPHICS; + swap_apply_gamma_render_pass_subpass.colorAttachmentCount = 1; + swap_apply_gamma_render_pass_subpass.pColorAttachments = + &swap_apply_gamma_render_pass_color_attachment; + VkSubpassDependency swap_apply_gamma_render_pass_dependencies[2]; + for (uint32_t i = 0; i < 2; ++i) { + VkSubpassDependency& swap_apply_gamma_render_pass_dependency = + swap_apply_gamma_render_pass_dependencies[i]; + swap_apply_gamma_render_pass_dependency.srcSubpass = + i ? 0 : VK_SUBPASS_EXTERNAL; + swap_apply_gamma_render_pass_dependency.dstSubpass = + i ? VK_SUBPASS_EXTERNAL : 0; + swap_apply_gamma_render_pass_dependency.srcStageMask = + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + swap_apply_gamma_render_pass_dependency.dstStageMask = + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + swap_apply_gamma_render_pass_dependency.srcAccessMask = + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + swap_apply_gamma_render_pass_dependency.dstAccessMask = + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + swap_apply_gamma_render_pass_dependency.dependencyFlags = + VK_DEPENDENCY_BY_REGION_BIT; + } + VkRenderPassCreateInfo swap_apply_gamma_render_pass_create_info; + swap_apply_gamma_render_pass_create_info.sType = + VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + swap_apply_gamma_render_pass_create_info.pNext = nullptr; + swap_apply_gamma_render_pass_create_info.flags = 0; + swap_apply_gamma_render_pass_create_info.attachmentCount = 1; + swap_apply_gamma_render_pass_create_info.pAttachments = + &swap_apply_gamma_render_pass_attachment; + swap_apply_gamma_render_pass_create_info.subpassCount = 1; + swap_apply_gamma_render_pass_create_info.pSubpasses = + &swap_apply_gamma_render_pass_subpass; + swap_apply_gamma_render_pass_create_info.dependencyCount = + uint32_t(xe::countof(swap_apply_gamma_render_pass_dependencies)); + swap_apply_gamma_render_pass_create_info.pDependencies = + swap_apply_gamma_render_pass_dependencies; + if (dfn.vkCreateRenderPass(device, &swap_apply_gamma_render_pass_create_info, + nullptr, + &swap_apply_gamma_render_pass_) != VK_SUCCESS) { + XELOGE("Failed to create the gamma ramp application render pass"); + return false; + } + + // Gamma ramp application pipeline. + // Using a graphics pipeline, not a compute one, because storage image support + // is optional for VK_FORMAT_A2B10G10R10_UNORM_PACK32. + + enum SwapApplyGammaPixelShader { + kSwapApplyGammaPixelShader256EntryTable, + kSwapApplyGammaPixelShaderPWL, + + kSwapApplyGammaPixelShaderCount, + }; + std::array + swap_apply_gamma_pixel_shaders{}; + bool swap_apply_gamma_pixel_shaders_created = + (swap_apply_gamma_pixel_shaders[kSwapApplyGammaPixelShader256EntryTable] = + ui::vulkan::util::CreateShaderModule( + provider, shaders::apply_gamma_table_ps, + sizeof(shaders::apply_gamma_table_ps))) != VK_NULL_HANDLE && + (swap_apply_gamma_pixel_shaders[kSwapApplyGammaPixelShaderPWL] = + ui::vulkan::util::CreateShaderModule( + provider, shaders::apply_gamma_pwl_ps, + sizeof(shaders::apply_gamma_pwl_ps))) != VK_NULL_HANDLE; + if (!swap_apply_gamma_pixel_shaders_created) { + XELOGE("Failed to create the gamma ramp application pixel shader modules"); + for (VkShaderModule swap_apply_gamma_pixel_shader : + swap_apply_gamma_pixel_shaders) { + if (swap_apply_gamma_pixel_shader != VK_NULL_HANDLE) { + dfn.vkDestroyShaderModule(device, swap_apply_gamma_pixel_shader, + nullptr); + } + } + return false; + } + + VkPipelineShaderStageCreateInfo swap_apply_gamma_pipeline_stages[2]; + swap_apply_gamma_pipeline_stages[0].sType = + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + swap_apply_gamma_pipeline_stages[0].pNext = nullptr; + swap_apply_gamma_pipeline_stages[0].flags = 0; + swap_apply_gamma_pipeline_stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; + swap_apply_gamma_pipeline_stages[0].module = + ui::vulkan::util::CreateShaderModule(provider, shaders::fullscreen_cw_vs, + sizeof(shaders::fullscreen_cw_vs)); + if (swap_apply_gamma_pipeline_stages[0].module == VK_NULL_HANDLE) { + XELOGE("Failed to create the gamma ramp application vertex shader module"); + for (VkShaderModule swap_apply_gamma_pixel_shader : + swap_apply_gamma_pixel_shaders) { + assert_true(swap_apply_gamma_pixel_shader != VK_NULL_HANDLE); + dfn.vkDestroyShaderModule(device, swap_apply_gamma_pixel_shader, nullptr); + } + } + swap_apply_gamma_pipeline_stages[0].pName = "main"; + swap_apply_gamma_pipeline_stages[0].pSpecializationInfo = nullptr; + swap_apply_gamma_pipeline_stages[1].sType = + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + swap_apply_gamma_pipeline_stages[1].pNext = nullptr; + swap_apply_gamma_pipeline_stages[1].flags = 0; + swap_apply_gamma_pipeline_stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; + // The fragment shader module will be specified later. + swap_apply_gamma_pipeline_stages[1].pName = "main"; + swap_apply_gamma_pipeline_stages[1].pSpecializationInfo = nullptr; + + VkPipelineVertexInputStateCreateInfo + swap_apply_gamma_pipeline_vertex_input_state = {}; + swap_apply_gamma_pipeline_vertex_input_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; - VkPipelineInputAssemblyStateCreateInfo swap_pipeline_input_assembly_state; - swap_pipeline_input_assembly_state.sType = + VkPipelineInputAssemblyStateCreateInfo + swap_apply_gamma_pipeline_input_assembly_state; + swap_apply_gamma_pipeline_input_assembly_state.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; - swap_pipeline_input_assembly_state.pNext = nullptr; - swap_pipeline_input_assembly_state.flags = 0; - swap_pipeline_input_assembly_state.topology = + swap_apply_gamma_pipeline_input_assembly_state.pNext = nullptr; + swap_apply_gamma_pipeline_input_assembly_state.flags = 0; + swap_apply_gamma_pipeline_input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; - swap_pipeline_input_assembly_state.primitiveRestartEnable = VK_FALSE; + swap_apply_gamma_pipeline_input_assembly_state.primitiveRestartEnable = + VK_FALSE; - VkPipelineViewportStateCreateInfo swap_pipeline_viewport_state; - swap_pipeline_viewport_state.sType = + VkPipelineViewportStateCreateInfo swap_apply_gamma_pipeline_viewport_state; + swap_apply_gamma_pipeline_viewport_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; - swap_pipeline_viewport_state.pNext = nullptr; - swap_pipeline_viewport_state.flags = 0; - swap_pipeline_viewport_state.viewportCount = 1; - swap_pipeline_viewport_state.pViewports = nullptr; - swap_pipeline_viewport_state.scissorCount = 1; - swap_pipeline_viewport_state.pScissors = nullptr; + swap_apply_gamma_pipeline_viewport_state.pNext = nullptr; + swap_apply_gamma_pipeline_viewport_state.flags = 0; + swap_apply_gamma_pipeline_viewport_state.viewportCount = 1; + swap_apply_gamma_pipeline_viewport_state.pViewports = nullptr; + swap_apply_gamma_pipeline_viewport_state.scissorCount = 1; + swap_apply_gamma_pipeline_viewport_state.pScissors = nullptr; - VkPipelineRasterizationStateCreateInfo swap_pipeline_rasterization_state = {}; - swap_pipeline_rasterization_state.sType = + VkPipelineRasterizationStateCreateInfo + swap_apply_gamma_pipeline_rasterization_state = {}; + swap_apply_gamma_pipeline_rasterization_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; - swap_pipeline_rasterization_state.polygonMode = VK_POLYGON_MODE_FILL; - swap_pipeline_rasterization_state.cullMode = VK_CULL_MODE_NONE; - swap_pipeline_rasterization_state.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; - swap_pipeline_rasterization_state.lineWidth = 1.0f; + swap_apply_gamma_pipeline_rasterization_state.polygonMode = + VK_POLYGON_MODE_FILL; + swap_apply_gamma_pipeline_rasterization_state.cullMode = VK_CULL_MODE_NONE; + swap_apply_gamma_pipeline_rasterization_state.frontFace = + VK_FRONT_FACE_CLOCKWISE; + swap_apply_gamma_pipeline_rasterization_state.lineWidth = 1.0f; - VkPipelineMultisampleStateCreateInfo swap_pipeline_multisample_state = {}; - swap_pipeline_multisample_state.sType = + VkPipelineMultisampleStateCreateInfo + swap_apply_gamma_pipeline_multisample_state = {}; + swap_apply_gamma_pipeline_multisample_state.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; - swap_pipeline_multisample_state.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + swap_apply_gamma_pipeline_multisample_state.rasterizationSamples = + VK_SAMPLE_COUNT_1_BIT; VkPipelineColorBlendAttachmentState - swap_pipeline_color_blend_attachment_state = {}; - swap_pipeline_color_blend_attachment_state.colorWriteMask = + swap_apply_gamma_pipeline_color_blend_attachment_state = {}; + swap_apply_gamma_pipeline_color_blend_attachment_state.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; - VkPipelineColorBlendStateCreateInfo swap_pipeline_color_blend_state = {}; - swap_pipeline_color_blend_state.sType = + VkPipelineColorBlendStateCreateInfo + swap_apply_gamma_pipeline_color_blend_state = {}; + swap_apply_gamma_pipeline_color_blend_state.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; - swap_pipeline_color_blend_state.attachmentCount = 1; - swap_pipeline_color_blend_state.pAttachments = - &swap_pipeline_color_blend_attachment_state; + swap_apply_gamma_pipeline_color_blend_state.attachmentCount = 1; + swap_apply_gamma_pipeline_color_blend_state.pAttachments = + &swap_apply_gamma_pipeline_color_blend_attachment_state; - static const VkDynamicState kSwapPipelineDynamicStates[] = { + static const VkDynamicState kSwapApplyGammaPipelineDynamicStates[] = { VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, }; - VkPipelineDynamicStateCreateInfo swap_pipeline_dynamic_state; - swap_pipeline_dynamic_state.sType = + VkPipelineDynamicStateCreateInfo swap_apply_gamma_pipeline_dynamic_state; + swap_apply_gamma_pipeline_dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; - swap_pipeline_dynamic_state.pNext = nullptr; - swap_pipeline_dynamic_state.flags = 0; - swap_pipeline_dynamic_state.dynamicStateCount = - uint32_t(xe::countof(kSwapPipelineDynamicStates)); - swap_pipeline_dynamic_state.pDynamicStates = kSwapPipelineDynamicStates; + swap_apply_gamma_pipeline_dynamic_state.pNext = nullptr; + swap_apply_gamma_pipeline_dynamic_state.flags = 0; + swap_apply_gamma_pipeline_dynamic_state.dynamicStateCount = + uint32_t(xe::countof(kSwapApplyGammaPipelineDynamicStates)); + swap_apply_gamma_pipeline_dynamic_state.pDynamicStates = + kSwapApplyGammaPipelineDynamicStates; - VkGraphicsPipelineCreateInfo swap_pipeline_create_info; - swap_pipeline_create_info.sType = + VkGraphicsPipelineCreateInfo swap_apply_gamma_pipeline_create_info; + swap_apply_gamma_pipeline_create_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; - swap_pipeline_create_info.pNext = nullptr; - swap_pipeline_create_info.flags = 0; - swap_pipeline_create_info.stageCount = - uint32_t(xe::countof(swap_pipeline_stages)); - swap_pipeline_create_info.pStages = swap_pipeline_stages; - swap_pipeline_create_info.pVertexInputState = - &swap_pipeline_vertex_input_state; - swap_pipeline_create_info.pInputAssemblyState = - &swap_pipeline_input_assembly_state; - swap_pipeline_create_info.pTessellationState = nullptr; - swap_pipeline_create_info.pViewportState = &swap_pipeline_viewport_state; - swap_pipeline_create_info.pRasterizationState = - &swap_pipeline_rasterization_state; - swap_pipeline_create_info.pMultisampleState = - &swap_pipeline_multisample_state; - swap_pipeline_create_info.pDepthStencilState = nullptr; - swap_pipeline_create_info.pColorBlendState = &swap_pipeline_color_blend_state; - swap_pipeline_create_info.pDynamicState = &swap_pipeline_dynamic_state; - swap_pipeline_create_info.layout = swap_pipeline_layout_; - swap_pipeline_create_info.renderPass = swap_render_pass_; - swap_pipeline_create_info.subpass = 0; - swap_pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE; - swap_pipeline_create_info.basePipelineIndex = -1; - VkResult swap_pipeline_create_result = dfn.vkCreateGraphicsPipelines( - device, VK_NULL_HANDLE, 1, &swap_pipeline_create_info, nullptr, - &swap_pipeline_); - for (size_t i = 0; i < xe::countof(swap_pipeline_stages); ++i) { - dfn.vkDestroyShaderModule(device, swap_pipeline_stages[i].module, nullptr); + swap_apply_gamma_pipeline_create_info.pNext = nullptr; + swap_apply_gamma_pipeline_create_info.flags = 0; + swap_apply_gamma_pipeline_create_info.stageCount = + uint32_t(xe::countof(swap_apply_gamma_pipeline_stages)); + swap_apply_gamma_pipeline_create_info.pStages = + swap_apply_gamma_pipeline_stages; + swap_apply_gamma_pipeline_create_info.pVertexInputState = + &swap_apply_gamma_pipeline_vertex_input_state; + swap_apply_gamma_pipeline_create_info.pInputAssemblyState = + &swap_apply_gamma_pipeline_input_assembly_state; + swap_apply_gamma_pipeline_create_info.pTessellationState = nullptr; + swap_apply_gamma_pipeline_create_info.pViewportState = + &swap_apply_gamma_pipeline_viewport_state; + swap_apply_gamma_pipeline_create_info.pRasterizationState = + &swap_apply_gamma_pipeline_rasterization_state; + swap_apply_gamma_pipeline_create_info.pMultisampleState = + &swap_apply_gamma_pipeline_multisample_state; + swap_apply_gamma_pipeline_create_info.pDepthStencilState = nullptr; + swap_apply_gamma_pipeline_create_info.pColorBlendState = + &swap_apply_gamma_pipeline_color_blend_state; + swap_apply_gamma_pipeline_create_info.pDynamicState = + &swap_apply_gamma_pipeline_dynamic_state; + swap_apply_gamma_pipeline_create_info.layout = + swap_apply_gamma_pipeline_layout_; + swap_apply_gamma_pipeline_create_info.renderPass = + swap_apply_gamma_render_pass_; + swap_apply_gamma_pipeline_create_info.subpass = 0; + swap_apply_gamma_pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE; + swap_apply_gamma_pipeline_create_info.basePipelineIndex = -1; + swap_apply_gamma_pipeline_stages[1].module = + swap_apply_gamma_pixel_shaders[kSwapApplyGammaPixelShader256EntryTable]; + VkResult swap_apply_gamma_pipeline_256_entry_table_create_result = + dfn.vkCreateGraphicsPipelines( + device, VK_NULL_HANDLE, 1, &swap_apply_gamma_pipeline_create_info, + nullptr, &swap_apply_gamma_256_entry_table_pipeline_); + swap_apply_gamma_pipeline_stages[1].module = + swap_apply_gamma_pixel_shaders[kSwapApplyGammaPixelShaderPWL]; + VkResult swap_apply_gamma_pipeline_pwl_create_result = + dfn.vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, + &swap_apply_gamma_pipeline_create_info, + nullptr, &swap_apply_gamma_pwl_pipeline_); + dfn.vkDestroyShaderModule(device, swap_apply_gamma_pipeline_stages[0].module, + nullptr); + for (VkShaderModule swap_apply_gamma_pixel_shader : + swap_apply_gamma_pixel_shaders) { + assert_true(swap_apply_gamma_pixel_shader != VK_NULL_HANDLE); + dfn.vkDestroyShaderModule(device, swap_apply_gamma_pixel_shader, nullptr); } - if (swap_pipeline_create_result != VK_SUCCESS) { - XELOGE("Failed to create the Vulkan pipeline for presentation"); + if (swap_apply_gamma_pipeline_256_entry_table_create_result != VK_SUCCESS || + swap_apply_gamma_pipeline_pwl_create_result != VK_SUCCESS) { + XELOGE("Failed to create the gamma ramp application pipelines"); return false; } @@ -616,11 +991,36 @@ void VulkanCommandProcessor::ShutdownContext() { } ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyPipeline, device, - swap_pipeline_); - ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyPipelineLayout, device, - swap_pipeline_layout_); + swap_apply_gamma_pwl_pipeline_); + ui::vulkan::util::DestroyAndNullHandle( + dfn.vkDestroyPipeline, device, + swap_apply_gamma_256_entry_table_pipeline_); ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyRenderPass, device, - swap_render_pass_); + swap_apply_gamma_render_pass_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyPipelineLayout, device, + swap_apply_gamma_pipeline_layout_); + + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorPool, device, + swap_descriptor_pool_); + + ui::vulkan::util::DestroyAndNullHandle( + dfn.vkDestroyDescriptorSetLayout, device, + swap_descriptor_set_layout_uniform_texel_buffer_); + ui::vulkan::util::DestroyAndNullHandle( + dfn.vkDestroyDescriptorSetLayout, device, + swap_descriptor_set_layout_sampled_image_); + for (VkBufferView& gamma_ramp_buffer_view : gamma_ramp_buffer_views_) { + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBufferView, device, + gamma_ramp_buffer_view); + } + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + gamma_ramp_upload_buffer_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + gamma_ramp_upload_buffer_memory_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + gamma_ramp_buffer_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + gamma_ramp_buffer_memory_); ui::vulkan::util::DestroyAndNullHandle( dfn.vkDestroyDescriptorPool, device, @@ -782,6 +1182,14 @@ void VulkanCommandProcessor::SparseBindBuffer( sparse_bind_wait_stage_mask_ |= wait_stage_mask; } +void VulkanCommandProcessor::OnGammaRamp256EntryTableValueWritten() { + gamma_ramp_256_entry_table_current_frame_ = UINT32_MAX; +} + +void VulkanCommandProcessor::OnGammaRampPWLValueWritten() { + gamma_ramp_pwl_current_frame_ = UINT32_MAX; +} + void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width, uint32_t frontbuffer_height) { @@ -796,14 +1204,29 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, return; } - // TODO(Triang3l): Resolution scale. - uint32_t resolution_scale = 1; - uint32_t scaled_width = frontbuffer_width * resolution_scale; - uint32_t scaled_height = frontbuffer_height * resolution_scale; + // In case the swap command is the only one in the frame. + if (!BeginSubmission(true)) { + return; + } + + // Obtaining the actual front buffer size to pass to RefreshGuestOutput, + // resolution-scaled if it's a resolve destination, or not otherwise. + uint32_t frontbuffer_width_scaled, frontbuffer_height_scaled; + xenos::TextureFormat frontbuffer_format; + VkImageView swap_texture_view = texture_cache_->RequestSwapTexture( + frontbuffer_width_scaled, frontbuffer_height_scaled, frontbuffer_format); + if (swap_texture_view == VK_NULL_HANDLE) { + return; + } + + uint32_t draw_resolution_scale_max = + std::max(texture_cache_->draw_resolution_scale_x(), + texture_cache_->draw_resolution_scale_y()); presenter->RefreshGuestOutput( - scaled_width, scaled_height, 1280 * resolution_scale, - 720 * resolution_scale, - [this, scaled_width, scaled_height]( + frontbuffer_width_scaled, frontbuffer_height_scaled, + 1280 * draw_resolution_scale_max, 720 * draw_resolution_scale_max, + [this, frontbuffer_width_scaled, frontbuffer_height_scaled, + frontbuffer_format, swap_texture_view]( ui::Presenter::GuestOutputRefreshContext& context) -> bool { // In case the swap command is the only one in the frame. if (!BeginSubmission(true)) { @@ -819,6 +1242,105 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); + uint32_t swap_frame_index = + uint32_t(frame_current_ % kMaxFramesInFlight); + + // This is according to D3D::InitializePresentationParameters from a + // game executable, which initializes the 256-entry table gamma ramp for + // 8_8_8_8 output and the PWL gamma ramp for 2_10_10_10. + // TODO(Triang3l): Choose between the table and PWL based on + // DC_LUTA_CONTROL, support both for all formats (and also different + // increments for PWL). + bool use_pwl_gamma_ramp = + frontbuffer_format == xenos::TextureFormat::k_2_10_10_10 || + frontbuffer_format == + xenos::TextureFormat::k_2_10_10_10_AS_16_16_16_16; + + // TODO(Triang3l): FXAA can result in more than 8 bits of precision. + context.SetIs8bpc(!use_pwl_gamma_ramp); + + // Update the gamma ramp if it's out of date. + uint32_t& gamma_ramp_frame_index_ref = + use_pwl_gamma_ramp ? gamma_ramp_pwl_current_frame_ + : gamma_ramp_256_entry_table_current_frame_; + if (gamma_ramp_frame_index_ref == UINT32_MAX) { + constexpr uint32_t kGammaRampSize256EntryTable = + sizeof(uint32_t) * 256; + constexpr uint32_t kGammaRampSizePWL = sizeof(uint16_t) * 2 * 3 * 128; + constexpr uint32_t kGammaRampSize = + kGammaRampSize256EntryTable + kGammaRampSizePWL; + uint32_t gamma_ramp_offset_in_frame = + use_pwl_gamma_ramp ? kGammaRampSize256EntryTable : 0; + uint32_t gamma_ramp_upload_offset = + kGammaRampSize * swap_frame_index + gamma_ramp_offset_in_frame; + uint32_t gamma_ramp_size = use_pwl_gamma_ramp + ? kGammaRampSizePWL + : kGammaRampSize256EntryTable; + void* gamma_ramp_frame_upload = + reinterpret_cast(gamma_ramp_upload_mapping_) + + gamma_ramp_upload_offset; + if (std::endian::native != std::endian::little && + use_pwl_gamma_ramp) { + // R16G16 is first R16, where the shader expects the base, and + // second G16, where the delta should be, but gamma_ramp_pwl_rgb() + // is an array of 32-bit DC_LUT_PWL_DATA registers - swap 16 bits in + // each 32. + auto gamma_ramp_pwl_upload = + reinterpret_cast( + gamma_ramp_frame_upload); + const reg::DC_LUT_PWL_DATA* gamma_ramp_pwl = gamma_ramp_pwl_rgb(); + for (size_t i = 0; i < 128 * 3; ++i) { + reg::DC_LUT_PWL_DATA& gamma_ramp_pwl_upload_entry = + gamma_ramp_pwl_upload[i]; + reg::DC_LUT_PWL_DATA gamma_ramp_pwl_entry = gamma_ramp_pwl[i]; + gamma_ramp_pwl_upload_entry.base = gamma_ramp_pwl_entry.delta; + gamma_ramp_pwl_upload_entry.delta = gamma_ramp_pwl_entry.base; + } + } else { + std::memcpy( + gamma_ramp_frame_upload, + use_pwl_gamma_ramp + ? static_cast(gamma_ramp_pwl_rgb()) + : static_cast(gamma_ramp_256_entry_table()), + gamma_ramp_size); + } + bool gamma_ramp_has_upload_buffer = + gamma_ramp_upload_buffer_memory_ != VK_NULL_HANDLE; + ui::vulkan::util::FlushMappedMemoryRange( + provider, + gamma_ramp_has_upload_buffer ? gamma_ramp_upload_buffer_memory_ + : gamma_ramp_buffer_memory_, + gamma_ramp_upload_memory_type_, gamma_ramp_upload_offset, + gamma_ramp_upload_memory_size_, gamma_ramp_size); + if (gamma_ramp_has_upload_buffer) { + // Copy from the host-visible buffer to the device-local one. + PushBufferMemoryBarrier( + gamma_ramp_buffer_, gamma_ramp_offset_in_frame, gamma_ramp_size, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_SHADER_READ_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, false); + SubmitBarriers(true); + VkBufferCopy gamma_ramp_buffer_copy; + gamma_ramp_buffer_copy.srcOffset = gamma_ramp_upload_offset; + gamma_ramp_buffer_copy.dstOffset = gamma_ramp_offset_in_frame; + gamma_ramp_buffer_copy.size = gamma_ramp_size; + deferred_command_buffer_.CmdVkCopyBuffer(gamma_ramp_upload_buffer_, + gamma_ramp_buffer_, 1, + &gamma_ramp_buffer_copy); + PushBufferMemoryBarrier( + gamma_ramp_buffer_, gamma_ramp_offset_in_frame, gamma_ramp_size, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, + VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, false); + } + // The device-local, but not host-visible, gamma ramp buffer doesn't + // have per-frame sets of gamma ramps. + gamma_ramp_frame_index_ref = + gamma_ramp_has_upload_buffer ? 0 : swap_frame_index; + } + // Make sure a framebuffer is available for the current guest output // image version. size_t swap_framebuffer_index = SIZE_MAX; @@ -865,19 +1387,18 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, } new_swap_framebuffer.framebuffer = VK_NULL_HANDLE; } - VkImageView guest_output_image_view_srgb = - vulkan_context.image_view(); + VkImageView guest_output_image_view = vulkan_context.image_view(); VkFramebufferCreateInfo swap_framebuffer_create_info; swap_framebuffer_create_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; swap_framebuffer_create_info.pNext = nullptr; swap_framebuffer_create_info.flags = 0; - swap_framebuffer_create_info.renderPass = swap_render_pass_; + swap_framebuffer_create_info.renderPass = + swap_apply_gamma_render_pass_; swap_framebuffer_create_info.attachmentCount = 1; - swap_framebuffer_create_info.pAttachments = - &guest_output_image_view_srgb; - swap_framebuffer_create_info.width = scaled_width; - swap_framebuffer_create_info.height = scaled_height; + swap_framebuffer_create_info.pAttachments = &guest_output_image_view; + swap_framebuffer_create_info.width = frontbuffer_width_scaled; + swap_framebuffer_create_info.height = frontbuffer_height_scaled; swap_framebuffer_create_info.layers = 1; if (dfn.vkCreateFramebuffer( device, &swap_framebuffer_create_info, nullptr, @@ -891,7 +1412,6 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, new_swap_framebuffer.last_submission = 0; } - if (vulkan_context.image_ever_written_previously()) { // Insert a barrier after the last presenter's usage of the guest // output image. Will be overwriting all the contents, so oldLayout @@ -918,12 +1438,14 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, VkRenderPassBeginInfo render_pass_begin_info; render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; render_pass_begin_info.pNext = nullptr; - render_pass_begin_info.renderPass = swap_render_pass_; + render_pass_begin_info.renderPass = swap_apply_gamma_render_pass_; render_pass_begin_info.framebuffer = swap_framebuffer.framebuffer; render_pass_begin_info.renderArea.offset.x = 0; render_pass_begin_info.renderArea.offset.y = 0; - render_pass_begin_info.renderArea.extent.width = scaled_width; - render_pass_begin_info.renderArea.extent.height = scaled_height; + render_pass_begin_info.renderArea.extent.width = + frontbuffer_width_scaled; + render_pass_begin_info.renderArea.extent.height = + frontbuffer_height_scaled; render_pass_begin_info.clearValueCount = 0; render_pass_begin_info.pClearValues = nullptr; deferred_command_buffer_.CmdVkBeginRenderPass( @@ -932,19 +1454,58 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, VkViewport viewport; viewport.x = 0.0f; viewport.y = 0.0f; - viewport.width = float(scaled_width); - viewport.height = float(scaled_height); + viewport.width = float(frontbuffer_width_scaled); + viewport.height = float(frontbuffer_height_scaled); viewport.minDepth = 0.0f; viewport.maxDepth = 1.0f; SetViewport(viewport); VkRect2D scissor; scissor.offset.x = 0; scissor.offset.y = 0; - scissor.extent.width = scaled_width; - scissor.extent.height = scaled_height; + scissor.extent.width = frontbuffer_width_scaled; + scissor.extent.height = frontbuffer_height_scaled; SetScissor(scissor); - BindExternalGraphicsPipeline(swap_pipeline_); + BindExternalGraphicsPipeline( + use_pwl_gamma_ramp ? swap_apply_gamma_pwl_pipeline_ + : swap_apply_gamma_256_entry_table_pipeline_); + + VkDescriptorSet swap_descriptor_source = + swap_descriptors_source_[swap_frame_index]; + VkDescriptorImageInfo swap_descriptor_source_image_info; + swap_descriptor_source_image_info.sampler = VK_NULL_HANDLE; + swap_descriptor_source_image_info.imageView = swap_texture_view; + swap_descriptor_source_image_info.imageLayout = + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + VkWriteDescriptorSet swap_descriptor_source_write; + swap_descriptor_source_write.sType = + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + swap_descriptor_source_write.pNext = nullptr; + swap_descriptor_source_write.dstSet = swap_descriptor_source; + swap_descriptor_source_write.dstBinding = 0; + swap_descriptor_source_write.dstArrayElement = 0; + swap_descriptor_source_write.descriptorCount = 1; + swap_descriptor_source_write.descriptorType = + VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + swap_descriptor_source_write.pImageInfo = + &swap_descriptor_source_image_info; + swap_descriptor_source_write.pBufferInfo = nullptr; + swap_descriptor_source_write.pTexelBufferView = nullptr; + dfn.vkUpdateDescriptorSets(device, 1, &swap_descriptor_source_write, 0, + nullptr); + + std::array + swap_descriptor_sets{}; + swap_descriptor_sets[kSwapApplyGammaDescriptorSetRamp] = + swap_descriptors_gamma_ramp_[2 * gamma_ramp_frame_index_ref + + uint32_t(use_pwl_gamma_ramp)]; + swap_descriptor_sets[kSwapApplyGammaDescriptorSetSource] = + swap_descriptor_source; + // TODO(Triang3l): Red / blue swap without imageViewFormatSwizzle. + deferred_command_buffer_.CmdVkBindDescriptorSets( + VK_PIPELINE_BIND_POINT_GRAPHICS, swap_apply_gamma_pipeline_layout_, + 0, uint32_t(swap_descriptor_sets.size()), + swap_descriptor_sets.data(), 0, nullptr); deferred_command_buffer_.CmdVkDraw(3, 1, 0, 0); diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 5c8cfecab..164e7b253 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -265,6 +265,9 @@ class VulkanCommandProcessor : public CommandProcessor { void WriteRegister(uint32_t index, uint32_t value) override; + void OnGammaRamp256EntryTableValueWritten() override; + void OnGammaRampPWLValueWritten() override; + void IssueSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width, uint32_t frontbuffer_height) override; @@ -398,6 +401,21 @@ class VulkanCommandProcessor : public CommandProcessor { VkDescriptorSet set; }; + enum SwapApplyGammaDescriptorSet : uint32_t { + kSwapApplyGammaDescriptorSetRamp, + kSwapApplyGammaDescriptorSetSource, + + kSwapApplyGammaDescriptorSetCount, + }; + + // Framebuffer for the current presenter's guest output image revision, and + // its usage tracking. + struct SwapFramebuffer { + VkFramebuffer framebuffer = VK_NULL_HANDLE; + uint64_t version = UINT64_MAX; + uint64_t last_submission = 0; + }; + // BeginSubmission and EndSubmission may be called at any time. If there's an // open non-frame submission, BeginSubmission(true) will promote it to a // frame. EndSubmission(true) will close the frame no matter whether the @@ -554,24 +572,55 @@ class VulkanCommandProcessor : public CommandProcessor { VkDescriptorPool shared_memory_and_edram_descriptor_pool_ = VK_NULL_HANDLE; VkDescriptorSet shared_memory_and_edram_descriptor_set_; + // Bytes 0x0...0x3FF - 256-entry gamma ramp table with B10G10R10X2 data (read + // as R10G10B10X2 with swizzle). + // Bytes 0x400...0x9FF - 128-entry PWL R16G16 gamma ramp (R - base, G - delta, + // low 6 bits of each are zero, 3 elements per entry). + // kMaxFramesInFlight pairs of gamma ramps if in host-visible memory and + // uploaded directly, one otherwise. + VkDeviceMemory gamma_ramp_buffer_memory_ = VK_NULL_HANDLE; + VkBuffer gamma_ramp_buffer_ = VK_NULL_HANDLE; + // kMaxFramesInFlight pairs, only when the gamma ramp buffer is not + // host-visible. + VkDeviceMemory gamma_ramp_upload_buffer_memory_ = VK_NULL_HANDLE; + VkBuffer gamma_ramp_upload_buffer_ = VK_NULL_HANDLE; + VkDeviceSize gamma_ramp_upload_memory_size_; + uint32_t gamma_ramp_upload_memory_type_; + // Mapping of either gamma_ramp_buffer_memory_ (if it's host-visible) or + // gamma_ramp_upload_buffer_memory_ (otherwise). + void* gamma_ramp_upload_mapping_; + std::array gamma_ramp_buffer_views_{}; + // UINT32_MAX if outdated. + uint32_t gamma_ramp_256_entry_table_current_frame_ = UINT32_MAX; + uint32_t gamma_ramp_pwl_current_frame_ = UINT32_MAX; + + VkDescriptorSetLayout swap_descriptor_set_layout_sampled_image_ = + VK_NULL_HANDLE; + VkDescriptorSetLayout swap_descriptor_set_layout_uniform_texel_buffer_ = + VK_NULL_HANDLE; + + // Descriptor pool for allocating descriptors needed for presentation, such as + // the destination images and the gamma ramps. + VkDescriptorPool swap_descriptor_pool_ = VK_NULL_HANDLE; + // Interleaved 256-entry table and PWL texel buffer descriptors. + // kMaxFramesInFlight pairs of gamma ramps if in host-visible memory and + // uploaded directly, one otherwise. + std::array + swap_descriptors_gamma_ramp_; + // Sampled images. + std::array swap_descriptors_source_; + + VkPipelineLayout swap_apply_gamma_pipeline_layout_ = VK_NULL_HANDLE; // Has no dependencies on specific pipeline stages on both ends to simplify // use in different scenarios with different pipelines - use explicit barriers - // for synchronization. Drawing to VK_FORMAT_R8G8B8A8_SRGB. - VkRenderPass swap_render_pass_ = VK_NULL_HANDLE; - VkPipelineLayout swap_pipeline_layout_ = VK_NULL_HANDLE; - VkPipeline swap_pipeline_ = VK_NULL_HANDLE; + // for synchronization. + VkRenderPass swap_apply_gamma_render_pass_ = VK_NULL_HANDLE; + VkPipeline swap_apply_gamma_256_entry_table_pipeline_ = VK_NULL_HANDLE; + VkPipeline swap_apply_gamma_pwl_pipeline_ = VK_NULL_HANDLE; - // Framebuffer for the current presenter's guest output image revision, and - // its usage tracking. - struct SwapFramebuffer { - VkFramebuffer framebuffer = VK_NULL_HANDLE; - uint64_t version = UINT64_MAX; - uint64_t last_submission = 0; - }; std::array swap_framebuffers_; - std::deque> swap_framebuffers_outdated_; // Pending pipeline barriers. std::vector pending_barriers_buffer_memory_barriers_; diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc index 1806d7df3..ac7543330 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc @@ -589,6 +589,59 @@ VkImageView VulkanTextureCache::GetActiveBindingOrNullImageView( } } +VkImageView VulkanTextureCache::RequestSwapTexture( + uint32_t& width_scaled_out, uint32_t& height_scaled_out, + xenos::TextureFormat& format_out) { + const auto& regs = register_file(); + const auto& fetch = regs.Get( + XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0); + TextureKey key; + BindingInfoFromFetchConstant(fetch, key, nullptr); + if (!key.is_valid || key.base_page == 0 || + key.dimension != xenos::DataDimension::k2DOrStacked) { + return nullptr; + } + VulkanTexture* texture = + static_cast(FindOrCreateTexture(key)); + if (!texture) { + return VK_NULL_HANDLE; + } + VkImageView texture_view = texture->GetView( + false, GuestToHostSwizzle(fetch.swizzle, GetHostFormatSwizzle(key)), + false); + if (texture_view == VK_NULL_HANDLE) { + return VK_NULL_HANDLE; + } + if (!LoadTextureData(*texture)) { + return VK_NULL_HANDLE; + } + texture->MarkAsUsed(); + VulkanTexture::Usage old_usage = + texture->SetUsage(VulkanTexture::Usage::kSwapSampled); + if (old_usage != VulkanTexture::Usage::kSwapSampled) { + VkPipelineStageFlags src_stage_mask, dst_stage_mask; + VkAccessFlags src_access_mask, dst_access_mask; + VkImageLayout old_layout, new_layout; + GetTextureUsageMasks(old_usage, src_stage_mask, src_access_mask, + old_layout); + GetTextureUsageMasks(VulkanTexture::Usage::kSwapSampled, dst_stage_mask, + dst_access_mask, new_layout); + command_processor_.PushImageMemoryBarrier( + texture->image(), ui::vulkan::util::InitializeSubresourceRange(), + src_stage_mask, dst_stage_mask, src_access_mask, dst_access_mask, + old_layout, new_layout); + } + // Only texture->key, not the result of BindingInfoFromFetchConstant, contains + // whether the texture is scaled. + key = texture->key(); + width_scaled_out = + key.GetWidth() * (key.scaled_resolve ? draw_resolution_scale_x() : 1); + height_scaled_out = + key.GetHeight() * (key.scaled_resolve ? draw_resolution_scale_y() : 1); + format_out = key.format; + return texture_view; +} + bool VulkanTextureCache::IsSignedVersionSeparateForFormat( TextureKey key) const { const HostFormatPair& host_format_pair = GetHostFormatPair(key); @@ -1263,7 +1316,14 @@ VulkanTextureCache::VulkanTexture::~VulkanTexture() { } VkImageView VulkanTextureCache::VulkanTexture::GetView(bool is_signed, - uint32_t host_swizzle) { + uint32_t host_swizzle, + bool is_array) { + xenos::DataDimension dimension = key().dimension; + if (dimension == xenos::DataDimension::k3D || + dimension == xenos::DataDimension::kCube) { + is_array = false; + } + const VulkanTextureCache& vulkan_texture_cache = static_cast(texture_cache()); @@ -1297,6 +1357,8 @@ VkImageView VulkanTextureCache::VulkanTexture::GetView(bool is_signed, } view_key.host_swizzle = host_swizzle; + view_key.is_array = uint32_t(is_array); + // Try to find an existing view. auto it = views_.find(view_key); if (it != views_.end()) { @@ -1311,17 +1373,6 @@ VkImageView VulkanTextureCache::VulkanTexture::GetView(bool is_signed, view_create_info.pNext = nullptr; view_create_info.flags = 0; view_create_info.image = image(); - switch (key().dimension) { - case xenos::DataDimension::k3D: - view_create_info.viewType = VK_IMAGE_VIEW_TYPE_3D; - break; - case xenos::DataDimension::kCube: - view_create_info.viewType = VK_IMAGE_VIEW_TYPE_CUBE; - break; - default: - view_create_info.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY; - break; - } view_create_info.format = format; view_create_info.components.r = GetComponentSwizzle(host_swizzle, 0); view_create_info.components.g = GetComponentSwizzle(host_swizzle, 1); @@ -1329,6 +1380,22 @@ VkImageView VulkanTextureCache::VulkanTexture::GetView(bool is_signed, view_create_info.components.a = GetComponentSwizzle(host_swizzle, 3); view_create_info.subresourceRange = ui::vulkan::util::InitializeSubresourceRange(); + switch (dimension) { + case xenos::DataDimension::k3D: + view_create_info.viewType = VK_IMAGE_VIEW_TYPE_3D; + break; + case xenos::DataDimension::kCube: + view_create_info.viewType = VK_IMAGE_VIEW_TYPE_CUBE; + break; + default: + if (is_array) { + view_create_info.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY; + } else { + view_create_info.viewType = VK_IMAGE_VIEW_TYPE_2D; + view_create_info.subresourceRange.layerCount = 1; + } + break; + } VkImageView view; if (dfn.vkCreateImageView(device, &view_create_info, nullptr, &view) != VK_SUCCESS) { @@ -2248,9 +2315,10 @@ void VulkanTextureCache::GetTextureUsageMasks(VulkanTexture::Usage usage, layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; break; case VulkanTexture::Usage::kSwapSampled: - // The swap texture is likely to be used only for the presentation compute - // shader, and not during emulation, where it'd be used in other stages. - stage_mask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + // The swap texture is likely to be used only for the presentation + // fragment shader, and not during emulation, where it'd be used in other + // stages. + stage_mask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; access_mask = VK_ACCESS_SHADER_READ_BIT; layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; break; diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.h b/src/xenia/gpu/vulkan/vulkan_texture_cache.h index 6b6c4814a..423131c55 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.h @@ -60,6 +60,13 @@ class VulkanTextureCache final : public TextureCache { xenos::FetchOpDimension dimension, bool is_signed) const; + // Returns the 2D view of the front buffer texture (for fragment shader + // reading - the barrier will be pushed in the command processor if needed), + // or VK_NULL_HANDLE in case of failure. May call LoadTextureData. + VkImageView RequestSwapTexture(uint32_t& width_scaled_out, + uint32_t& height_scaled_out, + xenos::TextureFormat& format_out); + protected: bool IsSignedVersionSeparateForFormat(TextureKey key) const override; uint32_t GetHostFormatSwizzle(TextureKey key) const override; @@ -136,7 +143,8 @@ class VulkanTextureCache final : public TextureCache { return old_usage; } - VkImageView GetView(bool is_signed, uint32_t host_swizzle); + VkImageView GetView(bool is_signed, uint32_t host_swizzle, + bool is_array = true); private: union ViewKey { @@ -144,6 +152,7 @@ class VulkanTextureCache final : public TextureCache { struct { uint32_t is_signed_separate_view : 1; uint32_t host_swizzle : 12; + uint32_t is_array : 1; }; ViewKey() : key(0) { static_assert_size(*this, sizeof(key)); } diff --git a/src/xenia/ui/vulkan/functions/device_1_0.inc b/src/xenia/ui/vulkan/functions/device_1_0.inc index 148d6dd52..ae640f61e 100644 --- a/src/xenia/ui/vulkan/functions/device_1_0.inc +++ b/src/xenia/ui/vulkan/functions/device_1_0.inc @@ -29,6 +29,7 @@ XE_UI_VULKAN_FUNCTION(vkCmdSetStencilReference) XE_UI_VULKAN_FUNCTION(vkCmdSetStencilWriteMask) XE_UI_VULKAN_FUNCTION(vkCmdSetViewport) XE_UI_VULKAN_FUNCTION(vkCreateBuffer) +XE_UI_VULKAN_FUNCTION(vkCreateBufferView) XE_UI_VULKAN_FUNCTION(vkCreateCommandPool) XE_UI_VULKAN_FUNCTION(vkCreateComputePipelines) XE_UI_VULKAN_FUNCTION(vkCreateDescriptorPool) @@ -44,6 +45,7 @@ XE_UI_VULKAN_FUNCTION(vkCreateSampler) XE_UI_VULKAN_FUNCTION(vkCreateSemaphore) XE_UI_VULKAN_FUNCTION(vkCreateShaderModule) XE_UI_VULKAN_FUNCTION(vkDestroyBuffer) +XE_UI_VULKAN_FUNCTION(vkDestroyBufferView) XE_UI_VULKAN_FUNCTION(vkDestroyCommandPool) XE_UI_VULKAN_FUNCTION(vkDestroyDescriptorPool) XE_UI_VULKAN_FUNCTION(vkDestroyDescriptorSetLayout) From c37c05d189c7f9c0d9b5dfe2cb5f6898b0df4c2f Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 25 Jun 2022 14:35:15 +0300 Subject: [PATCH 101/123] [Vulkan] Remove an outdated fullscreen shader comment [ci skip] --- src/xenia/gpu/vulkan/vulkan_command_processor.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 8b45446db..2a38b8612 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -42,7 +42,6 @@ namespace vulkan { // Generated with `xb buildshaders`. namespace shaders { -// TODO(Triang3l): Remove the texture coordinates. #include "xenia/gpu/shaders/bytecode/vulkan_spirv/apply_gamma_pwl_fxaa_luma_ps.h" #include "xenia/gpu/shaders/bytecode/vulkan_spirv/apply_gamma_pwl_ps.h" #include "xenia/gpu/shaders/bytecode/vulkan_spirv/apply_gamma_table_fxaa_luma_ps.h" From 758db4ccb35c4ee439011cf0d4a1ce2b8d529f3a Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 25 Jun 2022 15:15:06 +0300 Subject: [PATCH 102/123] [Vulkan] Fix textures not loaded if using a shader for the first time --- .../gpu/vulkan/vulkan_command_processor.cc | 46 ++++++++++--------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 2a38b8612..72c8f0efa 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -2210,7 +2210,16 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, *pixel_shader, normalized_color_mask) : SpirvShaderTranslator::Modification(0); - // Translate the shaders. + // Set up the render targets - this may perform dispatches and draws. + if (!render_target_cache_->Update(is_rasterization_done, + normalized_depth_control, + normalized_color_mask, *vertex_shader)) { + return false; + } + + // Create the pipeline (for this, need the render pass from the render target + // cache), translating the shaders - doing this now to obtain the used + // textures. VulkanShader::VulkanTranslation* vertex_shader_translation = static_cast( vertex_shader->GetOrCreateTranslation( @@ -2220,27 +2229,6 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, pixel_shader->GetOrCreateTranslation( pixel_shader_modification.value)) : nullptr; - - // Update the textures before other work in the submission because samplers - // depend on this (and in case of sampler overflow in a submission, - // submissions must be split) - may perform dispatches. - uint32_t used_texture_mask = - vertex_shader->GetUsedTextureMaskAfterTranslation() | - (pixel_shader != nullptr - ? pixel_shader->GetUsedTextureMaskAfterTranslation() - : 0); - texture_cache_->RequestTextures(used_texture_mask); - - // Set up the render targets - this may perform dispatches and draws. - if (!render_target_cache_->Update(is_rasterization_done, - normalized_depth_control, - normalized_color_mask, *vertex_shader)) { - return false; - } - - // Update the graphics pipeline, and if the new graphics pipeline has a - // different layout, invalidate incompatible descriptor sets before updating - // current_guest_graphics_pipeline_layout_. VkPipeline pipeline; const VulkanPipelineCache::PipelineLayoutProvider* pipeline_layout_provider; if (!pipeline_cache_->ConfigurePipeline( @@ -2251,6 +2239,20 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, pipeline_layout_provider)) { return false; } + + // Update the textures before most other work in the submission because + // samplers depend on this (and in case of sampler overflow in a submission, + // submissions must be split) - may perform dispatches and copying. + uint32_t used_texture_mask = + vertex_shader->GetUsedTextureMaskAfterTranslation() | + (pixel_shader != nullptr + ? pixel_shader->GetUsedTextureMaskAfterTranslation() + : 0); + texture_cache_->RequestTextures(used_texture_mask); + + // Update the graphics pipeline, and if the new graphics pipeline has a + // different layout, invalidate incompatible descriptor sets before updating + // current_guest_graphics_pipeline_layout_. if (current_guest_graphics_pipeline_ != pipeline) { deferred_command_buffer_.CmdVkBindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); From fdcbf67623c1ac5bad2de1d6a3e815f5a9652c0e Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 25 Jun 2022 15:46:02 +0300 Subject: [PATCH 103/123] [Vulkan] Enable VK_KHR_sampler_ycbcr_conversion --- src/xenia/ui/vulkan/vulkan_provider.cc | 10 ++++++++++ src/xenia/ui/vulkan/vulkan_provider.h | 2 ++ 2 files changed, 12 insertions(+) diff --git a/src/xenia/ui/vulkan/vulkan_provider.cc b/src/xenia/ui/vulkan/vulkan_provider.cc index 1e84bb14a..95c544925 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.cc +++ b/src/xenia/ui/vulkan/vulkan_provider.cc @@ -701,6 +701,7 @@ bool VulkanProvider::Initialize() { std::memset(&device_extensions_, 0, sizeof(device_extensions_)); if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 1, 0)) { device_extensions_.khr_dedicated_allocation = true; + device_extensions_.khr_sampler_ycbcr_conversion = true; if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 2, 0)) { device_extensions_.khr_image_format_list = true; device_extensions_.khr_shader_float_controls = true; @@ -723,6 +724,13 @@ bool VulkanProvider::Initialize() { offsetof(DeviceExtensions, khr_image_format_list)}, {"VK_KHR_portability_subset", offsetof(DeviceExtensions, khr_portability_subset)}, + // While vkGetPhysicalDeviceFormatProperties should be used to check the + // format support (device support for Y'CbCr formats is not required by + // this extension or by Vulkan 1.1), still adding + // VK_KHR_sampler_ycbcr_conversion to this list to enable this extension + // on the device on Vulkan 1.0. + {"VK_KHR_sampler_ycbcr_conversion", + offsetof(DeviceExtensions, khr_sampler_ycbcr_conversion)}, {"VK_KHR_shader_float_controls", offsetof(DeviceExtensions, khr_shader_float_controls)}, {"VK_KHR_spirv_1_4", offsetof(DeviceExtensions, khr_spirv_1_4)}, @@ -982,6 +990,8 @@ bool VulkanProvider::Initialize() { XELOGVK(" * Triangle fans: {}", device_portability_subset_features_.triangleFans ? "yes" : "no"); } + XELOGVK("* VK_KHR_sampler_ycbcr_conversion: {}", + device_extensions_.khr_sampler_ycbcr_conversion ? "yes" : "no"); XELOGVK("* VK_KHR_shader_float_controls: {}", device_extensions_.khr_shader_float_controls ? "yes" : "no"); if (device_extensions_.khr_shader_float_controls) { diff --git a/src/xenia/ui/vulkan/vulkan_provider.h b/src/xenia/ui/vulkan/vulkan_provider.h index fce603635..0680b5766 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.h +++ b/src/xenia/ui/vulkan/vulkan_provider.h @@ -139,6 +139,8 @@ class VulkanProvider : public GraphicsProvider { bool khr_image_format_list; // Requires the VK_KHR_get_physical_device_properties2 instance extension. bool khr_portability_subset; + // Core since 1.1.0. + bool khr_sampler_ycbcr_conversion; // Core since 1.2.0. bool khr_shader_float_controls; // Core since 1.2.0. From d8b0227cbd199dd66fa319385b84573595dfeec1 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 25 Jun 2022 16:25:29 +0300 Subject: [PATCH 104/123] [SPIR-V] Fix cubemap X axis --- src/xenia/gpu/spirv_shader_translator_alu.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xenia/gpu/spirv_shader_translator_alu.cc b/src/xenia/gpu/spirv_shader_translator_alu.cc index ec0051e7e..9dfbccb09 100644 --- a/src/xenia/gpu/spirv_shader_translator_alu.cc +++ b/src/xenia/gpu/spirv_shader_translator_alu.cc @@ -653,7 +653,7 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation( // tc = -y ma_x_result[0] = operand_neg[1]; // ma/2 = x - ma_x_result[2] = operand[2]; + ma_x_result[2] = operand[0]; if (used_result_components & 0b1010) { spv::Id x_is_neg = builder_->createBinOp( spv::OpFOrdLessThan, type_bool_, operand[0], const_float_0_); From 5dca11a89208b1a14a145adf9e515a09f0d75d44 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 25 Jun 2022 16:33:18 +0300 Subject: [PATCH 105/123] [SPIR-V] Fix fetch constant LOD bias signedness --- src/xenia/gpu/spirv_shader_translator_fetch.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xenia/gpu/spirv_shader_translator_fetch.cc b/src/xenia/gpu/spirv_shader_translator_fetch.cc index 98e9a5836..f9bf7c564 100644 --- a/src/xenia/gpu/spirv_shader_translator_fetch.cc +++ b/src/xenia/gpu/spirv_shader_translator_fetch.cc @@ -1600,7 +1600,7 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction( spv::Id lod = builder_->createBinOp( spv::OpFMul, type_float_, builder_->createUnaryOp( - spv::OpConvertUToF, type_float_, + spv::OpConvertSToF, type_float_, builder_->createTriOp(spv::OpBitFieldSExtract, type_int_, fetch_constant_word_4_signed, builder_->makeUintConstant(12), From d30d59883aeeb0743068c651e38316d0c06e1099 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 25 Jun 2022 20:35:13 +0300 Subject: [PATCH 106/123] [Vulkan] Color exponent bias and gamma conversion --- src/xenia/gpu/spirv_shader_translator.cc | 25 +++-- src/xenia/gpu/spirv_shader_translator.h | 13 +++ src/xenia/gpu/spirv_shader_translator_rb.cc | 98 +++++++++++++++++++ .../gpu/vulkan/vulkan_command_processor.cc | 38 +++++++ .../gpu/vulkan/vulkan_render_target_cache.cc | 5 +- .../gpu/vulkan/vulkan_render_target_cache.h | 14 +++ 6 files changed, 180 insertions(+), 13 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index b59b637b6..b260f78fa 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -228,6 +228,8 @@ void SpirvShaderTranslator::StartTranslation() { offsetof(SystemConstants, texture_swizzled_signs), type_uint4_array_2}, {"texture_swizzles", offsetof(SystemConstants, texture_swizzles), type_uint4_array_4}, + {"color_exp_bias", offsetof(SystemConstants, color_exp_bias), + type_float4_}, }; id_vector_temp_.clear(); id_vector_temp_.reserve(xe::countof(system_constants)); @@ -403,6 +405,14 @@ void SpirvShaderTranslator::StartTranslation() { spv::NoPrecision, type_void_, "main", main_param_types, main_precisions, &function_main_entry); + // Load the flags system constant since it may be used in many places. + id_vector_temp_.clear(); + id_vector_temp_.push_back(builder_->makeIntConstant(kSystemConstantFlags)); + main_system_constant_flags_ = builder_->createLoad( + builder_->createAccessChain(spv::StorageClassUniform, + uniform_system_constants_, id_vector_temp_), + spv::NoPrecision); + // Begin ucode translation. Initialize everything, even without defined // defaults, for safety. var_main_predicate_ = builder_->createVariable( @@ -580,6 +590,8 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { if (is_vertex_shader()) { CompleteVertexOrTessEvalShaderInMain(); + } else if (is_pixel_shader()) { + CompleteFragmentShaderInMain(); } // End the main function. @@ -1115,13 +1127,6 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() { } void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() { - id_vector_temp_.clear(); - id_vector_temp_.push_back(builder_->makeIntConstant(kSystemConstantFlags)); - spv::Id system_constant_flags = builder_->createLoad( - builder_->createAccessChain(spv::StorageClassUniform, - uniform_system_constants_, id_vector_temp_), - spv::NoPrecision); - id_vector_temp_.clear(); id_vector_temp_.push_back( builder_->makeIntConstant(kOutputPerVertexMemberPosition)); @@ -1136,7 +1141,7 @@ void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() { spv::Id is_w_not_reciprocal = builder_->createBinOp( spv::OpINotEqual, type_bool_, builder_->createBinOp( - spv::OpBitwiseAnd, type_uint_, system_constant_flags, + spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_, builder_->makeUintConstant( static_cast(kSysFlag_WNotReciprocal))), const_uint_0_); @@ -1160,7 +1165,7 @@ void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() { spv::Id is_xy_divided_by_w = builder_->createBinOp( spv::OpINotEqual, type_bool_, builder_->createBinOp( - spv::OpBitwiseAnd, type_uint_, system_constant_flags, + spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_, builder_->makeUintConstant( static_cast(kSysFlag_XYDividedByW))), const_uint_0_); @@ -1180,7 +1185,7 @@ void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() { spv::Id is_z_divided_by_w = builder_->createBinOp( spv::OpINotEqual, type_bool_, builder_->createBinOp( - spv::OpBitwiseAnd, type_uint_, system_constant_flags, + spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_, builder_->makeUintConstant( static_cast(kSysFlag_ZDividedByW))), const_uint_0_); diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 075279848..aa0265afb 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -61,12 +61,20 @@ class SpirvShaderTranslator : public ShaderTranslator { kSysFlag_XYDividedByW_Shift, kSysFlag_ZDividedByW_Shift, kSysFlag_WNotReciprocal_Shift, + kSysFlag_ConvertColor0ToGamma_Shift, + kSysFlag_ConvertColor1ToGamma_Shift, + kSysFlag_ConvertColor2ToGamma_Shift, + kSysFlag_ConvertColor3ToGamma_Shift, kSysFlag_Count, kSysFlag_XYDividedByW = 1u << kSysFlag_XYDividedByW_Shift, kSysFlag_ZDividedByW = 1u << kSysFlag_ZDividedByW_Shift, kSysFlag_WNotReciprocal = 1u << kSysFlag_WNotReciprocal_Shift, + kSysFlag_ConvertColor0ToGamma = 1u << kSysFlag_ConvertColor0ToGamma_Shift, + kSysFlag_ConvertColor1ToGamma = 1u << kSysFlag_ConvertColor1ToGamma_Shift, + kSysFlag_ConvertColor2ToGamma = 1u << kSysFlag_ConvertColor2ToGamma_Shift, + kSysFlag_ConvertColor3ToGamma = 1u << kSysFlag_ConvertColor3ToGamma_Shift, }; static_assert(kSysFlag_Count <= 32, "Too many flags in the system constants"); @@ -94,6 +102,8 @@ class SpirvShaderTranslator : public ShaderTranslator { // apply to the result directly in the shader code. In each uint32_t, // swizzles for 2 texture fetch constants (in bits 0:11 and 12:23). uint32_t texture_swizzles[16]; + + float color_exp_bias[4]; }; // The minimum limit for maxPerStageDescriptorStorageBuffers is 4, and for @@ -308,6 +318,7 @@ class SpirvShaderTranslator : public ShaderTranslator { void StartFragmentShaderBeforeMain(); void StartFragmentShaderInMain(); + void CompleteFragmentShaderInMain(); // Updates the current flow control condition (to be called in the beginning // of exec and in jumps), closing the previous conditionals if needed. @@ -509,6 +520,7 @@ class SpirvShaderTranslator : public ShaderTranslator { kSystemConstantNdcOffset, kSystemConstantTextureSwizzledSigns, kSystemConstantTextureSwizzles, + kSystemConstantColorExpBias, }; spv::Id uniform_system_constants_; spv::Id uniform_float_constants_; @@ -545,6 +557,7 @@ class SpirvShaderTranslator : public ShaderTranslator { std::vector main_interface_; spv::Function* function_main_; + spv::Id main_system_constant_flags_; // bool. spv::Id var_main_predicate_; // uint4. diff --git a/src/xenia/gpu/spirv_shader_translator_rb.cc b/src/xenia/gpu/spirv_shader_translator_rb.cc index 8282016b5..829b3f576 100644 --- a/src/xenia/gpu/spirv_shader_translator_rb.cc +++ b/src/xenia/gpu/spirv_shader_translator_rb.cc @@ -11,9 +11,11 @@ #include #include +#include #include "third_party/glslang/SPIRV/GLSL.std.450.h" #include "xenia/base/assert.h" +#include "xenia/base/math.h" namespace xe { namespace gpu { @@ -423,5 +425,101 @@ spv::Id SpirvShaderTranslator::Depth20e4To32(spv::Builder& builder, return f32; } +void SpirvShaderTranslator::CompleteFragmentShaderInMain() { + id_vector_temp_.clear(); + id_vector_temp_.push_back(builder_->makeIntConstant(kSystemConstantFlags)); + spv::Id system_constant_flags = builder_->createLoad( + builder_->createAccessChain(spv::StorageClassUniform, + uniform_system_constants_, id_vector_temp_), + spv::NoPrecision); + + uint32_t color_targets_remaining = current_shader().writes_color_targets(); + uint32_t color_target_index; + while (xe::bit_scan_forward(color_targets_remaining, &color_target_index)) { + color_targets_remaining &= ~(UINT32_C(1) << color_target_index); + spv::Id color_variable = output_fragment_data_[color_target_index]; + spv::Id color = builder_->createLoad(color_variable, spv::NoPrecision); + + // Apply the exponent bias after the alpha test and alpha to coverage + // because they need the unbiased alpha from the shader. + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + id_vector_temp_.push_back( + builder_->makeIntConstant(kSystemConstantColorExpBias)); + id_vector_temp_.push_back( + builder_->makeIntConstant(int32_t(color_target_index))); + color = builder_->createBinOp( + spv::OpVectorTimesScalar, type_float4_, color, + builder_->createLoad(builder_->createAccessChain( + spv::StorageClassUniform, + uniform_system_constants_, id_vector_temp_), + spv::NoPrecision)); + builder_->addDecoration(color, spv::DecorationNoContraction); + + // Convert to gamma space - this is incorrect, since it must be done after + // blending on the Xbox 360, but this is just one of many blending issues in + // the host render target path. + // TODO(Triang3l): Gamma as sRGB check. + spv::Id color_rgb; + { + std::unique_ptr color_rgb_shuffle_op = + std::make_unique( + builder_->getUniqueId(), type_float3_, spv::OpVectorShuffle); + color_rgb_shuffle_op->addIdOperand(color); + color_rgb_shuffle_op->addIdOperand(color); + color_rgb_shuffle_op->addImmediateOperand(0); + color_rgb_shuffle_op->addImmediateOperand(1); + color_rgb_shuffle_op->addImmediateOperand(2); + color_rgb = color_rgb_shuffle_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(color_rgb_shuffle_op)); + } + spv::Id is_gamma = builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_, + builder_->makeUintConstant(kSysFlag_ConvertColor0ToGamma + << color_target_index)), + const_uint_0_); + spv::Block& block_gamma_head = *builder_->getBuildPoint(); + spv::Block& block_gamma = builder_->makeNewBlock(); + spv::Block& block_gamma_merge = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_gamma_merge.getId()); + builder_->createConditionalBranch(is_gamma, &block_gamma, + &block_gamma_merge); + builder_->setBuildPoint(&block_gamma); + spv::Id color_rgb_gamma = LinearToPWLGamma(color_rgb, false); + builder_->createBranch(&block_gamma_merge); + builder_->setBuildPoint(&block_gamma_merge); + { + std::unique_ptr gamma_phi_op = + std::make_unique(builder_->getUniqueId(), + type_float3_, spv::OpPhi); + gamma_phi_op->addIdOperand(color_rgb_gamma); + gamma_phi_op->addIdOperand(block_gamma.getId()); + gamma_phi_op->addIdOperand(color_rgb); + gamma_phi_op->addIdOperand(block_gamma_head.getId()); + color_rgb = gamma_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(gamma_phi_op)); + } + { + std::unique_ptr color_rgba_shuffle_op = + std::make_unique( + builder_->getUniqueId(), type_float4_, spv::OpVectorShuffle); + color_rgba_shuffle_op->addIdOperand(color_rgb); + color_rgba_shuffle_op->addIdOperand(color); + color_rgba_shuffle_op->addImmediateOperand(0); + color_rgba_shuffle_op->addImmediateOperand(1); + color_rgba_shuffle_op->addImmediateOperand(2); + color_rgba_shuffle_op->addImmediateOperand(3 + 3); + color = color_rgba_shuffle_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(color_rgba_shuffle_op)); + } + + builder_->createStore(color, color_variable); + } +} + } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 72c8f0efa..4ac850ce0 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -3267,6 +3267,13 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( auto pa_cl_vte_cntl = regs.Get(); int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32); + // Get the color info register values for each render target. + reg::RB_COLOR_INFO color_infos[xenos::kMaxColorRenderTargets]; + for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { + color_infos[i] = regs.Get( + reg::RB_COLOR_INFO::rt_register_indices[i]); + } + bool dirty = false; // Flags. @@ -3288,6 +3295,14 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( if (pa_cl_vte_cntl.vtx_w0_fmt) { flags |= SpirvShaderTranslator::kSysFlag_WNotReciprocal; } + // Gamma writing. + // TODO(Triang3l): Gamma as sRGB check. + for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { + if (color_infos[i].color_format == + xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA) { + flags |= SpirvShaderTranslator::kSysFlag_ConvertColor0ToGamma << i; + } + } dirty |= system_constants_.flags != flags; system_constants_.flags = flags; @@ -3356,6 +3371,29 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( } } + // Color exponent bias. + for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { + reg::RB_COLOR_INFO color_info = color_infos[i]; + // Exponent bias is in bits 20:25 of RB_COLOR_INFO. + int32_t color_exp_bias = color_info.color_exp_bias; + if (render_target_cache_->GetPath() == + RenderTargetCache::Path::kHostRenderTargets && + (color_info.color_format == xenos::ColorRenderTargetFormat::k_16_16 && + !render_target_cache_->IsFixedRG16TruncatedToMinus1To1() || + color_info.color_format == + xenos::ColorRenderTargetFormat::k_16_16_16_16 && + !render_target_cache_->IsFixedRGBA16TruncatedToMinus1To1())) { + // Remap from -32...32 to -1...1 by dividing the output values by 32, + // losing blending correctness, but getting the full range. + color_exp_bias -= 5; + } + float color_exp_bias_scale; + *reinterpret_cast(&color_exp_bias_scale) = + UINT32_C(0x3F800000) + (color_exp_bias << 23); + dirty |= system_constants_.color_exp_bias[i] != color_exp_bias_scale; + system_constants_.color_exp_bias[i] = color_exp_bias_scale; + } + if (dirty) { current_graphics_descriptor_set_values_up_to_date_ &= ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSystemConstants); diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc index 4d8545fd0..02547eaaa 100644 --- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc @@ -784,11 +784,10 @@ bool VulkanRenderTargetCache::Resolve(const Memory& memory, bool draw_resolution_scaled = IsDrawResolutionScaled(); draw_util::ResolveInfo resolve_info; - // TODO(Triang3l): Truncation of fixed16 (but not fixed16 as float16) range to - // -1 to 1. if (!draw_util::GetResolveInfo( register_file(), memory, trace_writer_, draw_resolution_scale_x(), - draw_resolution_scale_y(), false, false, resolve_info)) { + draw_resolution_scale_y(), IsFixedRG16TruncatedToMinus1To1(), + IsFixedRGBA16TruncatedToMinus1To1(), resolve_info)) { return false; } diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.h b/src/xenia/gpu/vulkan/vulkan_render_target_cache.h index 2857fde1f..18113bf9f 100644 --- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.h @@ -128,6 +128,20 @@ class VulkanRenderTargetCache final : public RenderTargetCache { return last_update_framebuffer_; } + // Using R16G16[B16A16]_SNORM, which are -1...1, not the needed -32...32. + // Persistent data doesn't depend on this, so can be overriden by per-game + // configuration. + bool IsFixedRG16TruncatedToMinus1To1() const { + // TODO(Triang3l): Not float16 condition. + return GetPath() == Path::kHostRenderTargets && + !cvars::snorm16_render_target_full_range; + } + bool IsFixedRGBA16TruncatedToMinus1To1() const { + // TODO(Triang3l): Not float16 condition. + return GetPath() == Path::kHostRenderTargets && + !cvars::snorm16_render_target_full_range; + } + bool depth_float24_round() const { return depth_float24_round_; } bool msaa_2x_attachments_supported() const { From d8b2944caac86106e6f4157c7491ae1f6da9f21d Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 25 Jun 2022 20:46:52 +0300 Subject: [PATCH 107/123] [Vulkan] Handle unsupported fillModeNonSolid + fix portability subset feature checks --- src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc | 80 +++++++++++-------- 1 file changed, 46 insertions(+), 34 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index 0a8a88f62..574e1ba36 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -525,37 +525,41 @@ bool VulkanPipelineCache::GetCurrentStateDescription( bool cull_back = pa_su_sc_mode_cntl.cull_back; description_out.cull_front = cull_front; description_out.cull_back = cull_back; - xenos::PolygonType polygon_type = xenos::PolygonType::kTriangles; - if (!cull_front) { - polygon_type = - std::min(polygon_type, pa_su_sc_mode_cntl.polymode_front_ptype); - } - if (!cull_back) { - polygon_type = - std::min(polygon_type, pa_su_sc_mode_cntl.polymode_back_ptype); - } - if (pa_su_sc_mode_cntl.poly_mode != xenos::PolygonModeEnable::kDualMode) { - polygon_type = xenos::PolygonType::kTriangles; - } - switch (polygon_type) { - case xenos::PolygonType::kPoints: - // When points are not supported, use lines instead, preserving - // debug-like purpose. - description_out.polygon_mode = - (!device_portability_subset_features || - device_portability_subset_features->pointPolygons) - ? PipelinePolygonMode::kPoint - : PipelinePolygonMode::kLine; - break; - case xenos::PolygonType::kLines: - description_out.polygon_mode = PipelinePolygonMode::kLine; - break; - case xenos::PolygonType::kTriangles: - description_out.polygon_mode = PipelinePolygonMode::kFill; - break; - default: - assert_unhandled_case(polygon_type); - return false; + if (device_features.fillModeNonSolid) { + xenos::PolygonType polygon_type = xenos::PolygonType::kTriangles; + if (!cull_front) { + polygon_type = + std::min(polygon_type, pa_su_sc_mode_cntl.polymode_front_ptype); + } + if (!cull_back) { + polygon_type = + std::min(polygon_type, pa_su_sc_mode_cntl.polymode_back_ptype); + } + if (pa_su_sc_mode_cntl.poly_mode != xenos::PolygonModeEnable::kDualMode) { + polygon_type = xenos::PolygonType::kTriangles; + } + switch (polygon_type) { + case xenos::PolygonType::kPoints: + // When points are not supported, use lines instead, preserving + // debug-like purpose. + description_out.polygon_mode = + (!device_portability_subset_features || + device_portability_subset_features->pointPolygons) + ? PipelinePolygonMode::kPoint + : PipelinePolygonMode::kLine; + break; + case xenos::PolygonType::kLines: + description_out.polygon_mode = PipelinePolygonMode::kLine; + break; + case xenos::PolygonType::kTriangles: + description_out.polygon_mode = PipelinePolygonMode::kFill; + break; + default: + assert_unhandled_case(polygon_type); + return false; + } + } else { + description_out.polygon_mode = PipelinePolygonMode::kFill; } description_out.front_face_clockwise = pa_su_sc_mode_cntl.face != 0; } else { @@ -701,7 +705,6 @@ bool VulkanPipelineCache::ArePipelineRequirementsMet( const ui::vulkan::VulkanProvider& provider = command_processor_.GetVulkanProvider(); - const VkPhysicalDeviceFeatures& device_features = provider.device_features(); const VkPhysicalDevicePortabilitySubsetFeaturesKHR* device_portability_subset_features = @@ -709,13 +712,15 @@ bool VulkanPipelineCache::ArePipelineRequirementsMet( if (device_portability_subset_features) { if (description.primitive_topology == PipelinePrimitiveTopology::kTriangleFan && - device_portability_subset_features->triangleFans) { + !device_portability_subset_features->triangleFans) { return false; } + if (description.polygon_mode == PipelinePolygonMode::kPoint && - device_portability_subset_features->pointPolygons) { + !device_portability_subset_features->pointPolygons) { return false; } + if (!device_portability_subset_features->constantAlphaColorBlendFactors) { uint32_t color_rts_remaining = description.render_pass_key.depth_and_color_used >> 1; @@ -738,11 +743,18 @@ bool VulkanPipelineCache::ArePipelineRequirementsMet( } } + const VkPhysicalDeviceFeatures& device_features = provider.device_features(); + if (!device_features.geometryShader && description.geometry_shader != PipelineGeometryShader::kNone) { return false; } + if (!device_features.fillModeNonSolid && + description.polygon_mode != PipelinePolygonMode::kFill) { + return false; + } + if (!device_features.independentBlend) { uint32_t color_rts_remaining = description.render_pass_key.depth_and_color_used >> 1; From a5c8df7a37cfe49928c071c49c262fcd43edd98f Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 25 Jun 2022 20:57:44 +0300 Subject: [PATCH 108/123] [Vulkan] Remove UB-based independent blend logic On Vulkan, unlike Direct3D, not writing to a color target in the fragment shader produces an undefined result. --- src/xenia/gpu/spirv_shader_translator.cc | 21 +++++++-------- src/xenia/gpu/spirv_shader_translator.h | 5 ---- src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc | 27 ------------------- 3 files changed, 9 insertions(+), 44 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index b260f78fa..146af6823 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -1270,19 +1270,16 @@ void SpirvShaderTranslator::StartFragmentShaderBeforeMain() { "xe_out_fragment_data_2", "xe_out_fragment_data_3", }; - uint32_t fragment_data_outputs_written = - current_shader().writes_color_targets() & - ~GetSpirvShaderModification().pixel.color_outputs_disabled; - for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { - if (!(fragment_data_outputs_written & (uint32_t(1) << i))) { - continue; - } - spv::Id output_fragment_data_rt = - builder_->createVariable(spv::NoPrecision, spv::StorageClassOutput, - type_float4_, kFragmentDataNames[i]); - output_fragment_data_[i] = output_fragment_data_rt; + uint32_t color_targets_remaining = current_shader().writes_color_targets(); + uint32_t color_target_index; + while (xe::bit_scan_forward(color_targets_remaining, &color_target_index)) { + color_targets_remaining &= ~(UINT32_C(1) << color_target_index); + spv::Id output_fragment_data_rt = builder_->createVariable( + spv::NoPrecision, spv::StorageClassOutput, type_float4_, + kFragmentDataNames[color_target_index]); + output_fragment_data_[color_target_index] = output_fragment_data_rt; builder_->addDecoration(output_fragment_data_rt, spv::DecorationLocation, - int(i)); + int(color_target_index)); // Make invariant as pixel shaders may be used for various precise // computations. builder_->addDecoration(output_fragment_data_rt, spv::DecorationInvariant); diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index aa0265afb..76caab044 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -46,11 +46,6 @@ class SpirvShaderTranslator : public ShaderTranslator { struct PixelShaderModification { // Dynamically indexable register count from SQ_PROGRAM_CNTL. uint32_t dynamic_addressable_register_count : 8; - // Color outputs removed from the shader to implement a zero color write - // mask when independent blending (and thus independent write masks) is - // not supported without switching to a render pass with some attachments - // actually excluded. - uint32_t color_outputs_disabled : 4; } pixel; uint64_t value = 0; diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index 574e1ba36..5e9fec78d 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -141,33 +141,6 @@ VulkanPipelineCache::GetCurrentPixelShaderModification( shader.GetDynamicAddressableRegisterCount( sq_program_cntl.ps_num_reg))); - const ui::vulkan::VulkanProvider& provider = - command_processor_.GetVulkanProvider(); - const VkPhysicalDeviceFeatures& device_features = provider.device_features(); - if (!device_features.independentBlend) { - // Since without independent blending, the write mask is common for all - // attachments, but the render pass may still include the attachments from - // previous draws (to prevent excessive render pass changes potentially - // doing stores and loads), disable writing to render targets with a - // completely empty write mask by removing the output from the shader. - // Only explicitly excluding render targets that the shader actually writes - // to, for better pipeline storage compatibility between devices with and - // without independent blending (so in the usual situation - the shader - // doesn't write to any render targets disabled via the color mask - no - // explicit disabling of shader outputs will be needed, and the disabled - // output mask will be 0). - uint32_t color_targets_remaining = shader.writes_color_targets(); - uint32_t color_target_index; - while (xe::bit_scan_forward(color_targets_remaining, &color_target_index)) { - color_targets_remaining &= ~(uint32_t(1) << color_target_index); - if (!(normalized_color_mask & - (uint32_t(0b1111) << (4 * color_target_index)))) { - modification.pixel.color_outputs_disabled |= uint32_t(1) - << color_target_index; - } - } - } - return modification; } From 6688b13773e0515a8a4d81c9ed51001f211c51df Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 26 Jun 2022 15:01:27 +0300 Subject: [PATCH 109/123] [Vulkan] PsParamGen --- src/xenia/gpu/spirv_shader_translator.cc | 166 +++++++++++++++++- src/xenia/gpu/spirv_shader_translator.h | 17 ++ .../gpu/vulkan/vulkan_command_processor.cc | 16 +- .../gpu/vulkan/vulkan_command_processor.h | 3 +- src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc | 16 +- 5 files changed, 212 insertions(+), 6 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 146af6823..a6830c20f 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -106,6 +106,9 @@ void SpirvShaderTranslator::Reset() { uniform_float_constants_ = spv::NoResult; + input_fragment_coord_ = spv::NoResult; + input_front_facing_ = spv::NoResult; + sampler_bindings_.clear(); texture_bindings_.clear(); @@ -1011,6 +1014,17 @@ spv::Id SpirvShaderTranslator::SpirvSmearScalarResultOrConstant( is_spec_constant); } +uint32_t SpirvShaderTranslator::GetPsParamGenInterpolator() const { + assert_true(is_pixel_shader()); + Modification modification = GetSpirvShaderModification(); + // param_gen_interpolator is already 4 bits, no need for an interpolator count + // safety check. + return (modification.pixel.param_gen_enable && + modification.pixel.param_gen_interpolator < register_count()) + ? modification.pixel.param_gen_interpolator + : UINT32_MAX; +} + void SpirvShaderTranslator::EnsureBuildPointAvailable() { if (!builder_->getBuildPoint()->isTerminated()) { return; @@ -1261,6 +1275,31 @@ void SpirvShaderTranslator::StartFragmentShaderBeforeMain() { main_interface_.push_back(interpolator); } + bool param_gen_needed = GetPsParamGenInterpolator() != UINT32_MAX; + + // Fragment coordinates. + // TODO(Triang3l): More conditions - fragment shader interlock render backend, + // alpha to coverage (if RT 0 is written, and there's no early depth / + // stencil), depth writing in the fragment shader (per-sample if supported). + if (param_gen_needed) { + input_fragment_coord_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassInput, type_float4_, "gl_FragCoord"); + builder_->addDecoration(input_fragment_coord_, spv::DecorationBuiltIn, + spv::BuiltInFragCoord); + main_interface_.push_back(input_fragment_coord_); + } + + // Is front facing. + // TODO(Triang3l): Needed for stencil in the fragment shader interlock render + // backend. + if (param_gen_needed && !GetSpirvShaderModification().pixel.param_gen_point) { + input_front_facing_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassInput, type_bool_, "gl_FrontFacing"); + builder_->addDecoration(input_front_facing_, spv::DecorationBuiltIn, + spv::BuiltInFrontFacing); + main_interface_.push_back(input_front_facing_); + } + // Framebuffer attachment outputs. std::fill(output_fragment_data_.begin(), output_fragment_data_.end(), spv::NoResult); @@ -1288,12 +1327,16 @@ void SpirvShaderTranslator::StartFragmentShaderBeforeMain() { } void SpirvShaderTranslator::StartFragmentShaderInMain() { + uint32_t param_gen_interpolator = GetPsParamGenInterpolator(); + // Copy the interpolators to general-purpose registers. // TODO(Triang3l): Centroid. - // TODO(Triang3l): ps_param_gen. uint32_t interpolator_count = std::min(xenos::kMaxInterpolators, register_count()); for (uint32_t i = 0; i < interpolator_count; ++i) { + if (i == param_gen_interpolator) { + continue; + } id_vector_temp_.clear(); // Register array element. id_vector_temp_.push_back(builder_->makeIntConstant(int(i))); @@ -1303,6 +1346,127 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() { var_main_registers_, id_vector_temp_)); } + // Pixel parameters. + if (param_gen_interpolator != UINT32_MAX) { + Modification modification = GetSpirvShaderModification(); + // Rounding the position down, and taking the absolute value, so in case the + // host GPU for some reason has quads used for derivative calculation at odd + // locations, the left and top edges will have correct derivative magnitude + // and LODs. + // Assuming that if PsParamGen is needed at all, param_gen_point is always + // set for point primitives, and is always disabled for other primitive + // types. + // OpFNegate requires sign bit flipping even for 0.0 (in this case, the + // first column or row of pixels) only since SPIR-V 1.5 revision 2 (not the + // base 1.5). + // TODO(Triang3l): When SPIR-V 1.6 is used in Xenia, see if OpFNegate can be + // used there, should be cheaper because it may be implemented as a hardware + // instruction modifier, though it respects the rule for subnormal numbers - + // see the actual hardware instructions in both OpBitwiseXor and OpFNegate + // cases. + spv::Id const_sign_bit = builder_->makeUintConstant(UINT32_C(1) << 31); + // TODO(Triang3l): Resolution scale inversion. + // X - pixel X .0 in the magnitude, is back-facing in the sign bit. + assert_true(input_fragment_coord_ != spv::NoResult); + id_vector_temp_.clear(); + id_vector_temp_.push_back(const_int_0_); + spv::Id param_gen_x = builder_->createLoad( + builder_->createAccessChain(spv::StorageClassInput, + input_fragment_coord_, id_vector_temp_), + spv::NoPrecision); + id_vector_temp_.clear(); + id_vector_temp_.push_back(param_gen_x); + param_gen_x = builder_->createBuiltinCall( + type_float_, ext_inst_glsl_std_450_, GLSLstd450Floor, id_vector_temp_); + id_vector_temp_.clear(); + id_vector_temp_.push_back(param_gen_x); + param_gen_x = builder_->createBuiltinCall( + type_float_, ext_inst_glsl_std_450_, GLSLstd450FAbs, id_vector_temp_); + if (!modification.pixel.param_gen_point) { + assert_true(input_front_facing_ != spv::NoResult); + param_gen_x = builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp( + spv::OpLogicalOr, type_bool_, + builder_->createBinOp( + spv::OpIEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, + main_system_constant_flags_, + builder_->makeUintConstant(kSysFlag_PrimitivePolygonal)), + const_uint_0_), + builder_->createLoad(input_front_facing_, spv::NoPrecision)), + param_gen_x, + builder_->createUnaryOp( + spv::OpBitcast, type_float_, + builder_->createBinOp( + spv::OpBitwiseXor, type_uint_, + builder_->createUnaryOp(spv::OpBitcast, type_uint_, + param_gen_x), + const_sign_bit))); + } + // Y - pixel Y .0 in the magnitude, is point in the sign bit. + id_vector_temp_.clear(); + id_vector_temp_.push_back(builder_->makeIntConstant(1)); + spv::Id param_gen_y = builder_->createLoad( + builder_->createAccessChain(spv::StorageClassInput, + input_fragment_coord_, id_vector_temp_), + spv::NoPrecision); + id_vector_temp_.clear(); + id_vector_temp_.push_back(param_gen_y); + param_gen_y = builder_->createBuiltinCall( + type_float_, ext_inst_glsl_std_450_, GLSLstd450Floor, id_vector_temp_); + id_vector_temp_.clear(); + id_vector_temp_.push_back(param_gen_y); + param_gen_y = builder_->createBuiltinCall( + type_float_, ext_inst_glsl_std_450_, GLSLstd450FAbs, id_vector_temp_); + if (modification.pixel.param_gen_point) { + param_gen_y = builder_->createUnaryOp( + spv::OpBitcast, type_float_, + builder_->createBinOp( + spv::OpBitwiseXor, type_uint_, + builder_->createUnaryOp(spv::OpBitcast, type_uint_, param_gen_y), + const_sign_bit)); + } + // Z - point S in the magnitude, is line in the sign bit. + spv::Id param_gen_z; + if (modification.pixel.param_gen_point) { + // TODO(Triang3l): Point coordinates. + param_gen_z = const_float_0_; + } else { + param_gen_z = builder_->createUnaryOp( + spv::OpBitcast, type_float_, + builder_->createTriOp( + spv::OpSelect, type_uint_, + builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, + main_system_constant_flags_, + builder_->makeUintConstant(kSysFlag_PrimitiveLine)), + const_uint_0_), + const_sign_bit, const_uint_0_)); + } + // W - point T in the magnitude. + // TODO(Triang3l): Point coordinates. + spv::Id param_gen_w = const_float_0_; + // Store the pixel parameters. + id_vector_temp_.clear(); + id_vector_temp_.reserve(4); + id_vector_temp_.push_back(param_gen_x); + id_vector_temp_.push_back(param_gen_y); + id_vector_temp_.push_back(param_gen_z); + id_vector_temp_.push_back(param_gen_w); + spv::Id param_gen = + builder_->createCompositeConstruct(type_float4_, id_vector_temp_); + id_vector_temp_.clear(); + id_vector_temp_.push_back( + builder_->makeIntConstant(int(param_gen_interpolator))); + builder_->createStore(param_gen, builder_->createAccessChain( + spv::StorageClassFunction, + var_main_registers_, id_vector_temp_)); + } + // Initialize the colors for safety. for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { spv::Id output_fragment_data_rt = output_fragment_data_[i]; diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 76caab044..0a94300a0 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -46,6 +46,12 @@ class SpirvShaderTranslator : public ShaderTranslator { struct PixelShaderModification { // Dynamically indexable register count from SQ_PROGRAM_CNTL. uint32_t dynamic_addressable_register_count : 8; + uint32_t param_gen_enable : 1; + uint32_t param_gen_interpolator : 4; + // If param_gen_enable is set, this must be set for point primitives, and + // must not be set for other primitive types - enables the point sprite + // coordinates input, and also effects the flag bits in PsParamGen. + uint32_t param_gen_point : 1; } pixel; uint64_t value = 0; @@ -56,6 +62,8 @@ class SpirvShaderTranslator : public ShaderTranslator { kSysFlag_XYDividedByW_Shift, kSysFlag_ZDividedByW_Shift, kSysFlag_WNotReciprocal_Shift, + kSysFlag_PrimitivePolygonal_Shift, + kSysFlag_PrimitiveLine_Shift, kSysFlag_ConvertColor0ToGamma_Shift, kSysFlag_ConvertColor1ToGamma_Shift, kSysFlag_ConvertColor2ToGamma_Shift, @@ -66,6 +74,8 @@ class SpirvShaderTranslator : public ShaderTranslator { kSysFlag_XYDividedByW = 1u << kSysFlag_XYDividedByW_Shift, kSysFlag_ZDividedByW = 1u << kSysFlag_ZDividedByW_Shift, kSysFlag_WNotReciprocal = 1u << kSysFlag_WNotReciprocal_Shift, + kSysFlag_PrimitivePolygonal = 1u << kSysFlag_PrimitivePolygonal_Shift, + kSysFlag_PrimitiveLine = 1u << kSysFlag_PrimitiveLine_Shift, kSysFlag_ConvertColor0ToGamma = 1u << kSysFlag_ConvertColor0ToGamma_Shift, kSysFlag_ConvertColor1ToGamma = 1u << kSysFlag_ConvertColor1ToGamma_Shift, kSysFlag_ConvertColor2ToGamma = 1u << kSysFlag_ConvertColor2ToGamma_Shift, @@ -301,6 +311,9 @@ class SpirvShaderTranslator : public ShaderTranslator { GetSpirvShaderModification().vertex.host_vertex_shader_type); } + // Returns UINT32_MAX if PsParamGen doesn't need to be written. + uint32_t GetPsParamGenInterpolator() const; + // Must be called before emitting any SPIR-V operations that must be in a // block in translator callbacks to ensure that if the last instruction added // was something like OpBranch - in this case, an unreachable block is @@ -535,6 +548,10 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id input_vertex_index_; // VS as TES only - int. spv::Id input_primitive_id_; + // PS, only when needed - float4. + spv::Id input_fragment_coord_; + // PS, only when needed - bool. + spv::Id input_front_facing_; // In vertex or tessellation evaluation shaders - outputs, always // xenos::kMaxInterpolators. diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 4ac850ce0..29eebba8e 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -2336,7 +2336,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, normalized_depth_control); // Update system constants before uploading them. - UpdateSystemConstantValues(primitive_processing_result.host_index_endian, + UpdateSystemConstantValues(primitive_polygonal, + primitive_processing_result.host_index_endian, viewport_info, used_texture_mask); // Update uniform buffers and descriptor sets after binding the pipeline with @@ -3257,14 +3258,15 @@ void VulkanCommandProcessor::UpdateDynamicState( } void VulkanCommandProcessor::UpdateSystemConstantValues( - xenos::Endian index_endian, const draw_util::ViewportInfo& viewport_info, - uint32_t used_texture_mask) { + bool primitive_polygonal, xenos::Endian index_endian, + const draw_util::ViewportInfo& viewport_info, uint32_t used_texture_mask) { #if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES const RegisterFile& regs = *register_file_; auto pa_cl_vte_cntl = regs.Get(); + auto vgt_draw_initiator = regs.Get(); int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32); // Get the color info register values for each render target. @@ -3295,6 +3297,14 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( if (pa_cl_vte_cntl.vtx_w0_fmt) { flags |= SpirvShaderTranslator::kSysFlag_WNotReciprocal; } + // Whether the primitive is polygonal, and gl_FrontFacing matters. + if (primitive_polygonal) { + flags |= SpirvShaderTranslator::kSysFlag_PrimitivePolygonal; + } + // Primitive type. + if (draw_util::IsPrimitiveLine(regs)) { + flags |= SpirvShaderTranslator::kSysFlag_PrimitiveLine; + } // Gamma writing. // TODO(Triang3l): Gamma as sRGB check. for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 164e7b253..dece6e02a 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -449,7 +449,8 @@ class VulkanCommandProcessor : public CommandProcessor { void UpdateDynamicState(const draw_util::ViewportInfo& viewport_info, bool primitive_polygonal, reg::RB_DEPTHCONTROL normalized_depth_control); - void UpdateSystemConstantValues(xenos::Endian index_endian, + void UpdateSystemConstantValues(bool primitive_polygonal, + xenos::Endian index_endian, const draw_util::ViewportInfo& viewport_info, uint32_t used_texture_mask); bool UpdateBindings(const VulkanShader* vertex_shader, diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index 5e9fec78d..d3049a561 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -134,13 +134,27 @@ VulkanPipelineCache::GetCurrentPixelShaderModification( assert_true(shader.type() == xenos::ShaderType::kPixel); assert_true(shader.is_ucode_analyzed()); const auto& regs = register_file_; - auto sq_program_cntl = regs.Get(); + SpirvShaderTranslator::Modification modification( shader_translator_->GetDefaultPixelShaderModification( shader.GetDynamicAddressableRegisterCount( sq_program_cntl.ps_num_reg))); + if (sq_program_cntl.param_gen) { + auto sq_context_misc = regs.Get(); + if (sq_context_misc.param_gen_pos < + std::min(std::max(modification.pixel.dynamic_addressable_register_count, + shader.register_static_address_bound()), + xenos::kMaxInterpolators)) { + modification.pixel.param_gen_enable = 1; + modification.pixel.param_gen_interpolator = sq_context_misc.param_gen_pos; + auto vgt_draw_initiator = regs.Get(); + modification.pixel.param_gen_point = uint32_t( + vgt_draw_initiator.prim_type == xenos::PrimitiveType::kPointList); + } + } + return modification; } From 05ef7a273afcb45e31152ec6c77720198c0b1693 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Tue, 28 Jun 2022 22:42:18 +0300 Subject: [PATCH 110/123] [Vulkan] Samplers (only 1.0 core features for now) --- .../gpu/vulkan/vulkan_command_processor.cc | 181 +++++++--- .../gpu/vulkan/vulkan_command_processor.h | 7 + src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc | 35 +- src/xenia/gpu/vulkan/vulkan_pipeline_cache.h | 2 + src/xenia/gpu/vulkan/vulkan_texture_cache.cc | 315 ++++++++++++++++++ src/xenia/gpu/vulkan/vulkan_texture_cache.h | 72 ++++ 6 files changed, 550 insertions(+), 62 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 29eebba8e..0a89bb122 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -2174,26 +2174,6 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, } // TODO(Triang3l): Memory export. - if (!BeginSubmission(true)) { - return false; - } - - // Process primitives. - PrimitiveProcessor::ProcessingResult primitive_processing_result; - if (!primitive_processor_->Process(primitive_processing_result)) { - return false; - } - if (!primitive_processing_result.host_draw_vertex_count) { - // Nothing to draw. - return true; - } - // TODO(Triang3l): Tessellation, geometry-type-specific vertex shader, vertex - // shader as compute. - if (primitive_processing_result.host_vertex_shader_type != - Shader::HostVertexShaderType::kVertex) { - return false; - } - reg::RB_DEPTHCONTROL normalized_depth_control = draw_util::GetNormalizedDepthControl(regs); uint32_t normalized_color_mask = @@ -2201,14 +2181,132 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, regs, pixel_shader->writes_color_targets()) : 0; - // Shader modifications. - SpirvShaderTranslator::Modification vertex_shader_modification = - pipeline_cache_->GetCurrentVertexShaderModification( - *vertex_shader, primitive_processing_result.host_vertex_shader_type); - SpirvShaderTranslator::Modification pixel_shader_modification = - pixel_shader ? pipeline_cache_->GetCurrentPixelShaderModification( - *pixel_shader, normalized_color_mask) - : SpirvShaderTranslator::Modification(0); + PrimitiveProcessor::ProcessingResult primitive_processing_result; + SpirvShaderTranslator::Modification vertex_shader_modification; + SpirvShaderTranslator::Modification pixel_shader_modification; + VulkanShader::VulkanTranslation* vertex_shader_translation; + VulkanShader::VulkanTranslation* pixel_shader_translation; + + // Two iterations because a submission (even the current one - in which case + // it needs to be ended, and a new one must be started) may need to be awaited + // in case of a sampler count overflow, and if that happens, all subsystem + // updates done previously must be performed again because the updates done + // before the awaiting may be referencing objects destroyed by + // CompletedSubmissionUpdated. + for (uint32_t i = 0; i < 2; ++i) { + if (!BeginSubmission(true)) { + return false; + } + + // Process primitives. + if (!primitive_processor_->Process(primitive_processing_result)) { + return false; + } + if (!primitive_processing_result.host_draw_vertex_count) { + // Nothing to draw. + return true; + } + // TODO(Triang3l): Tessellation, geometry-type-specific vertex shader, + // vertex shader as compute. + if (primitive_processing_result.host_vertex_shader_type != + Shader::HostVertexShaderType::kVertex) { + return false; + } + + // Shader modifications. + vertex_shader_modification = + pipeline_cache_->GetCurrentVertexShaderModification( + *vertex_shader, + primitive_processing_result.host_vertex_shader_type); + pixel_shader_modification = + pixel_shader ? pipeline_cache_->GetCurrentPixelShaderModification( + *pixel_shader, normalized_color_mask) + : SpirvShaderTranslator::Modification(0); + + // Translate the shaders now to obtain the sampler bindings. + vertex_shader_translation = static_cast( + vertex_shader->GetOrCreateTranslation( + vertex_shader_modification.value)); + pixel_shader_translation = + pixel_shader ? static_cast( + pixel_shader->GetOrCreateTranslation( + pixel_shader_modification.value)) + : nullptr; + if (!pipeline_cache_->EnsureShadersTranslated(vertex_shader_translation, + pixel_shader_translation)) { + return false; + } + + // Obtain the samplers. Note that the bindings don't depend on the shader + // modification, so if on the second iteration of this loop it becomes + // different for some reason (like a race condition with the guest in index + // buffer processing in the primitive processor resulting in different host + // vertex shader types), the bindings will stay the same. + // TODO(Triang3l): Sampler caching and reuse for adjacent draws within one + // submission. + uint32_t samplers_overflowed_count = 0; + for (uint32_t j = 0; j < 2; ++j) { + std::vector>& + shader_samplers = + j ? current_samplers_pixel_ : current_samplers_vertex_; + if (!i) { + shader_samplers.clear(); + } + const VulkanShader* shader = j ? pixel_shader : vertex_shader; + if (!shader) { + continue; + } + const std::vector& shader_sampler_bindings = + shader->GetSamplerBindingsAfterTranslation(); + if (!i) { + shader_samplers.reserve(shader_sampler_bindings.size()); + for (const VulkanShader::SamplerBinding& shader_sampler_binding : + shader_sampler_bindings) { + shader_samplers.emplace_back( + texture_cache_->GetSamplerParameters(shader_sampler_binding), + VK_NULL_HANDLE); + } + } + for (std::pair& + shader_sampler_pair : shader_samplers) { + // UseSampler calls are needed even on the second iteration in case the + // submission was broken (and thus the last usage submission indices for + // the used samplers need to be updated) due to an overflow within one + // submission. Though sampler overflow is a very rare situation overall. + bool sampler_overflowed; + VkSampler shader_sampler = texture_cache_->UseSampler( + shader_sampler_pair.first, sampler_overflowed); + shader_sampler_pair.second = shader_sampler; + if (shader_sampler == VK_NULL_HANDLE) { + if (!sampler_overflowed || i) { + // If !sampler_overflowed, just failed to create a sampler for some + // reason. + // If i == 1, an overflow has happened twice, can't recover from it + // anymore (would enter an infinite loop otherwise if the number of + // attempts was not limited to 2). Possibly too many unique samplers + // in one draw, or failed to await submission completion. + return false; + } + ++samplers_overflowed_count; + } + } + } + if (!samplers_overflowed_count) { + break; + } + assert_zero(i); + // Free space for as many samplers as how many haven't been allocated + // successfully - obtain the submission index that needs to be awaited to + // reuse `samplers_overflowed_count` slots. This must be done after all the + // UseSampler calls, not inside the loop calling UseSampler, because earlier + // UseSampler calls may "mark for deletion" some samplers that later + // UseSampler calls in the loop may actually demand. + uint64_t sampler_overflow_await_submission = + texture_cache_->GetSubmissionToAwaitOnSamplerOverflow( + samplers_overflowed_count); + assert_true(sampler_overflow_await_submission <= GetCurrentSubmission()); + CheckSubmissionFenceAndDeviceLoss(sampler_overflow_await_submission); + } // Set up the render targets - this may perform dispatches and draws. if (!render_target_cache_->Update(is_rasterization_done, @@ -2220,15 +2318,6 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, // Create the pipeline (for this, need the render pass from the render target // cache), translating the shaders - doing this now to obtain the used // textures. - VulkanShader::VulkanTranslation* vertex_shader_translation = - static_cast( - vertex_shader->GetOrCreateTranslation( - vertex_shader_modification.value)); - VulkanShader::VulkanTranslation* pixel_shader_translation = - pixel_shader ? static_cast( - pixel_shader->GetOrCreateTranslation( - pixel_shader_modification.value)) - : nullptr; VkPipeline pipeline; const VulkanPipelineCache::PipelineLayoutProvider* pipeline_layout_provider; if (!pipeline_cache_->ConfigurePipeline( @@ -3532,18 +3621,15 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, (write_pixel_textures ? texture_count_pixel : 0)); size_t vertex_sampler_image_info_offset = descriptor_write_image_info_.size(); if (write_vertex_samplers) { - // TODO(Triang3l): Real samplers. - for (const VulkanShader::SamplerBinding& sampler_binding : - samplers_vertex) { + for (const std::pair& + sampler_pair : current_samplers_vertex_) { VkDescriptorImageInfo& descriptor_image_info = descriptor_write_image_info_.emplace_back(); - descriptor_image_info.sampler = provider.GetHostSampler( - ui::vulkan::VulkanProvider::HostSampler::kNearestClamp); + descriptor_image_info.sampler = sampler_pair.second; } } size_t vertex_texture_image_info_offset = descriptor_write_image_info_.size(); if (write_vertex_textures) { - // TODO(Triang3l): Real textures. for (const VulkanShader::TextureBinding& texture_binding : textures_vertex) { VkDescriptorImageInfo& descriptor_image_info = @@ -3558,18 +3644,15 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, } size_t pixel_sampler_image_info_offset = descriptor_write_image_info_.size(); if (write_pixel_samplers) { - // TODO(Triang3l): Real samplers. - for (const VulkanShader::SamplerBinding& sampler_binding : - *samplers_pixel) { + for (const std::pair& + sampler_pair : current_samplers_pixel_) { VkDescriptorImageInfo& descriptor_image_info = descriptor_write_image_info_.emplace_back(); - descriptor_image_info.sampler = provider.GetHostSampler( - ui::vulkan::VulkanProvider::HostSampler::kNearestClamp); + descriptor_image_info.sampler = sampler_pair.second; } } size_t pixel_texture_image_info_offset = descriptor_write_image_info_.size(); if (write_pixel_textures) { - // TODO(Triang3l): Real textures. for (const VulkanShader::TextureBinding& texture_binding : *textures_pixel) { VkDescriptorImageInfo& descriptor_image_info = diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index dece6e02a..cef27be57 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -158,6 +158,7 @@ class VulkanCommandProcessor : public CommandProcessor { return deferred_command_buffer_; } + bool submission_open() const { return submission_open_; } uint64_t GetCurrentSubmission() const { return submission_completed_ + uint64_t(submissions_in_flight_fences_.size()) + 1; @@ -676,6 +677,12 @@ class VulkanCommandProcessor : public CommandProcessor { bool dynamic_stencil_reference_front_update_needed_; bool dynamic_stencil_reference_back_update_needed_; + // Currently used samplers. + std::vector> + current_samplers_vertex_; + std::vector> + current_samplers_pixel_; + // Cache render pass currently started in the command buffer with the // framebuffer. VkRenderPass current_render_pass_; diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index d3049a561..406a1a444 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -158,20 +158,9 @@ VulkanPipelineCache::GetCurrentPixelShaderModification( return modification; } -bool VulkanPipelineCache::ConfigurePipeline( +bool VulkanPipelineCache::EnsureShadersTranslated( VulkanShader::VulkanTranslation* vertex_shader, - VulkanShader::VulkanTranslation* pixel_shader, - const PrimitiveProcessor::ProcessingResult& primitive_processing_result, - reg::RB_DEPTHCONTROL normalized_depth_control, - uint32_t normalized_color_mask, - VulkanRenderTargetCache::RenderPassKey render_pass_key, - VkPipeline& pipeline_out, - const PipelineLayoutProvider*& pipeline_layout_out) { -#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES - SCOPE_profile_cpu_f("gpu"); -#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES - - // Ensure shaders are translated - needed now for GetCurrentStateDescription. + VulkanShader::VulkanTranslation* pixel_shader) { // Edge flags are not supported yet (because polygon primitives are not). assert_true(register_file_.Get().vs_export_mode != xenos::VertexShaderExportMode::kPosition2VectorsEdge && @@ -202,6 +191,26 @@ bool VulkanPipelineCache::ConfigurePipeline( return false; } } + return true; +} + +bool VulkanPipelineCache::ConfigurePipeline( + VulkanShader::VulkanTranslation* vertex_shader, + VulkanShader::VulkanTranslation* pixel_shader, + const PrimitiveProcessor::ProcessingResult& primitive_processing_result, + reg::RB_DEPTHCONTROL normalized_depth_control, + uint32_t normalized_color_mask, + VulkanRenderTargetCache::RenderPassKey render_pass_key, + VkPipeline& pipeline_out, + const PipelineLayoutProvider*& pipeline_layout_out) { +#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES + + // Ensure shaders are translated - needed now for GetCurrentStateDescription. + if (!EnsureShadersTranslated(vertex_shader, pixel_shader)) { + return false; + } PipelineDescription description; if (!GetCurrentStateDescription( diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h index 819bd6e16..141d756c8 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h @@ -74,6 +74,8 @@ class VulkanPipelineCache { SpirvShaderTranslator::Modification GetCurrentPixelShaderModification( const Shader& shader, uint32_t normalized_color_mask) const; + bool EnsureShadersTranslated(VulkanShader::VulkanTranslation* vertex_shader, + VulkanShader::VulkanTranslation* pixel_shader); // TODO(Triang3l): Return a deferred creation handle. bool ConfigurePipeline( VulkanShader::VulkanTranslation* vertex_shader, diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc index ac7543330..51c1d1b09 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc @@ -18,6 +18,7 @@ #include "xenia/base/math.h" #include "xenia/base/profiling.h" #include "xenia/gpu/texture_info.h" +#include "xenia/gpu/texture_util.h" #include "xenia/gpu/vulkan/deferred_command_buffer.h" #include "xenia/gpu/vulkan/vulkan_command_processor.h" #include "xenia/ui/vulkan/vulkan_util.h" @@ -425,6 +426,15 @@ VulkanTextureCache::~VulkanTextureCache() { const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); + for (const std::pair& sampler_pair : + samplers_) { + dfn.vkDestroySampler(device, sampler_pair.second.sampler, nullptr); + } + samplers_.clear(); + COUNT_profile_set("gpu/texture_cache/vulkan/samplers", 0); + sampler_used_last_ = nullptr; + sampler_used_first_ = nullptr; + if (null_image_view_3d_ != VK_NULL_HANDLE) { dfn.vkDestroyImageView(device, null_image_view_3d_, nullptr); } @@ -589,6 +599,266 @@ VkImageView VulkanTextureCache::GetActiveBindingOrNullImageView( } } +VulkanTextureCache::SamplerParameters VulkanTextureCache::GetSamplerParameters( + const VulkanShader::SamplerBinding& binding) const { + const auto& regs = register_file(); + const auto& fetch = regs.Get( + XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6); + + SamplerParameters parameters; + + xenos::ClampMode fetch_clamp_x, fetch_clamp_y, fetch_clamp_z; + texture_util::GetClampModesForDimension(fetch, fetch_clamp_x, fetch_clamp_y, + fetch_clamp_z); + parameters.clamp_x = NormalizeClampMode(fetch_clamp_x); + parameters.clamp_y = NormalizeClampMode(fetch_clamp_y); + parameters.clamp_z = NormalizeClampMode(fetch_clamp_z); + if (xenos::ClampModeUsesBorder(parameters.clamp_x) || + xenos::ClampModeUsesBorder(parameters.clamp_y) || + xenos::ClampModeUsesBorder(parameters.clamp_z)) { + parameters.border_color = fetch.border_color; + } else { + parameters.border_color = xenos::BorderColor::k_ABGR_Black; + } + + xenos::TextureFilter mag_filter = + binding.mag_filter == xenos::TextureFilter::kUseFetchConst + ? fetch.mag_filter + : binding.mag_filter; + parameters.mag_linear = mag_filter == xenos::TextureFilter::kLinear; + xenos::TextureFilter min_filter = + binding.min_filter == xenos::TextureFilter::kUseFetchConst + ? fetch.min_filter + : binding.min_filter; + parameters.min_linear = min_filter == xenos::TextureFilter::kLinear; + xenos::TextureFilter mip_filter = + binding.mip_filter == xenos::TextureFilter::kUseFetchConst + ? fetch.mip_filter + : binding.mip_filter; + parameters.mip_linear = mip_filter == xenos::TextureFilter::kLinear; + if (parameters.mag_linear || parameters.min_linear || parameters.mip_linear) { + // Check if the texture is actually filterable on the host. + bool linear_filterable = true; + TextureKey texture_key; + uint8_t texture_swizzled_signs; + BindingInfoFromFetchConstant(fetch, texture_key, &texture_swizzled_signs); + if (texture_key.is_valid) { + const HostFormatPair& host_format_pair = GetHostFormatPair(texture_key); + if ((texture_util::IsAnySignNotSigned(texture_swizzled_signs) && + !host_format_pair.format_unsigned.linear_filterable) || + (texture_util::IsAnySignSigned(texture_swizzled_signs) && + !host_format_pair.format_signed.linear_filterable)) { + linear_filterable = false; + } + } else { + linear_filterable = false; + } + if (!linear_filterable) { + parameters.mag_linear = 0; + parameters.min_linear = 0; + parameters.mip_linear = 0; + } + } + xenos::AnisoFilter aniso_filter = + binding.aniso_filter == xenos::AnisoFilter::kUseFetchConst + ? fetch.aniso_filter + : binding.aniso_filter; + parameters.aniso_filter = std::min(aniso_filter, max_anisotropy_); + parameters.mip_base_map = mip_filter == xenos::TextureFilter::kBaseMap; + + uint32_t mip_min_level; + texture_util::GetSubresourcesFromFetchConstant(fetch, nullptr, nullptr, + nullptr, nullptr, nullptr, + &mip_min_level, nullptr); + parameters.mip_min_level = mip_min_level; + + return parameters; +} + +VkSampler VulkanTextureCache::UseSampler(SamplerParameters parameters, + bool& has_overflown_out) { + assert_true(command_processor_.submission_open()); + uint64_t submission_current = command_processor_.GetCurrentSubmission(); + + // Try to find an existing sampler. + auto it_existing = samplers_.find(parameters); + if (it_existing != samplers_.end()) { + std::pair& sampler = *it_existing; + assert_true(sampler.second.last_usage_submission <= submission_current); + // This is called very frequently, don't relink unless needed for caching. + if (sampler.second.last_usage_submission < submission_current) { + // Move to the front of the LRU queue. + sampler.second.last_usage_submission = submission_current; + if (sampler.second.used_next) { + if (sampler.second.used_previous) { + sampler.second.used_previous->second.used_next = + sampler.second.used_next; + } else { + sampler_used_first_ = sampler.second.used_next; + } + sampler.second.used_next->second.used_previous = + sampler.second.used_previous; + sampler.second.used_previous = sampler_used_last_; + sampler.second.used_next = nullptr; + sampler_used_last_->second.used_next = &sampler; + sampler_used_last_ = &sampler; + } + } + has_overflown_out = false; + return sampler.second.sampler; + } + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + // See if an existing sampler can be destroyed to create space for the new + // one. + if (samplers_.size() >= sampler_max_count_) { + assert_not_null(sampler_used_first_); + if (!sampler_used_first_) { + has_overflown_out = false; + return VK_NULL_HANDLE; + } + if (sampler_used_first_->second.last_usage_submission > + command_processor_.GetCompletedSubmission()) { + has_overflown_out = true; + return VK_NULL_HANDLE; + } + auto it_reuse = samplers_.find(sampler_used_first_->first); + dfn.vkDestroySampler(device, sampler_used_first_->second.sampler, nullptr); + if (sampler_used_first_->second.used_next) { + sampler_used_first_->second.used_next->second.used_previous = + sampler_used_first_->second.used_previous; + } else { + sampler_used_last_ = sampler_used_first_->second.used_previous; + } + sampler_used_first_ = sampler_used_first_->second.used_next; + assert_true(it_reuse != samplers_.end()); + if (it_reuse != samplers_.end()) { + // This destroys the Sampler object. + samplers_.erase(it_reuse); + COUNT_profile_set("gpu/texture_cache/vulkan/samplers", samplers_.size()); + } else { + has_overflown_out = false; + return VK_NULL_HANDLE; + } + } + + // Create a new sampler and make it the least recently used. + // The values are normalized, and unsupported ones are excluded, in + // GetSamplerParameters. + VkSamplerCreateInfo sampler_create_info = {}; + sampler_create_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + // TODO(Triang3l): VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT if + // VK_EXT_non_seamless_cube_map and the nonSeamlessCubeMap feature are + // supported. + sampler_create_info.magFilter = + parameters.mag_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; + sampler_create_info.minFilter = + parameters.mag_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; + sampler_create_info.mipmapMode = parameters.mag_linear + ? VK_SAMPLER_MIPMAP_MODE_LINEAR + : VK_SAMPLER_MIPMAP_MODE_NEAREST; + static const VkSamplerAddressMode kAddressModeMap[] = { + // kRepeat + VK_SAMPLER_ADDRESS_MODE_REPEAT, + // kMirroredRepeat + VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT, + // kClampToEdge + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + // kMirrorClampToEdge + VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE_KHR, + // kClampToHalfway + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + // kMirrorClampToHalfway + VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE_KHR, + // kClampToBorder + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + // kMirrorClampToBorder + VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE_KHR, + }; + sampler_create_info.addressModeU = + kAddressModeMap[uint32_t(parameters.clamp_x)]; + sampler_create_info.addressModeV = + kAddressModeMap[uint32_t(parameters.clamp_y)]; + sampler_create_info.addressModeW = + kAddressModeMap[uint32_t(parameters.clamp_z)]; + // LOD biasing is performed in shaders. + if (parameters.aniso_filter != xenos::AnisoFilter::kDisabled) { + sampler_create_info.anisotropyEnable = VK_TRUE; + sampler_create_info.maxAnisotropy = + float(UINT32_C(1) << (uint32_t(parameters.aniso_filter) - + uint32_t(xenos::AnisoFilter::kMax_1_1))); + } + sampler_create_info.minLod = float(parameters.mip_min_level); + if (parameters.mip_base_map) { + assert_false(parameters.mip_linear); + sampler_create_info.maxLod = sampler_create_info.minLod + 0.25f; + } else { + sampler_create_info.maxLod = VK_LOD_CLAMP_NONE; + } + // TODO(Triang3l): Custom border colors for CrYCb / YCrCb. + switch (parameters.border_color) { + case xenos::BorderColor::k_ABGR_White: + sampler_create_info.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + break; + default: + sampler_create_info.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; + break; + } + VkSampler vulkan_sampler; + if (dfn.vkCreateSampler(device, &sampler_create_info, nullptr, + &vulkan_sampler) != VK_SUCCESS) { + XELOGE( + "VulkanTextureCache: Failed to create the sampler for parameters " + "0x{:08X}", + parameters.value); + has_overflown_out = false; + return VK_NULL_HANDLE; + } + std::pair& new_sampler = + *(samplers_ + .emplace(std::piecewise_construct, + std::forward_as_tuple(parameters), std::forward_as_tuple()) + .first); + COUNT_profile_set("gpu/texture_cache/vulkan/samplers", samplers_.size()); + new_sampler.second.sampler = vulkan_sampler; + new_sampler.second.last_usage_submission = submission_current; + new_sampler.second.used_previous = sampler_used_last_; + new_sampler.second.used_next = nullptr; + if (sampler_used_last_) { + sampler_used_last_->second.used_next = &new_sampler; + } else { + sampler_used_first_ = &new_sampler; + } + sampler_used_last_ = &new_sampler; + return vulkan_sampler; +} + +uint64_t VulkanTextureCache::GetSubmissionToAwaitOnSamplerOverflow( + uint32_t overflowed_sampler_count) const { + if (!overflowed_sampler_count) { + return 0; + } + std::pair* sampler_used = + sampler_used_first_; + if (!sampler_used_first_) { + return 0; + } + for (uint32_t samplers_remaining = overflowed_sampler_count - 1; + samplers_remaining; --samplers_remaining) { + std::pair* sampler_used_next = + sampler_used->second.used_next; + if (!sampler_used_next) { + break; + } + sampler_used = sampler_used_next; + } + return sampler_used->second.last_usage_submission; +} + VkImageView VulkanTextureCache::RequestSwapTexture( uint32_t& width_scaled_out, uint32_t& height_scaled_out, xenos::TextureFormat& format_out) { @@ -2278,6 +2548,32 @@ bool VulkanTextureCache::Initialize() { null_images_cleared_ = false; + // Samplers. + + const VkPhysicalDeviceFeatures& device_features = provider.device_features(); + const VkPhysicalDeviceLimits& device_limits = + provider.device_properties().limits; + + // Some MoltenVK devices have a maximum of 2048, 1024, or even 96 samplers, + // below Vulkan's minimum requirement of 4000. + // Assuming that the current VulkanTextureCache is the only one on this + // VkDevice (true in a regular emulation scenario), so taking over all the + // allocation slots exclusively. + // Also leaving a few slots for use by things like overlay applications. + sampler_max_count_ = + device_limits.maxSamplerAllocationCount - + uint32_t(ui::vulkan::VulkanProvider::HostSampler::kCount) - 16; + + if (device_features.samplerAnisotropy) { + max_anisotropy_ = xenos::AnisoFilter( + uint32_t(xenos::AnisoFilter::kMax_1_1) + + (31 - + xe::lzcnt(uint32_t(std::min( + 16.0f, std::max(1.0f, device_limits.maxSamplerAnisotropy)))))); + } else { + max_anisotropy_ = xenos::AnisoFilter::kDisabled; + } + return true; } @@ -2325,6 +2621,25 @@ void VulkanTextureCache::GetTextureUsageMasks(VulkanTexture::Usage usage, } } +xenos::ClampMode VulkanTextureCache::NormalizeClampMode( + xenos::ClampMode clamp_mode) const { + if (clamp_mode == xenos::ClampMode::kClampToHalfway) { + // No GL_CLAMP (clamp to half edge, half border) equivalent in Vulkan, but + // there's no Direct3D 9 equivalent anyway, and too weird to be suitable for + // intentional real usage. + return xenos::ClampMode::kClampToEdge; + } + if (clamp_mode == xenos::ClampMode::kMirrorClampToEdge || + clamp_mode == xenos::ClampMode::kMirrorClampToHalfway || + clamp_mode == xenos::ClampMode::kMirrorClampToBorder) { + // TODO(Triang3l): VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE_KHR if + // VK_KHR_sampler_mirror_clamp_to_edge (or Vulkan 1.2) and the + // samplerMirrorClampToEdge feature are supported. + return xenos::ClampMode::kMirroredRepeat; + } + return clamp_mode; +} + } // namespace vulkan } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.h b/src/xenia/gpu/vulkan/vulkan_texture_cache.h index 423131c55..79e5db493 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.h @@ -17,6 +17,7 @@ #include "xenia/base/hash.h" #include "xenia/gpu/texture_cache.h" +#include "xenia/gpu/vulkan/vulkan_shader.h" #include "xenia/gpu/vulkan/vulkan_shared_memory.h" #include "xenia/ui/vulkan/vulkan_provider.h" @@ -28,6 +29,39 @@ class VulkanCommandProcessor; class VulkanTextureCache final : public TextureCache { public: + // Sampler parameters that can be directly converted to a host sampler or used + // for checking whether samplers bindings are up to date. + union SamplerParameters { + uint32_t value; + struct { + xenos::ClampMode clamp_x : 3; // 3 + xenos::ClampMode clamp_y : 3; // 6 + xenos::ClampMode clamp_z : 3; // 9 + xenos::BorderColor border_color : 2; // 11 + uint32_t mag_linear : 1; // 12 + uint32_t min_linear : 1; // 13 + uint32_t mip_linear : 1; // 14 + xenos::AnisoFilter aniso_filter : 3; // 17 + uint32_t mip_min_level : 4; // 21 + uint32_t mip_base_map : 1; // 22 + // Maximum mip level is in the texture resource itself, but mip_base_map + // can be used to limit fetching to mip_min_level. + }; + + SamplerParameters() : value(0) { static_assert_size(*this, sizeof(value)); } + struct Hasher { + size_t operator()(const SamplerParameters& parameters) const { + return std::hash{}(parameters.value); + } + }; + bool operator==(const SamplerParameters& parameters) const { + return value == parameters.value; + } + bool operator!=(const SamplerParameters& parameters) const { + return value != parameters.value; + } + }; + // Transient descriptor set layouts must be initialized in the command // processor. static std::unique_ptr Create( @@ -60,6 +94,26 @@ class VulkanTextureCache final : public TextureCache { xenos::FetchOpDimension dimension, bool is_signed) const; + SamplerParameters GetSamplerParameters( + const VulkanShader::SamplerBinding& binding) const; + + // Must be called for every used sampler at least once in a single submission, + // and a submission must be open for this to be callable. + // Returns: + // - The sampler, if obtained successfully - and increases its last usage + // submission index - and has_overflown_out = false. + // - VK_NULL_HANDLE and has_overflown_out = true if there's a total sampler + // count overflow in a submission that potentially hasn't completed yet. + // - VK_NULL_HANDLE and has_overflown_out = false in case of a general failure + // to create a sampler. + VkSampler UseSampler(SamplerParameters parameters, bool& has_overflown_out); + // Returns the submission index to await (may be the current submission in + // case of an overflow within a single submission - in this case, it must be + // ended, and a new one must be started) in case of sampler count overflow, so + // samplers may be freed, and UseSamplers may take their slots. + uint64_t GetSubmissionToAwaitOnSamplerOverflow( + uint32_t overflowed_sampler_count) const; + // Returns the 2D view of the front buffer texture (for fragment shader // reading - the barrier will be pushed in the command processor if needed), // or VK_NULL_HANDLE in case of failure. May call LoadTextureData. @@ -220,6 +274,13 @@ class VulkanTextureCache final : public TextureCache { } }; + struct Sampler { + VkSampler sampler; + uint64_t last_usage_submission; + std::pair* used_previous; + std::pair* used_next; + }; + static constexpr bool AreDimensionsCompatible( xenos::FetchOpDimension binding_dimension, xenos::DataDimension resource_dimension) { @@ -251,6 +312,8 @@ class VulkanTextureCache final : public TextureCache { VkPipelineStageFlags& stage_mask, VkAccessFlags& access_mask, VkImageLayout& layout); + xenos::ClampMode NormalizeClampMode(xenos::ClampMode clamp_mode) const; + VulkanCommandProcessor& command_processor_; VkPipelineStageFlags guest_shader_pipeline_stages_; @@ -275,6 +338,15 @@ class VulkanTextureCache final : public TextureCache { std::array vulkan_texture_bindings_; + + uint32_t sampler_max_count_; + + xenos::AnisoFilter max_anisotropy_; + + std::unordered_map + samplers_; + std::pair* sampler_used_first_ = nullptr; + std::pair* sampler_used_last_ = nullptr; }; } // namespace vulkan From d5815d9e6a29e198e0cf21c0dfda4bddaae0e72e Mon Sep 17 00:00:00 2001 From: Triang3l Date: Wed, 29 Jun 2022 13:14:00 +0300 Subject: [PATCH 111/123] [Vulkan] Float24 depth range remapping fixes --- src/xenia/gpu/spirv_shader_translator_rb.cc | 2 +- src/xenia/gpu/vulkan/vulkan_command_processor.cc | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator_rb.cc b/src/xenia/gpu/spirv_shader_translator_rb.cc index 829b3f576..82a58dfec 100644 --- a/src/xenia/gpu/spirv_shader_translator_rb.cc +++ b/src/xenia/gpu/spirv_shader_translator_rb.cc @@ -296,7 +296,7 @@ spv::Id SpirvShaderTranslator::PreClampedDepthTo20e4( // normal_biased_f32 = f32 - (112 << 23) spv::Id normal_biased_f32 = builder.createBinOp( spv::OpISub, type_uint, f32_scalar, - builder.makeUintConstant((UINT32_C(112) + remap_bias) << 23)); + builder.makeUintConstant((UINT32_C(112) - remap_bias) << 23)); // Select the needed conversion depending on whether the number is too small // to be represented as normalized 20e4. diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 0a89bb122..c9e3cb76a 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -2397,6 +2397,9 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, const VkPhysicalDeviceLimits& device_limits = provider.device_properties().limits; + bool host_render_targets_used = render_target_cache_->GetPath() == + RenderTargetCache::Path::kHostRenderTargets; + // Get dynamic rasterizer state. draw_util::ViewportInfo viewport_info; // Just handling maxViewportDimensions is enough - viewportBoundsRange[1] must @@ -2418,7 +2421,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, draw_util::GetHostViewportInfo( regs, 1, 1, false, device_limits.maxViewportDimensions[0], device_limits.maxViewportDimensions[1], true, normalized_depth_control, - false, false, false, viewport_info); + false, host_render_targets_used, + pixel_shader && pixel_shader->writes_depth(), viewport_info); // Update dynamic graphics pipeline state. UpdateDynamicState(viewport_info, primitive_polygonal, From 7c2df552095b6b9160e616242a73f09a9909f9c5 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Wed, 29 Jun 2022 13:24:45 +0300 Subject: [PATCH 112/123] [Vulkan] Cache clear: shared memory, scratch buffer --- .../gpu/vulkan/vulkan_command_processor.cc | 34 ++++++++++++++----- .../gpu/vulkan/vulkan_command_processor.h | 2 ++ 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index c9e3cb76a..6eb614088 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -979,10 +979,7 @@ void VulkanCommandProcessor::ShutdownContext() { const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); - ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, - scratch_buffer_); - ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, - scratch_buffer_memory_); + DestroyScratchBuffer(); for (SwapFramebuffer& swap_framebuffer : swap_framebuffers_) { ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyFramebuffer, device, @@ -3064,6 +3061,13 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { if (cache_clear_requested_ && AwaitAllQueueOperationsCompletion()) { cache_clear_requested_ = false; + DestroyScratchBuffer(); + + for (SwapFramebuffer& swap_framebuffer : swap_framebuffers_) { + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyFramebuffer, device, + swap_framebuffer.framebuffer); + } + assert_true(command_buffers_submitted_.empty()); for (const CommandBuffer& command_buffer : command_buffers_writable_) { dfn.vkDestroyCommandPool(device, command_buffer.pool, nullptr); @@ -3083,10 +3087,7 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { primitive_processor_->ClearCache(); - for (SwapFramebuffer& swap_framebuffer : swap_framebuffers_) { - ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyFramebuffer, device, - swap_framebuffer.framebuffer); - } + shared_memory_->ClearCache(); } } @@ -3130,6 +3131,23 @@ void VulkanCommandProcessor::SplitPendingBarrier() { pending_image_memory_barrier_count; } +void VulkanCommandProcessor::DestroyScratchBuffer() { + assert_false(scratch_buffer_used_); + + const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + scratch_buffer_last_usage_submission_ = 0; + scratch_buffer_last_access_mask_ = 0; + scratch_buffer_last_stage_mask_ = 0; + scratch_buffer_size_ = 0; + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, + scratch_buffer_); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device, + scratch_buffer_memory_); +} + void VulkanCommandProcessor::UpdateDynamicState( const draw_util::ViewportInfo& viewport_info, bool primitive_polygonal, reg::RB_DEPTHCONTROL normalized_depth_control) { diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index cef27be57..1186310f2 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -447,6 +447,8 @@ class VulkanCommandProcessor : public CommandProcessor { void SplitPendingBarrier(); + void DestroyScratchBuffer(); + void UpdateDynamicState(const draw_util::ViewportInfo& viewport_info, bool primitive_polygonal, reg::RB_DEPTHCONTROL normalized_depth_control); From c0c3666e121ae377b2cfab7ce115f6e4c512450b Mon Sep 17 00:00:00 2001 From: Triang3l Date: Wed, 29 Jun 2022 23:41:32 +0300 Subject: [PATCH 113/123] [Vulkan] Align texture extents in loading to vector size accessed by the shader Fixes loading of the 1x1 linear 8_8_8_8 texture containing just a single #FFFFFFFF texel in 4D5307E6, which is used for screen fade and the lobby map loading bar background --- src/xenia/gpu/vulkan/vulkan_texture_cache.cc | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc index 51c1d1b09..f2dc4ea29 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc @@ -1256,6 +1256,11 @@ bool VulkanTextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, } // TODO(Triang3l): Use a single 512 MB shared memory binding if possible. // TODO(Triang3l): Scaled resolve buffer bindings. + // Aligning because if the data for a vector in a storage buffer is provided + // partially, the value read may still be (0, 0, 0, 0), and small (especially + // linear) textures won't be loaded correctly. + uint32_t source_length_alignment = UINT32_C(1) + << load_shader_info.source_bpe_log2; VkDescriptorSet descriptor_set_source_base = VK_NULL_HANDLE; VkDescriptorSet descriptor_set_source_mips = VK_NULL_HANDLE; VkDescriptorBufferInfo write_descriptor_set_source_base_buffer_info; @@ -1273,7 +1278,7 @@ bool VulkanTextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, write_descriptor_set_source_base_buffer_info.offset = texture_key.base_page << 12; write_descriptor_set_source_base_buffer_info.range = - vulkan_texture.GetGuestBaseSize(); + xe::align(vulkan_texture.GetGuestBaseSize(), source_length_alignment); VkWriteDescriptorSet& write_descriptor_set_source_base = write_descriptor_sets[write_descriptor_set_count++]; write_descriptor_set_source_base.sType = @@ -1303,7 +1308,7 @@ bool VulkanTextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, write_descriptor_set_source_mips_buffer_info.offset = texture_key.mip_page << 12; write_descriptor_set_source_mips_buffer_info.range = - vulkan_texture.GetGuestMipsSize(); + xe::align(vulkan_texture.GetGuestMipsSize(), source_length_alignment); VkWriteDescriptorSet& write_descriptor_set_source_mips = write_descriptor_sets[write_descriptor_set_count++]; write_descriptor_set_source_mips.sType = From f8b351138e33c45f16287e045f14edf50605b02c Mon Sep 17 00:00:00 2001 From: Triang3l Date: Thu, 30 Jun 2022 22:20:51 +0300 Subject: [PATCH 114/123] [Vulkan] Alpha test --- src/xenia/gpu/spirv_shader_translator.cc | 6 + src/xenia/gpu/spirv_shader_translator.h | 31 ++++- src/xenia/gpu/spirv_shader_translator_rb.cc | 123 ++++++++++++++++++ .../gpu/vulkan/vulkan_command_processor.cc | 12 ++ src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc | 11 ++ 5 files changed, 182 insertions(+), 1 deletion(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index a6830c20f..cfbbd28e4 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -231,6 +231,8 @@ void SpirvShaderTranslator::StartTranslation() { offsetof(SystemConstants, texture_swizzled_signs), type_uint4_array_2}, {"texture_swizzles", offsetof(SystemConstants, texture_swizzles), type_uint4_array_4}, + {"alpha_test_reference", offsetof(SystemConstants, alpha_test_reference), + type_float_}, {"color_exp_bias", offsetof(SystemConstants, color_exp_bias), type_float4_}, }; @@ -606,6 +608,10 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { execution_model = spv::ExecutionModelFragment; builder_->addExecutionMode(function_main_, spv::ExecutionModeOriginUpperLeft); + if (IsExecutionModeEarlyFragmentTests()) { + builder_->addExecutionMode(function_main_, + spv::ExecutionModeEarlyFragmentTests); + } } else { assert_true(is_vertex_shader()); execution_model = IsSpirvTessEvalShader() diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 0a94300a0..aca23efe5 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -34,7 +34,16 @@ class SpirvShaderTranslator : public ShaderTranslator { // TODO(Triang3l): Change to 0xYYYYMMDD once it's out of the rapid // prototyping stage (easier to do small granular updates with an // incremental counter). - static constexpr uint32_t kVersion = 3; + static constexpr uint32_t kVersion = 4; + + enum class DepthStencilMode : uint32_t { + kNoModifiers, + // Early fragment tests - enable if alpha test and alpha to coverage are + // disabled; ignored if anything in the shader blocks early Z writing. + kEarlyHint, + // TODO(Triang3l): Unorm24 (rounding) and float24 (truncating and + // rounding) output modes. + }; struct { // Dynamically indexable register count from SQ_PROGRAM_CNTL. @@ -52,6 +61,8 @@ class SpirvShaderTranslator : public ShaderTranslator { // must not be set for other primitive types - enables the point sprite // coordinates input, and also effects the flag bits in PsParamGen. uint32_t param_gen_point : 1; + // For host render targets - depth / stencil output mode. + DepthStencilMode depth_stencil_mode : 3; } pixel; uint64_t value = 0; @@ -64,6 +75,9 @@ class SpirvShaderTranslator : public ShaderTranslator { kSysFlag_WNotReciprocal_Shift, kSysFlag_PrimitivePolygonal_Shift, kSysFlag_PrimitiveLine_Shift, + kSysFlag_AlphaPassIfLess_Shift, + kSysFlag_AlphaPassIfEqual_Shift, + kSysFlag_AlphaPassIfGreater_Shift, kSysFlag_ConvertColor0ToGamma_Shift, kSysFlag_ConvertColor1ToGamma_Shift, kSysFlag_ConvertColor2ToGamma_Shift, @@ -76,6 +90,9 @@ class SpirvShaderTranslator : public ShaderTranslator { kSysFlag_WNotReciprocal = 1u << kSysFlag_WNotReciprocal_Shift, kSysFlag_PrimitivePolygonal = 1u << kSysFlag_PrimitivePolygonal_Shift, kSysFlag_PrimitiveLine = 1u << kSysFlag_PrimitiveLine_Shift, + kSysFlag_AlphaPassIfLess = 1u << kSysFlag_AlphaPassIfLess_Shift, + kSysFlag_AlphaPassIfEqual = 1u << kSysFlag_AlphaPassIfEqual_Shift, + kSysFlag_AlphaPassIfGreater = 1u << kSysFlag_AlphaPassIfGreater_Shift, kSysFlag_ConvertColor0ToGamma = 1u << kSysFlag_ConvertColor0ToGamma_Shift, kSysFlag_ConvertColor1ToGamma = 1u << kSysFlag_ConvertColor1ToGamma_Shift, kSysFlag_ConvertColor2ToGamma = 1u << kSysFlag_ConvertColor2ToGamma_Shift, @@ -108,6 +125,9 @@ class SpirvShaderTranslator : public ShaderTranslator { // swizzles for 2 texture fetch constants (in bits 0:11 and 12:23). uint32_t texture_swizzles[16]; + float alpha_test_reference; + float padding_alpha_test_reference[3]; + float color_exp_bias[4]; }; @@ -311,6 +331,14 @@ class SpirvShaderTranslator : public ShaderTranslator { GetSpirvShaderModification().vertex.host_vertex_shader_type); } + bool IsExecutionModeEarlyFragmentTests() const { + // TODO(Triang3l): Not applicable to fragment shader interlock. + return is_pixel_shader() && + GetSpirvShaderModification().pixel.depth_stencil_mode == + Modification::DepthStencilMode::kEarlyHint && + current_shader().implicit_early_z_write_allowed(); + } + // Returns UINT32_MAX if PsParamGen doesn't need to be written. uint32_t GetPsParamGenInterpolator() const; @@ -528,6 +556,7 @@ class SpirvShaderTranslator : public ShaderTranslator { kSystemConstantNdcOffset, kSystemConstantTextureSwizzledSigns, kSystemConstantTextureSwizzles, + kSystemConstantAlphaTestReference, kSystemConstantColorExpBias, }; spv::Id uniform_system_constants_; diff --git a/src/xenia/gpu/spirv_shader_translator_rb.cc b/src/xenia/gpu/spirv_shader_translator_rb.cc index 82a58dfec..c594a902f 100644 --- a/src/xenia/gpu/spirv_shader_translator_rb.cc +++ b/src/xenia/gpu/spirv_shader_translator_rb.cc @@ -433,6 +433,129 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() { uniform_system_constants_, id_vector_temp_), spv::NoPrecision); + if (current_shader().writes_color_target(0) && + !IsExecutionModeEarlyFragmentTests()) { + // Alpha test. + // TODO(Triang3l): Check how alpha test works with NaN on Direct3D 9. + // Extract the comparison function (less, equal, greater bits). + spv::Id alpha_test_function = builder_->createTriOp( + spv::OpBitFieldUExtract, type_uint_, main_system_constant_flags_, + builder_->makeUintConstant(kSysFlag_AlphaPassIfLess_Shift), + builder_->makeUintConstant(3)); + // Check if the comparison function is not "always" - that should pass even + // for NaN likely, unlike "less, equal or greater". + spv::Id alpha_test_function_is_non_always = builder_->createBinOp( + spv::OpINotEqual, type_bool_, alpha_test_function, + builder_->makeUintConstant(uint32_t(xenos::CompareFunction::kAlways))); + spv::Block& block_alpha_test = builder_->makeNewBlock(); + spv::Block& block_alpha_test_merge = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_alpha_test_merge.getId(), + spv::SelectionControlDontFlattenMask); + builder_->createConditionalBranch(alpha_test_function_is_non_always, + &block_alpha_test, + &block_alpha_test_merge); + builder_->setBuildPoint(&block_alpha_test); + { + id_vector_temp_.clear(); + id_vector_temp_.push_back(builder_->makeIntConstant(3)); + spv::Id alpha_test_alpha = + builder_->createLoad(builder_->createAccessChain( + spv::StorageClassOutput, + output_fragment_data_[0], id_vector_temp_), + spv::NoPrecision); + id_vector_temp_.clear(); + id_vector_temp_.push_back( + builder_->makeIntConstant(kSystemConstantAlphaTestReference)); + spv::Id alpha_test_reference = + builder_->createLoad(builder_->createAccessChain( + spv::StorageClassUniform, + uniform_system_constants_, id_vector_temp_), + spv::NoPrecision); + // The comparison function is not "always" - perform the alpha test. + // Handle "not equal" specially (specifically as "not equal" so it's true + // for NaN, not "less or greater" which is false for NaN). + spv::Id alpha_test_function_is_not_equal = builder_->createBinOp( + spv::OpIEqual, type_bool_, alpha_test_function, + builder_->makeUintConstant( + uint32_t(xenos::CompareFunction::kNotEqual))); + spv::Block& block_alpha_test_not_equal = builder_->makeNewBlock(); + spv::Block& block_alpha_test_non_not_equal = builder_->makeNewBlock(); + spv::Block& block_alpha_test_not_equal_merge = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_alpha_test_not_equal_merge.getId(), + spv::SelectionControlDontFlattenMask); + builder_->createConditionalBranch(alpha_test_function_is_not_equal, + &block_alpha_test_not_equal, + &block_alpha_test_non_not_equal); + spv::Id alpha_test_result_not_equal, alpha_test_result_non_not_equal; + builder_->setBuildPoint(&block_alpha_test_not_equal); + { + // "Not equal" function. + alpha_test_result_not_equal = + builder_->createBinOp(spv::OpFUnordNotEqual, type_bool_, + alpha_test_alpha, alpha_test_reference); + builder_->createBranch(&block_alpha_test_not_equal_merge); + } + builder_->setBuildPoint(&block_alpha_test_non_not_equal); + { + // Function other than "not equal". + static const spv::Op kAlphaTestOps[] = { + spv::OpFOrdLessThan, spv::OpFOrdEqual, spv::OpFOrdGreaterThan}; + for (uint32_t i = 0; i < 3; ++i) { + spv::Id alpha_test_comparison_result = builder_->createBinOp( + spv::OpLogicalAnd, type_bool_, + builder_->createBinOp(kAlphaTestOps[i], type_bool_, + alpha_test_alpha, alpha_test_reference), + builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, alpha_test_function, + builder_->makeUintConstant(UINT32_C(1) << i)), + const_uint_0_)); + if (i) { + alpha_test_result_non_not_equal = builder_->createBinOp( + spv::OpLogicalOr, type_bool_, alpha_test_result_non_not_equal, + alpha_test_comparison_result); + } else { + alpha_test_result_non_not_equal = alpha_test_comparison_result; + } + } + builder_->createBranch(&block_alpha_test_not_equal_merge); + } + builder_->setBuildPoint(&block_alpha_test_not_equal_merge); + spv::Id alpha_test_result; + { + std::unique_ptr alpha_test_result_phi_op = + std::make_unique(builder_->getUniqueId(), + type_bool_, spv::OpPhi); + alpha_test_result_phi_op->addIdOperand(alpha_test_result_not_equal); + alpha_test_result_phi_op->addIdOperand( + block_alpha_test_not_equal.getId()); + alpha_test_result_phi_op->addIdOperand(alpha_test_result_non_not_equal); + alpha_test_result_phi_op->addIdOperand( + block_alpha_test_non_not_equal.getId()); + alpha_test_result = alpha_test_result_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(alpha_test_result_phi_op)); + } + // Discard the pixel if the alpha test has failed. Creating a merge block + // even though it will contain just one OpBranch since SPIR-V requires + // structured control flow in shaders. + spv::Block& block_alpha_test_kill = builder_->makeNewBlock(); + spv::Block& block_alpha_test_kill_merge = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_alpha_test_kill_merge.getId(), + spv::SelectionControlDontFlattenMask); + builder_->createConditionalBranch(alpha_test_result, + &block_alpha_test_kill_merge, + &block_alpha_test_kill); + builder_->setBuildPoint(&block_alpha_test_kill); + builder_->createNoResultOp(spv::OpKill); + // OpKill terminates the block. + builder_->setBuildPoint(&block_alpha_test_kill_merge); + builder_->createBranch(&block_alpha_test_merge); + } + builder_->setBuildPoint(&block_alpha_test_merge); + } + uint32_t color_targets_remaining = current_shader().writes_color_targets(); uint32_t color_target_index; while (xe::bit_scan_forward(color_targets_remaining, &color_target_index)) { diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 6eb614088..e38dceb1b 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -3377,6 +3377,8 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( const RegisterFile& regs = *register_file_; auto pa_cl_vte_cntl = regs.Get(); + float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32; + auto rb_colorcontrol = regs.Get(); auto vgt_draw_initiator = regs.Get(); int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32); @@ -3416,6 +3418,12 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( if (draw_util::IsPrimitiveLine(regs)) { flags |= SpirvShaderTranslator::kSysFlag_PrimitiveLine; } + // Alpha test. + xenos::CompareFunction alpha_test_function = + rb_colorcontrol.alpha_test_enable ? rb_colorcontrol.alpha_func + : xenos::CompareFunction::kAlways; + flags |= uint32_t(alpha_test_function) + << SpirvShaderTranslator::kSysFlag_AlphaPassIfLess_Shift; // Gamma writing. // TODO(Triang3l): Gamma as sRGB check. for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { @@ -3492,6 +3500,10 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( } } + // Alpha test. + dirty |= system_constants_.alpha_test_reference != rb_alpha_ref; + system_constants_.alpha_test_reference = rb_alpha_ref; + // Color exponent bias. for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { reg::RB_COLOR_INFO color_info = color_infos[i]; diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index 406a1a444..39decc091 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -155,6 +155,17 @@ VulkanPipelineCache::GetCurrentPixelShaderModification( } } + using DepthStencilMode = + SpirvShaderTranslator::Modification::DepthStencilMode; + if (shader.implicit_early_z_write_allowed() && + (!shader.writes_color_target(0) || + !draw_util::DoesCoverageDependOnAlpha( + regs.Get()))) { + modification.pixel.depth_stencil_mode = DepthStencilMode::kEarlyHint; + } else { + modification.pixel.depth_stencil_mode = DepthStencilMode::kNoModifiers; + } + return modification; } From c8a4a9504ffc10761c4577e0eb486132d77a5930 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Fri, 1 Jul 2022 12:52:12 +0300 Subject: [PATCH 115/123] [Vulkan] Remove an unneeded scale from RefreshGuestOutput aspect ratio --- src/xenia/gpu/vulkan/vulkan_command_processor.cc | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index e38dceb1b..cc1e56b37 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -1215,12 +1215,8 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, return; } - uint32_t draw_resolution_scale_max = - std::max(texture_cache_->draw_resolution_scale_x(), - texture_cache_->draw_resolution_scale_y()); presenter->RefreshGuestOutput( - frontbuffer_width_scaled, frontbuffer_height_scaled, - 1280 * draw_resolution_scale_max, 720 * draw_resolution_scale_max, + frontbuffer_width_scaled, frontbuffer_height_scaled, 1280, 720, [this, frontbuffer_width_scaled, frontbuffer_height_scaled, frontbuffer_format, swap_texture_view]( ui::Presenter::GuestOutputRefreshContext& context) -> bool { From 636585e0aacb2bd4ca184dbef4e388a6b36d33a6 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Fri, 1 Jul 2022 19:53:41 +0300 Subject: [PATCH 116/123] [Vulkan] Trace viewer --- src/xenia/gpu/vulkan/premake5.lua | 62 ++++++++++++++++++ .../gpu/vulkan/vulkan_trace_viewer_main.cc | 64 +++++++++++++++++++ 2 files changed, 126 insertions(+) create mode 100644 src/xenia/gpu/vulkan/vulkan_trace_viewer_main.cc diff --git a/src/xenia/gpu/vulkan/premake5.lua b/src/xenia/gpu/vulkan/premake5.lua index e704547ee..f68b886a7 100644 --- a/src/xenia/gpu/vulkan/premake5.lua +++ b/src/xenia/gpu/vulkan/premake5.lua @@ -23,6 +23,65 @@ project("xenia-gpu-vulkan") "../shaders/bytecode/vulkan_spirv/*.h", }) +group("src") +project("xenia-gpu-vulkan-trace-viewer") + uuid("86a1dddc-a26a-4885-8c55-cf745225d93e") + kind("WindowedApp") + language("C++") + links({ + "xenia-apu", + "xenia-apu-nop", + "xenia-base", + "xenia-core", + "xenia-cpu", + "xenia-cpu-backend-x64", + "xenia-gpu", + "xenia-gpu-vulkan", + "xenia-hid", + "xenia-hid-nop", + "xenia-kernel", + "xenia-ui", + "xenia-ui-vulkan", + "xenia-vfs", + }) + links({ + "aes_128", + "capstone", + "fmt", + "glslang-spirv", + "imgui", + "libavcodec", + "libavutil", + "mspack", + "snappy", + "xxhash", + }) + includedirs({ + project_root.."/third_party/Vulkan-Headers/include", + }) + files({ + "vulkan_trace_viewer_main.cc", + "../../ui/windowed_app_main_"..platform_suffix..".cc", + }) + + filter("platforms:Linux") + links({ + "X11", + "xcb", + "X11-xcb", + }) + + filter("platforms:Windows") + -- Only create the .user file if it doesn't already exist. + local user_file = project_root.."/build/xenia-gpu-vulkan-trace-viewer.vcxproj.user" + if not os.isfile(user_file) then + debugdir(project_root) + debugargs({ + "2>&1", + "1>scratch/stdout-trace-viewer.txt", + }) + end + group("src") project("xenia-gpu-vulkan-trace-dump") uuid("0dd0dd1c-b321-494d-ab9a-6c062f0c65cc") @@ -56,6 +115,9 @@ project("xenia-gpu-vulkan-trace-dump") "snappy", "xxhash", }) + includedirs({ + project_root.."/third_party/Vulkan-Headers/include", + }) files({ "vulkan_trace_dump_main.cc", "../../base/console_app_main_"..platform_suffix..".cc", diff --git a/src/xenia/gpu/vulkan/vulkan_trace_viewer_main.cc b/src/xenia/gpu/vulkan/vulkan_trace_viewer_main.cc new file mode 100644 index 000000000..1c0616052 --- /dev/null +++ b/src/xenia/gpu/vulkan/vulkan_trace_viewer_main.cc @@ -0,0 +1,64 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include +#include + +#include "xenia/base/logging.h" +#include "xenia/gpu/trace_viewer.h" +#include "xenia/gpu/vulkan/vulkan_command_processor.h" +#include "xenia/gpu/vulkan/vulkan_graphics_system.h" + +namespace xe { +namespace gpu { +namespace vulkan { + +class VulkanTraceViewer final : public TraceViewer { + public: + static std::unique_ptr Create( + xe::ui::WindowedAppContext& app_context) { + return std::unique_ptr(new VulkanTraceViewer(app_context)); + } + + std::unique_ptr CreateGraphicsSystem() override { + return std::unique_ptr(new VulkanGraphicsSystem()); + } + + uintptr_t GetColorRenderTarget( + uint32_t pitch, xenos::MsaaSamples samples, uint32_t base, + xenos::ColorRenderTargetFormat format) override { + // TODO(Triang3l): EDRAM viewer. + return 0; + } + + uintptr_t GetDepthRenderTarget( + uint32_t pitch, xenos::MsaaSamples samples, uint32_t base, + xenos::DepthRenderTargetFormat format) override { + // TODO(Triang3l): EDRAM viewer. + return 0; + } + + uintptr_t GetTextureEntry(const TextureInfo& texture_info, + const SamplerInfo& sampler_info) override { + // TODO(Triang3l): Textures, but from a fetch constant rather than + // TextureInfo/SamplerInfo which are going away. + return 0; + } + + private: + explicit VulkanTraceViewer(xe::ui::WindowedAppContext& app_context) + : TraceViewer(app_context, "xenia-gpu-vulkan-trace-viewer") {} +}; + +} // namespace vulkan +} // namespace gpu +} // namespace xe + +XE_DEFINE_WINDOWED_APP(xenia_gpu_vulkan_trace_viewer, + xe::gpu::vulkan::VulkanTraceViewer::Create); From 001f64852c044b7a0903e7c24d1360834c688449 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 3 Jul 2022 19:40:48 +0300 Subject: [PATCH 117/123] [Vulkan] VMA for textures --- .gitmodules | 3 + src/xenia/gpu/vulkan/vulkan_texture_cache.cc | 46 +++++--- src/xenia/gpu/vulkan/vulkan_texture_cache.h | 11 +- src/xenia/ui/vulkan/functions/device_1_0.inc | 1 + .../functions/device_khr_bind_memory2.inc | 4 + .../device_khr_get_memory_requirements2.inc | 6 + .../functions/device_khr_maintenance4.inc | 6 + ...ce_khr_get_physical_device_properties2.inc | 2 + src/xenia/ui/vulkan/vulkan_mem_alloc.cc | 108 ++++++++++++++++++ src/xenia/ui/vulkan/vulkan_mem_alloc.h | 39 +++++++ src/xenia/ui/vulkan/vulkan_provider.cc | 60 ++++++++++ src/xenia/ui/vulkan/vulkan_provider.h | 13 +++ third_party/VulkanMemoryAllocator | 1 + 13 files changed, 283 insertions(+), 17 deletions(-) create mode 100644 src/xenia/ui/vulkan/functions/device_khr_bind_memory2.inc create mode 100644 src/xenia/ui/vulkan/functions/device_khr_get_memory_requirements2.inc create mode 100644 src/xenia/ui/vulkan/functions/device_khr_maintenance4.inc create mode 100644 src/xenia/ui/vulkan/vulkan_mem_alloc.cc create mode 100644 src/xenia/ui/vulkan/vulkan_mem_alloc.h create mode 160000 third_party/VulkanMemoryAllocator diff --git a/.gitmodules b/.gitmodules index 352566414..a73061e22 100644 --- a/.gitmodules +++ b/.gitmodules @@ -82,3 +82,6 @@ [submodule "third_party/SPIRV-Tools"] path = third_party/SPIRV-Tools url = https://github.com/KhronosGroup/SPIRV-Tools.git +[submodule "third_party/VulkanMemoryAllocator"] + path = third_party/VulkanMemoryAllocator + url = https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator.git diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc index f2dc4ea29..580696a30 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc @@ -21,6 +21,7 @@ #include "xenia/gpu/texture_util.h" #include "xenia/gpu/vulkan/deferred_command_buffer.h" #include "xenia/gpu/vulkan/vulkan_command_processor.h" +#include "xenia/ui/vulkan/vulkan_mem_alloc.h" #include "xenia/ui/vulkan/vulkan_util.h" namespace xe { @@ -468,6 +469,14 @@ VulkanTextureCache::~VulkanTextureCache() { if (load_pipeline_layout_ != VK_NULL_HANDLE) { dfn.vkDestroyPipelineLayout(device, load_pipeline_layout_, nullptr); } + + // Textures memory is allocated using the Vulkan Memory Allocator, destroy all + // textures before destroying VMA. + DestroyAllTextures(true); + + if (vma_allocator_ != VK_NULL_HANDLE) { + vmaDestroyAllocator(vma_allocator_); + } } void VulkanTextureCache::BeginSubmission(uint64_t new_submission_index) { @@ -1052,21 +1061,19 @@ std::unique_ptr VulkanTextureCache::CreateTexture( image_format_list_create_info.viewFormatCount = 2; image_format_list_create_info.pViewFormats = formats; } - // TODO(Triang3l): Suballocate due to the low memory allocation count limit on - // Windows (use VMA or a custom allocator, possibly based on two-level - // segregated fit just like VMA). + + VmaAllocationCreateInfo allocation_create_info = {}; + allocation_create_info.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; + VkImage image; - VkDeviceMemory memory; - VkDeviceSize memory_size; - if (!ui::vulkan::util::CreateDedicatedAllocationImage( - provider, image_create_info, - ui::vulkan::util::MemoryPurpose::kDeviceLocal, image, memory, nullptr, - &memory_size)) { + VmaAllocation allocation; + if (vmaCreateImage(vma_allocator_, &image_create_info, + &allocation_create_info, &image, &allocation, nullptr)) { return nullptr; } return std::unique_ptr( - new VulkanTexture(*this, key, image, memory, memory_size)); + new VulkanTexture(*this, key, image, allocation)); } bool VulkanTextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, @@ -1571,9 +1578,12 @@ void VulkanTextureCache::UpdateTextureBindingsImpl( VulkanTextureCache::VulkanTexture::VulkanTexture( VulkanTextureCache& texture_cache, const TextureKey& key, VkImage image, - VkDeviceMemory memory, VkDeviceSize memory_size) - : Texture(texture_cache, key), image_(image), memory_(memory) { - SetHostMemoryUsage(uint64_t(memory_size)); + VmaAllocation allocation) + : Texture(texture_cache, key), image_(image), allocation_(allocation) { + VmaAllocationInfo allocation_info; + vmaGetAllocationInfo(texture_cache.vma_allocator_, allocation_, + &allocation_info); + SetHostMemoryUsage(uint64_t(allocation_info.size)); } VulkanTextureCache::VulkanTexture::~VulkanTexture() { @@ -1586,8 +1596,7 @@ VulkanTextureCache::VulkanTexture::~VulkanTexture() { for (const auto& view_pair : views_) { dfn.vkDestroyImageView(device, view_pair.second, nullptr); } - dfn.vkDestroyImage(device, image_, nullptr); - dfn.vkFreeMemory(device, memory_, nullptr); + vmaDestroyImage(vulkan_texture_cache.vma_allocator_, image_, allocation_); } VkImageView VulkanTextureCache::VulkanTexture::GetView(bool is_signed, @@ -1708,6 +1717,13 @@ bool VulkanTextureCache::Initialize() { device_portability_subset_features = provider.device_portability_subset_features(); + // Vulkan Memory Allocator. + + vma_allocator_ = ui::vulkan::CreateVmaAllocator(provider, true); + if (vma_allocator_ == VK_NULL_HANDLE) { + return false; + } + // Image formats. // Initialize to the best formats. diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.h b/src/xenia/gpu/vulkan/vulkan_texture_cache.h index 79e5db493..448e74d03 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.h @@ -19,6 +19,7 @@ #include "xenia/gpu/texture_cache.h" #include "xenia/gpu/vulkan/vulkan_shader.h" #include "xenia/gpu/vulkan/vulkan_shared_memory.h" +#include "xenia/ui/vulkan/vulkan_mem_alloc.h" #include "xenia/ui/vulkan/vulkan_provider.h" namespace xe { @@ -185,7 +186,7 @@ class VulkanTextureCache final : public TextureCache { // Takes ownership of the image and its memory. explicit VulkanTexture(VulkanTextureCache& texture_cache, const TextureKey& key, VkImage image, - VkDeviceMemory memory, VkDeviceSize memory_size); + VmaAllocation allocation); ~VulkanTexture(); VkImage image() const { return image_; } @@ -255,7 +256,7 @@ class VulkanTextureCache final : public TextureCache { } VkImage image_; - VkDeviceMemory memory_; + VmaAllocation allocation_; Usage usage_ = Usage::kUndefined; @@ -317,6 +318,12 @@ class VulkanTextureCache final : public TextureCache { VulkanCommandProcessor& command_processor_; VkPipelineStageFlags guest_shader_pipeline_stages_; + // Using the Vulkan Memory Allocator because texture count in games is + // naturally pretty much unbounded, while Vulkan implementations, especially + // on Windows versions before 10, may have an allocation count limit as low as + // 4096. + VmaAllocator vma_allocator_ = VK_NULL_HANDLE; + static const HostFormatPair kBestHostFormats[64]; static const HostFormatPair kHostFormatGBGRUnaligned; static const HostFormatPair kHostFormatBGRGUnaligned; diff --git a/src/xenia/ui/vulkan/functions/device_1_0.inc b/src/xenia/ui/vulkan/functions/device_1_0.inc index ae640f61e..4e9eaa83a 100644 --- a/src/xenia/ui/vulkan/functions/device_1_0.inc +++ b/src/xenia/ui/vulkan/functions/device_1_0.inc @@ -66,6 +66,7 @@ XE_UI_VULKAN_FUNCTION(vkGetBufferMemoryRequirements) XE_UI_VULKAN_FUNCTION(vkGetDeviceQueue) XE_UI_VULKAN_FUNCTION(vkGetFenceStatus) XE_UI_VULKAN_FUNCTION(vkGetImageMemoryRequirements) +XE_UI_VULKAN_FUNCTION(vkInvalidateMappedMemoryRanges) XE_UI_VULKAN_FUNCTION(vkMapMemory) XE_UI_VULKAN_FUNCTION(vkResetCommandPool) XE_UI_VULKAN_FUNCTION(vkResetDescriptorPool) diff --git a/src/xenia/ui/vulkan/functions/device_khr_bind_memory2.inc b/src/xenia/ui/vulkan/functions/device_khr_bind_memory2.inc new file mode 100644 index 000000000..ebefbc50f --- /dev/null +++ b/src/xenia/ui/vulkan/functions/device_khr_bind_memory2.inc @@ -0,0 +1,4 @@ +// VK_KHR_bind_memory2 functions used in Xenia. +// Promoted to Vulkan 1.1 core. +XE_UI_VULKAN_FUNCTION_PROMOTED(vkBindBufferMemory2KHR, vkBindBufferMemory2) +XE_UI_VULKAN_FUNCTION_PROMOTED(vkBindImageMemory2KHR, vkBindImageMemory2) diff --git a/src/xenia/ui/vulkan/functions/device_khr_get_memory_requirements2.inc b/src/xenia/ui/vulkan/functions/device_khr_get_memory_requirements2.inc new file mode 100644 index 000000000..11068c485 --- /dev/null +++ b/src/xenia/ui/vulkan/functions/device_khr_get_memory_requirements2.inc @@ -0,0 +1,6 @@ +// VK_KHR_get_memory_requirements2 functions used in Xenia. +// Promoted to Vulkan 1.1 core. +XE_UI_VULKAN_FUNCTION_PROMOTED(vkGetBufferMemoryRequirements2KHR, + vkGetBufferMemoryRequirements2) +XE_UI_VULKAN_FUNCTION_PROMOTED(vkGetImageMemoryRequirements2KHR, + vkGetImageMemoryRequirements2) diff --git a/src/xenia/ui/vulkan/functions/device_khr_maintenance4.inc b/src/xenia/ui/vulkan/functions/device_khr_maintenance4.inc new file mode 100644 index 000000000..11c078792 --- /dev/null +++ b/src/xenia/ui/vulkan/functions/device_khr_maintenance4.inc @@ -0,0 +1,6 @@ +// VK_KHR_maintenance4 functions used in Xenia. +// Promoted to Vulkan 1.3 core. +XE_UI_VULKAN_FUNCTION_PROMOTED(vkGetDeviceBufferMemoryRequirementsKHR, + vkGetDeviceBufferMemoryRequirements) +XE_UI_VULKAN_FUNCTION_PROMOTED(vkGetDeviceImageMemoryRequirementsKHR, + vkGetDeviceImageMemoryRequirements) diff --git a/src/xenia/ui/vulkan/functions/instance_khr_get_physical_device_properties2.inc b/src/xenia/ui/vulkan/functions/instance_khr_get_physical_device_properties2.inc index 05b2fe800..45153db06 100644 --- a/src/xenia/ui/vulkan/functions/instance_khr_get_physical_device_properties2.inc +++ b/src/xenia/ui/vulkan/functions/instance_khr_get_physical_device_properties2.inc @@ -1,4 +1,6 @@ // VK_KHR_get_physical_device_properties2 functions used in Xenia. // Promoted to Vulkan 1.1 core. +XE_UI_VULKAN_FUNCTION_PROMOTED(vkGetPhysicalDeviceMemoryProperties2KHR, + vkGetPhysicalDeviceMemoryProperties2) XE_UI_VULKAN_FUNCTION_PROMOTED(vkGetPhysicalDeviceProperties2KHR, vkGetPhysicalDeviceProperties2) diff --git a/src/xenia/ui/vulkan/vulkan_mem_alloc.cc b/src/xenia/ui/vulkan/vulkan_mem_alloc.cc new file mode 100644 index 000000000..d3be16c5f --- /dev/null +++ b/src/xenia/ui/vulkan/vulkan_mem_alloc.cc @@ -0,0 +1,108 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +// Implementing VMA in this translation unit. +#define VMA_IMPLEMENTATION +#include "xenia/ui/vulkan/vulkan_mem_alloc.h" + +#include + +#include "xenia/base/logging.h" +#include "xenia/ui/vulkan/vulkan_provider.h" + +namespace xe { +namespace ui { +namespace vulkan { + +VmaAllocator CreateVmaAllocator(const VulkanProvider& provider, + bool externally_synchronized) { + const VulkanProvider::LibraryFunctions& lfn = provider.lfn(); + const VulkanProvider::InstanceFunctions& ifn = provider.ifn(); + const VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + const VulkanProvider::InstanceExtensions& instance_extensions = + provider.instance_extensions(); + const VulkanProvider::DeviceExtensions& device_extensions = + provider.device_extensions(); + + VmaVulkanFunctions vma_vulkan_functions = {}; + VmaAllocatorCreateInfo allocator_create_info = {}; + + vma_vulkan_functions.vkGetInstanceProcAddr = lfn.vkGetInstanceProcAddr; + vma_vulkan_functions.vkGetDeviceProcAddr = ifn.vkGetDeviceProcAddr; + vma_vulkan_functions.vkGetPhysicalDeviceProperties = + ifn.vkGetPhysicalDeviceProperties; + vma_vulkan_functions.vkGetPhysicalDeviceMemoryProperties = + ifn.vkGetPhysicalDeviceMemoryProperties; + vma_vulkan_functions.vkAllocateMemory = dfn.vkAllocateMemory; + vma_vulkan_functions.vkFreeMemory = dfn.vkFreeMemory; + vma_vulkan_functions.vkMapMemory = dfn.vkMapMemory; + vma_vulkan_functions.vkUnmapMemory = dfn.vkUnmapMemory; + vma_vulkan_functions.vkFlushMappedMemoryRanges = + dfn.vkFlushMappedMemoryRanges; + vma_vulkan_functions.vkInvalidateMappedMemoryRanges = + dfn.vkInvalidateMappedMemoryRanges; + vma_vulkan_functions.vkBindBufferMemory = dfn.vkBindBufferMemory; + vma_vulkan_functions.vkBindImageMemory = dfn.vkBindImageMemory; + vma_vulkan_functions.vkGetBufferMemoryRequirements = + dfn.vkGetBufferMemoryRequirements; + vma_vulkan_functions.vkGetImageMemoryRequirements = + dfn.vkGetImageMemoryRequirements; + vma_vulkan_functions.vkCreateBuffer = dfn.vkCreateBuffer; + vma_vulkan_functions.vkDestroyBuffer = dfn.vkDestroyBuffer; + vma_vulkan_functions.vkCreateImage = dfn.vkCreateImage; + vma_vulkan_functions.vkDestroyImage = dfn.vkDestroyImage; + vma_vulkan_functions.vkCmdCopyBuffer = dfn.vkCmdCopyBuffer; + if (device_extensions.khr_get_memory_requirements2) { + vma_vulkan_functions.vkGetBufferMemoryRequirements2KHR = + dfn.vkGetBufferMemoryRequirements2KHR; + vma_vulkan_functions.vkGetImageMemoryRequirements2KHR = + dfn.vkGetImageMemoryRequirements2KHR; + if (device_extensions.khr_dedicated_allocation) { + allocator_create_info.flags |= + VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT; + } + } + if (device_extensions.khr_bind_memory2) { + vma_vulkan_functions.vkBindBufferMemory2KHR = dfn.vkBindBufferMemory2KHR; + vma_vulkan_functions.vkBindImageMemory2KHR = dfn.vkBindImageMemory2KHR; + allocator_create_info.flags |= VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT; + } + if (instance_extensions.khr_get_physical_device_properties2) { + vma_vulkan_functions.vkGetPhysicalDeviceMemoryProperties2KHR = + ifn.vkGetPhysicalDeviceMemoryProperties2KHR; + if (device_extensions.ext_memory_budget) { + allocator_create_info.flags |= VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT; + } + } + if (device_extensions.khr_maintenance4) { + vma_vulkan_functions.vkGetDeviceImageMemoryRequirements = + dfn.vkGetDeviceImageMemoryRequirementsKHR; + } + + if (externally_synchronized) { + allocator_create_info.flags |= + VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT; + } + allocator_create_info.physicalDevice = provider.physical_device(); + allocator_create_info.device = provider.device(); + allocator_create_info.pVulkanFunctions = &vma_vulkan_functions; + allocator_create_info.instance = provider.instance(); + allocator_create_info.vulkanApiVersion = + provider.device_properties().apiVersion; + VmaAllocator allocator; + if (vmaCreateAllocator(&allocator_create_info, &allocator) != VK_SUCCESS) { + XELOGE("Failed to create a Vulkan Memory Allocator instance"); + return VK_NULL_HANDLE; + } + return allocator; +} + +} // namespace vulkan +} // namespace ui +} // namespace xe diff --git a/src/xenia/ui/vulkan/vulkan_mem_alloc.h b/src/xenia/ui/vulkan/vulkan_mem_alloc.h new file mode 100644 index 000000000..9ae9db16e --- /dev/null +++ b/src/xenia/ui/vulkan/vulkan_mem_alloc.h @@ -0,0 +1,39 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_UI_VULKAN_VULKAN_MEM_ALLOC_H_ +#define XENIA_UI_VULKAN_VULKAN_MEM_ALLOC_H_ + +// Make sure vulkan.h is included from third_party (rather than from the system +// include directory) before vk_mem_alloc.h. + +#include "xenia/ui/vulkan/vulkan_provider.h" + +#define VMA_STATIC_VULKAN_FUNCTIONS 0 +// Work around the pointer nullability completeness warnings on Clang. +#ifndef VMA_NULLABLE +#define VMA_NULLABLE +#endif +#ifndef VMA_NOT_NULL +#define VMA_NOT_NULL +#endif +#include "third_party/VulkanMemoryAllocator/include/vk_mem_alloc.h" + +namespace xe { +namespace ui { +namespace vulkan { + +VmaAllocator CreateVmaAllocator(const VulkanProvider& provider, + bool externally_synchronized); + +} // namespace vulkan +} // namespace ui +} // namespace xe + +#endif // XENIA_UI_VULKAN_VULKAN_MEM_ALLOC_H_ \ No newline at end of file diff --git a/src/xenia/ui/vulkan/vulkan_provider.cc b/src/xenia/ui/vulkan/vulkan_provider.cc index 95c544925..a08fa7b51 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.cc +++ b/src/xenia/ui/vulkan/vulkan_provider.cc @@ -700,12 +700,17 @@ bool VulkanProvider::Initialize() { } std::memset(&device_extensions_, 0, sizeof(device_extensions_)); if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 1, 0)) { + device_extensions_.khr_bind_memory2 = true; device_extensions_.khr_dedicated_allocation = true; + device_extensions_.khr_get_memory_requirements2 = true; device_extensions_.khr_sampler_ycbcr_conversion = true; if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 2, 0)) { device_extensions_.khr_image_format_list = true; device_extensions_.khr_shader_float_controls = true; device_extensions_.khr_spirv_1_4 = true; + if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 3, 0)) { + device_extensions_.khr_maintenance4 = true; + } } } device_extensions_enabled.clear(); @@ -716,12 +721,17 @@ bool VulkanProvider::Initialize() { static const std::pair kUsedDeviceExtensions[] = { {"VK_EXT_fragment_shader_interlock", offsetof(DeviceExtensions, ext_fragment_shader_interlock)}, + {"VK_EXT_memory_budget", offsetof(DeviceExtensions, ext_memory_budget)}, {"VK_EXT_shader_stencil_export", offsetof(DeviceExtensions, ext_shader_stencil_export)}, + {"VK_KHR_bind_memory2", offsetof(DeviceExtensions, khr_bind_memory2)}, {"VK_KHR_dedicated_allocation", offsetof(DeviceExtensions, khr_dedicated_allocation)}, + {"VK_KHR_get_memory_requirements2", + offsetof(DeviceExtensions, khr_get_memory_requirements2)}, {"VK_KHR_image_format_list", offsetof(DeviceExtensions, khr_image_format_list)}, + {"VK_KHR_maintenance4", offsetof(DeviceExtensions, khr_maintenance4)}, {"VK_KHR_portability_subset", offsetof(DeviceExtensions, khr_portability_subset)}, // While vkGetPhysicalDeviceFormatProperties should be used to check the @@ -922,6 +932,48 @@ bool VulkanProvider::Initialize() { } } // Extensions - disable the specific extension if failed to get its functions. + if (device_extensions_.khr_bind_memory2) { + bool functions_loaded = true; + if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 1, 0)) { +#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_PROMOTE +#include "xenia/ui/vulkan/functions/device_khr_bind_memory2.inc" +#undef XE_UI_VULKAN_FUNCTION_PROMOTED + } else { +#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_DONT_PROMOTE +#include "xenia/ui/vulkan/functions/device_khr_bind_memory2.inc" +#undef XE_UI_VULKAN_FUNCTION_PROMOTED + } + device_extensions_.khr_bind_memory2 = functions_loaded; + } + if (device_extensions_.khr_get_memory_requirements2) { + bool functions_loaded = true; + if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 1, 0)) { +#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_PROMOTE +#include "xenia/ui/vulkan/functions/device_khr_get_memory_requirements2.inc" +#undef XE_UI_VULKAN_FUNCTION_PROMOTED + } else { +#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_DONT_PROMOTE +#include "xenia/ui/vulkan/functions/device_khr_get_memory_requirements2.inc" +#undef XE_UI_VULKAN_FUNCTION_PROMOTED + } + device_extensions_.khr_get_memory_requirements2 = functions_loaded; + // VK_KHR_dedicated_allocation can still work without the dedicated + // allocation preference getter even though it requires + // VK_KHR_get_memory_requirements2 to be supported and enabled. + } + if (device_extensions_.khr_maintenance4) { + bool functions_loaded = true; + if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 3, 0)) { +#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_PROMOTE +#include "xenia/ui/vulkan/functions/device_khr_maintenance4.inc" +#undef XE_UI_VULKAN_FUNCTION_PROMOTED + } else { +#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_DONT_PROMOTE +#include "xenia/ui/vulkan/functions/device_khr_maintenance4.inc" +#undef XE_UI_VULKAN_FUNCTION_PROMOTED + } + device_extensions_.khr_maintenance4 = functions_loaded; + } if (device_extensions_.khr_swapchain) { bool functions_loaded = true; #include "xenia/ui/vulkan/functions/device_khr_swapchain.inc" @@ -956,12 +1008,20 @@ bool VulkanProvider::Initialize() { XELOGVK("Vulkan device extensions:"); XELOGVK("* VK_EXT_fragment_shader_interlock: {}", device_extensions_.ext_fragment_shader_interlock ? "yes" : "no"); + XELOGVK("* VK_EXT_memory_budget: {}", + device_extensions_.ext_memory_budget ? "yes" : "no"); XELOGVK("* VK_EXT_shader_stencil_export: {}", device_extensions_.ext_shader_stencil_export ? "yes" : "no"); + XELOGVK("* VK_KHR_bind_memory2: {}", + device_extensions_.khr_bind_memory2 ? "yes" : "no"); XELOGVK("* VK_KHR_dedicated_allocation: {}", device_extensions_.khr_dedicated_allocation ? "yes" : "no"); + XELOGVK("* VK_KHR_get_memory_requirements2: {}", + device_extensions_.khr_get_memory_requirements2 ? "yes" : "no"); XELOGVK("* VK_KHR_image_format_list: {}", device_extensions_.khr_image_format_list ? "yes" : "no"); + XELOGVK("* VK_KHR_maintenance4: {}", + device_extensions_.khr_maintenance4 ? "yes" : "no"); XELOGVK("* VK_KHR_portability_subset: {}", device_extensions_.khr_portability_subset ? "yes" : "no"); if (device_extensions_.khr_portability_subset) { diff --git a/src/xenia/ui/vulkan/vulkan_provider.h b/src/xenia/ui/vulkan/vulkan_provider.h index 0680b5766..8dc83283c 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.h +++ b/src/xenia/ui/vulkan/vulkan_provider.h @@ -132,11 +132,18 @@ class VulkanProvider : public GraphicsProvider { } struct DeviceExtensions { bool ext_fragment_shader_interlock; + bool ext_memory_budget; bool ext_shader_stencil_export; // Core since 1.1.0. + bool khr_bind_memory2; + // Core since 1.1.0. bool khr_dedicated_allocation; + // Core since 1.1.0. + bool khr_get_memory_requirements2; // Core since 1.2.0. bool khr_image_format_list; + // Core since 1.3.0. + bool khr_maintenance4; // Requires the VK_KHR_get_physical_device_properties2 instance extension. bool khr_portability_subset; // Core since 1.1.0. @@ -217,8 +224,14 @@ class VulkanProvider : public GraphicsProvider { VkDevice device() const { return device_; } struct DeviceFunctions { #define XE_UI_VULKAN_FUNCTION(name) PFN_##name name; +#define XE_UI_VULKAN_FUNCTION_PROMOTED(extension_name, core_name) \ + PFN_##extension_name extension_name; #include "xenia/ui/vulkan/functions/device_1_0.inc" +#include "xenia/ui/vulkan/functions/device_khr_bind_memory2.inc" +#include "xenia/ui/vulkan/functions/device_khr_get_memory_requirements2.inc" +#include "xenia/ui/vulkan/functions/device_khr_maintenance4.inc" #include "xenia/ui/vulkan/functions/device_khr_swapchain.inc" +#undef XE_UI_VULKAN_FUNCTION_PROMOTED #undef XE_UI_VULKAN_FUNCTION }; const DeviceFunctions& dfn() const { return dfn_; } diff --git a/third_party/VulkanMemoryAllocator b/third_party/VulkanMemoryAllocator new file mode 160000 index 000000000..51c8b5601 --- /dev/null +++ b/third_party/VulkanMemoryAllocator @@ -0,0 +1 @@ +Subproject commit 51c8b56011303e94840370089f816b19dbe7edf0 From f7ef051025349324516dc0a82540ff80073ea7a2 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 3 Jul 2022 19:42:22 +0300 Subject: [PATCH 118/123] [Vulkan] Disable validation by default --- src/xenia/ui/vulkan/vulkan_provider.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/xenia/ui/vulkan/vulkan_provider.cc b/src/xenia/ui/vulkan/vulkan_provider.cc index a08fa7b51..29b94616f 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.cc +++ b/src/xenia/ui/vulkan/vulkan_provider.cc @@ -29,9 +29,8 @@ #include "xenia/base/platform_win.h" #endif -// TODO(Triang3l): Disable Vulkan validation before releasing a stable version. DEFINE_bool( - vulkan_validation, true, + vulkan_validation, false, "Enable Vulkan validation (VK_LAYER_KHRONOS_validation). Messages will be " "written to the OS debug log without vulkan_debug_messenger or to the " "Xenia log with it.", From ee84f4e267a4726f552510cf605eae18552c1d1d Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 3 Jul 2022 19:45:48 +0300 Subject: [PATCH 119/123] [Vulkan] Update title bar warning --- src/xenia/gpu/vulkan/vulkan_graphics_system.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/xenia/gpu/vulkan/vulkan_graphics_system.h b/src/xenia/gpu/vulkan/vulkan_graphics_system.h index 89b97b06c..ae81e144c 100644 --- a/src/xenia/gpu/vulkan/vulkan_graphics_system.h +++ b/src/xenia/gpu/vulkan/vulkan_graphics_system.h @@ -26,7 +26,9 @@ class VulkanGraphicsSystem : public GraphicsSystem { static bool IsAvailable() { return true; } - std::string name() const override { return "Vulkan Prototype - DO NOT USE"; } + std::string name() const override { + return "Vulkan - HEAVILY INCOMPLETE, early development"; + } X_STATUS Setup(cpu::Processor* processor, kernel::KernelState* kernel_state, ui::WindowedAppContext* app_context, From ed61e15fc3636927a7d79e0098d21f9f64cae610 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 3 Jul 2022 19:49:11 +0300 Subject: [PATCH 120/123] [App] Make D3D12 the default GPU backend on Windows again --- src/xenia/app/xenia_main.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/xenia/app/xenia_main.cc b/src/xenia/app/xenia_main.cc index 44cc6a855..ed6f1e9f1 100644 --- a/src/xenia/app/xenia_main.cc +++ b/src/xenia/app/xenia_main.cc @@ -260,11 +260,10 @@ std::unique_ptr EmulatorApp::CreateAudioSystem( std::unique_ptr EmulatorApp::CreateGraphicsSystem() { Factory factory; - factory.Add("vulkan"); - // TODO(Triang3l): Move D3D12 back to the top. #if XE_PLATFORM_WIN32 factory.Add("d3d12"); #endif // XE_PLATFORM_WIN32 + factory.Add("vulkan"); factory.Add("null"); return factory.Create(cvars::gpu); } From bbae909fd7a6a16ce9dccb80d4111b85f2f0dd10 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 3 Jul 2022 20:39:44 +0300 Subject: [PATCH 121/123] [GPU] Reasons to keep non-Vulkan backends [ci skip] --- src/xenia/app/xenia_main.cc | 71 +++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/src/xenia/app/xenia_main.cc b/src/xenia/app/xenia_main.cc index ed6f1e9f1..864993b6e 100644 --- a/src/xenia/app/xenia_main.cc +++ b/src/xenia/app/xenia_main.cc @@ -259,6 +259,77 @@ std::unique_ptr EmulatorApp::CreateAudioSystem( } std::unique_ptr EmulatorApp::CreateGraphicsSystem() { + // While Vulkan is supported by a large variety of operating systems (Windows, + // GNU/Linux, Android, also via the MoltenVK translation layer on top of Metal + // on macOS and iOS), please don't remove platform-specific GPU backends from + // Xenia. + // + // Regardless of the operating system, having multiple options provides more + // stability to users. In case of driver issues, users may try switching + // between the available backends. For example, in June 2022, on Nvidia Ampere + // (RTX 30xx), Xenia had synchronization issues that resulted in flickering, + // most prominently in 4D5307E6, on Direct3D 12 - but the same issue was not + // reproducible in the Vulkan backend, however, it used ImageSampleExplicitLod + // with explicit gradients for cubemaps, which triggered a different driver + // bug on Nvidia (every 1 out of 2x2 pixels receiving junk). + // + // Specifically on Microsoft platforms, there are a few reasons why supporting + // Direct3D 12 is desirable rather than limiting Xenia to Vulkan only: + // - Wider hardware support for Direct3D 12 on x86 Windows desktops. + // Direct3D 12 requires the minimum of Nvidia Fermi, or, with a pre-2021 + // driver version, Intel HD Graphics 4200. Vulkan, however, is supported + // only starting with Nvidia Kepler and a much more recent Intel UHD + // Graphics generation. + // - Wider hardware support on other kinds of Microsoft devices. The Xbox One + // and the Xbox Series X|S only support Direct3D as the GPU API in their UWP + // runtime, and only version 12 can be granted expanded resource access. + // Qualcomm, as of June 2022, also doesn't provide a Vulkan implementation + // for their Arm-based Windows devices, while Direct3D 12 is available. + // - Both older Intel GPUs and the Xbox One apparently, as well as earlier + // Windows 10 versions, also require Shader Model 5.1 DXBC shaders rather + // than Shader Model 6 DXIL ones, so a DXBC shader translator should be + // available in Xenia too, a DXIL one doesn't fully replace it. + // - As of June 2022, AMD also refuses to implement the + // VK_EXT_fragment_shader_interlock Vulkan extension in their drivers, as + // well as its OpenGL counterpart, which is heavily utilized for accurate + // support of Xenos render target formats that don't have PC equivalents + // (8_8_8_8_GAMMA, 2_10_10_10_FLOAT, 16_16 and 16_16_16_16 with -32 to 32 + // range, D24FS8) with correct blending. Direct3D 12, however, requires + // support for similar functionality (rasterizer-ordered views) on the + // feature level 12_1, and the AMD driver implements it on Direct3D, as well + // as raster order groups in their Metal driver. + // + // Additionally, different host GPU APIs receive feature support at different + // paces. VK_EXT_fragment_shader_interlock first appeared in 2019, for + // instance, while Xenia had been taking advantage of rasterizer-ordered views + // on Direct3D 12 for over half a year at that point (they have existed in + // Direct3D 12 since the first version). + // + // MoltenVK on top Metal also has its flaws and limitations. Metal, for + // instance, as of June 2022, doesn't provide a switch for primitive restart, + // while Vulkan does - so MoltenVK is not completely transparent to Xenia, + // many of its issues that may be not very obvious (unlike when the Metal API + // is used directly) should be taken into account in Xenia. Also, as of June + // 2022, MoltenVK translates SPIR-V shaders into the C++-based Metal Shading + // Language rather than AIR directly, which likely massively increases + // pipeline object creation time - and Xenia translates shaders and creates + // pipelines when they're first actually used for a draw command by the game, + // thus it can't precompile anything that hasn't ever been encountered before + // there's already no time to waste. + // + // Very old hardware (Direct3D 10 level) is also not supported by most Vulkan + // drivers. However, in the future, Xenia may be ported to it using the + // Direct3D 11 API with the feature level 10_1 or 10_0. OpenGL, however, had + // been lagging behind Direct3D prior to versions 4.x, and didn't receive + // compute shaders until a 4.2 extension (while 4.2 already corresponds + // roughly to Direct3D 11 features) - and replacing Xenia compute shaders with + // transform feedback / stream output is not always trivial (in particular, + // will need to rely on GL_ARB_transform_feedback3 for skipping over memory + // locations that shouldn't be overwritten). + // + // For maintainability, as much implementation code as possible should be + // placed in `xe::gpu` and shared between the backends rather than duplicated + // between them. Factory factory; #if XE_PLATFORM_WIN32 factory.Add("d3d12"); From 83e99845396ee289f204009f51a04de713bbd5f8 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 3 Jul 2022 20:54:34 +0300 Subject: [PATCH 122/123] [Vulkan] Remove required feature checks Fallbacks for those will be added more or less soon, the stable version won't hard-require anything beyond 1.0 and the portability subset --- src/xenia/ui/vulkan/vulkan_provider.cc | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/src/xenia/ui/vulkan/vulkan_provider.cc b/src/xenia/ui/vulkan/vulkan_provider.cc index 29b94616f..3a30220fb 100644 --- a/src/xenia/ui/vulkan/vulkan_provider.cc +++ b/src/xenia/ui/vulkan/vulkan_provider.cc @@ -543,22 +543,10 @@ bool VulkanProvider::Initialize() { ++i) { VkPhysicalDevice physical_device_current = physical_devices[i]; - // Get physical device features and check if the needed ones are supported. - // Need this before obtaining the queues as sparse binding is an optional - // feature. + // Get physical device features. Need this before obtaining the queues as + // sparse binding is an optional feature. ifn_.vkGetPhysicalDeviceFeatures(physical_device_current, &device_features_); - // Passing indices directly from guest memory, where they are big-endian; a - // workaround using fetch from shared memory for 32-bit indices that need - // swapping isn't implemented yet. Not supported only Qualcomm Adreno 4xx. - if (!device_features_.fullDrawIndexUint32) { - continue; - } - // TODO(Triang3l): Make geometry shaders optional by providing compute - // shader fallback (though that would require vertex shader stores). - if (!device_features_.geometryShader) { - continue; - } // Get the needed queues: // - Graphics and compute. From 2621dabf0f77a61cbc055e6ed86e5c709d5934d1 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 3 Jul 2022 21:21:17 +0300 Subject: [PATCH 123/123] [Vulkan] Native 24-bit unorm depth where available --- .../gpu/vulkan/vulkan_render_target_cache.cc | 30 ++++++++++++++++++- .../gpu/vulkan/vulkan_render_target_cache.h | 9 +++--- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc index 02547eaaa..46e261ac5 100644 --- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc @@ -183,9 +183,22 @@ VulkanRenderTargetCache::~VulkanRenderTargetCache() { Shutdown(true); } bool VulkanRenderTargetCache::Initialize() { const ui::vulkan::VulkanProvider& provider = command_processor_.GetVulkanProvider(); + const ui::vulkan::VulkanProvider::InstanceFunctions& ifn = provider.ifn(); + VkPhysicalDevice physical_device = provider.physical_device(); const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); + // Format support. + constexpr VkFormatFeatureFlags kUsedDepthFormatFeatures = + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | + VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; + VkFormatProperties depth_unorm24_properties; + ifn.vkGetPhysicalDeviceFormatProperties( + physical_device, VK_FORMAT_D24_UNORM_S8_UINT, &depth_unorm24_properties); + depth_unorm24_vulkan_format_supported_ = + (depth_unorm24_properties.optimalTilingFeatures & + kUsedDepthFormatFeatures) == kUsedDepthFormatFeatures; + // Descriptor set layouts. VkDescriptorSetLayoutBinding descriptor_set_layout_bindings[2]; descriptor_set_layout_bindings[0].binding = 0; @@ -1235,7 +1248,10 @@ VkRenderPass VulkanRenderTargetCache::GetRenderPass(RenderPassKey key) { VkFormat VulkanRenderTargetCache::GetDepthVulkanFormat( xenos::DepthRenderTargetFormat format) const { - // TODO(Triang3l): Conditional 24-bit depth. + if (format == xenos::DepthRenderTargetFormat::kD24S8 && + depth_unorm24_vulkan_format_supported()) { + return VK_FORMAT_D24_UNORM_S8_UINT; + } return VK_FORMAT_D32_SFLOAT_S8_UINT; } @@ -1582,6 +1598,18 @@ RenderTargetCache::RenderTarget* VulkanRenderTargetCache::CreateRenderTarget( descriptor_set_index_transfer_source); } +bool VulkanRenderTargetCache::IsHostDepthEncodingDifferent( + xenos::DepthRenderTargetFormat format) const { + // TODO(Triang3l): Conversion directly in shaders. + switch (format) { + case xenos::DepthRenderTargetFormat::kD24S8: + return !depth_unorm24_vulkan_format_supported(); + case xenos::DepthRenderTargetFormat::kD24FS8: + return true; + } + return false; +} + void VulkanRenderTargetCache::GetEdramBufferUsageMasks( EdramBufferUsage usage, VkPipelineStageFlags& stage_mask_out, VkAccessFlags& access_mask_out) { diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.h b/src/xenia/gpu/vulkan/vulkan_render_target_cache.h index 18113bf9f..d15ba2abc 100644 --- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.h @@ -142,6 +142,9 @@ class VulkanRenderTargetCache final : public RenderTargetCache { !cvars::snorm16_render_target_full_range; } + bool depth_unorm24_vulkan_format_supported() const { + return depth_unorm24_vulkan_format_supported_; + } bool depth_float24_round() const { return depth_float24_round_; } bool msaa_2x_attachments_supported() const { @@ -172,11 +175,8 @@ class VulkanRenderTargetCache final : public RenderTargetCache { RenderTarget* CreateRenderTarget(RenderTargetKey key) override; - // TODO(Triang3l): Check actual unorm24 support. bool IsHostDepthEncodingDifferent( - xenos::DepthRenderTargetFormat format) const override { - return true; - } + xenos::DepthRenderTargetFormat format) const override; private: enum class EdramBufferUsage { @@ -840,6 +840,7 @@ class VulkanRenderTargetCache final : public RenderTargetCache { bool gamma_render_target_as_srgb_ = false; + bool depth_unorm24_vulkan_format_supported_ = false; bool depth_float24_round_ = false; bool msaa_2x_attachments_supported_ = false;