shader_recompiler: add gl_Layer translation GS for older hardware

2024-07-04 23:31:19 +01:00 · 2022-11-30 17:16:00 -05:00 · 2022-11-30 17:16:00 -05:00 · 3ef006b5ab
commit 3ef006b5ab
parent d6b63239ae
9 changed files with 230 additions and 6 deletions
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@ -221,6 +221,7 @@ add_library(shader_recompiler STATIC
    ir_opt/dual_vertex_pass.cpp
    ir_opt/global_memory_to_storage_buffer_pass.cpp
    ir_opt/identity_removal_pass.cpp
    ir_opt/layer_pass.cpp
    ir_opt/lower_fp16_to_fp32.cpp
    ir_opt/lower_int64_to_int32.cpp
    ir_opt/passes.h
--- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@ -9,6 +9,7 @@
 #include "common/settings.h"
 #include "shader_recompiler/exception.h"
 #include "shader_recompiler/frontend/ir/basic_block.h"
 #include "shader_recompiler/frontend/ir/ir_emitter.h"
 #include "shader_recompiler/frontend/ir/post_order.h"
 #include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
 #include "shader_recompiler/frontend/maxwell/translate/translate.h"
@ -233,6 +234,8 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
        Optimization::VerificationPass(program);
    }
    Optimization::CollectShaderInfoPass(env, program);
    Optimization::LayerPass(program, host_info);
    CollectInterpolationInfo(env, program);
    AddNVNStorageBuffers(program);
    return program;
@ -331,4 +334,82 @@ void ConvertLegacyToGeneric(IR::Program& program, const Shader::RuntimeInfo& run
    }
 }
 IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool,
                                        ObjectPool<IR::Block>& block_pool,
                                        const HostTranslateInfo& host_info,
                                        IR::Program& source_program,
                                        Shader::OutputTopology output_topology) {
    IR::Program program;
    program.stage = Stage::Geometry;
    program.output_topology = output_topology;
    switch (output_topology) {
    case OutputTopology::PointList:
        program.output_vertices = 1;
        break;
    case OutputTopology::LineStrip:
        program.output_vertices = 2;
        break;
    default:
        program.output_vertices = 3;
        break;
    }
    program.is_geometry_passthrough = false;
    program.info.loads.mask = source_program.info.stores.mask;
    program.info.stores.mask = source_program.info.stores.mask;
    program.info.stores.Set(IR::Attribute::Layer, true);
    program.info.stores.Set(source_program.info.emulated_layer, false);
    IR::Block* current_block = block_pool.Create(inst_pool);
    auto& node{program.syntax_list.emplace_back()};
    node.type = IR::AbstractSyntaxNode::Type::Block;
    node.data.block = current_block;
    IR::IREmitter ir{*current_block};
    for (u32 i = 0; i < program.output_vertices; i++) {
        // Assign generics from input
        for (u32 j = 0; j < 32; j++) {
            if (!program.info.stores.Generic(j)) {
                continue;
            }
            const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4);
            ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
            ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
            ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
            ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
        }
        // Assign position from input
        const IR::Attribute attr = IR::Attribute::PositionX;
        ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
        ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
        ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
        ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
        // Assign layer
        ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(source_program.info.emulated_layer),
                        ir.Imm32(0));
        // Emit vertex
        ir.EmitVertex(ir.Imm32(0));
    }
    ir.EndPrimitive(ir.Imm32(0));
    IR::Block* return_block{block_pool.Create(inst_pool)};
    IR::IREmitter{*return_block}.Epilogue();
    current_block->AddBranch(return_block);
    auto& merge{program.syntax_list.emplace_back()};
    merge.type = IR::AbstractSyntaxNode::Type::Block;
    merge.data.block = return_block;
    program.syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return;
    program.blocks = GenerateBlocks(program.syntax_list);
    program.post_order_blocks = PostOrder(program.syntax_list.front());
    Optimization::SsaRewritePass(program);
    return program;
 }
 } // namespace Shader::Maxwell
--- a/src/shader_recompiler/frontend/maxwell/translate_program.h
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.h
@ -25,4 +25,13 @@ namespace Shader::Maxwell {
 void ConvertLegacyToGeneric(IR::Program& program, const RuntimeInfo& runtime_info);
 // Maxwell v1 and older Nvidia cards don't support setting gl_Layer from non-geometry stages.
 // This creates a workaround by setting the layer as a generic output and creating a
 // passthrough geometry shader that reads the generic and sets the layer.
 [[nodiscard]] IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool,
                                                      ObjectPool<IR::Block>& block_pool,
                                                      const HostTranslateInfo& host_info,
                                                      IR::Program& source_program,
                                                      Shader::OutputTopology output_topology);
 } // namespace Shader::Maxwell
--- a/src/shader_recompiler/host_translate_info.h
+++ b/src/shader_recompiler/host_translate_info.h
@ -13,7 +13,8 @@ struct HostTranslateInfo {
    bool support_float16{};      ///< True when the device supports 16-bit floats
    bool support_int64{};        ///< True when the device supports 64-bit integers
    bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
-    bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers
+    bool support_snorm_render_buffer{};  ///< True when the device supports SNORM render buffers
    bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS
 };
 } // namespace Shader
--- a/src/shader_recompiler/ir_opt/layer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/layer_pass.cpp
@ -0,0 +1,68 @@
 // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 #include <algorithm>
 #include <bit>
 #include <optional>
 #include <boost/container/small_vector.hpp>
 #include "shader_recompiler/environment.h"
 #include "shader_recompiler/frontend/ir/basic_block.h"
 #include "shader_recompiler/frontend/ir/breadth_first_search.h"
 #include "shader_recompiler/frontend/ir/ir_emitter.h"
 #include "shader_recompiler/host_translate_info.h"
 #include "shader_recompiler/ir_opt/passes.h"
 #include "shader_recompiler/shader_info.h"
 namespace Shader::Optimization {
 static IR::Attribute EmulatedLayerAttribute(VaryingState& stores) {
    for (u32 i = 0; i < 32; i++) {
        if (!stores.Generic(i)) {
            return IR::Attribute::Generic0X + (i * 4);
        }
    }
    return IR::Attribute::Layer;
 }
 static bool PermittedProgramStage(Stage stage) {
    switch (stage) {
    case Stage::VertexA:
    case Stage::VertexB:
    case Stage::TessellationControl:
    case Stage::TessellationEval:
        return true;
    default:
        return false;
    }
 }
 void LayerPass(IR::Program& program, const HostTranslateInfo& host_info) {
    if (host_info.support_viewport_index_layer || !PermittedProgramStage(program.stage)) {
        return;
    }
    const auto end{program.post_order_blocks.end()};
    const auto layer_attribute = EmulatedLayerAttribute(program.info.stores);
    bool requires_layer_emulation = false;
    for (auto block = program.post_order_blocks.begin(); block != end; ++block) {
        for (IR::Inst& inst : (*block)->Instructions()) {
            if (inst.GetOpcode() == IR::Opcode::SetAttribute &&
                inst.Arg(0).Attribute() == IR::Attribute::Layer) {
                requires_layer_emulation = true;
                inst.SetArg(0, IR::Value{layer_attribute});
            }
        }
    }
    if (requires_layer_emulation) {
        program.info.requires_layer_emulation = true;
        program.info.emulated_layer = layer_attribute;
        program.info.stores.Set(IR::Attribute::Layer, false);
        program.info.stores.Set(layer_attribute, true);
    }
 }
 } // namespace Shader::Optimization
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@ -23,6 +23,7 @@ void RescalingPass(IR::Program& program);
 void SsaRewritePass(IR::Program& program);
 void PositionPass(Environment& env, IR::Program& program);
 void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo& host_info);
 void LayerPass(IR::Program& program, const HostTranslateInfo& host_info);
 void VerificationPass(const IR::Program& program);
 // Dual Vertex
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@ -204,6 +204,9 @@ struct Info {
    u32 nvn_buffer_base{};
    std::bitset<16> nvn_buffer_used{};
    bool requires_layer_emulation{};
    IR::Attribute emulated_layer{};
    boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS>
        constant_buffer_descriptors;
    boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors;
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@ -39,6 +39,7 @@ using Shader::Backend::GLASM::EmitGLASM;
 using Shader::Backend::GLSL::EmitGLSL;
 using Shader::Backend::SPIRV::EmitSPIRV;
 using Shader::Maxwell::ConvertLegacyToGeneric;
 using Shader::Maxwell::GenerateGeometryPassthrough;
 using Shader::Maxwell::MergeDualVertexPrograms;
 using Shader::Maxwell::TranslateProgram;
 using VideoCommon::ComputeEnvironment;
@ -56,6 +57,17 @@ auto MakeSpan(Container& container) {
    return std::span(container.data(), container.size());
 }
 Shader::OutputTopology MaxwellToOutputTopology(Maxwell::PrimitiveTopology topology) {
    switch (topology) {
    case Maxwell::PrimitiveTopology::Points:
        return Shader::OutputTopology::PointList;
    case Maxwell::PrimitiveTopology::LineStrip:
        return Shader::OutputTopology::LineStrip;
    default:
        return Shader::OutputTopology::TriangleStrip;
    }
 }
 Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
                                    const Shader::IR::Program& program,
                                    const Shader::IR::Program* previous_program,
@ -220,6 +232,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
          .support_int64 = device.HasShaderInt64(),
          .needs_demote_reorder = device.IsAmd(),
          .support_snorm_render_buffer = false,
          .support_viewport_index_layer = device.HasVertexViewportLayer(),
      } {
    if (use_asynchronous_shaders) {
        workers = CreateWorkers();
@ -314,9 +327,7 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() {
    const auto& regs{maxwell3d->regs};
    graphics_key.raw = 0;
    graphics_key.early_z.Assign(regs.mandated_early_z != 0 ? 1 : 0);
-    graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0
+    graphics_key.gs_input_topology.Assign(regs.draw.topology.Value());
                                              ? regs.draw.topology.Value()
                                              : Maxwell::PrimitiveTopology{});
    graphics_key.tessellation_primitive.Assign(regs.tessellation.params.domain_type.Value());
    graphics_key.tessellation_spacing.Assign(regs.tessellation.params.spacing.Value());
    graphics_key.tessellation_clockwise.Assign(
@ -415,7 +426,19 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
    std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
    const bool uses_vertex_a{key.unique_hashes[0] != 0};
    const bool uses_vertex_b{key.unique_hashes[1] != 0};
    // Layer passthrough generation for devices without GL_ARB_shader_viewport_layer_array
    Shader::IR::Program* layer_source_program{};
    for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
        const bool is_emulated_stage = layer_source_program != nullptr &&
                                       index == static_cast<u32>(Maxwell::ShaderType::Geometry);
        if (key.unique_hashes[index] == 0 && is_emulated_stage) {
            auto topology = MaxwellToOutputTopology(key.gs_input_topology);
            programs[index] = GenerateGeometryPassthrough(pools.inst, pools.block, host_info,
                                                          *layer_source_program, topology);
            continue;
        }
        if (key.unique_hashes[index] == 0) {
            continue;
        }
@ -443,6 +466,10 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
                Shader::NumDescriptors(program_vb.info.storage_buffers_descriptors);
            programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);
        }
        if (programs[index].info.requires_layer_emulation) {
            layer_source_program = &programs[index];
        }
    }
    const u32 glasm_storage_buffer_limit{device.GetMaxGLASMStorageBufferBlocks()};
    const bool glasm_use_storage_buffers{total_storage_buffers <= glasm_storage_buffer_limit};
@ -456,7 +483,9 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
    const bool use_glasm{device.UseAssemblyShaders()};
    const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0;
    for (size_t index = first_index; index < Maxwell::MaxShaderProgram; ++index) {
-        if (key.unique_hashes[index] == 0) {
+        const bool is_emulated_stage = layer_source_program != nullptr &&
                                       index == static_cast<u32>(Maxwell::ShaderType::Geometry);
        if (key.unique_hashes[index] == 0 && !is_emulated_stage) {
            continue;
        }
        UNIMPLEMENTED_IF(index == 0);
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@ -46,6 +46,7 @@ MICROPROFILE_DECLARE(Vulkan_PipelineCache);
 namespace {
 using Shader::Backend::SPIRV::EmitSPIRV;
 using Shader::Maxwell::ConvertLegacyToGeneric;
 using Shader::Maxwell::GenerateGeometryPassthrough;
 using Shader::Maxwell::MergeDualVertexPrograms;
 using Shader::Maxwell::TranslateProgram;
 using VideoCommon::ComputeEnvironment;
@ -60,6 +61,17 @@ auto MakeSpan(Container& container) {
    return std::span(container.data(), container.size());
 }
 Shader::OutputTopology MaxwellToOutputTopology(Maxwell::PrimitiveTopology topology) {
    switch (topology) {
    case Maxwell::PrimitiveTopology::Points:
        return Shader::OutputTopology::PointList;
    case Maxwell::PrimitiveTopology::LineStrip:
        return Shader::OutputTopology::LineStrip;
    default:
        return Shader::OutputTopology::TriangleStrip;
    }
 }
 Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) {
    switch (comparison) {
    case Maxwell::ComparisonOp::Never_D3D:
@ -327,6 +339,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
        .needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR ||
                                driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR,
        .support_snorm_render_buffer = true,
        .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(),
    };
 }
@ -509,7 +522,19 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
    std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
    const bool uses_vertex_a{key.unique_hashes[0] != 0};
    const bool uses_vertex_b{key.unique_hashes[1] != 0};
    // Layer passthrough generation for devices without VK_EXT_shader_viewport_index_layer
    Shader::IR::Program* layer_source_program{};
    for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
        const bool is_emulated_stage = layer_source_program != nullptr &&
                                       index == static_cast<u32>(Maxwell::ShaderType::Geometry);
        if (key.unique_hashes[index] == 0 && is_emulated_stage) {
            auto topology = MaxwellToOutputTopology(key.state.topology);
            programs[index] = GenerateGeometryPassthrough(pools.inst, pools.block, host_info,
                                                          *layer_source_program, topology);
            continue;
        }
        if (key.unique_hashes[index] == 0) {
            continue;
        }
@ -530,6 +555,10 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
            auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
            programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);
        }
        if (programs[index].info.requires_layer_emulation) {
            layer_source_program = &programs[index];
        }
    }
    std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
    std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules;
@ -538,7 +567,9 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
    Shader::Backend::Bindings binding;
    for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram;
         ++index) {
-        if (key.unique_hashes[index] == 0) {
+        const bool is_emulated_stage = layer_source_program != nullptr &&
                                       index == static_cast<u32>(Maxwell::ShaderType::Geometry);
        if (key.unique_hashes[index] == 0 && !is_emulated_stage) {
            continue;
        }
        UNIMPLEMENTED_IF(index == 0);