shader: Accelerate pipeline transitions and use dirty flags for shaders

2024-07-04 23:31:19 +01:00 · 2021-04-24 18:27:25 -03:00 · 2021-04-24 18:27:25 -03:00 · f4ace63957
commit f4ace63957
parent 20e86fd615
9 changed files with 114 additions and 64 deletions
--- a/src/video_core/dirty_flags.cpp
+++ b/src/video_core/dirty_flags.cpp
@ -58,6 +58,11 @@ void SetupDirtyRenderTargets(Maxwell3D::DirtyState::Tables& tables) {
        FillBlock(table, OFF(zeta), NUM(zeta), flag);
    }
 }
+
+void SetupDirtyShaders(Maxwell3D::DirtyState::Tables& tables) {
+    FillBlock(tables[0], OFF(shader_config[0]),
+              NUM(shader_config[0]) * Maxwell3D::Regs::MaxShaderProgram, Shaders);
+}
 } // Anonymous namespace

 void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) {
@ -65,6 +70,7 @@ void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) {
    SetupIndexBuffer(tables);
    SetupDirtyDescriptors(tables);
    SetupDirtyRenderTargets(tables);
+    SetupDirtyShaders(tables);
 }

 } // namespace VideoCommon::Dirty
--- a/src/video_core/dirty_flags.h
+++ b/src/video_core/dirty_flags.h
@ -36,6 +36,8 @@ enum : u8 {

    IndexBuffer,

+    Shaders,
+
    LastCommonEntry,
 };

--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@ -635,7 +635,7 @@ void RasterizerOpenGL::SyncDepthClamp() {

 void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) {
    auto& flags = maxwell3d.dirty.flags;
-    if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) {
+    if (!flags[Dirty::ClipDistances] && !flags[VideoCommon::Dirty::Shaders]) {
        return;
    }
    flags[Dirty::ClipDistances] = false;
--- a/src/video_core/renderer_opengl/gl_state_tracker.cpp
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@ -83,11 +83,6 @@ void SetupDirtyScissors(Tables& tables) {
    FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors);
 }

-void SetupDirtyShaders(Tables& tables) {
-    FillBlock(tables[0], OFF(shader_config[0]), NUM(shader_config[0]) * Regs::MaxShaderProgram,
-              Shaders);
-}
-
 void SetupDirtyPolygonModes(Tables& tables) {
    tables[0][OFF(polygon_mode_front)] = PolygonModeFront;
    tables[0][OFF(polygon_mode_back)] = PolygonModeBack;
@ -217,7 +212,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}
    SetupDirtyScissors(tables);
    SetupDirtyVertexInstances(tables);
    SetupDirtyVertexFormat(tables);
-    SetupDirtyShaders(tables);
    SetupDirtyPolygonModes(tables);
    SetupDirtyDepthTest(tables);
    SetupDirtyStencilTest(tables);
--- a/src/video_core/renderer_opengl/gl_state_tracker.h
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@ -52,7 +52,6 @@ enum : u8 {
    BlendState0,
    BlendState7 = BlendState0 + 7,

-    Shaders,
    ClipDistances,

    PolygonModes,
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@ -125,13 +125,12 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_,
                                   VKUpdateDescriptorQueue& update_descriptor_queue_,
                                   Common::ThreadWorker* worker_thread,
                                   RenderPassCache& render_pass_cache,
-                                   const FixedPipelineState& state_,
+                                   const GraphicsPipelineCacheKey& key_,
                                   std::array<vk::ShaderModule, NUM_STAGES> stages,
                                   const std::array<const Shader::Info*, NUM_STAGES>& infos)
-    : maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, texture_cache{texture_cache_},
+    : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, texture_cache{texture_cache_},
      buffer_cache{buffer_cache_}, scheduler{scheduler_},
-      update_descriptor_queue{update_descriptor_queue_}, state{state_}, spv_modules{
-                                                                            std::move(stages)} {
+      update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} {
    std::ranges::transform(infos, stage_infos.begin(),
                           [](const Shader::Info* info) { return info ? *info : Shader::Info{}; });

@ -144,7 +143,7 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_,
        pipeline_layout = builder.CreatePipelineLayout(set_layout);
        descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout);

-        const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))};
+        const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))};
        MakePipeline(device, render_pass);

        std::lock_guard lock{build_mutex};
@ -158,6 +157,11 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_,
    }
 }

+void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) {
+    transition_keys.push_back(transition->key);
+    transitions.push_back(transition);
+}
+
 void GraphicsPipeline::Configure(bool is_indexed) {
    static constexpr size_t max_images_elements = 64;
    std::array<ImageId, max_images_elements> image_view_ids;
@ -294,12 +298,12 @@ void GraphicsPipeline::Configure(bool is_indexed) {
 void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pass) {
    FixedPipelineState::DynamicState dynamic{};
    if (!device.IsExtExtendedDynamicStateSupported()) {
-        dynamic = state.dynamic_state;
+        dynamic = key.state.dynamic_state;
    }
    static_vector<VkVertexInputBindingDescription, 32> vertex_bindings;
    static_vector<VkVertexInputBindingDivisorDescriptionEXT, 32> vertex_binding_divisors;
    for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
-        const bool instanced = state.binding_divisors[index] != 0;
+        const bool instanced = key.state.binding_divisors[index] != 0;
        const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
        vertex_bindings.push_back({
            .binding = static_cast<u32>(index),
@ -309,14 +313,14 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa
        if (instanced) {
            vertex_binding_divisors.push_back({
                .binding = static_cast<u32>(index),
-                .divisor = state.binding_divisors[index],
+                .divisor = key.state.binding_divisors[index],
            });
        }
    }
    static_vector<VkVertexInputAttributeDescription, 32> vertex_attributes;
    const auto& input_attributes = stage_infos[0].input_generics;
-    for (size_t index = 0; index < state.attributes.size(); ++index) {
-        const auto& attribute = state.attributes[index];
+    for (size_t index = 0; index < key.state.attributes.size(); ++index) {
+        const auto& attribute = key.state.attributes[index];
        if (!attribute.enabled || !input_attributes[index].used) {
            continue;
        }
@ -345,7 +349,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa
    if (!vertex_binding_divisors.empty()) {
        vertex_input_ci.pNext = &input_divisor_ci;
    }
-    auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology);
+    auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, key.state.topology);
    if (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST) {
        if (!spv_modules[1] && !spv_modules[2]) {
            LOG_WARNING(Render_Vulkan, "Patch topology used without tessellation, using points");
@ -357,14 +361,14 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa
        .pNext = nullptr,
        .flags = 0,
        .topology = input_assembly_topology,
-        .primitiveRestartEnable = state.primitive_restart_enable != 0 &&
+        .primitiveRestartEnable = key.state.primitive_restart_enable != 0 &&
                                  SupportsPrimitiveRestart(input_assembly_topology),
    };
    const VkPipelineTessellationStateCreateInfo tessellation_ci{
        .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,
        .pNext = nullptr,
        .flags = 0,
-        .patchControlPoints = state.patch_control_points_minus_one.Value() + 1,
+        .patchControlPoints = key.state.patch_control_points_minus_one.Value() + 1,
    };
    VkPipelineViewportStateCreateInfo viewport_ci{
        .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
@ -376,7 +380,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa
        .pScissors = nullptr,
    };
    std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles;
-    std::ranges::transform(state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle);
+    std::ranges::transform(key.state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle);
    VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{
        .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV,
        .pNext = nullptr,
@ -393,15 +397,15 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa
        .pNext = nullptr,
        .flags = 0,
        .depthClampEnable =
-            static_cast<VkBool32>(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE),
+            static_cast<VkBool32>(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE),
        .rasterizerDiscardEnable =
-            static_cast<VkBool32>(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE),
+            static_cast<VkBool32>(key.state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE),
        .polygonMode =
-            MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(state.polygon_mode)),
+            MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(key.state.polygon_mode)),
        .cullMode = static_cast<VkCullModeFlags>(
            dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE),
        .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()),
-        .depthBiasEnable = state.depth_bias_enable,
+        .depthBiasEnable = key.state.depth_bias_enable,
        .depthBiasConstantFactor = 0.0f,
        .depthBiasClamp = 0.0f,
        .depthBiasSlopeFactor = 0.0f,
@ -411,7 +415,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa
        .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
        .pNext = nullptr,
        .flags = 0,
-        .rasterizationSamples = MaxwellToVK::MsaaMode(state.msaa_mode),
+        .rasterizationSamples = MaxwellToVK::MsaaMode(key.state.msaa_mode),
        .sampleShadingEnable = VK_FALSE,
        .minSampleShading = 0.0f,
        .pSampleMask = nullptr,
@ -435,7 +439,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa
        .maxDepthBounds = 0.0f,
    };
    static_vector<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
-    const size_t num_attachments{NumAttachments(state)};
+    const size_t num_attachments{NumAttachments(key.state)};
    for (size_t index = 0; index < num_attachments; ++index) {
        static constexpr std::array mask_table{
            VK_COLOR_COMPONENT_R_BIT,
@ -443,7 +447,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa
            VK_COLOR_COMPONENT_B_BIT,
            VK_COLOR_COMPONENT_A_BIT,
        };
-        const auto& blend{state.attachments[index]};
+        const auto& blend{key.state.attachments[index]};
        const std::array mask{blend.Mask()};
        VkColorComponentFlags write_mask{};
        for (size_t i = 0; i < mask_table.size(); ++i) {
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@ -4,10 +4,12 @@

 #pragma once

+#include <algorithm>
 #include <array>
 #include <atomic>
 #include <condition_variable>
 #include <mutex>
+#include <type_traits>

 #include "common/thread_worker.h"
 #include "shader_recompiler/shader_info.h"
@ -20,6 +22,39 @@

 namespace Vulkan {

+struct GraphicsPipelineCacheKey {
+    std::array<u128, 6> unique_hashes;
+    FixedPipelineState state;
+
+    size_t Hash() const noexcept;
+
+    bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
+
+    bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
+        return !operator==(rhs);
+    }
+
+    size_t Size() const noexcept {
+        return sizeof(unique_hashes) + state.Size();
+    }
+};
+static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
+static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
+static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
+
+} // namespace Vulkan
+
+namespace std {
+template <>
+struct hash<Vulkan::GraphicsPipelineCacheKey> {
+    size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
+        return k.Hash();
+    }
+};
+} // namespace std
+
+namespace Vulkan {
+
 class Device;
 class RenderPassCache;
 class VKScheduler;
@ -35,7 +70,8 @@ public:
                              const Device& device, VKDescriptorPool& descriptor_pool,
                              VKUpdateDescriptorQueue& update_descriptor_queue,
                              Common::ThreadWorker* worker_thread,
-                              RenderPassCache& render_pass_cache, const FixedPipelineState& state,
+                              RenderPassCache& render_pass_cache,
+                              const GraphicsPipelineCacheKey& key,
                              std::array<vk::ShaderModule, NUM_STAGES> stages,
                              const std::array<const Shader::Info*, NUM_STAGES>& infos);

@ -47,16 +83,30 @@ public:
    GraphicsPipeline& operator=(const GraphicsPipeline&) = delete;
    GraphicsPipeline(const GraphicsPipeline&) = delete;

+    void AddTransition(GraphicsPipeline* transition);
+
+    GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept {
+        if (key == current_key) {
+            return this;
+        }
+        const auto it{std::find(transition_keys.begin(), transition_keys.end(), current_key)};
+        return it != transition_keys.end() ? transitions[std::distance(transition_keys.begin(), it)]
+                                           : nullptr;
+    }
+
 private:
    void MakePipeline(const Device& device, VkRenderPass render_pass);

+    const GraphicsPipelineCacheKey key;
    Tegra::Engines::Maxwell3D& maxwell3d;
    Tegra::MemoryManager& gpu_memory;
    TextureCache& texture_cache;
    BufferCache& buffer_cache;
    VKScheduler& scheduler;
    VKUpdateDescriptorQueue& update_descriptor_queue;
-    const FixedPipelineState state;
+
+    std::vector<GraphicsPipelineCacheKey> transition_keys;
+    std::vector<GraphicsPipeline*> transitions;

    std::array<vk::ShaderModule, NUM_STAGES> spv_modules;
    std::array<Shader::Info, NUM_STAGES> stage_infos;
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@ -21,6 +21,7 @@
 #include "shader_recompiler/frontend/maxwell/control_flow.h"
 #include "shader_recompiler/frontend/maxwell/program.h"
 #include "shader_recompiler/program_header.h"
+#include "video_core/dirty_flags.h"
 #include "video_core/engines/kepler_compute.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/memory_manager.h"
@ -700,17 +701,28 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
    MICROPROFILE_SCOPE(Vulkan_PipelineCache);

    if (!RefreshStages()) {
+        current_pipeline = nullptr;
        return nullptr;
    }
    graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported());

+    if (current_pipeline) {
+        GraphicsPipeline* const next{current_pipeline->Next(graphics_key)};
+        if (next) {
+            current_pipeline = next;
+            return current_pipeline;
+        }
+    }
    const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)};
    auto& pipeline{pair->second};
-    if (!is_new) {
-        return pipeline.get();
-    }
+    if (is_new) {
        pipeline = CreateGraphicsPipeline();
-    return pipeline.get();
+    }
+    if (current_pipeline) {
+        current_pipeline->AddTransition(pipeline.get());
+    }
+    current_pipeline = pipeline.get();
+    return current_pipeline;
 }

 ComputePipeline* PipelineCache::CurrentComputePipeline() {
@ -743,6 +755,12 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() {
 }

 bool PipelineCache::RefreshStages() {
+    auto& dirty{maxwell3d.dirty.flags};
+    if (!dirty[VideoCommon::Dirty::Shaders]) {
+        return last_valid_shaders;
+    }
+    dirty[VideoCommon::Dirty::Shaders] = false;
+
    const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
    for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
        if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
@ -755,6 +773,7 @@ bool PipelineCache::RefreshStages() {
        const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
        if (!cpu_shader_addr) {
            LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr);
+            last_valid_shaders = false;
            return false;
        }
        const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)};
@ -766,6 +785,7 @@ bool PipelineCache::RefreshStages() {
        shader_infos[index] = shader_info;
        graphics_key.unique_hashes[index] = shader_info->unique_hash;
    }
+    last_valid_shaders = true;
    return true;
 }

@ -832,8 +852,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
    Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
    return std::make_unique<GraphicsPipeline>(
        maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, descriptor_pool,
-        update_descriptor_queue, thread_worker, render_pass_cache, key.state, std::move(modules),
-        infos);
+        update_descriptor_queue, thread_worker, render_pass_cache, key, std::move(modules), infos);
 }

 std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@ -58,26 +58,6 @@ static_assert(std::has_unique_object_representations_v<ComputePipelineCacheKey>)
 static_assert(std::is_trivially_copyable_v<ComputePipelineCacheKey>);
 static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>);

-struct GraphicsPipelineCacheKey {
-    std::array<u128, 6> unique_hashes;
-    FixedPipelineState state;
-
-    size_t Hash() const noexcept;
-
-    bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
-
-    bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
-        return !operator==(rhs);
-    }
-
-    size_t Size() const noexcept {
-        return sizeof(unique_hashes) + state.Size();
-    }
-};
-static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
-static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
-static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
-
 } // namespace Vulkan

 namespace std {
@ -89,13 +69,6 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
    }
 };

-template <>
-struct hash<Vulkan::GraphicsPipelineCacheKey> {
-    size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
-        return k.Hash();
-    }
-};
-
 } // namespace std

 namespace Vulkan {
@ -181,7 +154,10 @@ private:
    TextureCache& texture_cache;

    GraphicsPipelineCacheKey graphics_key{};
+    GraphicsPipeline* current_pipeline{};
+
    std::array<const ShaderInfo*, 6> shader_infos{};
+    bool last_valid_shaders{};

    std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
    std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;