Texture Cache: Fix downscaling and correct memory comsumption.

2024-07-04 23:31:19 +01:00 · 2021-10-17 18:01:18 +02:00 · 2021-10-17 18:01:18 +02:00 · 425ab9ef4b
commit 425ab9ef4b
parent b60966041c
8 changed files with 146 additions and 35 deletions
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@ -876,7 +876,7 @@ void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t b
    }
 }

-bool Image::Scale() {
+bool Image::Scale(bool up_scale) {
    const auto format_type = GetFormatType(info.format);
    const GLenum attachment = [format_type] {
        switch (format_type) {
@ -944,14 +944,25 @@ bool Image::Scale() {
    const GLuint draw_fbo = runtime->rescale_draw_fbos[fbo_index].handle;
    for (s32 layer = 0; layer < info.resources.layers; ++layer) {
        for (s32 level = 0; level < info.resources.levels; ++level) {
-            const u32 src_level_width = std::max(1u, original_width >> level);
-            const u32 src_level_height = std::max(1u, original_height >> level);
-            const u32 dst_level_width = std::max(1u, scaled_width >> level);
-            const u32 dst_level_height = std::max(1u, scaled_height >> level);
+            const u32 src_level_width =
+                std::max(1u, (up_scale ? original_width : scaled_width) >> level);
+            const u32 src_level_height =
+                std::max(1u, (up_scale ? original_height : scaled_height) >> level);
+            const u32 dst_level_width =
+                std::max(1u, (up_scale ? scaled_width : original_width) >> level);
+            const u32 dst_level_height =
+                std::max(1u, (up_scale ? scaled_height : original_height) >> level);
+
+            if (up_scale) {
+                glNamedFramebufferTextureLayer(read_fbo, attachment, texture.handle, level, layer);
+                glNamedFramebufferTextureLayer(draw_fbo, attachment, upscaled_backup.handle, level,
+                                               layer);
+            } else {
+                glNamedFramebufferTextureLayer(read_fbo, attachment, upscaled_backup.handle, level,
+                                               layer);
+                glNamedFramebufferTextureLayer(draw_fbo, attachment, texture.handle, level, layer);
+            }

-            glNamedFramebufferTextureLayer(read_fbo, attachment, texture.handle, level, layer);
-            glNamedFramebufferTextureLayer(draw_fbo, attachment, upscaled_backup.handle, level,
-                                           layer);
            glBlitNamedFramebuffer(read_fbo, draw_fbo, 0, 0, src_level_width, src_level_height, 0,
                                   0, dst_level_width, dst_level_height, mask, filter);
        }
@ -959,7 +970,12 @@ bool Image::Scale() {
    if (scissor_test != GL_FALSE) {
        glEnablei(GL_SCISSOR_TEST, 0);
    }
-    current_texture = upscaled_backup.handle;
+    if (up_scale) {
+        current_texture = upscaled_backup.handle;
+    } else {
+        current_texture = texture.handle;
+    }
+
    return true;
 }

@ -981,6 +997,7 @@ bool Image::ScaleUp() {
        flags &= ~ImageFlagBits::Rescaled;
        return false;
    }
+    scale_count++;
    if (!Scale()) {
        flags &= ~ImageFlagBits::Rescaled;
        return false;
@ -996,7 +1013,11 @@ bool Image::ScaleDown() {
    if (!runtime->resolution.active) {
        return false;
    }
-    current_texture = texture.handle;
+    scale_count++;
+    if (!Scale(false)) {
+        flags &= ~ImageFlagBits::Rescaled;
+        return false;
+    }
    return true;
 }

--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@ -205,7 +205,7 @@ private:

    void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);

-    bool Scale();
+    bool Scale(bool up_scale = true);

    OGLTexture texture;
    OGLTexture upscaled_backup;
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@ -592,7 +592,8 @@ struct RangedBarrierRange {
 }

 void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, const ImageInfo& info,
-               VkImageAspectFlags aspect_mask, const Settings::ResolutionScalingInfo& resolution) {
+               VkImageAspectFlags aspect_mask, const Settings::ResolutionScalingInfo& resolution,
+               bool up_scaling = true) {
    const bool is_2d = info.type == ImageType::e2D;
    const auto resources = info.resources;
    const VkExtent2D extent{
@ -605,14 +606,16 @@ void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, con

    scheduler.RequestOutsideRenderPassOperationContext();
    scheduler.Record([dst_image, src_image, extent, resources, aspect_mask, resolution, is_2d,
-                      vk_filter](vk::CommandBuffer cmdbuf) {
+                      vk_filter, up_scaling](vk::CommandBuffer cmdbuf) {
        const VkOffset2D src_size{
-            .x = static_cast<s32>(extent.width),
-            .y = static_cast<s32>(extent.height),
+            .x = static_cast<s32>(up_scaling ? extent.width : resolution.ScaleUp(extent.width)),
+            .y = static_cast<s32>(is_2d && up_scaling ? extent.height
+                                                      : resolution.ScaleUp(extent.height)),
        };
        const VkOffset2D dst_size{
-            .x = static_cast<s32>(resolution.ScaleUp(extent.width)),
-            .y = static_cast<s32>(is_2d ? resolution.ScaleUp(extent.height) : extent.height),
+            .x = static_cast<s32>(up_scaling ? resolution.ScaleUp(extent.width) : extent.width),
+            .y = static_cast<s32>(is_2d && up_scaling ? resolution.ScaleUp(extent.height)
+                                                      : extent.height),
        };
        boost::container::small_vector<VkImageBlit, 4> regions;
        regions.reserve(resources.levels);
@ -1134,6 +1137,7 @@ bool Image::ScaleUp() {
    if (!resolution.active) {
        return false;
    }
+    scale_count++;
    const auto& device = runtime->device;
    const bool is_2d = info.type == ImageType::e2D;
    const u32 scaled_width = resolution.ScaleUp(info.size.width);
@ -1161,8 +1165,10 @@ bool Image::ScaleUp() {
        using namespace VideoCommon;
        static constexpr auto BLIT_OPERATION = Tegra::Engines::Fermi2D::Operation::SrcCopy;

-        const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format);
-        scale_view = std::make_unique<ImageView>(*runtime, view_info, NULL_IMAGE_ID, *this);
+        if (!scale_view) {
+            const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format);
+            scale_view = std::make_unique<ImageView>(*runtime, view_info, NULL_IMAGE_ID, *this);
+        }
        auto* view_ptr = scale_view.get();

        const Region2D src_region{
@ -1178,7 +1184,10 @@ bool Image::ScaleUp() {
            .height = scaled_height,
        };
        if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
-            scale_framebuffer = std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent);
+            if (!scale_framebuffer) {
+                scale_framebuffer =
+                    std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent);
+            }
            const auto color_view = scale_view->Handle(Shader::TextureType::Color2D);

            runtime->blit_image_helper.BlitColor(
@ -1186,7 +1195,10 @@ bool Image::ScaleUp() {
                Tegra::Engines::Fermi2D::Filter::Bilinear, BLIT_OPERATION);
        } else if (!runtime->device.IsBlitDepthStencilSupported() &&
                   aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
-            scale_framebuffer = std::make_unique<Framebuffer>(*runtime, nullptr, view_ptr, extent);
+            if (!scale_framebuffer) {
+                scale_framebuffer =
+                    std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent);
+            }
            runtime->blit_image_helper.BlitDepthStencil(
                scale_framebuffer.get(), scale_view->DepthView(), scale_view->StencilView(),
                dst_region, src_region, Tegra::Engines::Fermi2D::Filter::Point, BLIT_OPERATION);
@ -1209,6 +1221,67 @@ bool Image::ScaleDown() {
    if (!resolution.active) {
        return false;
    }
+    const auto& device = runtime->device;
+    const bool is_2d = info.type == ImageType::e2D;
+    const u32 scaled_width = resolution.ScaleUp(info.size.width);
+    const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height;
+    if (aspect_mask == 0) {
+        aspect_mask = ImageAspectMask(info.format);
+    }
+    static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal;
+    const PixelFormat format = StorageFormat(info.format);
+    const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format;
+    const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
+    if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) {
+        BlitScale(*scheduler, *scaled_image, *original_image, info, aspect_mask, resolution, false);
+    } else {
+        using namespace VideoCommon;
+        static constexpr auto BLIT_OPERATION = Tegra::Engines::Fermi2D::Operation::SrcCopy;
+
+        if (!normal_view) {
+            const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format);
+            normal_view = std::make_unique<ImageView>(*runtime, view_info, NULL_IMAGE_ID, *this);
+        }
+        auto* view_ptr = normal_view.get();
+
+        const Region2D src_region{
+            .start = {0, 0},
+            .end = {static_cast<s32>(scaled_width), static_cast<s32>(scaled_height)},
+        };
+        const Region2D dst_region{
+            .start = {0, 0},
+            .end = {static_cast<s32>(info.size.width), static_cast<s32>(info.size.height)},
+        };
+        const VkExtent2D extent{
+            .width = scaled_width,
+            .height = scaled_height,
+        };
+        if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
+            if (!normal_framebuffer) {
+                normal_framebuffer =
+                    std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent);
+            }
+            const auto color_view = normal_view->Handle(Shader::TextureType::Color2D);
+
+            runtime->blit_image_helper.BlitColor(
+                normal_framebuffer.get(), color_view, dst_region, src_region,
+                Tegra::Engines::Fermi2D::Filter::Bilinear, BLIT_OPERATION);
+        } else if (!runtime->device.IsBlitDepthStencilSupported() &&
+                   aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
+            if (!normal_framebuffer) {
+                normal_framebuffer =
+                    std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent);
+            }
+            runtime->blit_image_helper.BlitDepthStencil(
+                normal_framebuffer.get(), normal_view->DepthView(), normal_view->StencilView(),
+                dst_region, src_region, Tegra::Engines::Fermi2D::Filter::Point, BLIT_OPERATION);
+        } else {
+            // TODO: Use helper blits where applicable
+            flags &= ~ImageFlagBits::Rescaled;
+            LOG_ERROR(Render_Vulkan, "Device does not support scaling format {}", format);
+            return false;
+        }
+    }
    ASSERT(info.type != ImageType::Linear);
    current_image = *original_image;
    return true;
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@ -148,6 +148,9 @@ private:

    std::unique_ptr<Framebuffer> scale_framebuffer;
    std::unique_ptr<ImageView> scale_view;
+
+    std::unique_ptr<Framebuffer> normal_framebuffer;
+    std::unique_ptr<ImageView> normal_view;
 };

 class ImageView : public VideoCommon::ImageViewBase {
--- a/src/video_core/texture_cache/image_base.cpp
+++ b/src/video_core/texture_cache/image_base.cpp
@ -60,8 +60,8 @@ namespace {
 ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_)
    : info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)},
      unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)},
-      converted_size_bytes{CalculateConvertedSizeBytes(info)}, scale_rating{},
-      scale_tick{}, gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_},
+      converted_size_bytes{CalculateConvertedSizeBytes(info)}, scale_rating{}, scale_tick{},
+      scale_count{}, gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_},
      cpu_addr_end{cpu_addr + guest_size_bytes}, mip_level_offsets{CalculateMipLevelOffsets(info)} {
    if (info.type == ImageType::e3D) {
        slice_offsets = CalculateSliceOffsets(info);
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@ -77,6 +77,10 @@ struct ImageBase {
    void CheckBadOverlapState();
    void CheckAliasState();

+    bool HasScaled() {
+        return scale_count > 0;
+    }
+
    ImageInfo info;

    u32 guest_size_bytes = 0;
@ -84,6 +88,7 @@ struct ImageBase {
    u32 converted_size_bytes = 0;
    u32 scale_rating = 0;
    u64 scale_tick = 0;
+    u32 scale_count = 0;
    ImageFlagBits flags = ImageFlagBits::CpuModified;

    GPUVAddr gpu_addr = 0;
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@ -854,8 +854,8 @@ void TextureCache<P>::InvalidateScale(Image& image) {
 }

 template <class P>
-u64 TextureCache<P>::GetScaledImageSizeBytes(Image& image) {
-    const f32 add_to_size = Settings::values.resolution_info.up_factor - 1.0f;
+u64 TextureCache<P>::GetScaledImageSizeBytes(ImageBase& image) {
+    const f32 add_to_size = Settings::values.resolution_info.up_factor;
    const bool sign = std::signbit(add_to_size);
    const u32 image_size_bytes = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
    const u64 tentative_size = image_size_bytes * static_cast<u64>(std::abs(add_to_size));
@ -865,11 +865,14 @@ u64 TextureCache<P>::GetScaledImageSizeBytes(Image& image) {

 template <class P>
 bool TextureCache<P>::ScaleUp(Image& image) {
+    const bool has_copy = image.HasScaled();
    const bool rescaled = image.ScaleUp();
    if (!rescaled) {
        return false;
    }
-    total_used_memory += GetScaledImageSizeBytes(image);
+    if (!has_copy) {
+        total_used_memory += GetScaledImageSizeBytes(image);
+    }
    InvalidateScale(image);
    return true;
 }
@ -880,7 +883,10 @@ bool TextureCache<P>::ScaleDown(Image& image) {
    if (!rescaled) {
        return false;
    }
-    total_used_memory -= GetScaledImageSizeBytes(image);
+    const bool has_copy = image.HasScaled();
+    if (!has_copy) {
+        total_used_memory -= GetScaledImageSizeBytes(image);
+    }
    InvalidateScale(image);
    return true;
 }
@ -1391,13 +1397,6 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
               "Trying to unregister an already registered image");
    image.flags &= ~ImageFlagBits::Registered;
    image.flags &= ~ImageFlagBits::BadOverlap;
-    u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
-    if ((IsPixelFormatASTC(image.info.format) &&
-         True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
-        True(image.flags & ImageFlagBits::Converted)) {
-        tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
-    }
-    total_used_memory -= Common::AlignUp(tentative_size, 1024);
    lru_cache.Free(image.lru_index);
    const auto& clear_page_table =
        [this, image_id](
@ -1478,6 +1477,16 @@ template <class P>
 void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {
    ASSERT(False(image.flags & ImageFlagBits::Tracked));
    image.flags |= ImageFlagBits::Tracked;
+    if (image.HasScaled()) {
+        total_used_memory -= GetScaledImageSizeBytes(image);
+    }
+    u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
+    if ((IsPixelFormatASTC(image.info.format) &&
+         True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
+        True(image.flags & ImageFlagBits::Converted)) {
+        tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
+    }
+    total_used_memory -= Common::AlignUp(tentative_size, 1024);
    if (False(image.flags & ImageFlagBits::Sparse)) {
        rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
        return;
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@ -331,7 +331,7 @@ private:
    void InvalidateScale(Image& image);
    bool ScaleUp(Image& image);
    bool ScaleDown(Image& image);
-    u64 GetScaledImageSizeBytes(Image& image);
+    u64 GetScaledImageSizeBytes(ImageBase& image);

    Runtime& runtime;
    VideoCore::RasterizerInterface& rasterizer;