From 935e88a5805dfbeefb599da72d2ff8c41664073e Mon Sep 17 00:00:00 2001
From: BreadFish64 <mohror64@gmail.com>
Date: Tue, 2 Feb 2021 20:43:41 -0600
Subject: [PATCH 1/5] gl_rasterizer_cache: Remove all fully invalid surfaces
 from the cache Some games (e.g. Pilotwings Resort) create many surfaces that
 are invalidated quickly but were never removed. This occasionally lead to
 large lag spikes due to high lookup times and other data structure management
 overhead.

---
 src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 88310a0ac..c85496d29 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -1858,9 +1858,9 @@ void RasterizerCacheOpenGL::InvalidateRegion(PAddr addr, u32 size, const Surface
             cached_surface->invalid_regions.insert(interval);
             cached_surface->InvalidateAllWatcher();
 
-            // Remove only "empty" fill surfaces to avoid destroying and recreating OGL textures
-            if (cached_surface->type == SurfaceType::Fill &&
-                cached_surface->IsSurfaceFullyInvalid()) {
+            // If the surface has no salvageable data it should be removed from the cache to avoid
+            // clogging the data structure
+            if (cached_surface->IsSurfaceFullyInvalid()) {
                 remove_surfaces.emplace(cached_surface);
             }
         }

From 85e9ba897d6228dd03f9cece1bcf04902d6d9e20 Mon Sep 17 00:00:00 2001
From: BreadFish64 <mohror64@gmail.com>
Date: Tue, 2 Feb 2021 20:46:25 -0600
Subject: [PATCH 2/5] gl_rasterizer_cache: Recycle host textures Allocating new
 textures has fairly high driver overhead. We can avoid some of this by
 reusing the textures from destroyed surfaces since the game will probably
 create more textures with the same dimensions and format.

---
 .../renderer_opengl/gl_rasterizer_cache.cpp   | 19 +++++--
 .../renderer_opengl/gl_rasterizer_cache.h     | 56 ++++++++++++++++---
 2 files changed, 61 insertions(+), 14 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index c85496d29..e2c6bd1f3 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -494,6 +494,10 @@ static bool FillSurface(const Surface& surface, const u8* fill_data,
     return true;
 }
 
+CachedSurface::~CachedSurface() {
+    owner.host_texture_recycler.emplace(*this, std::move(texture));
+}
+
 bool CachedSurface::CanFill(const SurfaceParams& dest_surface,
                             SurfaceInterval fill_interval) const {
     if (type == SurfaceType::Fill && IsRegionValid(fill_interval) &&
@@ -1893,12 +1897,17 @@ Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) {
     Surface surface = std::make_shared<CachedSurface>(*this);
     static_cast<SurfaceParams&>(*surface) = params;
 
-    surface->texture.Create();
-
-    surface->gl_buffer.resize(0);
     surface->invalid_regions.insert(surface->GetInterval());
-    AllocateSurfaceTexture(surface->texture.handle, GetFormatTuple(surface->pixel_format),
-                           surface->GetScaledWidth(), surface->GetScaledHeight());
+
+    auto recycled_texture = host_texture_recycler.find(params);
+    if (recycled_texture == host_texture_recycler.end()) {
+        surface->texture.Create();
+        AllocateSurfaceTexture(surface->texture.handle, GetFormatTuple(surface->pixel_format),
+                               surface->GetScaledWidth(), surface->GetScaledHeight());
+    } else {
+        surface->texture = std::move(recycled_texture->second);
+        host_texture_recycler.erase(recycled_texture);
+    }
 
     return surface;
 }
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 1a9b6a3b3..fc261316d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -36,6 +36,36 @@ class RasterizerCacheOpenGL;
 class TextureFilterer;
 class FormatReinterpreterOpenGL;
 
+struct FormatTuple {
+    GLint internal_format;
+    GLenum format;
+    GLenum type;
+};
+
+constexpr FormatTuple tex_tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE};
+
+const FormatTuple& GetFormatTuple(SurfaceParams::PixelFormat pixel_format);
+
+struct HostTextureTag {
+    GLint internal_format;
+    GLenum format;
+    u32 width;
+    u32 height;
+    HostTextureTag(const SurfaceParams& params) noexcept {
+        auto format_tuple = GetFormatTuple(params.pixel_format);
+        internal_format = format_tuple.internal_format;
+        format = format_tuple.format;
+        // The type in the format tuple is irrelevant for the tag since the type is only for
+        // interpreting data on upload/download
+        width = params.GetScaledWidth();
+        height = params.GetScaledHeight();
+    }
+    bool operator==(const HostTextureTag& rhs) const noexcept {
+        return std::tie(internal_format, format, width, height) ==
+               std::tie(rhs.internal_format, rhs.format, rhs.width, rhs.height);
+    };
+};
+
 struct TextureCubeConfig {
     PAddr px;
     PAddr nx;
@@ -59,6 +89,18 @@ struct TextureCubeConfig {
 } // namespace OpenGL
 
 namespace std {
+template <>
+struct hash<OpenGL::HostTextureTag> {
+    std::size_t operator()(const OpenGL::HostTextureTag& tag) const noexcept {
+        std::size_t hash = 0;
+        boost::hash_combine(hash, tag.format);
+        boost::hash_combine(hash, tag.internal_format);
+        boost::hash_combine(hash, tag.width);
+        boost::hash_combine(hash, tag.height);
+        return hash;
+    }
+};
+
 template <>
 struct hash<OpenGL::TextureCubeConfig> {
     std::size_t operator()(const OpenGL::TextureCubeConfig& config) const noexcept {
@@ -139,6 +181,7 @@ private:
 
 struct CachedSurface : SurfaceParams, std::enable_shared_from_this<CachedSurface> {
     CachedSurface(RasterizerCacheOpenGL& owner) : owner{owner} {}
+    ~CachedSurface();
 
     bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const;
     bool CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const;
@@ -326,17 +369,12 @@ private:
     std::unordered_map<TextureCubeConfig, CachedTextureCube> texture_cube_cache;
 
 public:
+    // Textures from destroyed surfaces are stored here to be recyled to reduce allocation overhead
+    // in the driver
+    std::unordered_multimap<HostTextureTag, OGLTexture> host_texture_recycler;
+
     std::unique_ptr<TextureFilterer> texture_filterer;
     std::unique_ptr<FormatReinterpreterOpenGL> format_reinterpreter;
 };
 
-struct FormatTuple {
-    GLint internal_format;
-    GLenum format;
-    GLenum type;
-};
-
-constexpr FormatTuple tex_tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE};
-
-const FormatTuple& GetFormatTuple(SurfaceParams::PixelFormat pixel_format);
 } // namespace OpenGL

From a131f9eb749d7734c9bc3291aa3582704b849e1d Mon Sep 17 00:00:00 2001
From: BreadFish64 <mohror64@gmail.com>
Date: Wed, 3 Feb 2021 13:29:03 -0600
Subject: [PATCH 3/5] gl_rasterizer_cache: don't place null textures in the
 recycler

---
 src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index e2c6bd1f3..3b894444f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -495,7 +495,9 @@ static bool FillSurface(const Surface& surface, const u8* fill_data,
 }
 
 CachedSurface::~CachedSurface() {
-    owner.host_texture_recycler.emplace(*this, std::move(texture));
+    if (texture.handle) {
+        owner.host_texture_recycler.emplace(*this, std::move(texture));
+    }
 }
 
 bool CachedSurface::CanFill(const SurfaceParams& dest_surface,

From 00c798991c8b5192227db1431ce38c6e85247975 Mon Sep 17 00:00:00 2001
From: Marshall Mohror <mohror64@gmail.com>
Date: Fri, 5 Feb 2021 08:25:02 -0600
Subject: [PATCH 4/5] remove semicolon

---
 src/video_core/renderer_opengl/gl_rasterizer_cache.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index fc261316d..bac748d2e 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -63,7 +63,7 @@ struct HostTextureTag {
     bool operator==(const HostTextureTag& rhs) const noexcept {
         return std::tie(internal_format, format, width, height) ==
                std::tie(rhs.internal_format, rhs.format, rhs.width, rhs.height);
-    };
+    }
 };
 
 struct TextureCubeConfig {

From ff56fdf37de6cb7136bc4a71721a66985da1be67 Mon Sep 17 00:00:00 2001
From: BreadFish64 <mohror64@gmail.com>
Date: Mon, 8 Feb 2021 15:38:30 -0600
Subject: [PATCH 5/5] fix recycling custom textures

---
 .../renderer_opengl/gl_rasterizer_cache.cpp   | 61 ++++++++++---------
 .../renderer_opengl/gl_rasterizer_cache.h     | 25 +++-----
 2 files changed, 41 insertions(+), 45 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 3b894444f..92e93735e 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -311,13 +311,21 @@ static constexpr std::array<void (*)(u32, u32, u8*, PAddr, PAddr, PAddr), 18> gl
 };
 
 // Allocate an uninitialized texture of appropriate size and format for the surface
-static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tuple, u32 width,
-                                   u32 height) {
-    OpenGLState cur_state = OpenGLState::GetCurState();
+OGLTexture RasterizerCacheOpenGL::AllocateSurfaceTexture(const FormatTuple& format_tuple, u32 width,
+                                                         u32 height) {
+    auto recycled_tex = host_texture_recycler.find({format_tuple, width, height});
+    if (recycled_tex != host_texture_recycler.end()) {
+        OGLTexture texture = std::move(recycled_tex->second);
+        host_texture_recycler.erase(recycled_tex);
+        return texture;
+    }
+    OGLTexture texture;
+    texture.Create();
 
+    OpenGLState cur_state = OpenGLState::GetCurState();
     // Keep track of previous texture bindings
     GLuint old_tex = cur_state.texture_units[0].texture_2d;
-    cur_state.texture_units[0].texture_2d = texture;
+    cur_state.texture_units[0].texture_2d = texture.handle;
     cur_state.Apply();
     glActiveTexture(GL_TEXTURE0);
 
@@ -332,6 +340,8 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup
     // Restore previous texture bindings
     cur_state.texture_units[0].texture_2d = old_tex;
     cur_state.Apply();
+
+    return texture;
 }
 
 static void AllocateTextureCube(GLuint texture, const FormatTuple& format_tuple, u32 width) {
@@ -496,7 +506,12 @@ static bool FillSurface(const Surface& surface, const u8* fill_data,
 
 CachedSurface::~CachedSurface() {
     if (texture.handle) {
-        owner.host_texture_recycler.emplace(*this, std::move(texture));
+        auto tag = is_custom ? HostTextureTag{GetFormatTuple(PixelFormat::RGBA8),
+                                              custom_tex_info.width, custom_tex_info.height}
+                             : HostTextureTag{GetFormatTuple(pixel_format), GetScaledWidth(),
+                                              GetScaledHeight()};
+
+        owner.host_texture_recycler.emplace(tag, std::move(texture));
     }
 }
 
@@ -819,12 +834,11 @@ void CachedSurface::UploadGLTexture(Common::Rectangle<u32> rect, GLuint read_fb_
         x0 = 0;
         y0 = 0;
 
-        unscaled_tex.Create();
         if (is_custom) {
-            AllocateSurfaceTexture(unscaled_tex.handle, GetFormatTuple(PixelFormat::RGBA8),
-                                   custom_tex_info.width, custom_tex_info.height);
+            unscaled_tex = owner.AllocateSurfaceTexture(
+                GetFormatTuple(PixelFormat::RGBA8), custom_tex_info.width, custom_tex_info.height);
         } else {
-            AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight());
+            unscaled_tex = owner.AllocateSurfaceTexture(tuple, rect.GetWidth(), rect.GetHeight());
         }
         target_tex = unscaled_tex.handle;
     }
@@ -839,8 +853,8 @@ void CachedSurface::UploadGLTexture(Common::Rectangle<u32> rect, GLuint read_fb_
     ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0);
     if (is_custom) {
         if (res_scale == 1) {
-            AllocateSurfaceTexture(texture.handle, GetFormatTuple(PixelFormat::RGBA8),
-                                   custom_tex_info.width, custom_tex_info.height);
+            texture = owner.AllocateSurfaceTexture(GetFormatTuple(PixelFormat::RGBA8),
+                                                   custom_tex_info.width, custom_tex_info.height);
             cur_state.texture_units[0].texture_2d = texture.handle;
             cur_state.Apply();
         }
@@ -917,11 +931,9 @@ void CachedSurface::DownloadGLTexture(const Common::Rectangle<u32>& rect, GLuint
         scaled_rect.right *= res_scale;
         scaled_rect.bottom *= res_scale;
 
-        OGLTexture unscaled_tex;
-        unscaled_tex.Create();
-
         Common::Rectangle<u32> unscaled_tex_rect{0, rect.GetHeight(), rect.GetWidth(), 0};
-        AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight());
+        OGLTexture unscaled_tex =
+            owner.AllocateSurfaceTexture(tuple, rect.GetWidth(), rect.GetHeight());
         BlitTextures(texture.handle, scaled_rect, unscaled_tex.handle, unscaled_tex_rect, type,
                      read_fb_handle, draw_fb_handle);
 
@@ -1741,15 +1753,12 @@ bool RasterizerCacheOpenGL::ValidateByReinterpretation(const Surface& surface,
             if (!texture_filterer->IsNull() && reinterpret_surface->res_scale == 1 &&
                 surface->res_scale == resolution_scale_factor) {
                 // The destination surface is either a framebuffer, or a filtered texture.
-                OGLTexture tmp_tex;
-                tmp_tex.Create();
                 // Create an intermediate surface to convert to before blitting to the
                 // destination.
                 Common::Rectangle<u32> tmp_rect{0, dest_rect.GetHeight() / resolution_scale_factor,
                                                 dest_rect.GetWidth() / resolution_scale_factor, 0};
-                AllocateSurfaceTexture(tmp_tex.handle,
-                                       GetFormatTuple(reinterpreter->first.dst_format),
-                                       tmp_rect.right, tmp_rect.top);
+                OGLTexture tmp_tex = AllocateSurfaceTexture(
+                    GetFormatTuple(reinterpreter->first.dst_format), tmp_rect.right, tmp_rect.top);
                 reinterpreter->second->Reinterpret(reinterpret_surface->texture.handle, src_rect,
                                                    read_framebuffer.handle, tmp_tex.handle,
                                                    tmp_rect, draw_framebuffer.handle);
@@ -1901,15 +1910,9 @@ Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) {
 
     surface->invalid_regions.insert(surface->GetInterval());
 
-    auto recycled_texture = host_texture_recycler.find(params);
-    if (recycled_texture == host_texture_recycler.end()) {
-        surface->texture.Create();
-        AllocateSurfaceTexture(surface->texture.handle, GetFormatTuple(surface->pixel_format),
-                               surface->GetScaledWidth(), surface->GetScaledHeight());
-    } else {
-        surface->texture = std::move(recycled_texture->second);
-        host_texture_recycler.erase(recycled_texture);
-    }
+    surface->texture =
+        AllocateSurfaceTexture(GetFormatTuple(surface->pixel_format), surface->GetScaledWidth(),
+                               surface->GetScaledHeight());
 
     return surface;
 }
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index bac748d2e..1c4ab7174 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -47,23 +47,14 @@ constexpr FormatTuple tex_tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE};
 const FormatTuple& GetFormatTuple(SurfaceParams::PixelFormat pixel_format);
 
 struct HostTextureTag {
-    GLint internal_format;
-    GLenum format;
+    FormatTuple format_tuple;
     u32 width;
     u32 height;
-    HostTextureTag(const SurfaceParams& params) noexcept {
-        auto format_tuple = GetFormatTuple(params.pixel_format);
-        internal_format = format_tuple.internal_format;
-        format = format_tuple.format;
-        // The type in the format tuple is irrelevant for the tag since the type is only for
-        // interpreting data on upload/download
-        width = params.GetScaledWidth();
-        height = params.GetScaledHeight();
-    }
     bool operator==(const HostTextureTag& rhs) const noexcept {
-        return std::tie(internal_format, format, width, height) ==
-               std::tie(rhs.internal_format, rhs.format, rhs.width, rhs.height);
-    }
+        return std::tie(format_tuple.format, format_tuple.internal_format, width, height) ==
+               std::tie(rhs.format_tuple.format, rhs.format_tuple.internal_format, rhs.width,
+                        rhs.height);
+    };
 };
 
 struct TextureCubeConfig {
@@ -93,8 +84,8 @@ template <>
 struct hash<OpenGL::HostTextureTag> {
     std::size_t operator()(const OpenGL::HostTextureTag& tag) const noexcept {
         std::size_t hash = 0;
-        boost::hash_combine(hash, tag.format);
-        boost::hash_combine(hash, tag.internal_format);
+        boost::hash_combine(hash, tag.format_tuple.format);
+        boost::hash_combine(hash, tag.format_tuple.internal_format);
         boost::hash_combine(hash, tag.width);
         boost::hash_combine(hash, tag.height);
         return hash;
@@ -369,6 +360,8 @@ private:
     std::unordered_map<TextureCubeConfig, CachedTextureCube> texture_cube_cache;
 
 public:
+    OGLTexture AllocateSurfaceTexture(const FormatTuple& format_tuple, u32 width, u32 height);
+
     // Textures from destroyed surfaces are stored here to be recyled to reduce allocation overhead
     // in the driver
     std::unordered_multimap<HostTextureTag, OGLTexture> host_texture_recycler;