fs: implement PooledBuffer

2025-01-17 06:41:38 +00:00 · 2020-04-06 03:15:24 -07:00 · 2020-04-06 03:15:24 -07:00 · 496be5ecd4
commit 496be5ecd4
parent 50a91b1d6e
5 changed files with 521 additions and 6 deletions
--- a/libraries/libstratosphere/include/stratosphere/fssystem.hpp
+++ b/libraries/libstratosphere/include/stratosphere/fssystem.hpp
@ -26,3 +26,4 @@
 #include <stratosphere/fssystem/fssystem_romfs_file_system.hpp>
 #include <stratosphere/fssystem/buffers/fssystem_buffer_manager_utils.hpp>
 #include <stratosphere/fssystem/buffers/fssystem_file_system_buddy_heap.hpp>
+#include <stratosphere/fssystem/fssystem_pooled_buffer.hpp>
--- a/libraries/libstratosphere/include/stratosphere/fssystem/fssystem_pooled_buffer.hpp
+++ b/libraries/libstratosphere/include/stratosphere/fssystem/fssystem_pooled_buffer.hpp
@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2018-2020 Atmosphère-NX
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+#include <vapours.hpp>
+#include <stratosphere/fs/impl/fs_newable.hpp>
+
+namespace ams::fssystem {
+
+    constexpr inline size_t BufferPoolAlignment = 4_KB;
+    constexpr inline size_t BufferPoolWorkSize  = 320;
+
+    class PooledBuffer {
+        NON_COPYABLE(PooledBuffer);
+        private:
+            char *buffer;
+            size_t size;
+        private:
+            static size_t GetAllocatableSizeMaxCore(bool large);
+        public:
+            static size_t GetAllocatableSizeMax() { return GetAllocatableSizeMaxCore(false); }
+            static size_t GetAllocatableParticularlyLargeSizeMax() { return GetAllocatableSizeMaxCore(true); }
+        private:
+            void Swap(PooledBuffer &rhs) {
+                std::swap(this->buffer, rhs.buffer);
+                std::swap(this->size, rhs.size);
+            }
+        public:
+            /* Constructor/Destructor. */
+            constexpr PooledBuffer() : buffer(), size() { /* ... */ }
+
+            PooledBuffer(size_t ideal_size, size_t required_size) : buffer(), size() {
+                this->Allocate(ideal_size, required_size);
+            }
+
+            ~PooledBuffer() {
+                this->Deallocate();
+            }
+
+            /* Move and assignment. */
+            explicit PooledBuffer(PooledBuffer &&rhs) : buffer(rhs.buffer), size(rhs.size) {
+                rhs.buffer = nullptr;
+                rhs.size   = 0;
+            }
+
+            PooledBuffer &operator=(PooledBuffer &&rhs) {
+                PooledBuffer(std::move(rhs)).Swap(*this);
+                return *this;
+            }
+
+            /* Allocation API. */
+            void Allocate(size_t ideal_size, size_t required_size) {
+                return this->AllocateCore(ideal_size, required_size, false);
+            }
+
+            void AllocateParticularlyLarge(size_t ideal_size, size_t required_size) {
+                return this->AllocateCore(ideal_size, required_size, true);
+            }
+
+            void Shrink(size_t ideal_size);
+
+            void Deallocate() {
+                /* Shrink the buffer to empty. */
+                this->Shrink(0);
+                AMS_ASSERT(this->buffer == nullptr);
+            }
+
+            char *GetBuffer() const {
+                AMS_ASSERT(this->buffer != nullptr);
+                return this->buffer;
+            }
+
+            size_t GetSize() const {
+                AMS_ASSERT(this->buffer != nullptr);
+                return this->size;
+            }
+        private:
+            void AllocateCore(size_t ideal_size, size_t required_size, bool large);
+    };
+
+    Result InitializeBufferPool(char *buffer, size_t size);
+    Result InitializeBufferPool(char *buffer, size_t size, char *work, size_t work_size);
+
+    bool IsPooledBuffer(const void *buffer);
+
+    size_t GetPooledBufferRetriedCount();
+    size_t GetPooledBufferReduceAllocationCount();
+    size_t GetPooledBufferFreeSizePeak();
+
+    void ClearPooledBufferPeak();
+
+    void RegisterAdditionalDeviceAddress(uintptr_t address, size_t size);
+    void UnregisterAdditionalDeviceAddress(uintptr_t address);
+    bool IsAdditionalDeviceAddress(const void *ptr);
+
+}
--- a/libraries/libstratosphere/source/fssystem/fssystem_pooled_buffer.cpp
+++ b/libraries/libstratosphere/source/fssystem/fssystem_pooled_buffer.cpp
@ -0,0 +1,272 @@
+/*
+ * Copyright (c) 2018-2020 Atmosphère-NX
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <stratosphere.hpp>
+
+namespace ams::fssystem {
+
+    namespace {
+
+        class AdditionalDeviceAddressEntry {
+            private:
+                /* TODO: SdkMutex */
+                os::Mutex mutex;
+                bool is_registered;
+                uintptr_t address;
+                size_t size;
+            public:
+                constexpr AdditionalDeviceAddressEntry() : mutex(), is_registered(), address(), size() { /* ... */ }
+
+                void Register(uintptr_t addr, size_t sz) {
+                    std::scoped_lock lk(this->mutex);
+
+                    AMS_ASSERT(!this->is_registered);
+                    if (!this->is_registered) {
+                        this->is_registered = true;
+                        this->address       = addr;
+                        this->size          = size;
+                    }
+                }
+
+                void Unregister(uintptr_t addr) {
+                    std::scoped_lock lk(this->mutex);
+
+                    if (this->is_registered && this->address == addr) {
+                        this->is_registered = false;
+                        this->address       = 0;
+                        this->size          = 0;
+                    }
+                }
+
+                bool Includes(const void *ptr) {
+                    std::scoped_lock lk(this->mutex);
+
+                    if (this->is_registered) {
+                        const uintptr_t addr = reinterpret_cast<uintptr_t>(ptr);
+                        return this->address <= addr && addr < this->address + this->size;
+                    } else {
+                        return false;
+                    }
+                }
+        };
+
+        constexpr auto RetryWait = TimeSpan::FromMilliSeconds(10);
+
+        constexpr size_t HeapBlockSize = BufferPoolAlignment;
+        static_assert(HeapBlockSize == 4_KB);
+
+        /* A heap block is 4KB. An order is a power of two. */
+        /* This gives blocks of the order 32KB, 512KB, 4MB. */
+        constexpr s32    HeapOrderTrim        = 3;
+        constexpr s32    HeapOrderMax         = 7;
+        constexpr s32    HeapOrderMaxForLarge = HeapOrderMax + 3;
+
+        constexpr size_t HeapAllocatableSizeTrim        = HeapBlockSize * (static_cast<size_t>(1) << HeapOrderTrim);
+        constexpr size_t HeapAllocatableSizeMax         = HeapBlockSize * (static_cast<size_t>(1) << HeapOrderMax);
+        constexpr size_t HeapAllocatableSizeMaxForLarge = HeapBlockSize * (static_cast<size_t>(1) << HeapOrderMaxForLarge);
+
+        /* TODO: SdkMutex */
+        os::Mutex g_heap_mutex;
+        FileSystemBuddyHeap g_heap;
+
+        std::atomic<size_t> g_retry_count;
+        std::atomic<size_t> g_reduce_allocation_count;
+
+        void *g_heap_buffer;
+        size_t g_heap_size;
+        size_t g_heap_free_size_peak;
+
+        AdditionalDeviceAddressEntry g_additional_device_address_entry;
+
+    }
+
+    size_t PooledBuffer::GetAllocatableSizeMaxCore(bool large) {
+        return large ? HeapAllocatableSizeMaxForLarge : HeapAllocatableSizeMax;
+    }
+
+    void PooledBuffer::AllocateCore(size_t ideal_size, size_t required_size, bool large) {
+        /* Ensure preconditions. */
+        AMS_ASSERT(g_heap_buffer != nullptr);
+        AMS_ASSERT(this->buffer == nullptr);
+        AMS_ASSERT(g_heap.GetBlockSize() == HeapBlockSize);
+
+        /* Check that we can allocate this size. */
+        AMS_ASSERT(required_size <= GetAllocatableSizeMaxCore(large));
+
+        const size_t target_size = std::min(std::max(ideal_size, required_size), GetAllocatableSizeMaxCore(large));
+
+        /* Loop until we allocate. */
+        while (true) {
+            /* Lock the heap and try to allocate. */
+            {
+                std::scoped_lock lk(g_heap_mutex);
+
+                /* Determine how much we can allocate, and don't allocate more than half the heap. */
+                size_t allocatable_size = g_heap.GetAllocatableSizeMax();
+                if (allocatable_size > HeapBlockSize) {
+                    allocatable_size >>= 1;
+                }
+
+                /* Check if this allocation is acceptable. */
+                if (allocatable_size >= required_size) {
+                    /* Get the order. */
+                    const auto order = g_heap.GetOrderFromBytes(std::min(target_size, allocatable_size));
+
+                    /* Allocate and get the size. */
+                    this->buffer = reinterpret_cast<char *>(g_heap.AllocateByOrder(order));
+                    this->size   = g_heap.GetBytesFromOrder(order);
+                }
+            }
+
+            /* Check if we allocated. */
+            if (this->buffer != nullptr) {
+                /* If we need to trim the end, do so. */
+                if (this->GetSize() >= target_size + HeapAllocatableSizeTrim) {
+                    this->Shrink(util::AlignUp(target_size, HeapAllocatableSizeTrim));
+                }
+                AMS_ASSERT(this->GetSize() >= required_size);
+
+                /* If we reduced, note so. */
+                if (this->GetSize() < std::min(target_size, HeapAllocatableSizeMax)) {
+                    g_reduce_allocation_count++;
+                }
+                break;
+            } else {
+                /* Sleep. */
+                /* TODO: os::SleepThread() */
+                svc::SleepThread(RetryWait.GetNanoSeconds());
+                g_retry_count++;
+            }
+        }
+
+        /* Update metrics. */
+        {
+            std::scoped_lock lk(g_heap_mutex);
+
+            const size_t free_size = g_heap.GetTotalFreeSize();
+            if (free_size < g_heap_free_size_peak) {
+                g_heap_free_size_peak = free_size;
+            }
+        }
+    }
+
+    void PooledBuffer::Shrink(size_t ideal_size) {
+        AMS_ASSERT(ideal_size <= GetAllocatableSizeMaxCore(true));
+
+        /* Check if we actually need to shrink. */
+        if (this->size > ideal_size) {
+            /* If we do, we need to have a buffer allocated from the heap. */
+            AMS_ASSERT(this->buffer != nullptr);
+            AMS_ASSERT(g_heap.GetBlockSize(), HeapBlockSize);
+
+            const size_t new_size = util::AlignUp(ideal_size, HeapBlockSize);
+
+            /* Repeatedly free the tail of our buffer until we're done. */
+            {
+                std::scoped_lock lk(g_heap_mutex);
+
+                while (new_size < this->size) {
+                    /* Determine the size and order to free. */
+                    const size_t tail_align = util::LeastSignificantOneBit(this->size);
+                    const size_t free_size  = std::min(util::FloorPowerOfTwo(this->size - new_size), tail_align);
+                    const s32 free_order    = g_heap.GetOrderFromBytes(free_size);
+
+                    /* Ensure we determined size correctly. */
+                    AMS_ASSERT(util::IsAligned(free_size, HeapBlockSize));
+                    AMS_ASSERT(free_size == g_heap.GetBytesFromOrder(free_order));
+
+                    /* Actually free the memory. */
+                    g_heap.Free(this->buffer + this->size - free_size, free_order);
+                    this->size -= free_size;
+                }
+            }
+
+            /* Shrinking to zero means that we have no buffer. */
+            if (this->size == 0) {
+                this->buffer = nullptr;
+            }
+        }
+    }
+
+    Result InitializeBufferPool(char *buffer, size_t size) {
+        AMS_ASSERT(g_heap_buffer == nullptr);
+        AMS_ASSERT(buffer != nullptr);
+        AMS_ASSERT(util::IsAligned(reinterpret_cast<uintptr_t>(buffer), BufferPoolAlignment));
+
+        /* Initialize the heap. */
+        R_TRY(g_heap.Initialize(reinterpret_cast<uintptr_t>(buffer), size, HeapBlockSize, HeapOrderMaxForLarge + 1));
+
+        /* Initialize metrics. */
+        g_heap_buffer         = buffer;
+        g_heap_size           = size;
+        g_heap_free_size_peak = size;
+
+        return ResultSuccess();
+    }
+
+    Result InitializeBufferPool(char *buffer, size_t size, char *work, size_t work_size) {
+        AMS_ASSERT(g_heap_buffer == nullptr);
+        AMS_ASSERT(buffer != nullptr);
+        AMS_ASSERT(util::IsAligned(reinterpret_cast<uintptr_t>(buffer), BufferPoolAlignment));
+        AMS_ASSERT(work_size >= BufferPoolWorkSize);
+
+        /* Initialize the heap. */
+        R_TRY(g_heap.Initialize(reinterpret_cast<uintptr_t>(buffer), size, HeapBlockSize, HeapOrderMaxForLarge + 1, work, work_size));
+
+        /* Initialize metrics. */
+        g_heap_buffer         = buffer;
+        g_heap_size           = size;
+        g_heap_free_size_peak = size;
+
+        return ResultSuccess();
+    }
+
+    bool IsPooledBuffer(const void *buffer) {
+        AMS_ASSERT(buffer != nullptr);
+        return g_heap_buffer <= buffer && buffer < reinterpret_cast<char *>(g_heap_buffer) + g_heap_size;
+    }
+
+    size_t GetPooledBufferRetriedCount() {
+        return g_retry_count;
+    }
+
+    size_t GetPooledBufferReduceAllocationCount() {
+        return g_reduce_allocation_count;
+    }
+
+    size_t GetPooledBufferFreeSizePeak() {
+        return g_heap_free_size_peak;
+    }
+
+    void ClearPooledBufferPeak() {
+        std::scoped_lock lk(g_heap_mutex);
+        g_heap_free_size_peak     = g_heap.GetTotalFreeSize();
+        g_retry_count             = 0;
+        g_reduce_allocation_count = 0;
+    }
+
+    void RegisterAdditionalDeviceAddress(uintptr_t address, size_t size) {
+        g_additional_device_address_entry.Register(address, size);
+    }
+
+    void UnregisterAdditionalDeviceAddress(uintptr_t address) {
+        g_additional_device_address_entry.Unregister(address);
+    }
+
+    bool IsAdditionalDeviceAddress(const void *ptr) {
+        return g_additional_device_address_entry.Includes(ptr);
+    }
+
+}
--- a/libraries/libvapours/include/vapours/util/util_alignment.hpp
+++ b/libraries/libvapours/include/vapours/util/util_alignment.hpp
@ -17,16 +17,11 @@
 #pragma once
 #include <vapours/common.hpp>
 #include <vapours/assert.hpp>
+#include <vapours/util/util_bitutil.hpp>

 namespace ams::util {

    /* Utilities for alignment to power of two. */
-    template<typename T>
-    constexpr ALWAYS_INLINE bool IsPowerOfTwo(T value) {
-        using U = typename std::make_unsigned<T>::type;
-        return (static_cast<U>(value) & static_cast<U>(value - 1)) == 0;
-    }
-
    template<typename T>
    constexpr ALWAYS_INLINE T AlignUp(T value, size_t alignment) {
        using U = typename std::make_unsigned<T>::type;
--- a/libraries/libvapours/include/vapours/util/util_bitutil.hpp
+++ b/libraries/libvapours/include/vapours/util/util_bitutil.hpp
@ -20,6 +20,16 @@

 namespace ams::util {

+    namespace impl {
+
+        template<size_t N>
+        constexpr inline size_t Log2 = Log2<N / 2> + 1;
+
+        template<>
+        constexpr inline size_t Log2<1> = 0;
+
+    }
+
    template <typename T>
    class BitsOf {
        private:
@ -73,4 +83,133 @@ namespace ams::util {
        return (... | (T(1u) << args));
    }

+    template<typename T>
+    constexpr ALWAYS_INLINE T ResetLeastSignificantOneBit(T x) {
+        return x & (x - 1);
+    }
+
+    template<typename T>
+    constexpr ALWAYS_INLINE T SetLeastSignificantZeroBit(T x) {
+        return x | (x + 1);
+    }
+
+    template<typename T>
+    constexpr ALWAYS_INLINE T LeastSignificantOneBit(T x) {
+        return x & ~(x - 1);
+    }
+
+    template<typename T>
+    constexpr ALWAYS_INLINE T LeastSignificantZeroBit(T x) {
+        return ~x & (x + 1);
+    }
+
+    template<typename T>
+    constexpr ALWAYS_INLINE T ResetTrailingOnes(T x) {
+        return x & (x + 1);
+    }
+
+    template<typename T>
+    constexpr ALWAYS_INLINE T SetTrailingZeros(T x) {
+        return x | (x - 1);
+    }
+
+    template<typename T>
+    constexpr ALWAYS_INLINE T MaskTrailingZeros(T x) {
+        return (~x) & (x - 1);
+    }
+
+    template<typename T>
+    constexpr ALWAYS_INLINE T MaskTrailingOnes(T x) {
+        return ~((~x) | (x + 1));
+    }
+
+    template<typename T>
+    constexpr ALWAYS_INLINE T MaskTrailingZerosAndLeastSignificantOneBit(T x) {
+        return x ^ (x - 1);
+    }
+
+    template<typename T>
+    constexpr ALWAYS_INLINE T MaskTrailingOnesAndLeastSignificantZeroBit(T x) {
+        return x ^ (x + 1);
+    }
+
+    template<typename T>
+    constexpr ALWAYS_INLINE int PopCount(T x) {
+        /* TODO: C++20 std::bit_cast */
+        using U = typename std::make_unsigned<T>::type;
+        U u = static_cast<U>(x);
+
+        /* TODO: C++20 std::is_constant_evaluated */
+        if (false) {
+            /* https://en.wikipedia.org/wiki/Hamming_weight */
+            constexpr U m1 = U(-1) / 0x03;
+            constexpr U m2 = U(-1) / 0x05;
+            constexpr U m4 = U(-1) / 0x11;
+
+            u = static_cast<U>(u - ((u >> 1) & m1));
+            u = static_cast<U>((u & m2) + ((u >> 2) & m2));
+            u = static_cast<U>((u + (u >> 4)) & m4);
+
+            for (size_t i = 0; i < impl::Log2<sizeof(T)>; ++i) {
+                const size_t shift = (0x1 << i) * BITSIZEOF(u8);
+                u += u >> shift;
+            }
+
+            return static_cast<int>(u & 0x7Fu);
+        } else {
+            if constexpr (std::is_same<U, unsigned long long>::value) {
+                return __builtin_popcountll(u);
+            } else if constexpr (std::is_same<U, unsigned long>::value) {
+                return __builtin_popcountl(u);
+            } else {
+                static_assert(sizeof(U) <= sizeof(unsigned int));
+                return __builtin_popcount(static_cast<unsigned int>(u));
+            }
+        }
+    }
+
+    template<typename T>
+    constexpr ALWAYS_INLINE int CountLeadingZeros(T x) {
+        /* TODO: C++20 std::is_constant_evaluated */
+        if (false) {
+            for (size_t i = 0; i < impl::Log2<BITSIZEOF(T)>; ++i) {
+                const size_t shift = (0x1 << i);
+                x |= x >> shift;
+            }
+            return PopCount(static_cast<T>(~x));
+        } else {
+            /* TODO: C++20 std::bit_cast */
+            using U = typename std::make_unsigned<T>::type;
+            const U u = static_cast<U>(x);
+            if constexpr (std::is_same<U, unsigned long long>::value) {
+                return __builtin_clzll(u);
+            } else if constexpr (std::is_same<U, unsigned long>::value) {
+                return __builtin_clzl(u);
+            }  else if constexpr(std::is_same<U, unsigned int>::value) {
+                return __builtin_clz(u);
+            } else {
+                static_assert(sizeof(U) < sizeof(unsigned int));
+                constexpr size_t BitDiff = BITSIZEOF(unsigned int) - BITSIZEOF(U);
+                return __builtin_clz(static_cast<unsigned int>(u)) - BitDiff;
+            }
+        }
+    }
+
+    template<typename T>
+    constexpr ALWAYS_INLINE bool IsPowerOfTwo(T x) {
+        return x > 0 && ResetLeastSignificantOneBit(x) == 0;
+    }
+
+    template<typename T>
+    constexpr ALWAYS_INLINE T CeilingPowerOfTwo(T x) {
+        AMS_ASSERT(x > 0);
+        return T(1) << (BITSIZEOF(T) - CountLeadingZeros(T(x - 1)));
+    }
+
+    template<typename T>
+    constexpr ALWAYS_INLINE T FloorPowerOfTwo(T x) {
+        AMS_ASSERT(x > 0);
+        return T(1) << (BITSIZEOF(T) - CountLeadingZeros(x) - 1);
+    }
+
 }