kern: optimize timespan -> tick codegen, improve .text layout

2025-02-22 00:15:41 +00:00 · 2021-10-24 01:04:19 -07:00 · 2021-10-24 01:04:19 -07:00 · aaa3770806
commit aaa3770806
parent 89926f44c6
5 changed files with 88 additions and 45 deletions
--- a/libraries/libvapours/include/vapours/defines.hpp
+++ b/libraries/libvapours/include/vapours/defines.hpp
@ -33,8 +33,6 @@
 #define ALWAYS_INLINE inline __attribute__((always_inline))
 #define NOINLINE      __attribute__((noinline))

-#define CONST_FOLD(x) (__builtin_constant_p(x) ? (x) : (x))
-
 #define CONCATENATE_IMPL(s1, s2) s1##s2
 #define CONCATENATE(s1, s2) CONCATENATE_IMPL(s1, s2)

--- a/libraries/libvapours/include/vapours/svc/svc_tick.hpp
+++ b/libraries/libvapours/include/vapours/svc/svc_tick.hpp
@ -23,50 +23,71 @@ namespace ams::svc {
    class Tick {
        public:
            static constexpr s64 TicksPerSecond = ::ams::svc::TicksPerSecond;
-            static constexpr s64 GetTicksPerSecond() { return TicksPerSecond; }
+            static consteval s64 GetTicksPerSecond() { return TicksPerSecond; }
        private:
            s64 m_tick;
        private:
            static constexpr s64 NanoSecondsPerSecond = TimeSpan::FromSeconds(1).GetNanoSeconds();

-            static constexpr void DivNs(s64 &out, const s64 value) {
-                out = value / NanoSecondsPerSecond;
-            }
+            static constexpr ALWAYS_INLINE s64 ConvertTimeSpanToTickImpl(TimeSpan ts) {
+                /* Get nano-seconds. */
+                const s64 ns = ts.GetNanoSeconds();

-            static constexpr void DivModNs(s64 &out_div, s64 &out_mod, const s64 value) {
-                out_div = value / NanoSecondsPerSecond;
-                out_mod = value % NanoSecondsPerSecond;
-            }
+                /* Special-case optimize arm64/nintendo-nx value. */
+                if (!std::is_constant_evaluated()) {
+                    if constexpr (TicksPerSecond == 19'200'000) {
+                        #if defined(ATMOSPHERE_IS_MESOSPHERE) && defined(ATMOSPHERE_ARCH_ARM64)
+                        s64 t0, t1, t2, t3;
+                        __asm__ __volatile__("mov   %[t1], #0x5A53\n"
+                                             "movk  %[t1], #0xA09B, lsl #16\n"
+                                             "lsr   %[t0], %[ns], #9\n"
+                                             "movk  %[t1], #0xB82F, lsl #32\n"
+                                             "movk  %[t1], #0x0044, lsl #48\n"
+                                             "umulh %[t0], %[t0], %[t1]\n"
+                                             "mov   %[t1], #0xFFFFFFFFFFFF3600\n"
+                                             "movk  %[t1], #0xC465, lsl #16\n"
+                                             "lsr   %[t0], %[t0], #0xB\n"
+                                             "madd  %[t1], %[t0], %[t1], %[ns]\n"
+                                             "mov   %w[t2], #0xF800\n"
+                                             "movk  %w[t2], #0x0124, lsl #16\n"
+                                             "mov   %w[t3], #0xCA00\n"
+                                             "movk  %w[t3], #0x3B9A, lsl #16\n"
+                                             "madd  %[t1], %[t1], %[t2], %[t3]\n"
+                                             "mov   %[t3], #0x94B3\n"
+                                             "movk  %[t3], #0x26D6, lsl #16\n"
+                                             "movk  %[t3], #0x0BE8, lsl #32\n"
+                                             "movk  %[t3], #0x112E, lsl #48\n"
+                                             "sub   %[t1], %[t1], #1\n"
+                                             "smulh %[t1], %[t1], %[t3]\n"
+                                             "asr   %[t3], %[t1], #26\n"
+                                             "add   %[t1], %[t3], %[t1], lsr #63\n"
+                                             "madd  %[t0], %[t0], %[t2], %[t1]\n"
+                                             : [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3)
+                                             : [ns]"r"(ns)
+                                             : "cc");
+                        return t0;
+                        #endif
+                    }
+                }

-            static constexpr s64 ConvertTimeSpanToTickImpl(TimeSpan ts) {
-                /* Split up timespan and ticks-per-second by ns. */
-                s64 ts_div = 0, ts_mod = 0;
-                s64 tick_div = 0, tick_mod = 0;
-                DivModNs(ts_div, ts_mod, ts.GetNanoSeconds());
-                DivModNs(tick_div, tick_mod, TicksPerSecond);
-
-                /* Convert the timespan into a tick count. */
-                s64 value = 0;
-                DivNs(value, ts_mod * tick_mod + NanoSecondsPerSecond - 1);
-
-                return (ts_div * tick_div) * NanoSecondsPerSecond + ts_div * tick_mod + ts_mod * tick_div + value;
+                return util::ScaleByConstantFactor<s64, TicksPerSecond, NanoSecondsPerSecond>(ns);
            }
        public:
-            constexpr explicit Tick(s64 t = 0) : m_tick(t) { /* ... */ }
-            constexpr Tick(TimeSpan ts) : m_tick(ConvertTimeSpanToTickImpl(ts)) { /* ... */ }
+            constexpr ALWAYS_INLINE explicit Tick(s64 t = 0) : m_tick(t) { /* ... */ }
+            constexpr ALWAYS_INLINE Tick(TimeSpan ts) : m_tick(ConvertTimeSpanToTickImpl(ts)) { /* ... */ }

-            constexpr operator s64() const { return m_tick; }
+            constexpr ALWAYS_INLINE operator s64() const { return m_tick; }

            /* Tick arithmetic. */
-            constexpr Tick &operator+=(Tick rhs) { m_tick += rhs.m_tick; return *this; }
-            constexpr Tick &operator-=(Tick rhs) { m_tick -= rhs.m_tick; return *this; }
-            constexpr Tick operator+(Tick rhs) const { Tick r(*this); return r += rhs; }
-            constexpr Tick operator-(Tick rhs) const { Tick r(*this); return r -= rhs; }
+            constexpr ALWAYS_INLINE Tick &operator+=(Tick rhs) { m_tick += rhs.m_tick; return *this; }
+            constexpr ALWAYS_INLINE Tick &operator-=(Tick rhs) { m_tick -= rhs.m_tick; return *this; }
+            constexpr ALWAYS_INLINE Tick operator+(Tick rhs) const { Tick r(*this); return r += rhs; }
+            constexpr ALWAYS_INLINE Tick operator-(Tick rhs) const { Tick r(*this); return r -= rhs; }

-            constexpr Tick &operator+=(TimeSpan rhs) { m_tick += Tick(rhs).m_tick; return *this; }
-            constexpr Tick &operator-=(TimeSpan rhs) { m_tick -= Tick(rhs).m_tick; return *this; }
-            constexpr Tick operator+(TimeSpan rhs) const { Tick r(*this); return r += rhs; }
-            constexpr Tick operator-(TimeSpan rhs) const { Tick r(*this); return r -= rhs; }
+            constexpr ALWAYS_INLINE Tick &operator+=(TimeSpan rhs) { m_tick += Tick(rhs).m_tick; return *this; }
+            constexpr ALWAYS_INLINE Tick &operator-=(TimeSpan rhs) { m_tick -= Tick(rhs).m_tick; return *this; }
+            constexpr ALWAYS_INLINE Tick operator+(TimeSpan rhs) const { Tick r(*this); return r += rhs; }
+            constexpr ALWAYS_INLINE Tick operator-(TimeSpan rhs) const { Tick r(*this); return r -= rhs; }
    };

 }
--- a/libraries/libvapours/include/vapours/util/arch/arm64/util_atomic.hpp
+++ b/libraries/libvapours/include/vapours/util/arch/arm64/util_atomic.hpp
@ -270,7 +270,7 @@ namespace ams::util {

            template<std::memory_order Order = std::memory_order_seq_cst>
            ALWAYS_INLINE T Exchange(T arg) {
-                return ConvertToType(impl::AtomicExchangeImpl(this->GetStoragePointer(), ConvertToStorage(arg)));
+                return ConvertToType(impl::AtomicExchangeImpl<Order>(this->GetStoragePointer(), ConvertToStorage(arg)));
            }

            template<std::memory_order Order = std::memory_order_seq_cst>
@ -374,7 +374,7 @@ namespace ams::util {

            template<std::memory_order Order = std::memory_order_seq_cst>
            ALWAYS_INLINE T Exchange(T arg) const {
-                return ConvertToType(impl::AtomicExchangeImpl(this->GetStoragePointer(), ConvertToStorage(arg)));
+                return ConvertToType(impl::AtomicExchangeImpl<Order>(this->GetStoragePointer(), ConvertToStorage(arg)));
            }

            template<std::memory_order Order = std::memory_order_seq_cst>
--- a/libraries/libvapours/include/vapours/util/util_bitutil.hpp
+++ b/libraries/libvapours/include/vapours/util/util_bitutil.hpp
@ -255,4 +255,28 @@ namespace ams::util {
        return static_cast<T>((v + add) / d);
    }

+    template<typename T, T N, T D>
+    constexpr ALWAYS_INLINE T ScaleByConstantFactor(const T V) {
+        /* Multiplying and dividing by large numerator/denominator can cause error to be introduced. */
+        /* This algorithm multiples/divides in stages, so as to mitigate this (particularly with large denominator). */
+
+        /* Justification for the algorithm.                                                                         */
+        /* Calculate: (V * N) / D                                                                                   */
+        /*          = (Quot_V * D + Rem_V) * (Quot_N * D + Rem_N) / D                                               */
+        /*          = (D^2 * (Quot_V * Quot_N) + D * (Quot_V * Rem_N + Rem_V * Quot_N) + Rem_V * Rem_N) / D         */
+        /*          = (D * Quot_V * Quot_N) + (Quot_V * Rem_N) + (Rem_V * Quot_N) + ((Rem_V * Rem_N) / D)           */
+
+        /* Calculate quotients/remainders. */
+        const     T Quot_V = V / D;
+        const     T Rem_V  = V % D;
+        constexpr T Quot_N = N / D;
+        constexpr T Rem_N  = N % D;
+
+        /* Calculate the remainder multiplication, rounding up. */
+        const T rem_mult = ((Rem_V * Rem_N) + (D - 1)) / D;
+
+        /* Calculate results. */
+        return (D * Quot_N * Quot_V) + (Quot_V * Rem_N) + (Rem_V * Quot_N) + rem_mult;
+    }
+
 }
--- a/mesosphere/kernel/kernel.ld
+++ b/mesosphere/kernel/kernel.ld
@ -51,24 +51,24 @@ SECTIONS
 		. = ALIGN(8);
 	} :code

-	/* .vectors. */
-	. = ALIGN(2K);
-	__vectors_start__ = . ;
-  .vectors :
-	{
-		KEEP( *(.vectors) )
-		. = ALIGN(8);
-	} :code
-
 	/* .sleep. */
 	. = ALIGN(4K);
 	__sleep_start__ = . ;
-  .sleep :
+    .sleep :
 	{
 		KEEP( *(.sleep .sleep.*) )
 		. = ALIGN(8);
 	} :code

+	/* .vectors. */
+	. = ALIGN(2K);
+	__vectors_start__ = . ;
+    .vectors :
+	{
+		KEEP( *(.vectors) )
+		. = ALIGN(8);
+	} :code
+
 	/* =========== RODATA section =========== */
 	. = ALIGN(0x1000);
 	__rodata_start = . ;