diff --git a/libraries/libvapours/include/vapours/defines.hpp b/libraries/libvapours/include/vapours/defines.hpp index 4ac265ee1..bf9fc2123 100644 --- a/libraries/libvapours/include/vapours/defines.hpp +++ b/libraries/libvapours/include/vapours/defines.hpp @@ -33,8 +33,6 @@ #define ALWAYS_INLINE inline __attribute__((always_inline)) #define NOINLINE __attribute__((noinline)) -#define CONST_FOLD(x) (__builtin_constant_p(x) ? (x) : (x)) - #define CONCATENATE_IMPL(s1, s2) s1##s2 #define CONCATENATE(s1, s2) CONCATENATE_IMPL(s1, s2) diff --git a/libraries/libvapours/include/vapours/svc/svc_tick.hpp b/libraries/libvapours/include/vapours/svc/svc_tick.hpp index 7693c65ed..3eba22e65 100644 --- a/libraries/libvapours/include/vapours/svc/svc_tick.hpp +++ b/libraries/libvapours/include/vapours/svc/svc_tick.hpp @@ -23,50 +23,71 @@ namespace ams::svc { class Tick { public: static constexpr s64 TicksPerSecond = ::ams::svc::TicksPerSecond; - static constexpr s64 GetTicksPerSecond() { return TicksPerSecond; } + static consteval s64 GetTicksPerSecond() { return TicksPerSecond; } private: s64 m_tick; private: static constexpr s64 NanoSecondsPerSecond = TimeSpan::FromSeconds(1).GetNanoSeconds(); - static constexpr void DivNs(s64 &out, const s64 value) { - out = value / NanoSecondsPerSecond; - } + static constexpr ALWAYS_INLINE s64 ConvertTimeSpanToTickImpl(TimeSpan ts) { + /* Get nano-seconds. */ + const s64 ns = ts.GetNanoSeconds(); - static constexpr void DivModNs(s64 &out_div, s64 &out_mod, const s64 value) { - out_div = value / NanoSecondsPerSecond; - out_mod = value % NanoSecondsPerSecond; - } + /* Special-case optimize arm64/nintendo-nx value. */ + if (!std::is_constant_evaluated()) { + if constexpr (TicksPerSecond == 19'200'000) { + #if defined(ATMOSPHERE_IS_MESOSPHERE) && defined(ATMOSPHERE_ARCH_ARM64) + s64 t0, t1, t2, t3; + __asm__ __volatile__("mov %[t1], #0x5A53\n" + "movk %[t1], #0xA09B, lsl #16\n" + "lsr %[t0], %[ns], #9\n" + "movk %[t1], #0xB82F, lsl #32\n" + "movk %[t1], #0x0044, lsl #48\n" + "umulh %[t0], %[t0], %[t1]\n" + "mov %[t1], #0xFFFFFFFFFFFF3600\n" + "movk %[t1], #0xC465, lsl #16\n" + "lsr %[t0], %[t0], #0xB\n" + "madd %[t1], %[t0], %[t1], %[ns]\n" + "mov %w[t2], #0xF800\n" + "movk %w[t2], #0x0124, lsl #16\n" + "mov %w[t3], #0xCA00\n" + "movk %w[t3], #0x3B9A, lsl #16\n" + "madd %[t1], %[t1], %[t2], %[t3]\n" + "mov %[t3], #0x94B3\n" + "movk %[t3], #0x26D6, lsl #16\n" + "movk %[t3], #0x0BE8, lsl #32\n" + "movk %[t3], #0x112E, lsl #48\n" + "sub %[t1], %[t1], #1\n" + "smulh %[t1], %[t1], %[t3]\n" + "asr %[t3], %[t1], #26\n" + "add %[t1], %[t3], %[t1], lsr #63\n" + "madd %[t0], %[t0], %[t2], %[t1]\n" + : [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3) + : [ns]"r"(ns) + : "cc"); + return t0; + #endif + } + } - static constexpr s64 ConvertTimeSpanToTickImpl(TimeSpan ts) { - /* Split up timespan and ticks-per-second by ns. */ - s64 ts_div = 0, ts_mod = 0; - s64 tick_div = 0, tick_mod = 0; - DivModNs(ts_div, ts_mod, ts.GetNanoSeconds()); - DivModNs(tick_div, tick_mod, TicksPerSecond); - - /* Convert the timespan into a tick count. */ - s64 value = 0; - DivNs(value, ts_mod * tick_mod + NanoSecondsPerSecond - 1); - - return (ts_div * tick_div) * NanoSecondsPerSecond + ts_div * tick_mod + ts_mod * tick_div + value; + return util::ScaleByConstantFactor(ns); } public: - constexpr explicit Tick(s64 t = 0) : m_tick(t) { /* ... */ } - constexpr Tick(TimeSpan ts) : m_tick(ConvertTimeSpanToTickImpl(ts)) { /* ... */ } + constexpr ALWAYS_INLINE explicit Tick(s64 t = 0) : m_tick(t) { /* ... */ } + constexpr ALWAYS_INLINE Tick(TimeSpan ts) : m_tick(ConvertTimeSpanToTickImpl(ts)) { /* ... */ } - constexpr operator s64() const { return m_tick; } + constexpr ALWAYS_INLINE operator s64() const { return m_tick; } /* Tick arithmetic. */ - constexpr Tick &operator+=(Tick rhs) { m_tick += rhs.m_tick; return *this; } - constexpr Tick &operator-=(Tick rhs) { m_tick -= rhs.m_tick; return *this; } - constexpr Tick operator+(Tick rhs) const { Tick r(*this); return r += rhs; } - constexpr Tick operator-(Tick rhs) const { Tick r(*this); return r -= rhs; } + constexpr ALWAYS_INLINE Tick &operator+=(Tick rhs) { m_tick += rhs.m_tick; return *this; } + constexpr ALWAYS_INLINE Tick &operator-=(Tick rhs) { m_tick -= rhs.m_tick; return *this; } + constexpr ALWAYS_INLINE Tick operator+(Tick rhs) const { Tick r(*this); return r += rhs; } + constexpr ALWAYS_INLINE Tick operator-(Tick rhs) const { Tick r(*this); return r -= rhs; } - constexpr Tick &operator+=(TimeSpan rhs) { m_tick += Tick(rhs).m_tick; return *this; } - constexpr Tick &operator-=(TimeSpan rhs) { m_tick -= Tick(rhs).m_tick; return *this; } - constexpr Tick operator+(TimeSpan rhs) const { Tick r(*this); return r += rhs; } - constexpr Tick operator-(TimeSpan rhs) const { Tick r(*this); return r -= rhs; } + constexpr ALWAYS_INLINE Tick &operator+=(TimeSpan rhs) { m_tick += Tick(rhs).m_tick; return *this; } + constexpr ALWAYS_INLINE Tick &operator-=(TimeSpan rhs) { m_tick -= Tick(rhs).m_tick; return *this; } + constexpr ALWAYS_INLINE Tick operator+(TimeSpan rhs) const { Tick r(*this); return r += rhs; } + constexpr ALWAYS_INLINE Tick operator-(TimeSpan rhs) const { Tick r(*this); return r -= rhs; } }; } diff --git a/libraries/libvapours/include/vapours/util/arch/arm64/util_atomic.hpp b/libraries/libvapours/include/vapours/util/arch/arm64/util_atomic.hpp index 588f59e1a..27fc7eb9c 100644 --- a/libraries/libvapours/include/vapours/util/arch/arm64/util_atomic.hpp +++ b/libraries/libvapours/include/vapours/util/arch/arm64/util_atomic.hpp @@ -270,7 +270,7 @@ namespace ams::util { template ALWAYS_INLINE T Exchange(T arg) { - return ConvertToType(impl::AtomicExchangeImpl(this->GetStoragePointer(), ConvertToStorage(arg))); + return ConvertToType(impl::AtomicExchangeImpl(this->GetStoragePointer(), ConvertToStorage(arg))); } template @@ -374,7 +374,7 @@ namespace ams::util { template ALWAYS_INLINE T Exchange(T arg) const { - return ConvertToType(impl::AtomicExchangeImpl(this->GetStoragePointer(), ConvertToStorage(arg))); + return ConvertToType(impl::AtomicExchangeImpl(this->GetStoragePointer(), ConvertToStorage(arg))); } template diff --git a/libraries/libvapours/include/vapours/util/util_bitutil.hpp b/libraries/libvapours/include/vapours/util/util_bitutil.hpp index f8be7402f..113890ad9 100644 --- a/libraries/libvapours/include/vapours/util/util_bitutil.hpp +++ b/libraries/libvapours/include/vapours/util/util_bitutil.hpp @@ -255,4 +255,28 @@ namespace ams::util { return static_cast((v + add) / d); } + template + constexpr ALWAYS_INLINE T ScaleByConstantFactor(const T V) { + /* Multiplying and dividing by large numerator/denominator can cause error to be introduced. */ + /* This algorithm multiples/divides in stages, so as to mitigate this (particularly with large denominator). */ + + /* Justification for the algorithm. */ + /* Calculate: (V * N) / D */ + /* = (Quot_V * D + Rem_V) * (Quot_N * D + Rem_N) / D */ + /* = (D^2 * (Quot_V * Quot_N) + D * (Quot_V * Rem_N + Rem_V * Quot_N) + Rem_V * Rem_N) / D */ + /* = (D * Quot_V * Quot_N) + (Quot_V * Rem_N) + (Rem_V * Quot_N) + ((Rem_V * Rem_N) / D) */ + + /* Calculate quotients/remainders. */ + const T Quot_V = V / D; + const T Rem_V = V % D; + constexpr T Quot_N = N / D; + constexpr T Rem_N = N % D; + + /* Calculate the remainder multiplication, rounding up. */ + const T rem_mult = ((Rem_V * Rem_N) + (D - 1)) / D; + + /* Calculate results. */ + return (D * Quot_N * Quot_V) + (Quot_V * Rem_N) + (Rem_V * Quot_N) + rem_mult; + } + } diff --git a/mesosphere/kernel/kernel.ld b/mesosphere/kernel/kernel.ld index efbc1c1e9..2235dae10 100644 --- a/mesosphere/kernel/kernel.ld +++ b/mesosphere/kernel/kernel.ld @@ -51,24 +51,24 @@ SECTIONS . = ALIGN(8); } :code - /* .vectors. */ - . = ALIGN(2K); - __vectors_start__ = . ; - .vectors : - { - KEEP( *(.vectors) ) - . = ALIGN(8); - } :code - /* .sleep. */ . = ALIGN(4K); __sleep_start__ = . ; - .sleep : + .sleep : { KEEP( *(.sleep .sleep.*) ) . = ALIGN(8); } :code + /* .vectors. */ + . = ALIGN(2K); + __vectors_start__ = . ; + .vectors : + { + KEEP( *(.vectors) ) + . = ALIGN(8); + } :code + /* =========== RODATA section =========== */ . = ALIGN(0x1000); __rodata_start = . ;