1
0
Fork 0
mirror of https://github.com/Atmosphere-NX/Atmosphere.git synced 2025-01-21 00:27:07 +00:00

kern: optimize timespan -> tick codegen, improve .text layout

This commit is contained in:
Michael Scire 2021-10-24 01:04:19 -07:00
parent 89926f44c6
commit aaa3770806
5 changed files with 88 additions and 45 deletions

View file

@ -33,8 +33,6 @@
#define ALWAYS_INLINE inline __attribute__((always_inline))
#define NOINLINE __attribute__((noinline))
#define CONST_FOLD(x) (__builtin_constant_p(x) ? (x) : (x))
#define CONCATENATE_IMPL(s1, s2) s1##s2
#define CONCATENATE(s1, s2) CONCATENATE_IMPL(s1, s2)

View file

@ -23,50 +23,71 @@ namespace ams::svc {
class Tick {
public:
static constexpr s64 TicksPerSecond = ::ams::svc::TicksPerSecond;
static constexpr s64 GetTicksPerSecond() { return TicksPerSecond; }
static consteval s64 GetTicksPerSecond() { return TicksPerSecond; }
private:
s64 m_tick;
private:
static constexpr s64 NanoSecondsPerSecond = TimeSpan::FromSeconds(1).GetNanoSeconds();
static constexpr void DivNs(s64 &out, const s64 value) {
out = value / NanoSecondsPerSecond;
}
static constexpr ALWAYS_INLINE s64 ConvertTimeSpanToTickImpl(TimeSpan ts) {
/* Get nano-seconds. */
const s64 ns = ts.GetNanoSeconds();
static constexpr void DivModNs(s64 &out_div, s64 &out_mod, const s64 value) {
out_div = value / NanoSecondsPerSecond;
out_mod = value % NanoSecondsPerSecond;
}
/* Special-case optimize arm64/nintendo-nx value. */
if (!std::is_constant_evaluated()) {
if constexpr (TicksPerSecond == 19'200'000) {
#if defined(ATMOSPHERE_IS_MESOSPHERE) && defined(ATMOSPHERE_ARCH_ARM64)
s64 t0, t1, t2, t3;
__asm__ __volatile__("mov %[t1], #0x5A53\n"
"movk %[t1], #0xA09B, lsl #16\n"
"lsr %[t0], %[ns], #9\n"
"movk %[t1], #0xB82F, lsl #32\n"
"movk %[t1], #0x0044, lsl #48\n"
"umulh %[t0], %[t0], %[t1]\n"
"mov %[t1], #0xFFFFFFFFFFFF3600\n"
"movk %[t1], #0xC465, lsl #16\n"
"lsr %[t0], %[t0], #0xB\n"
"madd %[t1], %[t0], %[t1], %[ns]\n"
"mov %w[t2], #0xF800\n"
"movk %w[t2], #0x0124, lsl #16\n"
"mov %w[t3], #0xCA00\n"
"movk %w[t3], #0x3B9A, lsl #16\n"
"madd %[t1], %[t1], %[t2], %[t3]\n"
"mov %[t3], #0x94B3\n"
"movk %[t3], #0x26D6, lsl #16\n"
"movk %[t3], #0x0BE8, lsl #32\n"
"movk %[t3], #0x112E, lsl #48\n"
"sub %[t1], %[t1], #1\n"
"smulh %[t1], %[t1], %[t3]\n"
"asr %[t3], %[t1], #26\n"
"add %[t1], %[t3], %[t1], lsr #63\n"
"madd %[t0], %[t0], %[t2], %[t1]\n"
: [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3)
: [ns]"r"(ns)
: "cc");
return t0;
#endif
}
}
static constexpr s64 ConvertTimeSpanToTickImpl(TimeSpan ts) {
/* Split up timespan and ticks-per-second by ns. */
s64 ts_div = 0, ts_mod = 0;
s64 tick_div = 0, tick_mod = 0;
DivModNs(ts_div, ts_mod, ts.GetNanoSeconds());
DivModNs(tick_div, tick_mod, TicksPerSecond);
/* Convert the timespan into a tick count. */
s64 value = 0;
DivNs(value, ts_mod * tick_mod + NanoSecondsPerSecond - 1);
return (ts_div * tick_div) * NanoSecondsPerSecond + ts_div * tick_mod + ts_mod * tick_div + value;
return util::ScaleByConstantFactor<s64, TicksPerSecond, NanoSecondsPerSecond>(ns);
}
public:
constexpr explicit Tick(s64 t = 0) : m_tick(t) { /* ... */ }
constexpr Tick(TimeSpan ts) : m_tick(ConvertTimeSpanToTickImpl(ts)) { /* ... */ }
constexpr ALWAYS_INLINE explicit Tick(s64 t = 0) : m_tick(t) { /* ... */ }
constexpr ALWAYS_INLINE Tick(TimeSpan ts) : m_tick(ConvertTimeSpanToTickImpl(ts)) { /* ... */ }
constexpr operator s64() const { return m_tick; }
constexpr ALWAYS_INLINE operator s64() const { return m_tick; }
/* Tick arithmetic. */
constexpr Tick &operator+=(Tick rhs) { m_tick += rhs.m_tick; return *this; }
constexpr Tick &operator-=(Tick rhs) { m_tick -= rhs.m_tick; return *this; }
constexpr Tick operator+(Tick rhs) const { Tick r(*this); return r += rhs; }
constexpr Tick operator-(Tick rhs) const { Tick r(*this); return r -= rhs; }
constexpr ALWAYS_INLINE Tick &operator+=(Tick rhs) { m_tick += rhs.m_tick; return *this; }
constexpr ALWAYS_INLINE Tick &operator-=(Tick rhs) { m_tick -= rhs.m_tick; return *this; }
constexpr ALWAYS_INLINE Tick operator+(Tick rhs) const { Tick r(*this); return r += rhs; }
constexpr ALWAYS_INLINE Tick operator-(Tick rhs) const { Tick r(*this); return r -= rhs; }
constexpr Tick &operator+=(TimeSpan rhs) { m_tick += Tick(rhs).m_tick; return *this; }
constexpr Tick &operator-=(TimeSpan rhs) { m_tick -= Tick(rhs).m_tick; return *this; }
constexpr Tick operator+(TimeSpan rhs) const { Tick r(*this); return r += rhs; }
constexpr Tick operator-(TimeSpan rhs) const { Tick r(*this); return r -= rhs; }
constexpr ALWAYS_INLINE Tick &operator+=(TimeSpan rhs) { m_tick += Tick(rhs).m_tick; return *this; }
constexpr ALWAYS_INLINE Tick &operator-=(TimeSpan rhs) { m_tick -= Tick(rhs).m_tick; return *this; }
constexpr ALWAYS_INLINE Tick operator+(TimeSpan rhs) const { Tick r(*this); return r += rhs; }
constexpr ALWAYS_INLINE Tick operator-(TimeSpan rhs) const { Tick r(*this); return r -= rhs; }
};
}

View file

@ -270,7 +270,7 @@ namespace ams::util {
template<std::memory_order Order = std::memory_order_seq_cst>
ALWAYS_INLINE T Exchange(T arg) {
return ConvertToType(impl::AtomicExchangeImpl(this->GetStoragePointer(), ConvertToStorage(arg)));
return ConvertToType(impl::AtomicExchangeImpl<Order>(this->GetStoragePointer(), ConvertToStorage(arg)));
}
template<std::memory_order Order = std::memory_order_seq_cst>
@ -374,7 +374,7 @@ namespace ams::util {
template<std::memory_order Order = std::memory_order_seq_cst>
ALWAYS_INLINE T Exchange(T arg) const {
return ConvertToType(impl::AtomicExchangeImpl(this->GetStoragePointer(), ConvertToStorage(arg)));
return ConvertToType(impl::AtomicExchangeImpl<Order>(this->GetStoragePointer(), ConvertToStorage(arg)));
}
template<std::memory_order Order = std::memory_order_seq_cst>

View file

@ -255,4 +255,28 @@ namespace ams::util {
return static_cast<T>((v + add) / d);
}
template<typename T, T N, T D>
constexpr ALWAYS_INLINE T ScaleByConstantFactor(const T V) {
/* Multiplying and dividing by large numerator/denominator can cause error to be introduced. */
/* This algorithm multiples/divides in stages, so as to mitigate this (particularly with large denominator). */
/* Justification for the algorithm. */
/* Calculate: (V * N) / D */
/* = (Quot_V * D + Rem_V) * (Quot_N * D + Rem_N) / D */
/* = (D^2 * (Quot_V * Quot_N) + D * (Quot_V * Rem_N + Rem_V * Quot_N) + Rem_V * Rem_N) / D */
/* = (D * Quot_V * Quot_N) + (Quot_V * Rem_N) + (Rem_V * Quot_N) + ((Rem_V * Rem_N) / D) */
/* Calculate quotients/remainders. */
const T Quot_V = V / D;
const T Rem_V = V % D;
constexpr T Quot_N = N / D;
constexpr T Rem_N = N % D;
/* Calculate the remainder multiplication, rounding up. */
const T rem_mult = ((Rem_V * Rem_N) + (D - 1)) / D;
/* Calculate results. */
return (D * Quot_N * Quot_V) + (Quot_V * Rem_N) + (Rem_V * Quot_N) + rem_mult;
}
}

View file

@ -51,24 +51,24 @@ SECTIONS
. = ALIGN(8);
} :code
/* .vectors. */
. = ALIGN(2K);
__vectors_start__ = . ;
.vectors :
{
KEEP( *(.vectors) )
. = ALIGN(8);
} :code
/* .sleep. */
. = ALIGN(4K);
__sleep_start__ = . ;
.sleep :
.sleep :
{
KEEP( *(.sleep .sleep.*) )
. = ALIGN(8);
} :code
/* .vectors. */
. = ALIGN(2K);
__vectors_start__ = . ;
.vectors :
{
KEEP( *(.vectors) )
. = ALIGN(8);
} :code
/* =========== RODATA section =========== */
. = ALIGN(0x1000);
__rodata_start = . ;