diff --git a/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_cpu.hpp b/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_cpu.hpp index 5a4b27651..136f06f39 100644 --- a/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_cpu.hpp +++ b/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_cpu.hpp @@ -22,16 +22,20 @@ namespace ams::kern::arm64::cpu { #if defined(ATMOSPHERE_CPU_ARM_CORTEX_A57) || defined(ATMOSPHERE_CPU_ARM_CORTEX_A53) constexpr inline size_t InstructionCacheLineSize = 0x40; constexpr inline size_t DataCacheLineSize = 0x40; + constexpr inline size_t NumPerformanceCounters = 6; #else #error "Unknown CPU for cache line sizes" #endif #if defined(ATMOSPHERE_BOARD_NINTENDO_SWITCH) - static constexpr size_t NumCores = 4; + constexpr inline size_t NumCores = 4; #else #error "Unknown Board for cpu::NumCores" #endif + /* Initialization. */ + NOINLINE void InitializeInterruptThreads(s32 core_id); + /* Helpers for managing memory state. */ ALWAYS_INLINE void DataSynchronizationBarrier() { __asm__ __volatile__("dsb sy" ::: "memory"); @@ -65,6 +69,40 @@ namespace ams::kern::arm64::cpu { InstructionMemoryBarrier(); } + /* Performance counter helpers. */ + ALWAYS_INLINE u64 GetCycleCounter() { + return cpu::GetPmcCntrEl0(); + } + + ALWAYS_INLINE u32 GetPerformanceCounter(s32 n) { + u64 counter = 0; + if (n < static_cast(NumPerformanceCounters)) { + switch (n) { + case 0: + counter = cpu::GetPmevCntr0El0(); + break; + case 1: + counter = cpu::GetPmevCntr1El0(); + break; + case 2: + counter = cpu::GetPmevCntr2El0(); + break; + case 3: + counter = cpu::GetPmevCntr3El0(); + break; + case 4: + counter = cpu::GetPmevCntr4El0(); + break; + case 5: + counter = cpu::GetPmevCntr5El0(); + break; + default: + break; + } + } + return static_cast(counter); + } + /* Helper for address access. */ ALWAYS_INLINE bool GetPhysicalAddressWritable(KPhysicalAddress *out, KVirtualAddress addr, bool privileged = false) { const uintptr_t va = GetInteger(addr); @@ -115,8 +153,8 @@ namespace ams::kern::arm64::cpu { /* Cache management helpers. */ void ClearPageToZeroImpl(void *); - void FlushEntireDataCacheShared(); - void FlushEntireDataCacheLocal(); + void FlushEntireDataCacheSharedForInit(); + void FlushEntireDataCacheLocalForInit(); ALWAYS_INLINE void ClearPageToZero(void *page) { MESOSPHERE_ASSERT(util::IsAligned(reinterpret_cast(page), PageSize)); diff --git a/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_cpu_system_registers.hpp b/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_cpu_system_registers.hpp index 1fe333783..cf7ca4b9e 100644 --- a/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_cpu_system_registers.hpp +++ b/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_cpu_system_registers.hpp @@ -52,6 +52,7 @@ namespace ams::kern::arm64::cpu { MESOSPHERE_CPU_DEFINE_SYSREG_ACCESSORS(CpuEctlrEl1, s3_1_c15_c2_1) MESOSPHERE_CPU_DEFINE_SYSREG_ACCESSORS(CsselrEl1, csselr_el1) + MESOSPHERE_CPU_DEFINE_SYSREG_ACCESSORS(CcsidrEl1, ccsidr_el1) MESOSPHERE_CPU_DEFINE_SYSREG_ACCESSORS(OslarEl1, oslar_el1) @@ -61,6 +62,15 @@ namespace ams::kern::arm64::cpu { MESOSPHERE_CPU_DEFINE_SYSREG_ACCESSORS(Afsr0El1, afsr0_el1) MESOSPHERE_CPU_DEFINE_SYSREG_ACCESSORS(Afsr1El1, afsr1_el1) + MESOSPHERE_CPU_DEFINE_SYSREG_ACCESSORS(PmUserEnrEl0, pmuserenr_el0) + MESOSPHERE_CPU_DEFINE_SYSREG_ACCESSORS(PmcCntrEl0, pmccntr_el0) + MESOSPHERE_CPU_DEFINE_SYSREG_ACCESSORS(PmevCntr0El0, pmevcntr0_el0) + MESOSPHERE_CPU_DEFINE_SYSREG_ACCESSORS(PmevCntr1El0, pmevcntr1_el0) + MESOSPHERE_CPU_DEFINE_SYSREG_ACCESSORS(PmevCntr2El0, pmevcntr2_el0) + MESOSPHERE_CPU_DEFINE_SYSREG_ACCESSORS(PmevCntr3El0, pmevcntr3_el0) + MESOSPHERE_CPU_DEFINE_SYSREG_ACCESSORS(PmevCntr4El0, pmevcntr4_el0) + MESOSPHERE_CPU_DEFINE_SYSREG_ACCESSORS(PmevCntr5El0, pmevcntr5_el0) + #define FOR_I_IN_0_TO_15(HANDLER, ...) \ HANDLER(0, ## __VA_ARGS__) HANDLER(1, ## __VA_ARGS__) HANDLER(2, ## __VA_ARGS__) HANDLER(3, ## __VA_ARGS__) \ HANDLER(4, ## __VA_ARGS__) HANDLER(5, ## __VA_ARGS__) HANDLER(6, ## __VA_ARGS__) HANDLER(7, ## __VA_ARGS__) \ diff --git a/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_k_interrupt_name.hpp b/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_k_interrupt_name.hpp index 33ad11e78..abadf78f4 100644 --- a/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_k_interrupt_name.hpp +++ b/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_k_interrupt_name.hpp @@ -18,14 +18,31 @@ namespace ams::kern::arm64 { namespace interrupt_name { + enum KInterruptName : s32 { + /* SGIs */ + KInterruptName_ThreadTerminate = 4, + KInterruptName_CacheOperation = 5, KInterruptName_Scheduler = 6, - KInterruptName_HardwareTimerEl1 = 30, + + KInterruptName_PerformanceCounter = 8, + + /* PPIs */ + #if defined(ATMOSPHERE_BOARD_NINTENDO_SWITCH) + KInterruptName_VirtualMaintenance = 25, + KInterruptName_HypervisorTimer = 26, + KInterruptName_VirtualTimer = 27, + KInterruptName_LegacyNFiq = 38, + KInterruptName_SecurePhysicalTimer = 29, + KInterruptName_NonSecurePhysicalTimer = 30, + KInterruptName_LegacyNIrq = 31, + #endif #if defined(ATMOSPHERE_BOARD_NINTENDO_SWITCH) KInterruptName_MemoryController = 109, #endif }; + }; } diff --git a/libraries/libmesosphere/source/arch/arm64/kern_cpu.cpp b/libraries/libmesosphere/source/arch/arm64/kern_cpu.cpp index ec4097944..c2ddc5ed3 100644 --- a/libraries/libmesosphere/source/arch/arm64/kern_cpu.cpp +++ b/libraries/libmesosphere/source/arch/arm64/kern_cpu.cpp @@ -23,19 +23,196 @@ namespace ams::kern::arm64::cpu { namespace { + /* Nintendo registers a handler for a SGI on thread termination, but does not handle anything. */ + /* This is sufficient, because post-interrupt scheduling is all they really intend to occur. */ + class KThreadTerminationInterruptHandler : public KInterruptHandler { + public: + constexpr KThreadTerminationInterruptHandler() : KInterruptHandler() { /* ... */ } + + virtual KInterruptTask *OnInterrupt(s32 interrupt_id) override { + return nullptr; + } + }; + + class KPerformanceCounterInterruptHandler : public KInterruptHandler { + private: + static inline KLightLock s_lock; + private: + u64 counter; + s32 which; + bool done; + public: + constexpr KPerformanceCounterInterruptHandler() : KInterruptHandler(), counter(), which(), done() { /* ... */ } + + static KLightLock &GetLock() { return s_lock; } + + void Setup(s32 w) { + this->done = false; + this->which = w; + } + + void Wait() { + while (!this->done) { + __asm__ __volatile__("yield"); + } + } + + u64 GetCounter() const { return this->counter; } + + /* Nintendo misuses this per their own API, but it's functional. */ + virtual KInterruptTask *OnInterrupt(s32 interrupt_id) override { + if (this->which < 0) { + this->counter = cpu::GetCycleCounter(); + } else { + this->counter = cpu::GetPerformanceCounter(this->which); + } + DataMemoryBarrier(); + this->done = true; + return nullptr; + } + }; + + class KCacheHelperInterruptHandler : public KInterruptHandler { + private: + static constexpr s32 ThreadPriority = 8; + public: + enum class Operation { + Idle, + InvalidateInstructionCache, + StoreDataCache, + FlushDataCache, + }; + private: + KLightLock lock; + KLightLock cv_lock; + KLightConditionVariable cv; + std::atomic target_cores; + volatile Operation operation; + private: + static void ThreadFunction(uintptr_t _this) { + reinterpret_cast(_this)->ThreadFunctionImpl(); + } + + void ThreadFunctionImpl() { + const s32 core_id = GetCurrentCoreId(); + while (true) { + /* Wait for a request to come in. */ + { + KScopedLightLock lk(this->cv_lock); + while ((this->target_cores & (1ul << core_id)) == 0) { + this->cv.Wait(std::addressof(this->cv_lock)); + } + } + + /* Process the request. */ + this->ProcessOperation(); + + /* Broadcast, if there's nothing pending. */ + { + KScopedLightLock lk(this->cv_lock); + if (this->target_cores == 0) { + this->cv.Broadcast(); + } + } + } + } + + void ProcessOperation(); + public: + constexpr KCacheHelperInterruptHandler() : KInterruptHandler(), lock(), cv_lock(), cv(), target_cores(), operation(Operation::Idle) { /* ... */ } + + void Initialize(s32 core_id) { + /* Reserve a thread from the system limit. */ + MESOSPHERE_ABORT_UNLESS(Kernel::GetSystemResourceLimit().Reserve(ams::svc::LimitableResource_ThreadCountMax, 1)); + + /* Create a new thread. */ + KThread *new_thread = KThread::Create(); + MESOSPHERE_ABORT_UNLESS(new_thread != nullptr); + MESOSPHERE_R_ABORT_UNLESS(KThread::InitializeKernelThread(new_thread, ThreadFunction, reinterpret_cast(this), ThreadPriority, core_id)); + + /* Register the new thread. */ + KThread::Register(new_thread); + + /* Run the thread. */ + new_thread->Run(); + } + + virtual KInterruptTask *OnInterrupt(s32 interrupt_id) override { + this->ProcessOperation(); + return nullptr; + } + + void RequestOperation(Operation op) { + KScopedLightLock lk(this->lock); + MESOSPHERE_ABORT_UNLESS(this->operation == Operation::Idle); + /* Send and wait for acknowledgement of request. */ + { + KScopedLightLock cv_lk(this->cv_lock); + MESOSPHERE_ABORT_UNLESS(this->target_cores == 0); + + /* Set operation. */ + this->operation = op; + + /* Create core masks for us to use. */ + constexpr u64 AllCoresMask = (1ul << cpu::NumCores) - 1ul; + const u64 other_cores_mask = AllCoresMask & ~(1ul << GetCurrentCoreId()); + + if ((op == Operation::InvalidateInstructionCache) || (Kernel::GetState() == Kernel::State::Initializing)) { + /* For certain operations, we want to send an interrupt. */ + this->target_cores = other_cores_mask; + DataSynchronizationBarrier(); + const u64 target_mask = this->target_cores; + DataSynchronizationBarrier(); + Kernel::GetInterruptManager().SendInterProcessorInterrupt(KInterruptName_CacheOperation, target_mask); + this->ProcessOperation(); + while (this->target_cores != 0) { + __asm__ __volatile__("yield"); + } + } else { + /* Request all cores. */ + this->target_cores = AllCoresMask; + + /* Use the condvar. */ + this->cv.Broadcast(); + while (this->target_cores != 0) { + this->cv.Wait(std::addressof(this->cv_lock)); + } + } + } + /* Go idle again. */ + this->operation = Operation::Idle; + } + }; + + /* Instances of the interrupt handlers. */ + KThreadTerminationInterruptHandler g_thread_termination_handler; + KCacheHelperInterruptHandler g_cache_operation_handler; + KPerformanceCounterInterruptHandler g_performance_counter_handler[cpu::NumCores]; + /* Expose this as a global, for asm to use. */ s32 g_all_core_sync_count; - void FlushEntireDataCacheImpl(int level) { + template + ALWAYS_INLINE void PerformCacheOperationBySetWayImpl(int level, F f) { /* Used in multiple locations. */ const u64 level_sel_value = static_cast(level << 1); - /* Set selection register. */ - cpu::SetCsselrEl1(level_sel_value); - cpu::InstructionMemoryBarrier(); + u64 ccsidr_value; + if constexpr (Init) { + /* During init, we can just set the selection register directly. */ + cpu::SetCsselrEl1(level_sel_value); + cpu::InstructionMemoryBarrier(); + ccsidr_value = cpu::GetCcsidrEl1(); + } else { + /* After init, we need to care about interrupts. */ + KScopedInterruptDisable di; + cpu::SetCsselrEl1(level_sel_value); + cpu::InstructionMemoryBarrier(); + ccsidr_value = cpu::GetCcsidrEl1(); + } /* Get cache size id info. */ - CacheSizeIdRegisterAccessor ccsidr_el1; + CacheSizeIdRegisterAccessor ccsidr_el1(ccsidr_value); const int num_sets = ccsidr_el1.GetNumberOfSets(); const int num_ways = ccsidr_el1.GetAssociativity(); const int line_size = ccsidr_el1.GetLineSize(); @@ -47,12 +224,58 @@ namespace ams::kern::arm64::cpu { for (int set = 0; set <= num_sets; set++) { const u64 way_value = static_cast(way) << way_shift; const u64 set_value = static_cast(set) << set_shift; - const u64 cisw_value = way_value | set_value | level_sel_value; - __asm__ __volatile__("dc cisw, %0" ::"r"(cisw_value) : "memory"); + f(way_value | set_value | level_sel_value); } } } + ALWAYS_INLINE void FlushDataCacheLineBySetWayImpl(const u64 sw_value) { + __asm__ __volatile__("dc cisw, %[v]" :: [v]"r"(sw_value) : "memory"); + } + + ALWAYS_INLINE void StoreDataCacheLineBySetWayImpl(const u64 sw_value) { + __asm__ __volatile__("dc csw, %[v]" :: [v]"r"(sw_value) : "memory"); + } + + template + ALWAYS_INLINE void PerformCacheOperationBySetWayShared(F f) { + CacheLineIdRegisterAccessor clidr_el1; + const int levels_of_coherency = clidr_el1.GetLevelsOfCoherency(); + const int levels_of_unification = clidr_el1.GetLevelsOfUnification(); + + for (int level = levels_of_coherency; level >= levels_of_unification; level--) { + PerformCacheOperationBySetWayImpl(level, f); + } + } + + template + ALWAYS_INLINE void PerformCacheOperationBySetWayLocal(F f) { + CacheLineIdRegisterAccessor clidr_el1; + const int levels_of_unification = clidr_el1.GetLevelsOfUnification(); + + for (int level = levels_of_unification - 1; level >= 0; level--) { + PerformCacheOperationBySetWayImpl(level, f); + } + } + + void KCacheHelperInterruptHandler::ProcessOperation() { + switch (this->operation) { + case Operation::Idle: + break; + case Operation::InvalidateInstructionCache: + InstructionMemoryBarrier(); + break; + case Operation::StoreDataCache: + PerformCacheOperationBySetWayLocal(StoreDataCacheLineBySetWayImpl); + DataSynchronizationBarrier(); + break; + case Operation::FlushDataCache: + PerformCacheOperationBySetWayLocal(FlushDataCacheLineBySetWayImpl); + DataSynchronizationBarrier(); + break; + } + } + ALWAYS_INLINE void SetEventLocally() { __asm__ __volatile__("sevl" ::: "memory"); } @@ -63,26 +286,27 @@ namespace ams::kern::arm64::cpu { } - void FlushEntireDataCacheShared() { - CacheLineIdRegisterAccessor clidr_el1; - const int levels_of_coherency = clidr_el1.GetLevelsOfCoherency(); - const int levels_of_unification = clidr_el1.GetLevelsOfUnification(); - - for (int level = levels_of_coherency; level >= levels_of_unification; level--) { - FlushEntireDataCacheImpl(level); - } + void FlushEntireDataCacheSharedForInit() { + return PerformCacheOperationBySetWayShared(FlushDataCacheLineBySetWayImpl); } - void FlushEntireDataCacheLocal() { - CacheLineIdRegisterAccessor clidr_el1; - const int levels_of_unification = clidr_el1.GetLevelsOfUnification(); - - for (int level = levels_of_unification - 1; level >= 0; level--) { - FlushEntireDataCacheImpl(level); - } + void FlushEntireDataCacheLocalForInit() { + return PerformCacheOperationBySetWayLocal(FlushDataCacheLineBySetWayImpl); } - NOINLINE void SynchronizeAllCores() { + void InitializeInterruptThreads(s32 core_id) { + /* Initialize the cache operation handler. */ + g_cache_operation_handler.Initialize(core_id); + + /* Bind all handlers to the relevant interrupts. */ + Kernel::GetInterruptManager().BindHandler(std::addressof(g_cache_operation_handler), KInterruptName_CacheOperation, core_id, KInterruptController::PriorityLevel_High, false, false); + Kernel::GetInterruptManager().BindHandler(std::addressof(g_thread_termination_handler), KInterruptName_ThreadTerminate, core_id, KInterruptController::PriorityLevel_Scheduler, false, false); + + if (KTargetSystem::IsUserPmuAccessEnabled()) { SetPmUserEnrEl0(1ul); } + Kernel::GetInterruptManager().BindHandler(std::addressof(g_performance_counter_handler[core_id]), KInterruptName_PerformanceCounter, core_id, KInterruptController::PriorityLevel_Timer, false, false); + } + + void SynchronizeAllCores() { SynchronizeAllCoresImpl(&g_all_core_sync_count, static_cast(cpu::NumCores)); } diff --git a/libraries/libmesosphere/source/arch/arm64/kern_k_hardware_timer.cpp b/libraries/libmesosphere/source/arch/arm64/kern_k_hardware_timer.cpp index d56d5b8eb..5b50195d2 100644 --- a/libraries/libmesosphere/source/arch/arm64/kern_k_hardware_timer.cpp +++ b/libraries/libmesosphere/source/arch/arm64/kern_k_hardware_timer.cpp @@ -50,7 +50,7 @@ namespace ams::kern::arm64 { InitializeGlobalTimer(); /* Bind the interrupt task for this core. */ - Kernel::GetInterruptManager().BindHandler(GetHardwareTimerInterruptTask(core_id), KInterruptName_HardwareTimerEl1, core_id, KInterruptController::PriorityLevel_Timer, true, true); + Kernel::GetInterruptManager().BindHandler(GetHardwareTimerInterruptTask(core_id), KInterruptName_NonSecurePhysicalTimer, core_id, KInterruptController::PriorityLevel_Timer, true, true); } void KHardwareTimer::Finalize() { @@ -74,7 +74,7 @@ namespace ams::kern::arm64 { } /* Clear the timer interrupt. */ - Kernel::GetInterruptManager().ClearInterrupt(KInterruptName_HardwareTimerEl1, GetCurrentCoreId()); + Kernel::GetInterruptManager().ClearInterrupt(KInterruptName_NonSecurePhysicalTimer, GetCurrentCoreId()); } } diff --git a/libraries/libmesosphere/source/kern_main.cpp b/libraries/libmesosphere/source/kern_main.cpp index 238226fb4..81ab0d680 100644 --- a/libraries/libmesosphere/source/kern_main.cpp +++ b/libraries/libmesosphere/source/kern_main.cpp @@ -99,7 +99,7 @@ namespace ams::kern { }); /* Initialize cpu interrupt threads. */ - MESOSPHERE_TODO("cpu::InitializeInterruptThreads(core_id);"); + cpu::InitializeInterruptThreads(core_id); /* Initialize the DPC manager. */ KDpcManager::Initialize(); diff --git a/mesosphere/kernel_ldr/source/kern_init_loader.cpp b/mesosphere/kernel_ldr/source/kern_init_loader.cpp index 226e20de2..627ffea4f 100644 --- a/mesosphere/kernel_ldr/source/kern_init_loader.cpp +++ b/mesosphere/kernel_ldr/source/kern_init_loader.cpp @@ -53,15 +53,15 @@ namespace ams::kern::init::loader { void EnsureEntireDataCacheFlushed() { /* Flush shared cache. */ - cpu::FlushEntireDataCacheShared(); + cpu::FlushEntireDataCacheSharedForInit(); cpu::DataSynchronizationBarrier(); /* Flush local cache. */ - cpu::FlushEntireDataCacheLocal(); + cpu::FlushEntireDataCacheLocalForInit(); cpu::DataSynchronizationBarrier(); /* Flush shared cache. */ - cpu::FlushEntireDataCacheShared(); + cpu::FlushEntireDataCacheSharedForInit(); cpu::DataSynchronizationBarrier(); /* Invalidate entire instruction cache. */