From bb0be4de8e98e154ff72b1ed9a93f077bde1e03d Mon Sep 17 00:00:00 2001 From: Michael Scire Date: Tue, 22 Mar 2022 23:02:10 -0700 Subject: [PATCH] kern: implement revised IPI/SGI semantics --- .../mesosphere/arch/arm64/kern_cpu.hpp | 1 + .../arch/arm64/kern_k_interrupt_name.hpp | 9 ++- .../include/mesosphere/kern_build_config.hpp | 1 + .../mesosphere/kern_k_capabilities.hpp | 4 +- .../include/mesosphere/kern_k_process.hpp | 1 + .../include/mesosphere/kern_select_cpu.hpp | 10 +++ .../source/arch/arm64/kern_cpu.cpp | 76 +++++++++++++++++-- .../source/kern_k_capabilities.cpp | 11 ++- .../libmesosphere/source/kern_k_thread.cpp | 10 +-- 9 files changed, 103 insertions(+), 20 deletions(-) diff --git a/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_cpu.hpp b/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_cpu.hpp index e3f1de2e9..2876f7497 100644 --- a/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_cpu.hpp +++ b/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_cpu.hpp @@ -188,6 +188,7 @@ namespace ams::kern::arch::arm64::cpu { /* Synchronization helpers. */ NOINLINE void SynchronizeAllCores(); + void SynchronizeCores(u64 core_mask); /* Cache management helpers. */ void StoreCacheForInit(void *addr, size_t size); diff --git a/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_k_interrupt_name.hpp b/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_k_interrupt_name.hpp index 6503b4e1e..7bcac7a12 100644 --- a/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_k_interrupt_name.hpp +++ b/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_k_interrupt_name.hpp @@ -21,11 +21,12 @@ namespace ams::kern::arch::arm64 { enum KInterruptName : s32 { /* SGIs */ - KInterruptName_ThreadTerminate = 4, - KInterruptName_CacheOperation = 5, - KInterruptName_Scheduler = 6, + KInterruptName_ThreadTerminate = 0, + KInterruptName_CacheOperation = 1, + KInterruptName_Scheduler = 2, + KInterruptName_CoreBarrier = 3, - KInterruptName_PerformanceCounter = 8, + KInterruptName_PerformanceCounter = 4, /* PPIs */ #if defined(ATMOSPHERE_BOARD_NINTENDO_NX) diff --git a/libraries/libmesosphere/include/mesosphere/kern_build_config.hpp b/libraries/libmesosphere/include/mesosphere/kern_build_config.hpp index 4cb6fed4b..f3ae3cf0c 100644 --- a/libraries/libmesosphere/include/mesosphere/kern_build_config.hpp +++ b/libraries/libmesosphere/include/mesosphere/kern_build_config.hpp @@ -30,6 +30,7 @@ #endif //#define MESOSPHERE_BUILD_FOR_TRACING +//#define MESOSPHERE_ENABLE_PERFORMANCE_COUNTER #define MESOSPHERE_ENABLE_PANIC_REGISTER_DUMP #define MESOSPHERE_ENABLE_HARDWARE_SINGLE_STEP diff --git a/libraries/libmesosphere/include/mesosphere/kern_k_capabilities.hpp b/libraries/libmesosphere/include/mesosphere/kern_k_capabilities.hpp index dbc973de5..7b5a93dcc 100644 --- a/libraries/libmesosphere/include/mesosphere/kern_k_capabilities.hpp +++ b/libraries/libmesosphere/include/mesosphere/kern_k_capabilities.hpp @@ -184,6 +184,7 @@ namespace ams::kern { svc::SvcAccessFlagSet m_svc_access_flags; InterruptFlagSet m_irq_access_flags; u64 m_core_mask; + u64 m_phys_core_mask; u64 m_priority_mask; util::BitPack32 m_debug_capabilities; s32 m_handle_table_size; @@ -227,7 +228,7 @@ namespace ams::kern { Result SetCapabilities(const u32 *caps, s32 num_caps, KProcessPageTable *page_table); Result SetCapabilities(svc::KUserPointer user_caps, s32 num_caps, KProcessPageTable *page_table); public: - constexpr explicit KCapabilities(util::ConstantInitializeTag) : m_svc_access_flags{}, m_irq_access_flags{}, m_core_mask{}, m_priority_mask{}, m_debug_capabilities{0}, m_handle_table_size{}, m_intended_kernel_version{}, m_program_type{} { /* ... */ } + constexpr explicit KCapabilities(util::ConstantInitializeTag) : m_svc_access_flags{}, m_irq_access_flags{}, m_core_mask{}, m_phys_core_mask{}, m_priority_mask{}, m_debug_capabilities{0}, m_handle_table_size{}, m_intended_kernel_version{}, m_program_type{} { /* ... */ } KCapabilities() { /* ... */ } Result Initialize(const u32 *caps, s32 num_caps, KProcessPageTable *page_table); @@ -236,6 +237,7 @@ namespace ams::kern { static Result CheckCapabilities(svc::KUserPointer user_caps, s32 num_caps); constexpr u64 GetCoreMask() const { return m_core_mask; } + constexpr u64 GetPhysicalCoreMask() const { return m_phys_core_mask; } constexpr u64 GetPriorityMask() const { return m_priority_mask; } constexpr s32 GetHandleTableSize() const { return m_handle_table_size; } diff --git a/libraries/libmesosphere/include/mesosphere/kern_k_process.hpp b/libraries/libmesosphere/include/mesosphere/kern_k_process.hpp index 8e3fb85ac..53b184b68 100644 --- a/libraries/libmesosphere/include/mesosphere/kern_k_process.hpp +++ b/libraries/libmesosphere/include/mesosphere/kern_k_process.hpp @@ -160,6 +160,7 @@ namespace ams::kern { constexpr State GetState() const { return m_state; } constexpr u64 GetCoreMask() const { return m_capabilities.GetCoreMask(); } + constexpr u64 GetPhysicalCoreMask() const { return m_capabilities.GetPhysicalCoreMask(); } constexpr u64 GetPriorityMask() const { return m_capabilities.GetPriorityMask(); } constexpr s32 GetIdealCoreId() const { return m_ideal_core_id; } diff --git a/libraries/libmesosphere/include/mesosphere/kern_select_cpu.hpp b/libraries/libmesosphere/include/mesosphere/kern_select_cpu.hpp index 799a2fb3f..55e030bdb 100644 --- a/libraries/libmesosphere/include/mesosphere/kern_select_cpu.hpp +++ b/libraries/libmesosphere/include/mesosphere/kern_select_cpu.hpp @@ -67,6 +67,16 @@ namespace ams::kern { return mask; }(); + static constexpr inline u64 ConvertVirtualCoreMaskToPhysical(u64 v_core_mask) { + u64 p_core_mask = 0; + while (v_core_mask != 0) { + const u64 next = __builtin_ctzll(v_core_mask); + v_core_mask &= ~(static_cast(1) << next); + p_core_mask |= (static_cast(1) << cpu::VirtualToPhysicalCoreMap[next]); + } + return p_core_mask; + } + } static_assert(cpu::NumCores <= cpu::NumVirtualCores); diff --git a/libraries/libmesosphere/source/arch/arm64/kern_cpu.cpp b/libraries/libmesosphere/source/arch/arm64/kern_cpu.cpp index 759a628f2..83142f7de 100644 --- a/libraries/libmesosphere/source/arch/arm64/kern_cpu.cpp +++ b/libraries/libmesosphere/source/arch/arm64/kern_cpu.cpp @@ -23,6 +23,14 @@ namespace ams::kern::arch::arm64::cpu { namespace { + ALWAYS_INLINE void SetEventLocally() { + __asm__ __volatile__("sevl" ::: "memory"); + } + + ALWAYS_INLINE void WaitForEvent() { + __asm__ __volatile__("wfe" ::: "memory"); + } + class KScopedCoreMigrationDisable { public: ALWAYS_INLINE KScopedCoreMigrationDisable() { GetCurrentThread().DisableCoreMigration(); } @@ -82,6 +90,51 @@ namespace ams::kern::arch::arm64::cpu { } }; + class KCoreBarrierInterruptHandler : public KInterruptHandler { + private: + util::Atomic m_target_cores; + KSpinLock m_lock; + public: + constexpr KCoreBarrierInterruptHandler() : KInterruptHandler(), m_target_cores(0), m_lock() { /* ... */ } + + virtual KInterruptTask *OnInterrupt(s32 interrupt_id) override { + MESOSPHERE_UNUSED(interrupt_id); + m_target_cores &= ~(1ul << GetCurrentCoreId()); + return nullptr; + } + + void SynchronizeCores(u64 core_mask) { + /* Disable dispatch while we synchronize. */ + KScopedDisableDispatch dd; + + /* Acquire exclusive access to ourselves. */ + KScopedSpinLock lk(m_lock); + + /* If necessary, force synchronization with other cores. */ + if (const u64 other_cores_mask = core_mask & ~(1ul << GetCurrentCoreId()); other_cores_mask != 0) { + /* Send an interrupt to the other cores. */ + m_target_cores = other_cores_mask; + cpu::DataSynchronizationBarrierInnerShareable(); + Kernel::GetInterruptManager().SendInterProcessorInterrupt(KInterruptName_CoreBarrier, other_cores_mask); + + /* Wait for all cores to acknowledge. */ + { + u64 v; + __asm__ __volatile__("ldaxr %[v], %[p]\n" + "cbz %[v], 1f\n" + "0:\n" + "wfe\n" + "ldaxr %[v], %[p]\n" + "cbnz %[v], 0b\n" + "1:\n" + : [v]"=&r"(v) + : [p]"Q"(*reinterpret_cast(std::addressof(m_target_cores))) + : "memory"); + } + } + } + }; + class KCacheHelperInterruptHandler : public KInterruptHandler { private: static constexpr s32 ThreadPriority = 8; @@ -215,7 +268,11 @@ namespace ams::kern::arch::arm64::cpu { /* Instances of the interrupt handlers. */ constinit KThreadTerminationInterruptHandler g_thread_termination_handler; constinit KCacheHelperInterruptHandler g_cache_operation_handler; + constinit KCoreBarrierInterruptHandler g_core_barrier_handler; + + #if defined(MESOSPHERE_ENABLE_PERFORMANCE_COUNTER) constinit KPerformanceCounterInterruptHandler g_performance_counter_handler[cpu::NumCores]; + #endif /* Expose this as a global, for asm to use. */ constinit s32 g_all_core_sync_count; @@ -296,14 +353,6 @@ namespace ams::kern::arch::arm64::cpu { } } - ALWAYS_INLINE void SetEventLocally() { - __asm__ __volatile__("sevl" ::: "memory"); - } - - ALWAYS_INLINE void WaitForEvent() { - __asm__ __volatile__("wfe" ::: "memory"); - } - ALWAYS_INLINE Result InvalidateDataCacheRange(uintptr_t start, uintptr_t end) { MESOSPHERE_ASSERT(util::IsAligned(start, DataCacheLineSize)); MESOSPHERE_ASSERT(util::IsAligned(end, DataCacheLineSize)); @@ -338,6 +387,11 @@ namespace ams::kern::arch::arm64::cpu { } + void SynchronizeCores(u64 core_mask) { + /* Request a core barrier interrupt. */ + g_core_barrier_handler.SynchronizeCores(core_mask); + } + void StoreCacheForInit(void *addr, size_t size) { /* Store the data cache for the specified range. */ const uintptr_t start = util::AlignDown(reinterpret_cast(addr), DataCacheLineSize); @@ -446,9 +500,15 @@ namespace ams::kern::arch::arm64::cpu { /* Bind all handlers to the relevant interrupts. */ Kernel::GetInterruptManager().BindHandler(std::addressof(g_cache_operation_handler), KInterruptName_CacheOperation, core_id, KInterruptController::PriorityLevel_High, false, false); Kernel::GetInterruptManager().BindHandler(std::addressof(g_thread_termination_handler), KInterruptName_ThreadTerminate, core_id, KInterruptController::PriorityLevel_Scheduler, false, false); + Kernel::GetInterruptManager().BindHandler(std::addressof(g_core_barrier_handler), KInterruptName_CoreBarrier, core_id, KInterruptController::PriorityLevel_Scheduler, false, false); + /* If we should, enable user access to the performance counter registers. */ if (KTargetSystem::IsUserPmuAccessEnabled()) { SetPmUserEnrEl0(1ul); } + + /* If we should, enable the kernel performance counter interrupt handler. */ + #if defined(MESOSPHERE_ENABLE_PERFORMANCE_COUNTER) Kernel::GetInterruptManager().BindHandler(std::addressof(g_performance_counter_handler[core_id]), KInterruptName_PerformanceCounter, core_id, KInterruptController::PriorityLevel_Timer, false, false); + #endif } void SynchronizeAllCores() { diff --git a/libraries/libmesosphere/source/kern_k_capabilities.cpp b/libraries/libmesosphere/source/kern_k_capabilities.cpp index c86809bec..6b2c6f4b4 100644 --- a/libraries/libmesosphere/source/kern_k_capabilities.cpp +++ b/libraries/libmesosphere/source/kern_k_capabilities.cpp @@ -27,7 +27,11 @@ namespace ams::kern { m_program_type = 0; /* Initial processes may run on all cores. */ - m_core_mask = cpu::VirtualCoreMask; + constexpr u64 VirtMask = cpu::VirtualCoreMask; + constexpr u64 PhysMask = cpu::ConvertVirtualCoreMaskToPhysical(VirtMask); + + m_core_mask = VirtMask; + m_phys_core_mask = PhysMask; /* Initial processes may use any user priority they like. */ m_priority_mask = ~0xFul; @@ -60,7 +64,7 @@ namespace ams::kern { Result KCapabilities::SetCorePriorityCapability(const util::BitPack32 cap) { /* We can't set core/priority if we've already set them. */ - R_UNLESS(m_core_mask == 0, svc::ResultInvalidArgument()); + R_UNLESS(m_core_mask == 0, svc::ResultInvalidArgument()); R_UNLESS(m_priority_mask == 0, svc::ResultInvalidArgument()); /* Validate the core/priority. */ @@ -81,6 +85,9 @@ namespace ams::kern { } MESOSPHERE_ASSERT((m_core_mask & cpu::VirtualCoreMask) == m_core_mask); + /* Set physical core mask. */ + m_phys_core_mask = cpu::ConvertVirtualCoreMaskToPhysical(m_core_mask); + /* Set priority mask. */ for (auto prio = min_prio; prio <= max_prio; prio++) { m_priority_mask |= (1ul << prio); diff --git a/libraries/libmesosphere/source/kern_k_thread.cpp b/libraries/libmesosphere/source/kern_k_thread.cpp index c4d692492..b4315d97f 100644 --- a/libraries/libmesosphere/source/kern_k_thread.cpp +++ b/libraries/libmesosphere/source/kern_k_thread.cpp @@ -481,12 +481,16 @@ namespace ams::kern { /* Ensure that the thread is not executing on any core. */ if (m_parent != nullptr) { + /* Wait for the thread to not be current on any core. */ for (size_t i = 0; i < cpu::NumCores; ++i) { KThread *core_thread; do { core_thread = Kernel::GetScheduler(i).GetSchedulerCurrentThread(); } while (core_thread == this); } + + /* Ensure that all cores are synchronized at this point. */ + cpu::SynchronizeCores(m_parent->GetPhysicalCoreMask()); } /* Close the thread. */ @@ -724,11 +728,7 @@ namespace ams::kern { } /* Translate the virtual affinity mask to a physical one. */ - while (v_affinity_mask != 0) { - const u64 next = __builtin_ctzll(v_affinity_mask); - v_affinity_mask &= ~(1ul << next); - p_affinity_mask |= (1ul << cpu::VirtualToPhysicalCoreMap[next]); - } + p_affinity_mask = cpu::ConvertVirtualCoreMaskToPhysical(v_affinity_mask); /* If we haven't disabled migration, perform an affinity change. */ if (m_num_core_migration_disables == 0) {