From 745450746723903be0faac836ec920101a841d08 Mon Sep 17 00:00:00 2001 From: Michael Scire Date: Tue, 22 Mar 2022 21:33:43 -0700 Subject: [PATCH] kern: update for new hw maintenance semantics --- .../arm64/init/kern_k_init_page_table.hpp | 22 ++- .../mesosphere/arch/arm64/kern_cpu.hpp | 34 +++-- .../arch/arm64/kern_k_interrupt_manager.hpp | 2 - .../arch/arm64/kern_k_page_table.hpp | 24 +--- .../arm64/kern_userspace_memory_access.hpp | 1 - .../mesosphere/kern_k_scheduler_impls.hpp | 2 +- .../source/arch/arm64/kern_cpu.cpp | 136 ++++++------------ .../source/arch/arm64/kern_cpu_asm.s | 135 +++++++++++++++++ .../source/arch/arm64/kern_k_debug.cpp | 12 +- .../source/arch/arm64/kern_k_page_table.cpp | 51 +++++-- .../arm64/kern_userspace_memory_access_asm.s | 20 --- .../nintendo/nx/kern_k_sleep_manager_asm.s | 3 + .../source/kern_k_initial_process_reader.cpp | 6 - .../libmesosphere/source/kern_k_thread.cpp | 2 +- .../os/impl/os_cache_impl.os.horizon.hpp | 7 + .../dd/impl/dd_cache_impl.os.horizon.hpp | 18 +++ .../source/arch/arm64/init/kern_init_core.cpp | 14 +- .../kernel/source/arch/arm64/init/start.s | 133 +++++++++++------ .../kernel_ldr/source/kern_init_loader.cpp | 8 +- 19 files changed, 386 insertions(+), 244 deletions(-) diff --git a/libraries/libmesosphere/include/mesosphere/arch/arm64/init/kern_k_init_page_table.hpp b/libraries/libmesosphere/include/mesosphere/arch/arm64/init/kern_k_init_page_table.hpp index e1618dc2a..e0613fbd5 100644 --- a/libraries/libmesosphere/include/mesosphere/arch/arm64/init/kern_k_init_page_table.hpp +++ b/libraries/libmesosphere/include/mesosphere/arch/arm64/init/kern_k_init_page_table.hpp @@ -279,20 +279,21 @@ namespace ams::kern::arch::arm64::init { /* Invalidate the entire tlb. */ cpu::DataSynchronizationBarrierInnerShareable(); - cpu::InvalidateEntireTlbInnerShareable(); + cpu::InvalidateEntireTlb(); /* Copy data, if we should. */ const u64 negative_block_size_for_mask = static_cast(-static_cast(block_size)); const u64 offset_mask = negative_block_size_for_mask & ((1ul << 48) - 1); const KVirtualAddress copy_src_addr = KVirtualAddress(src_saved.GetRawAttributesUnsafeForSwap() & offset_mask); const KVirtualAddress copy_dst_addr = KVirtualAddress(dst_saved.GetRawAttributesUnsafeForSwap() & offset_mask); - if (block_size && do_copy) { + if (do_copy) { u8 tmp[0x100]; for (size_t ofs = 0; ofs < block_size; ofs += sizeof(tmp)) { std::memcpy(tmp, GetVoidPointer(copy_src_addr + ofs), sizeof(tmp)); std::memcpy(GetVoidPointer(copy_src_addr + ofs), GetVoidPointer(copy_dst_addr + ofs), sizeof(tmp)); std::memcpy(GetVoidPointer(copy_dst_addr + ofs), tmp, sizeof(tmp)); } + cpu::DataSynchronizationBarrierInnerShareable(); } /* Swap the mappings. */ @@ -339,7 +340,6 @@ namespace ams::kern::arch::arm64::init { /* Can we make an L1 block? */ if (util::IsAligned(GetInteger(virt_addr), L1BlockSize) && util::IsAligned(GetInteger(phys_addr), L1BlockSize) && size >= L1BlockSize) { *l1_entry = L1PageTableEntry(PageTableEntry::BlockTag{}, phys_addr, attr, PageTableEntry::SoftwareReservedBit_None, false); - cpu::DataSynchronizationBarrierInnerShareable(); virt_addr += L1BlockSize; phys_addr += L1BlockSize; @@ -350,8 +350,8 @@ namespace ams::kern::arch::arm64::init { /* If we don't already have an L2 table, we need to make a new one. */ if (!l1_entry->IsTable()) { KPhysicalAddress new_table = AllocateNewPageTable(allocator); - *l1_entry = L1PageTableEntry(PageTableEntry::TableTag{}, new_table, attr.IsPrivilegedExecuteNever()); cpu::DataSynchronizationBarrierInnerShareable(); + *l1_entry = L1PageTableEntry(PageTableEntry::TableTag{}, new_table, attr.IsPrivilegedExecuteNever()); } L2PageTableEntry *l2_entry = GetL2Entry(l1_entry, virt_addr); @@ -365,14 +365,12 @@ namespace ams::kern::arch::arm64::init { phys_addr += L2BlockSize; size -= L2BlockSize; } - cpu::DataSynchronizationBarrierInnerShareable(); continue; } /* Can we make an L2 block? */ if (util::IsAligned(GetInteger(virt_addr), L2BlockSize) && util::IsAligned(GetInteger(phys_addr), L2BlockSize) && size >= L2BlockSize) { *l2_entry = L2PageTableEntry(PageTableEntry::BlockTag{}, phys_addr, attr, PageTableEntry::SoftwareReservedBit_None, false); - cpu::DataSynchronizationBarrierInnerShareable(); virt_addr += L2BlockSize; phys_addr += L2BlockSize; @@ -383,8 +381,8 @@ namespace ams::kern::arch::arm64::init { /* If we don't already have an L3 table, we need to make a new one. */ if (!l2_entry->IsTable()) { KPhysicalAddress new_table = AllocateNewPageTable(allocator); - *l2_entry = L2PageTableEntry(PageTableEntry::TableTag{}, new_table, attr.IsPrivilegedExecuteNever()); cpu::DataSynchronizationBarrierInnerShareable(); + *l2_entry = L2PageTableEntry(PageTableEntry::TableTag{}, new_table, attr.IsPrivilegedExecuteNever()); } L3PageTableEntry *l3_entry = GetL3Entry(l2_entry, virt_addr); @@ -398,17 +396,18 @@ namespace ams::kern::arch::arm64::init { phys_addr += L3BlockSize; size -= L3BlockSize; } - cpu::DataSynchronizationBarrierInnerShareable(); continue; } /* Make an L3 block. */ *l3_entry = L3PageTableEntry(PageTableEntry::BlockTag{}, phys_addr, attr, PageTableEntry::SoftwareReservedBit_None, false); - cpu::DataSynchronizationBarrierInnerShareable(); virt_addr += L3BlockSize; phys_addr += L3BlockSize; size -= L3BlockSize; } + + /* Ensure data consistency after our mapping is added. */ + cpu::DataSynchronizationBarrierInnerShareable(); } KPhysicalAddress GetPhysicalAddress(KVirtualAddress virt_addr) const { @@ -556,9 +555,6 @@ namespace ams::kern::arch::arm64::init { } void Reprotect(KVirtualAddress virt_addr, size_t size, const PageTableEntry &attr_before, const PageTableEntry &attr_after) { - /* Ensure data consistency before we begin reprotection. */ - cpu::DataSynchronizationBarrierInnerShareable(); - /* Ensure that addresses and sizes are page aligned. */ MESOSPHERE_INIT_ABORT_UNLESS(util::IsAligned(GetInteger(virt_addr), PageSize)); MESOSPHERE_INIT_ABORT_UNLESS(util::IsAligned(size, PageSize)); @@ -699,7 +695,7 @@ namespace ams::kern::arch::arm64::init { this->PhysicallyRandomize(virt_addr, size, L2BlockSize, do_copy); this->PhysicallyRandomize(virt_addr, size, L3ContiguousBlockSize, do_copy); this->PhysicallyRandomize(virt_addr, size, L3BlockSize, do_copy); - cpu::StoreEntireCacheForInit(); + cpu::StoreCacheForInit(GetVoidPointer(virt_addr), size); } }; diff --git a/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_cpu.hpp b/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_cpu.hpp index 93769e362..e3f1de2e9 100644 --- a/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_cpu.hpp +++ b/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_cpu.hpp @@ -48,6 +48,10 @@ namespace ams::kern::arch::arm64::cpu { __asm__ __volatile__("dsb ish" ::: "memory"); } + ALWAYS_INLINE void DataSynchronizationBarrierInnerShareableStore() { + __asm__ __volatile__("dsb ishst" ::: "memory"); + } + ALWAYS_INLINE void DataMemoryBarrier() { __asm__ __volatile__("dmb sy" ::: "memory"); } @@ -56,16 +60,20 @@ namespace ams::kern::arch::arm64::cpu { __asm__ __volatile__("dmb ish" ::: "memory"); } + ALWAYS_INLINE void DataMemoryBarrierInnerShareableStore() { + __asm__ __volatile__("dmb ishst" ::: "memory"); + } + ALWAYS_INLINE void InstructionMemoryBarrier() { __asm__ __volatile__("isb" ::: "memory"); } - ALWAYS_INLINE void EnsureInstructionConsistencyInnerShareable() { + ALWAYS_INLINE void EnsureInstructionConsistency() { DataSynchronizationBarrierInnerShareable(); InstructionMemoryBarrier(); } - ALWAYS_INLINE void EnsureInstructionConsistency() { + ALWAYS_INLINE void EnsureInstructionConsistencyFullSystem() { DataSynchronizationBarrier(); InstructionMemoryBarrier(); } @@ -182,28 +190,23 @@ namespace ams::kern::arch::arm64::cpu { NOINLINE void SynchronizeAllCores(); /* Cache management helpers. */ - void StoreEntireCacheForInit(); - void FlushEntireCacheForInit(); + void StoreCacheForInit(void *addr, size_t size); void FlushEntireDataCache(); Result InvalidateDataCache(void *addr, size_t size); Result StoreDataCache(const void *addr, size_t size); Result FlushDataCache(const void *addr, size_t size); - Result InvalidateInstructionCache(void *addr, size_t size); void InvalidateEntireInstructionCache(); + void ClearPageToZeroImpl(void *); + ALWAYS_INLINE void ClearPageToZero(void * const page) { MESOSPHERE_ASSERT(util::IsAligned(reinterpret_cast(page), PageSize)); MESOSPHERE_ASSERT(page != nullptr); - uintptr_t cur = reinterpret_cast(__builtin_assume_aligned(page, PageSize)); - const uintptr_t last = cur + PageSize - DataCacheLineSize; - - for (/* ... */; cur <= last; cur += DataCacheLineSize) { - __asm__ __volatile__("dc zva, %[cur]" :: [cur]"r"(cur) : "memory"); - } + ClearPageToZeroImpl(page); } ALWAYS_INLINE void InvalidateTlbByAsid(u32 asid) { @@ -223,20 +226,15 @@ namespace ams::kern::arch::arm64::cpu { EnsureInstructionConsistency(); } - ALWAYS_INLINE void InvalidateEntireTlbInnerShareable() { - __asm__ __volatile__("tlbi vmalle1is" ::: "memory"); - EnsureInstructionConsistencyInnerShareable(); - } - ALWAYS_INLINE void InvalidateEntireTlbDataOnly() { __asm__ __volatile__("tlbi vmalle1is" ::: "memory"); - DataSynchronizationBarrier(); + DataSynchronizationBarrierInnerShareable(); } ALWAYS_INLINE void InvalidateTlbByVaDataOnly(KProcessAddress virt_addr) { const u64 value = ((GetInteger(virt_addr) >> 12) & 0xFFFFFFFFFFFul); __asm__ __volatile__("tlbi vaae1is, %[value]" :: [value]"r"(value) : "memory"); - DataSynchronizationBarrier(); + DataSynchronizationBarrierInnerShareable(); } ALWAYS_INLINE uintptr_t GetCurrentThreadPointerValue() { diff --git a/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_k_interrupt_manager.hpp b/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_k_interrupt_manager.hpp index ae5b60517..2a0c15604 100644 --- a/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_k_interrupt_manager.hpp +++ b/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_k_interrupt_manager.hpp @@ -96,8 +96,6 @@ namespace ams::kern::arch::arm64 { } static void HandleInterrupt(bool user_mode); - - /* Implement more KInterruptManager functionality. */ private: Result BindGlobal(KInterruptHandler *handler, s32 irq, s32 core_id, s32 priority, bool manual_clear, bool level); Result BindLocal(KInterruptHandler *handler, s32 irq, s32 priority, bool manual_clear); diff --git a/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_k_page_table.hpp b/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_k_page_table.hpp index efaf8a7ac..334ec014f 100644 --- a/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_k_page_table.hpp +++ b/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_k_page_table.hpp @@ -174,7 +174,6 @@ namespace ams::kern::arch::arm64 { static NOINLINE void Initialize(s32 core_id); ALWAYS_INLINE void Activate(u32 proc_id) { - cpu::DataSynchronizationBarrier(); cpu::SwitchProcess(m_ttbr, proc_id); } @@ -219,12 +218,13 @@ namespace ams::kern::arch::arm64 { Result ChangePermissions(KProcessAddress virt_addr, size_t num_pages, PageTableEntry entry_template, DisableMergeAttribute disable_merge_attr, bool refresh_mapping, PageLinkedList *page_list, bool reuse_ll); - static ALWAYS_INLINE void PteDataSynchronizationBarrier() { - cpu::DataSynchronizationBarrierInnerShareable(); + static ALWAYS_INLINE void PteDataMemoryBarrier() { + cpu::DataMemoryBarrierInnerShareableStore(); } static ALWAYS_INLINE void ClearPageTable(KVirtualAddress table) { cpu::ClearPageToZero(GetVoidPointer(table)); + cpu::DataSynchronizationBarrierInnerShareable(); } ALWAYS_INLINE void OnTableUpdated() const { @@ -239,22 +239,8 @@ namespace ams::kern::arch::arm64 { cpu::InvalidateTlbByVaDataOnly(virt_addr); } - ALWAYS_INLINE void NoteUpdated() const { - cpu::DataSynchronizationBarrier(); - - if (this->IsKernel()) { - this->OnKernelTableUpdated(); - } else { - this->OnTableUpdated(); - } - } - - ALWAYS_INLINE void NoteSingleKernelPageUpdated(KProcessAddress virt_addr) const { - MESOSPHERE_ASSERT(this->IsKernel()); - - cpu::DataSynchronizationBarrier(); - this->OnKernelTableSinglePageUpdated(virt_addr); - } + void NoteUpdated() const; + void NoteSingleKernelPageUpdated(KProcessAddress virt_addr) const; KVirtualAddress AllocatePageTable(PageLinkedList *page_list, bool reuse_ll) const { KVirtualAddress table = this->GetPageTableManager().Allocate(); diff --git a/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_userspace_memory_access.hpp b/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_userspace_memory_access.hpp index 7245831c2..202be5f51 100644 --- a/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_userspace_memory_access.hpp +++ b/libraries/libmesosphere/include/mesosphere/arch/arm64/kern_userspace_memory_access.hpp @@ -46,7 +46,6 @@ namespace ams::kern::arch::arm64 { static bool StoreDataCache(uintptr_t start, uintptr_t end); static bool FlushDataCache(uintptr_t start, uintptr_t end); static bool InvalidateDataCache(uintptr_t start, uintptr_t end); - static bool InvalidateInstructionCache(uintptr_t start, uintptr_t end); static bool ReadIoMemory32Bit(void *dst, const void *src, size_t size); static bool ReadIoMemory16Bit(void *dst, const void *src, size_t size); diff --git a/libraries/libmesosphere/include/mesosphere/kern_k_scheduler_impls.hpp b/libraries/libmesosphere/include/mesosphere/kern_k_scheduler_impls.hpp index effaf048f..e7071ab0b 100644 --- a/libraries/libmesosphere/include/mesosphere/kern_k_scheduler_impls.hpp +++ b/libraries/libmesosphere/include/mesosphere/kern_k_scheduler_impls.hpp @@ -35,7 +35,7 @@ namespace ams::kern { ALWAYS_INLINE void KScheduler::RescheduleOtherCores(u64 cores_needing_scheduling) { if (const u64 core_mask = cores_needing_scheduling & ~(1ul << m_core_id); core_mask != 0) { - cpu::DataSynchronizationBarrier(); + cpu::DataSynchronizationBarrierInnerShareable(); Kernel::GetInterruptManager().SendInterProcessorInterrupt(KInterruptName_Scheduler, core_mask); } } diff --git a/libraries/libmesosphere/source/arch/arm64/kern_cpu.cpp b/libraries/libmesosphere/source/arch/arm64/kern_cpu.cpp index 15fecd44c..759a628f2 100644 --- a/libraries/libmesosphere/source/arch/arm64/kern_cpu.cpp +++ b/libraries/libmesosphere/source/arch/arm64/kern_cpu.cpp @@ -176,7 +176,7 @@ namespace ams::kern::arch::arm64::cpu { const u64 target_mask = m_target_cores.Load(); - DataSynchronizationBarrier(); + DataSynchronizationBarrierInnerShareable(); Kernel::GetInterruptManager().SendInterProcessorInterrupt(KInterruptName_CacheOperation, target_mask); this->ProcessOperation(); @@ -213,32 +213,37 @@ namespace ams::kern::arch::arm64::cpu { }; /* Instances of the interrupt handlers. */ - KThreadTerminationInterruptHandler g_thread_termination_handler; - KCacheHelperInterruptHandler g_cache_operation_handler; - KPerformanceCounterInterruptHandler g_performance_counter_handler[cpu::NumCores]; + constinit KThreadTerminationInterruptHandler g_thread_termination_handler; + constinit KCacheHelperInterruptHandler g_cache_operation_handler; + constinit KPerformanceCounterInterruptHandler g_performance_counter_handler[cpu::NumCores]; /* Expose this as a global, for asm to use. */ - s32 g_all_core_sync_count; + constinit s32 g_all_core_sync_count; - template + template ALWAYS_INLINE void PerformCacheOperationBySetWayImpl(int level, F f) { /* Used in multiple locations. */ const u64 level_sel_value = static_cast(level << 1); + /* Get the cache size id register value with interrupts disabled. */ u64 ccsidr_value; - if constexpr (Init) { - /* During init, we can just set the selection register directly. */ - cpu::SetCsselrEl1(level_sel_value); - cpu::InstructionMemoryBarrier(); - ccsidr_value = cpu::GetCcsidrEl1(); - } else { - /* After init, we need to care about interrupts. */ + { + /* Disable interrupts. */ KScopedInterruptDisable di; + + /* Configure the cache select register for our level. */ cpu::SetCsselrEl1(level_sel_value); + + /* Ensure our configuration takes before reading the cache size id register. */ cpu::InstructionMemoryBarrier(); + + /* Get the cache size id register. */ ccsidr_value = cpu::GetCcsidrEl1(); } + /* Ensure that no memory inconsistencies occur between cache management invocations. */ + cpu::DataSynchronizationBarrier(); + /* Get cache size id info. */ CacheSizeIdRegisterAccessor ccsidr_el1(ccsidr_value); const int num_sets = ccsidr_el1.GetNumberOfSets(); @@ -266,13 +271,11 @@ namespace ams::kern::arch::arm64::cpu { } void StoreDataCacheBySetWay(int level) { - PerformCacheOperationBySetWayImpl(level, StoreDataCacheLineBySetWayImpl); - cpu::DataSynchronizationBarrier(); + PerformCacheOperationBySetWayImpl(level, StoreDataCacheLineBySetWayImpl); } void FlushDataCacheBySetWay(int level) { - PerformCacheOperationBySetWayImpl(level, FlushDataCacheLineBySetWayImpl); - cpu::DataSynchronizationBarrier(); + PerformCacheOperationBySetWayImpl(level, FlushDataCacheLineBySetWayImpl); } void KCacheHelperInterruptHandler::ProcessOperation() { @@ -284,9 +287,11 @@ namespace ams::kern::arch::arm64::cpu { break; case Operation::StoreDataCache: StoreDataCacheBySetWay(0); + cpu::DataSynchronizationBarrier(); break; case Operation::FlushDataCache: FlushDataCacheBySetWay(0); + cpu::DataSynchronizationBarrier(); break; } } @@ -323,14 +328,6 @@ namespace ams::kern::arch::arm64::cpu { R_SUCCEED(); } - ALWAYS_INLINE Result InvalidateInstructionCacheRange(uintptr_t start, uintptr_t end) { - MESOSPHERE_ASSERT(util::IsAligned(start, InstructionCacheLineSize)); - MESOSPHERE_ASSERT(util::IsAligned(end, InstructionCacheLineSize)); - R_UNLESS(UserspaceAccess::InvalidateInstructionCache(start, end), svc::ResultInvalidCurrentMemory()); - EnsureInstructionConsistency(); - R_SUCCEED(); - } - ALWAYS_INLINE void InvalidateEntireInstructionCacheLocalImpl() { __asm__ __volatile__("ic iallu" ::: "memory"); } @@ -341,26 +338,12 @@ namespace ams::kern::arch::arm64::cpu { } - void StoreEntireCacheForInit() { - /* Store local. */ - { - CacheLineIdRegisterAccessor clidr_el1; - const int levels_of_unification = clidr_el1.GetLevelsOfUnification(); - - for (int level = 0; level != levels_of_unification; ++level) { - PerformCacheOperationBySetWayImpl(level, StoreDataCacheLineBySetWayImpl); - } - } - - /* Store shared. */ - { - CacheLineIdRegisterAccessor clidr_el1; - const int levels_of_coherency = clidr_el1.GetLevelsOfCoherency(); - const int levels_of_unification = clidr_el1.GetLevelsOfUnification(); - - for (int level = levels_of_unification; level <= levels_of_coherency; ++level) { - PerformCacheOperationBySetWayImpl(level, StoreDataCacheLineBySetWayImpl); - } + void StoreCacheForInit(void *addr, size_t size) { + /* Store the data cache for the specified range. */ + const uintptr_t start = util::AlignDown(reinterpret_cast(addr), DataCacheLineSize); + const uintptr_t end = start + size; + for (uintptr_t cur = start; cur < end; cur += DataCacheLineSize) { + __asm__ __volatile__("dc cvac, %[cur]" :: [cur]"r"(cur) : "memory"); } /* Data synchronization barrier. */ @@ -370,36 +353,7 @@ namespace ams::kern::arch::arm64::cpu { InvalidateEntireInstructionCacheLocalImpl(); /* Ensure local instruction consistency. */ - DataSynchronizationBarrierInnerShareable(); - InstructionMemoryBarrier(); - } - - void FlushEntireCacheForInit() { - /* Flush data cache. */ - { - /* Get levels of coherence/unificaiton. */ - CacheLineIdRegisterAccessor clidr_el1; - const int levels_of_coherency = clidr_el1.GetLevelsOfCoherency(); - - /* Store cache from L1 up to (level of coherence - 1). */ - for (int level = 0; level < levels_of_coherency - 1; ++level) { - PerformCacheOperationBySetWayImpl(level, StoreDataCacheLineBySetWayImpl); - DataSynchronizationBarrier(); - } - - /* Flush cache from (level of coherence - 1) down to L0. */ - for (int level = levels_of_coherency; level > 0; --level) { - PerformCacheOperationBySetWayImpl(level - 1, FlushDataCacheLineBySetWayImpl); - DataSynchronizationBarrier(); - } - } - - /* Invalidate instruction cache. */ - InvalidateEntireInstructionCacheLocalImpl(); EnsureInstructionConsistency(); - - /* Invalidate entire TLB. */ - InvalidateEntireTlb(); } void FlushEntireDataCache() { @@ -417,10 +371,17 @@ namespace ams::kern::arch::arm64::cpu { for (int level = levels_of_coherency; level > 1; --level) { FlushDataCacheBySetWay(level - 1); } + + /* Data synchronization barrier for full system. */ + DataSynchronizationBarrier(); } Result InvalidateDataCache(void *addr, size_t size) { - KScopedCoreMigrationDisable dm; + /* Mark ourselves as in a cache maintenance operation, and prevent re-ordering. */ + __asm__ __volatile__("" ::: "memory"); + GetCurrentThread().SetInCacheMaintenanceOperation(); + ON_SCOPE_EXIT { GetCurrentThread().ClearInCacheMaintenanceOperation(); __asm__ __volatile__("" ::: "memory"); }; + const uintptr_t start = reinterpret_cast(addr); const uintptr_t end = start + size; uintptr_t aligned_start = util::AlignDown(start, DataCacheLineSize); @@ -444,7 +405,11 @@ namespace ams::kern::arch::arm64::cpu { } Result StoreDataCache(const void *addr, size_t size) { - KScopedCoreMigrationDisable dm; + /* Mark ourselves as in a cache maintenance operation, and prevent re-ordering. */ + __asm__ __volatile__("" ::: "memory"); + GetCurrentThread().SetInCacheMaintenanceOperation(); + ON_SCOPE_EXIT { GetCurrentThread().ClearInCacheMaintenanceOperation(); __asm__ __volatile__("" ::: "memory"); }; + const uintptr_t start = util::AlignDown(reinterpret_cast(addr), DataCacheLineSize); const uintptr_t end = util::AlignUp( reinterpret_cast(addr) + size, DataCacheLineSize); @@ -452,26 +417,17 @@ namespace ams::kern::arch::arm64::cpu { } Result FlushDataCache(const void *addr, size_t size) { - KScopedCoreMigrationDisable dm; + /* Mark ourselves as in a cache maintenance operation, and prevent re-ordering. */ + __asm__ __volatile__("" ::: "memory"); + GetCurrentThread().SetInCacheMaintenanceOperation(); + ON_SCOPE_EXIT { GetCurrentThread().ClearInCacheMaintenanceOperation(); __asm__ __volatile__("" ::: "memory"); }; + const uintptr_t start = util::AlignDown(reinterpret_cast(addr), DataCacheLineSize); const uintptr_t end = util::AlignUp( reinterpret_cast(addr) + size, DataCacheLineSize); R_RETURN(FlushDataCacheRange(start, end)); } - Result InvalidateInstructionCache(void *addr, size_t size) { - KScopedCoreMigrationDisable dm; - const uintptr_t start = util::AlignDown(reinterpret_cast(addr), InstructionCacheLineSize); - const uintptr_t end = util::AlignUp( reinterpret_cast(addr) + size, InstructionCacheLineSize); - - R_TRY(InvalidateInstructionCacheRange(start, end)); - - /* Request the interrupt helper to perform an instruction memory barrier. */ - g_cache_operation_handler.RequestOperation(KCacheHelperInterruptHandler::Operation::InstructionMemoryBarrier); - - R_SUCCEED(); - } - void InvalidateEntireInstructionCache() { KScopedCoreMigrationDisable dm; diff --git a/libraries/libmesosphere/source/arch/arm64/kern_cpu_asm.s b/libraries/libmesosphere/source/arch/arm64/kern_cpu_asm.s index 1de732d6f..3c6ec40cb 100644 --- a/libraries/libmesosphere/source/arch/arm64/kern_cpu_asm.s +++ b/libraries/libmesosphere/source/arch/arm64/kern_cpu_asm.s @@ -61,3 +61,138 @@ _ZN3ams4kern4arch5arm643cpu23SynchronizeAllCoresImplEPii: 5: stlr wzr, [x0] ret + +/* ams::kern::arch::arm64::cpu::ClearPageToZeroImpl(void *) */ +.section .text._ZN3ams4kern4arch5arm643cpu19ClearPageToZeroImplEPv, "ax", %progbits +.global _ZN3ams4kern4arch5arm643cpu19ClearPageToZeroImplEPv +.type _ZN3ams4kern4arch5arm643cpu19ClearPageToZeroImplEPv, %function +_ZN3ams4kern4arch5arm643cpu19ClearPageToZeroImplEPv: + /* Efficiently clear the page using dc zva. */ + dc zva, x0 + add x8, x0, #0x040 + dc zva, x8 + add x8, x0, #0x080 + dc zva, x8 + add x8, x0, #0x0c0 + dc zva, x8 + add x8, x0, #0x100 + dc zva, x8 + add x8, x0, #0x140 + dc zva, x8 + add x8, x0, #0x180 + dc zva, x8 + add x8, x0, #0x1c0 + dc zva, x8 + add x8, x0, #0x200 + dc zva, x8 + add x8, x0, #0x240 + dc zva, x8 + add x8, x0, #0x280 + dc zva, x8 + add x8, x0, #0x2c0 + dc zva, x8 + add x8, x0, #0x300 + dc zva, x8 + add x8, x0, #0x340 + dc zva, x8 + add x8, x0, #0x380 + dc zva, x8 + add x8, x0, #0x3c0 + dc zva, x8 + add x8, x0, #0x400 + dc zva, x8 + add x8, x0, #0x440 + dc zva, x8 + add x8, x0, #0x480 + dc zva, x8 + add x8, x0, #0x4c0 + dc zva, x8 + add x8, x0, #0x500 + dc zva, x8 + add x8, x0, #0x540 + dc zva, x8 + add x8, x0, #0x580 + dc zva, x8 + add x8, x0, #0x5c0 + dc zva, x8 + add x8, x0, #0x600 + dc zva, x8 + add x8, x0, #0x640 + dc zva, x8 + add x8, x0, #0x680 + dc zva, x8 + add x8, x0, #0x6c0 + dc zva, x8 + add x8, x0, #0x700 + dc zva, x8 + add x8, x0, #0x740 + dc zva, x8 + add x8, x0, #0x780 + dc zva, x8 + add x8, x0, #0x7c0 + dc zva, x8 + add x8, x0, #0x800 + dc zva, x8 + add x8, x0, #0x840 + dc zva, x8 + add x8, x0, #0x880 + dc zva, x8 + add x8, x0, #0x8c0 + dc zva, x8 + add x8, x0, #0x900 + dc zva, x8 + add x8, x0, #0x940 + dc zva, x8 + add x8, x0, #0x980 + dc zva, x8 + add x8, x0, #0x9c0 + dc zva, x8 + add x8, x0, #0xa00 + dc zva, x8 + add x8, x0, #0xa40 + dc zva, x8 + add x8, x0, #0xa80 + dc zva, x8 + add x8, x0, #0xac0 + dc zva, x8 + add x8, x0, #0xb00 + dc zva, x8 + add x8, x0, #0xb40 + dc zva, x8 + add x8, x0, #0xb80 + dc zva, x8 + add x8, x0, #0xbc0 + dc zva, x8 + add x8, x0, #0xc00 + dc zva, x8 + add x8, x0, #0xc40 + dc zva, x8 + add x8, x0, #0xc80 + dc zva, x8 + add x8, x0, #0xcc0 + dc zva, x8 + add x8, x0, #0xd00 + dc zva, x8 + add x8, x0, #0xd40 + dc zva, x8 + add x8, x0, #0xd80 + dc zva, x8 + add x8, x0, #0xdc0 + dc zva, x8 + add x8, x0, #0xe00 + dc zva, x8 + add x8, x0, #0xe40 + dc zva, x8 + add x8, x0, #0xe80 + dc zva, x8 + add x8, x0, #0xec0 + dc zva, x8 + add x8, x0, #0xf00 + dc zva, x8 + add x8, x0, #0xf40 + dc zva, x8 + add x8, x0, #0xf80 + dc zva, x8 + add x8, x0, #0xfc0 + dc zva, x8 + ret diff --git a/libraries/libmesosphere/source/arch/arm64/kern_k_debug.cpp b/libraries/libmesosphere/source/arch/arm64/kern_k_debug.cpp index 690e3134e..032cdea31 100644 --- a/libraries/libmesosphere/source/arch/arm64/kern_k_debug.cpp +++ b/libraries/libmesosphere/source/arch/arm64/kern_k_debug.cpp @@ -257,21 +257,21 @@ namespace ams::kern::arch::arm64 { #define MESOSPHERE_SET_HW_BREAK_POINT(ID, FLAGS, VALUE) \ ({ \ cpu::SetDbgBcr##ID##El1(0); \ - cpu::EnsureInstructionConsistency(); \ + cpu::EnsureInstructionConsistencyFullSystem(); \ cpu::SetDbgBvr##ID##El1(VALUE); \ - cpu::EnsureInstructionConsistency(); \ + cpu::EnsureInstructionConsistencyFullSystem(); \ cpu::SetDbgBcr##ID##El1(FLAGS); \ - cpu::EnsureInstructionConsistency(); \ + cpu::EnsureInstructionConsistencyFullSystem(); \ }) #define MESOSPHERE_SET_HW_WATCH_POINT(ID, FLAGS, VALUE) \ ({ \ cpu::SetDbgWcr##ID##El1(0); \ - cpu::EnsureInstructionConsistency(); \ + cpu::EnsureInstructionConsistencyFullSystem(); \ cpu::SetDbgWvr##ID##El1(VALUE); \ - cpu::EnsureInstructionConsistency(); \ + cpu::EnsureInstructionConsistencyFullSystem(); \ cpu::SetDbgWcr##ID##El1(FLAGS); \ - cpu::EnsureInstructionConsistency(); \ + cpu::EnsureInstructionConsistencyFullSystem(); \ }) Result KDebug::SetHardwareBreakPoint(ams::svc::HardwareBreakPointRegisterName name, u64 flags, u64 value) { diff --git a/libraries/libmesosphere/source/arch/arm64/kern_k_page_table.cpp b/libraries/libmesosphere/source/arch/arm64/kern_k_page_table.cpp index 66fec8b80..eaf88fdcf 100644 --- a/libraries/libmesosphere/source/arch/arm64/kern_k_page_table.cpp +++ b/libraries/libmesosphere/source/arch/arm64/kern_k_page_table.cpp @@ -158,6 +158,32 @@ namespace ams::kern::arch::arm64 { } + ALWAYS_INLINE void KPageTable::NoteUpdated() const { + cpu::DataSynchronizationBarrierInnerShareableStore(); + + /* Mark ourselves as in a tlb maintenance operation. */ + GetCurrentThread().SetInTlbMaintenanceOperation(); + ON_SCOPE_EXIT { GetCurrentThread().ClearInTlbMaintenanceOperation(); __asm__ __volatile__("" ::: "memory"); }; + + if (this->IsKernel()) { + this->OnKernelTableUpdated(); + } else { + this->OnTableUpdated(); + } + } + + ALWAYS_INLINE void KPageTable::NoteSingleKernelPageUpdated(KProcessAddress virt_addr) const { + MESOSPHERE_ASSERT(this->IsKernel()); + + cpu::DataSynchronizationBarrierInnerShareableStore(); + + /* Mark ourselves as in a tlb maintenance operation. */ + GetCurrentThread().SetInTlbMaintenanceOperation(); + ON_SCOPE_EXIT { GetCurrentThread().ClearInTlbMaintenanceOperation(); __asm__ __volatile__("" ::: "memory"); }; + + this->OnKernelTableSinglePageUpdated(virt_addr); + } + void KPageTable::Initialize(s32 core_id) { /* Nothing actually needed here. */ MESOSPHERE_UNUSED(core_id); @@ -412,9 +438,8 @@ namespace ams::kern::arch::arm64 { /* Set the entry. */ l2_phys = GetPageTablePhysicalAddress(l2_virt); - PteDataSynchronizationBarrier(); + PteDataMemoryBarrier(); *l1_entry = L1PageTableEntry(PageTableEntry::TableTag{}, l2_phys, this->IsKernel(), true); - PteDataSynchronizationBarrier(); } else { l2_virt = GetPageTableVirtualAddress(l2_phys); } @@ -477,9 +502,8 @@ namespace ams::kern::arch::arm64 { /* Set the entry. */ l2_phys = GetPageTablePhysicalAddress(l2_virt); - PteDataSynchronizationBarrier(); + PteDataMemoryBarrier(); *l1_entry = L1PageTableEntry(PageTableEntry::TableTag{}, l2_phys, this->IsKernel(), true); - PteDataSynchronizationBarrier(); l2_allocated = true; } else { l2_virt = GetPageTableVirtualAddress(l2_phys); @@ -505,9 +529,8 @@ namespace ams::kern::arch::arm64 { /* Set the entry. */ l3_phys = GetPageTablePhysicalAddress(l3_virt); - PteDataSynchronizationBarrier(); + PteDataMemoryBarrier(); *l2_entry = L2PageTableEntry(PageTableEntry::TableTag{}, l3_phys, this->IsKernel(), true); - PteDataSynchronizationBarrier(); l2_open_count++; } else { l3_virt = GetPageTableVirtualAddress(l3_phys); @@ -631,7 +654,7 @@ namespace ams::kern::arch::arm64 { for (size_t i = 0; i < num_l2_blocks; i++) { *impl.GetL2EntryFromTable(l2_virt, virt_addr + L2BlockSize * i) = InvalidL2PageTableEntry; } - PteDataSynchronizationBarrier(); + PteDataMemoryBarrier(); /* Close references to the L2 table. */ if (this->GetPageTableManager().IsInPageTableHeap(l2_virt)) { @@ -665,7 +688,7 @@ namespace ams::kern::arch::arm64 { for (size_t i = 0; i < num_l3_blocks; i++) { *impl.GetL3EntryFromTable(l3_virt, virt_addr + L3BlockSize * i) = InvalidL3PageTableEntry; } - PteDataSynchronizationBarrier(); + PteDataMemoryBarrier(); /* Close references to the L3 table. */ if (this->GetPageTableManager().IsInPageTableHeap(l3_virt)) { @@ -783,6 +806,9 @@ namespace ams::kern::arch::arm64 { this->MergePages(orig_virt_addr + (num_pages - 1) * PageSize, page_list); } + /* Wait for pending stores to complete. */ + cpu::DataSynchronizationBarrierInnerShareableStore(); + /* Open references to the pages, if we should. */ if (IsHeapPhysicalAddress(orig_phys_addr)) { Kernel::GetMemoryManager().Open(orig_phys_addr, num_pages); @@ -878,6 +904,9 @@ namespace ams::kern::arch::arm64 { this->MergePages(orig_virt_addr + (num_pages - 1) * PageSize, page_list); } + /* Wait for pending stores to complete. */ + cpu::DataSynchronizationBarrierInnerShareableStore(); + /* We succeeded! We want to persist the reference to the pages. */ spg.CancelClose(); R_SUCCEED(); @@ -967,7 +996,6 @@ namespace ams::kern::arch::arm64 { auto sw_reserved_bits = PageTableEntry::EncodeSoftwareReservedBits(head_entry->IsHeadMergeDisabled(), head_entry->IsHeadAndBodyMergeDisabled(), tail_entry->IsTailMergeDisabled()); /* Merge! */ - PteDataSynchronizationBarrier(); *l2_entry = L2PageTableEntry(PageTableEntry::BlockTag{}, phys_addr, PageTableEntry(entry_template), sw_reserved_bits, false); /* Note that we updated. */ @@ -1049,7 +1077,6 @@ namespace ams::kern::arch::arm64 { auto sw_reserved_bits = PageTableEntry::EncodeSoftwareReservedBits(head_entry->IsHeadMergeDisabled(), head_entry->IsHeadAndBodyMergeDisabled(), tail_entry->IsTailMergeDisabled()); /* Merge! */ - /* NOTE: As of 13.1.0, Nintendo does not do: PteDataSynchronizationBarrier(); */ *l1_entry = L1PageTableEntry(PageTableEntry::BlockTag{}, phys_addr, PageTableEntry(entry_template), sw_reserved_bits, false); /* Note that we updated. */ @@ -1097,7 +1124,7 @@ namespace ams::kern::arch::arm64 { this->GetPageTableManager().Open(l2_table, L1BlockSize / L2BlockSize); /* Replace the L1 entry with one to the new table. */ - PteDataSynchronizationBarrier(); + PteDataMemoryBarrier(); *l1_entry = L1PageTableEntry(PageTableEntry::TableTag{}, l2_phys, this->IsKernel(), true); this->NoteUpdated(); } @@ -1147,7 +1174,7 @@ namespace ams::kern::arch::arm64 { this->GetPageTableManager().Open(l3_table, L2BlockSize / L3BlockSize); /* Replace the L2 entry with one to the new table. */ - PteDataSynchronizationBarrier(); + PteDataMemoryBarrier(); *l2_entry = L2PageTableEntry(PageTableEntry::TableTag{}, l3_phys, this->IsKernel(), true); this->NoteUpdated(); } diff --git a/libraries/libmesosphere/source/arch/arm64/kern_userspace_memory_access_asm.s b/libraries/libmesosphere/source/arch/arm64/kern_userspace_memory_access_asm.s index 8a64750ac..660d5a4eb 100644 --- a/libraries/libmesosphere/source/arch/arm64/kern_userspace_memory_access_asm.s +++ b/libraries/libmesosphere/source/arch/arm64/kern_userspace_memory_access_asm.s @@ -577,26 +577,6 @@ _ZN3ams4kern4arch5arm6415UserspaceAccess19InvalidateDataCacheEmm: mov x0, #1 ret -/* ams::kern::arch::arm64::UserspaceAccess::InvalidateInstructionCache(uintptr_t start, uintptr_t end) */ -.section .text._ZN3ams4kern4arch5arm6415UserspaceAccess26InvalidateInstructionCacheEmm, "ax", %progbits -.global _ZN3ams4kern4arch5arm6415UserspaceAccess26InvalidateInstructionCacheEmm -.type _ZN3ams4kern4arch5arm6415UserspaceAccess26InvalidateInstructionCacheEmm, %function -.balign 0x10 -_ZN3ams4kern4arch5arm6415UserspaceAccess26InvalidateInstructionCacheEmm: - /* Check if we have any work to do. */ - cmp x1, x0 - b.eq 2f - -1: /* Loop, invalidating each cache line. */ - ic ivau, x0 - add x0, x0, #0x40 - cmp x1, x0 - b.ne 1b - -2: /* We're done! */ - mov x0, #1 - ret - /* ams::kern::arch::arm64::UserspaceAccess::ReadIoMemory32Bit(void *dst, const void *src, size_t size) */ .section .text._ZN3ams4kern4arch5arm6415UserspaceAccess17ReadIoMemory32BitEPvPKvm, "ax", %progbits .global _ZN3ams4kern4arch5arm6415UserspaceAccess17ReadIoMemory32BitEPvPKvm diff --git a/libraries/libmesosphere/source/board/nintendo/nx/kern_k_sleep_manager_asm.s b/libraries/libmesosphere/source/board/nintendo/nx/kern_k_sleep_manager_asm.s index 9864c1c15..fca5455c7 100644 --- a/libraries/libmesosphere/source/board/nintendo/nx/kern_k_sleep_manager_asm.s +++ b/libraries/libmesosphere/source/board/nintendo/nx/kern_k_sleep_manager_asm.s @@ -278,6 +278,9 @@ _ZN3ams4kern5board8nintendo2nx13KSleepManager11ResumeEntryEm: .global _ZN3ams4kern5board8nintendo2nx13KSleepManager33InvalidateDataCacheForResumeEntryEm .type _ZN3ams4kern5board8nintendo2nx13KSleepManager33InvalidateDataCacheForResumeEntryEm, %function _ZN3ams4kern5board8nintendo2nx13KSleepManager33InvalidateDataCacheForResumeEntryEm: + /* cpu::DataSynchronizationBarrier(); */ + dsb sy + /* const u64 level_sel_value = level << 1; */ lsl x8, x0, #1 diff --git a/libraries/libmesosphere/source/kern_k_initial_process_reader.cpp b/libraries/libmesosphere/source/kern_k_initial_process_reader.cpp index a7c272dc5..ff5f21a61 100644 --- a/libraries/libmesosphere/source/kern_k_initial_process_reader.cpp +++ b/libraries/libmesosphere/source/kern_k_initial_process_reader.cpp @@ -179,13 +179,7 @@ namespace ams::kern { } } - /* Flush caches. */ - /* NOTE: This seems incorrect according to arm spec, which says not to flush via set/way after boot. */ - /* However, Nintendo flushes the entire cache here and not doing so has caused reports of abort with ESR_EL1 */ - /* as 0x02000000 (unknown abort) to occur. */ MESOSPHERE_UNUSED(params); - cpu::FlushEntireDataCache(); - cpu::InvalidateEntireInstructionCache(); R_SUCCEED(); } diff --git a/libraries/libmesosphere/source/kern_k_thread.cpp b/libraries/libmesosphere/source/kern_k_thread.cpp index 38f7f8a8b..c4d692492 100644 --- a/libraries/libmesosphere/source/kern_k_thread.cpp +++ b/libraries/libmesosphere/source/kern_k_thread.cpp @@ -1315,7 +1315,7 @@ namespace ams::kern { /* If the thread is runnable, send a termination interrupt to other cores. */ if (this->GetState() == ThreadState_Runnable) { if (const u64 core_mask = m_physical_affinity_mask.GetAffinityMask() & ~(1ul << GetCurrentCoreId()); core_mask != 0) { - cpu::DataSynchronizationBarrier(); + cpu::DataSynchronizationBarrierInnerShareable(); Kernel::GetInterruptManager().SendInterProcessorInterrupt(KInterruptName_ThreadTerminate, core_mask); } } diff --git a/libraries/libstratosphere/source/os/impl/os_cache_impl.os.horizon.hpp b/libraries/libstratosphere/source/os/impl/os_cache_impl.os.horizon.hpp index 46562b203..a6fbb197d 100644 --- a/libraries/libstratosphere/source/os/impl/os_cache_impl.os.horizon.hpp +++ b/libraries/libstratosphere/source/os/impl/os_cache_impl.os.horizon.hpp @@ -32,6 +32,13 @@ namespace ams::os::impl { /* Calculate cache line size. */ cache_line_size = 4 << ((cache_type_register >> 16) & 0xF); + /* Get the thread local region. */ + auto * const tlr = svc::GetThreadLocalRegion(); + + /* Note to the kernel that we're performing cache maintenance, in case we get interrupted while touching cache lines. */ + tlr->cache_maintenance_flag = 1; + ON_SCOPE_EXIT { tlr->cache_maintenance_flag = 0; } + /* Iterate, flushing cache lines. */ for (uintptr_t cur = reinterpret_cast(addr) & ~(cache_line_size - 1); cur < end_addr; cur += cache_line_size) { __asm__ __volatile__ ("dc civac, %[cur]" :: [cur]"r"(cur)); diff --git a/libraries/libvapours/source/dd/impl/dd_cache_impl.os.horizon.hpp b/libraries/libvapours/source/dd/impl/dd_cache_impl.os.horizon.hpp index 447cd91ec..5c94c2a8a 100644 --- a/libraries/libvapours/source/dd/impl/dd_cache_impl.os.horizon.hpp +++ b/libraries/libvapours/source/dd/impl/dd_cache_impl.os.horizon.hpp @@ -31,6 +31,15 @@ namespace ams::dd::impl { __asm__ __volatile__("mrs %[ctr_el0], ctr_el0" : [ctr_el0]"=r"(ctr_el0)); const uintptr_t cache_line_size = 4 << ((ctr_el0 >> 16) & 0xF); + #if defined(ATMOSPHERE_IS_STRATOSPHERE) + /* Get the thread local region. */ + auto * const tlr = svc::GetThreadLocalRegion(); + + /* Note to the kernel that we're performing cache maintenance, in case we get interrupted while touching cache lines. */ + tlr->cache_maintenance_flag = 1; + ON_SCOPE_EXIT { tlr->cache_maintenance_flag = 0; } + #endif + /* Invalidate the cache. */ const uintptr_t start_addr = reinterpret_cast(addr) & ~(cache_line_size - 1); const uintptr_t end_addr = reinterpret_cast(addr) + size; @@ -62,6 +71,15 @@ namespace ams::dd::impl { __asm__ __volatile__("mrs %[ctr_el0], ctr_el0" : [ctr_el0]"=r"(ctr_el0)); const uintptr_t cache_line_size = 4 << ((ctr_el0 >> 16) & 0xF); + #if defined(ATMOSPHERE_IS_STRATOSPHERE) + /* Get the thread local region. */ + auto * const tlr = svc::GetThreadLocalRegion(); + + /* Note to the kernel that we're performing cache maintenance, in case we get interrupted while touching cache lines. */ + tlr->cache_maintenance_flag = 1; + ON_SCOPE_EXIT { tlr->cache_maintenance_flag = 0; } + #endif + /* Invalidate the cache. */ const uintptr_t start_addr = reinterpret_cast(addr) & ~(cache_line_size - 1); const uintptr_t end_addr = reinterpret_cast(addr) + size; diff --git a/mesosphere/kernel/source/arch/arm64/init/kern_init_core.cpp b/mesosphere/kernel/source/arch/arm64/init/kern_init_core.cpp index f983f7ce1..4c7d9b11b 100644 --- a/mesosphere/kernel/source/arch/arm64/init/kern_init_core.cpp +++ b/mesosphere/kernel/source/arch/arm64/init/kern_init_core.cpp @@ -570,13 +570,13 @@ namespace ams::kern::init { cpu::DebugFeatureRegisterAccessor aa64dfr0; const auto num_watchpoints = aa64dfr0.GetNumWatchpoints(); const auto num_breakpoints = aa64dfr0.GetNumBreakpoints(); - cpu::EnsureInstructionConsistency(); + cpu::EnsureInstructionConsistencyFullSystem(); /* Clear the debug monitor register and the os lock access register. */ cpu::MonitorDebugSystemControlRegisterAccessor(0).Store(); - cpu::EnsureInstructionConsistency(); + cpu::EnsureInstructionConsistencyFullSystem(); cpu::OsLockAccessRegisterAccessor(0).Store(); - cpu::EnsureInstructionConsistency(); + cpu::EnsureInstructionConsistencyFullSystem(); /* Clear all debug watchpoints/breakpoints. */ #define FOR_I_IN_15_TO_1(HANDLER, ...) \ @@ -620,22 +620,22 @@ namespace ams::kern::init { #undef MESOSPHERE_INITIALIZE_BREAKPOINT_CASE #undef FOR_I_IN_15_TO_1 - cpu::EnsureInstructionConsistency(); + cpu::EnsureInstructionConsistencyFullSystem(); /* Initialize the context id register to all 1s. */ cpu::ContextIdRegisterAccessor(0).SetProcId(std::numeric_limits::max()).Store(); - cpu::EnsureInstructionConsistency(); + cpu::EnsureInstructionConsistencyFullSystem(); /* Configure the debug monitor register. */ cpu::MonitorDebugSystemControlRegisterAccessor(0).SetMde(true).SetTdcc(true).Store(); - cpu::EnsureInstructionConsistency(); + cpu::EnsureInstructionConsistencyFullSystem(); } void InitializeExceptionVectors() { cpu::SetVbarEl1(reinterpret_cast(::ams::kern::ExceptionVectors)); cpu::SetTpidrEl1(0); cpu::SetExceptionThreadStackTop(0); - cpu::EnsureInstructionConsistency(); + cpu::EnsureInstructionConsistencyFullSystem(); } size_t GetMiscUnknownDebugRegionSize() { diff --git a/mesosphere/kernel/source/arch/arm64/init/start.s b/mesosphere/kernel/source/arch/arm64/init/start.s index ea32ac355..3b68f5e25 100644 --- a/mesosphere/kernel/source/arch/arm64/init/start.s +++ b/mesosphere/kernel/source/arch/arm64/init/start.s @@ -84,17 +84,31 @@ _ZN3ams4kern4init10StartCore0Emm: mov x20, x1 /* Check our current EL. We want to be executing out of EL1. */ - /* If we're in EL2, we'll need to deprivilege ourselves. */ mrs x1, currentel + + /* Check if we're EL1. */ cmp x1, #0x4 - b.eq core0_el1 + b.eq 2f + + /* Check if we're EL2. */ cmp x1, #0x8 - b.eq core0_el2 -core0_el3: - b core0_el3 -core0_el2: + b.eq 1f + +0: /* We're EL3. This is a panic condition. */ + b 0b + +1: /* We're EL2. */ + #ifdef ATMOSPHERE_BOARD_NINTENDO_NX + /* On NX board, this is a panic condition. */ + b 1b + #else + /* Otherwise, deprivilege to EL2. */ + /* TODO: Does N still have this? We need it for qemu emulation/unit testing, we should come up with a better solution maybe. */ bl _ZN3ams4kern4init16JumpFromEL2ToEL1Ev -core0_el1: + #endif + +2: /* We're EL1. */ + /* Disable the MMU/Caches. */ bl _ZN3ams4kern4init19DisableMmuAndCachesEv #ifdef ATMOSPHERE_BOARD_NINTENDO_NX @@ -103,14 +117,18 @@ core0_el1: mov w1, #65000 smc #1 cmp x0, #0 -0: - b.ne 0b +3: + b.ne 3b /* Store the target firmware. */ adr x0, __metadata_target_firmware str w1, [x0] #endif + /* Get the unknown debug region. */ + /* TODO: This is always zero in release kernels -- what is this? Is it the device tree buffer? */ + mov x21, #0 + /* We want to invoke kernel loader. */ adr x0, _start adr x1, __metadata_kernel_layout @@ -126,7 +144,7 @@ core0_el1: /* Next thing to do is to set up our memory management and slabheaps -- all the other core initialization. */ /* Call ams::kern::init::InitializeCore(uintptr_t, void **) */ mov x1, x0 /* Kernelldr returns a state object for the kernel to re-use. */ - mov x0, xzr /* Official kernel always passes zero, when this is non-zero the address is mapped. */ + mov x0, x21 /* Use the address we determined earlier. */ bl _ZN3ams4kern4init14InitializeCoreEmPPv /* Get the init arguments for core 0. */ @@ -144,17 +162,31 @@ _ZN3ams4kern4init14StartOtherCoreEPKNS1_14KInitArgumentsE: mov x20, x0 /* Check our current EL. We want to be executing out of EL1. */ - /* If we're in EL2, we'll need to deprivilege ourselves. */ mrs x1, currentel + + /* Check if we're EL1. */ cmp x1, #0x4 - b.eq othercore_el1 + b.eq 2f + + /* Check if we're EL2. */ cmp x1, #0x8 - b.eq othercore_el2 -othercore_el3: - b othercore_el3 -othercore_el2: + b.eq 1f + +0: /* We're EL3. This is a panic condition. */ + b 0b + +1: /* We're EL2. */ + #ifdef ATMOSPHERE_BOARD_NINTENDO_NX + /* On NX board, this is a panic condition. */ + b 1b + #else + /* Otherwise, deprivilege to EL2. */ + /* TODO: Does N still have this? We need it for qemu emulation/unit testing, we should come up with a better solution maybe. */ bl _ZN3ams4kern4init16JumpFromEL2ToEL1Ev -othercore_el1: + #endif + +2: /* We're EL1. */ + /* Disable the MMU/Caches. */ bl _ZN3ams4kern4init19DisableMmuAndCachesEv /* Setup system registers using values from our KInitArguments. */ @@ -171,21 +203,20 @@ othercore_el1: mrs x1, midr_el1 ubfx x2, x1, #0x18, #0x8 /* Extract implementer bits. */ cmp x2, #0x41 /* Implementer::ArmLimited */ - b.ne othercore_cpu_specific_setup_end + b.ne 4f ubfx x2, x1, #0x4, #0xC /* Extract primary part number. */ cmp x2, #0xD07 /* PrimaryPartNumber::CortexA57 */ - b.eq othercore_cpu_specific_setup_cortex_a57 + b.eq 3f cmp x2, #0xD03 /* PrimaryPartNumber::CortexA53 */ - b.eq othercore_cpu_specific_setup_cortex_a53 - b othercore_cpu_specific_setup_end -othercore_cpu_specific_setup_cortex_a57: -othercore_cpu_specific_setup_cortex_a53: + b.eq 3f + b 4f +3: /* We're running on a Cortex-A53/Cortex-A57. */ ldr x1, [x20, #(INIT_ARGUMENTS_CPUACTLR)] msr cpuactlr_el1, x1 ldr x1, [x20, #(INIT_ARGUMENTS_CPUECTLR)] msr cpuectlr_el1, x1 -othercore_cpu_specific_setup_end: +4: /* Ensure instruction consistency. */ dsb sy isb @@ -237,7 +268,8 @@ _ZN3ams4kern4init16InvokeEntrypointEPKNS1_14KInitArgumentsE: ldr x0, [x20, #(INIT_ARGUMENTS_ARGUMENT)] br x1 - +/* TODO: Can we remove this while retaining QEMU support? */ +#ifndef ATMOSPHERE_BOARD_NINTENDO_NX /* ams::kern::init::JumpFromEL2ToEL1() */ .section .crt0.text._ZN3ams4kern4init16JumpFromEL2ToEL1Ev, "ax", %progbits .global _ZN3ams4kern4init16JumpFromEL2ToEL1Ev @@ -314,6 +346,7 @@ _ZN3ams4kern4init16JumpFromEL2ToEL1Ev: msr spsr_el2, x0 eret +#endif /* ams::kern::init::DisableMmuAndCaches() */ .section .crt0.text._ZN3ams4kern4init19DisableMmuAndCachesEv, "ax", %progbits @@ -341,6 +374,10 @@ _ZN3ams4kern4init19DisableMmuAndCachesEv: and x0, x0, x1 msr sctlr_el1, x0 + /* Ensure instruction consistency. */ + dsb sy + isb + mov x30, x22 ret @@ -354,13 +391,10 @@ _ZN3ams4kern4arch5arm643cpu32FlushEntireDataCacheWithoutStackEv: /* Ensure that the cache is coherent. */ bl _ZN3ams4kern4arch5arm643cpu37FlushEntireDataCacheLocalWithoutStackEv - dsb sy bl _ZN3ams4kern4arch5arm643cpu38FlushEntireDataCacheSharedWithoutStackEv - dsb sy bl _ZN3ams4kern4arch5arm643cpu37FlushEntireDataCacheLocalWithoutStackEv - dsb sy /* Invalidate the entire TLB, and ensure instruction consistency. */ tlbi vmalle1is @@ -387,10 +421,10 @@ _ZN3ams4kern4arch5arm643cpu37FlushEntireDataCacheLocalWithoutStackEv: mov x9, xzr /* while (level <= levels_of_unification) { */ -begin_flush_cache_local_loop: cmp x9, x10 - b.eq done_flush_cache_local_loop + b.eq 1f +0: /* FlushEntireDataCacheImplWithoutStack(level); */ mov w0, w9 bl _ZN3ams4kern4arch5arm643cpu36FlushEntireDataCacheImplWithoutStackEv @@ -399,9 +433,13 @@ begin_flush_cache_local_loop: add w9, w9, #1 /* } */ - b begin_flush_cache_local_loop + cmp x9, x10 + b.ne 0b -done_flush_cache_local_loop: + /* cpu::DataSynchronizationBarrier(); */ + dsb sy + +1: mov x30, x24 ret @@ -423,21 +461,25 @@ _ZN3ams4kern4arch5arm643cpu38FlushEntireDataCacheSharedWithoutStackEv: /* int level = levels_of_unification */ /* while (level <= levels_of_coherency) { */ -begin_flush_cache_shared_loop: cmp w9, w10 - b.hi done_flush_cache_shared_loop + b.hi 1f +0: /* FlushEntireDataCacheImplWithoutStack(level); */ mov w0, w9 bl _ZN3ams4kern4arch5arm643cpu36FlushEntireDataCacheImplWithoutStackEv /* level++; */ + cmp w9, w10 add w9, w9, #1 /* } */ - b begin_flush_cache_shared_loop + b.cc 0b -done_flush_cache_shared_loop: + /* cpu::DataSynchronizationBarrier(); */ + dsb sy + +1: mov x30, x24 ret @@ -450,6 +492,9 @@ _ZN3ams4kern4arch5arm643cpu36FlushEntireDataCacheImplWithoutStackEv: lsl w6, w0, #1 sxtw x6, w6 + /* cpu::DataSynchronizationBarrier(); */ + dsb sy + /* cpu::SetCsselrEl1(level_sel_value); */ msr csselr_el1, x6 @@ -479,17 +524,17 @@ _ZN3ams4kern4arch5arm643cpu36FlushEntireDataCacheImplWithoutStackEv: mov x5, #0 /* while (way <= num_ways) { */ -begin_flush_cache_impl_way_loop: +0: cmp w8, w5 - b.lt done_flush_cache_impl_way_loop + b.lt 3f /* int set = 0; */ mov x0, #0 /* while (set <= num_sets) { */ -begin_flush_cache_impl_set_loop: +1: cmp w3, w0 - b.lt done_flush_cache_impl_set_loop + b.lt 2f /* const u64 cisw_value = (static_cast(way) << way_shift) | (static_cast(set) << set_shift) | level_sel_value; */ lsl x2, x5, x7 @@ -504,13 +549,13 @@ begin_flush_cache_impl_set_loop: add x0, x0, #1 /* } */ - b begin_flush_cache_impl_set_loop -done_flush_cache_impl_set_loop: + b 1b +2: /* way++; */ add x5, x5, 1 /* } */ - b begin_flush_cache_impl_way_loop -done_flush_cache_impl_way_loop: + b 0b +3: ret diff --git a/mesosphere/kernel_ldr/source/kern_init_loader.cpp b/mesosphere/kernel_ldr/source/kern_init_loader.cpp index 667f9eb8b..2eacd8947 100644 --- a/mesosphere/kernel_ldr/source/kern_init_loader.cpp +++ b/mesosphere/kernel_ldr/source/kern_init_loader.cpp @@ -88,17 +88,17 @@ namespace ams::kern::init::loader { cpu::MemoryAccessIndirectionRegisterAccessor(MairValue).Store(); cpu::TranslationControlRegisterAccessor(TcrValue).Store(); + /* Ensure that our configuration takes before proceeding. */ + cpu::EnsureInstructionConsistency(); + /* Perform board-specific setup. */ PerformBoardSpecificSetup(); - /* Ensure that the entire cache is flushed. */ - cpu::FlushEntireCacheForInit(); - /* Setup SCTLR_EL1. */ /* TODO: Define these bits properly elsewhere, document exactly what each bit set is doing .*/ constexpr u64 SctlrValue = 0x0000000034D5D925ul; cpu::SetSctlrEl1(SctlrValue); - cpu::EnsureInstructionConsistency(); + cpu::InstructionMemoryBarrier(); } KVirtualAddress GetRandomKernelBaseAddress(KInitialPageTable &page_table, KPhysicalAddress phys_base_address, size_t kernel_size) {