mirror of
https://github.com/Atmosphere-NX/Atmosphere.git
synced 2024-11-10 06:01:52 +00:00
cpu: optimize core barrier
This commit is contained in:
parent
7820e5b759
commit
507ab46709
2 changed files with 78 additions and 14 deletions
|
@ -17,9 +17,14 @@
|
|||
|
||||
namespace ams::kern::arm64::cpu {
|
||||
|
||||
/* Declare prototype to be implemented in asm. */
|
||||
void SynchronizeAllCoresImpl(s32 *sync_var, s32 num_cores);
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
std::atomic<s32> g_all_core_sync_count;
|
||||
/* Expose this as a global, for asm to use. */
|
||||
s32 g_all_core_sync_count;
|
||||
|
||||
void FlushEntireDataCacheImpl(int level) {
|
||||
/* Used in multiple locations. */
|
||||
|
@ -48,6 +53,14 @@ namespace ams::kern::arm64::cpu {
|
|||
}
|
||||
}
|
||||
|
||||
ALWAYS_INLINE void SetEventLocally() {
|
||||
__asm__ __volatile__("sevl" ::: "memory");
|
||||
}
|
||||
|
||||
ALWAYS_INLINE void WaitForEvent() {
|
||||
__asm__ __volatile__("wfe" ::: "memory");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void FlushEntireDataCacheShared() {
|
||||
|
@ -70,19 +83,7 @@ namespace ams::kern::arm64::cpu {
|
|||
}
|
||||
|
||||
NOINLINE void SynchronizeAllCores() {
|
||||
/* Wait until the count can be read. */
|
||||
while (!(g_all_core_sync_count < static_cast<s32>(cpu::NumCores))) { /* ... */ }
|
||||
|
||||
const s32 per_core_idx = g_all_core_sync_count.fetch_add(1);
|
||||
|
||||
/* Loop until it's our turn. This will act on each core in order. */
|
||||
while (g_all_core_sync_count != per_core_idx + static_cast<s32>(cpu::NumCores)) { /* ... */ }
|
||||
|
||||
if (g_all_core_sync_count != 2 * static_cast<s32>(cpu::NumCores) - 1) {
|
||||
g_all_core_sync_count++;
|
||||
} else {
|
||||
g_all_core_sync_count = 0;
|
||||
}
|
||||
SynchronizeAllCoresImpl(&g_all_core_sync_count, static_cast<s32>(cpu::NumCores));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
63
libraries/libmesosphere/source/arch/arm64/kern_cpu_asm.s
Normal file
63
libraries/libmesosphere/source/arch/arm64/kern_cpu_asm.s
Normal file
|
@ -0,0 +1,63 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2020 Atmosphère-NX
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/* ams::kern::arm64::cpu::SynchronizeAllCoresImpl(int *sync_var, int num_cores) */
|
||||
.section .text._ZN3ams4kern5arm643cpu23SynchronizeAllCoresImplEPii, "ax", %progbits
|
||||
.global _ZN3ams4kern5arm643cpu23SynchronizeAllCoresImplEPii
|
||||
.type _ZN3ams4kern5arm643cpu23SynchronizeAllCoresImplEPii, %function
|
||||
_ZN3ams4kern5arm643cpu23SynchronizeAllCoresImplEPii:
|
||||
/* Loop until the sync var is less than num cores. */
|
||||
sevl
|
||||
1:
|
||||
wfe
|
||||
ldaxr w2, [x0]
|
||||
cmp w2, w1
|
||||
b.gt 1b
|
||||
|
||||
/* Increment the sync var. */
|
||||
2:
|
||||
ldaxr w2, [x0]
|
||||
add w3, w2, #1
|
||||
stlxr w4, w3, [x0]
|
||||
cbnz w4, 2b
|
||||
|
||||
/* Loop until the sync var matches our ticket. */
|
||||
add w3, w2, w1
|
||||
sevl
|
||||
3:
|
||||
wfe
|
||||
ldaxr w2, [x0]
|
||||
cmp w2, w3
|
||||
b.ne 3b
|
||||
|
||||
/* Check if the ticket is the last. */
|
||||
sub w2, w1, #1
|
||||
add w2, w2, w1
|
||||
cmp w3, w2
|
||||
b.eq 5f
|
||||
|
||||
/* Our ticket is not the last one. Increment. */
|
||||
4:
|
||||
ldaxr w2, [x0]
|
||||
add w3, w2, #1
|
||||
stlxr w4, w3, [x0]
|
||||
cbnz w4, 4b
|
||||
ret
|
||||
|
||||
/* Our ticket is the last one. */
|
||||
5:
|
||||
stlr wzr, [x0]
|
||||
ret
|
Loading…
Reference in a new issue