1
0
Fork 0
mirror of https://github.com/Atmosphere-NX/Atmosphere.git synced 2024-12-18 08:22:04 +00:00

thermosphere: cache rewrite

This commit is contained in:
TuxSH 2020-02-16 23:20:02 +00:00
parent 613402121a
commit 5a445e9394
4 changed files with 235 additions and 232 deletions

View file

@ -1,166 +0,0 @@
/*
* Copyright (c) 2019 Atmosphère-NX
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "caches.h"
#include "preprocessor.h"
#include "core_ctx.h"
#define DEFINE_CACHE_RANGE_FUNC(isn, name, cache, post)\
void name(const void *addr, size_t size)\
{\
u32 lineCacheSize = cacheGetSmallest##cache##CacheLineSize();\
uintptr_t begin = (uintptr_t)addr & ~(lineCacheSize - 1);\
uintptr_t end = ((uintptr_t)addr + size + lineCacheSize - 1) & ~(lineCacheSize - 1);\
for (uintptr_t pos = begin; pos < end; pos += lineCacheSize) {\
__asm__ __volatile__ (isn ", %0" :: "r"(pos) : "memory");\
}\
post;\
}
static inline ALINLINE void cacheSelectByLevel(bool instructionCache, u32 level)
{
u32 ibit = instructionCache ? 1 : 0;
u32 lbits = (level & 7) << 1;
SET_SYSREG(csselr_el1, lbits | ibit);
__isb();
}
static inline ALINLINE void cacheInvalidateDataCacheLevel(u32 level)
{
cacheSelectByLevel(false, level);
u32 ccsidr = (u32)GET_SYSREG(ccsidr_el1);
u32 numWays = 1 + ((ccsidr >> 3) & 0x3FF);
u32 numSets = 1 + ((ccsidr >> 13) & 0x7FFF);
u32 wayShift = __builtin_clz(numWays);
u32 setShift = (ccsidr & 7) + 4;
u32 lbits = (level & 7) << 1;
for (u32 way = 0; way < numWays; way++) {
for (u32 set = 0; set < numSets; set++) {
u64 val = ((u64)way << wayShift) | ((u64)set << setShift) | lbits;
__asm__ __volatile__ ("dc isw, %0" :: "r"(val) : "memory");
}
}
}
static inline ALINLINE void cacheCleanInvalidateDataCacheLevel(u32 level)
{
cacheSelectByLevel(false, level);
u32 ccsidr = (u32)GET_SYSREG(ccsidr_el1);
u32 numWays = 1 + ((ccsidr >> 3) & 0x3FF);
u32 numSets = 1 + ((ccsidr >> 13) & 0x7FFF);
u32 wayShift = __builtin_clz(numWays);
u32 setShift = (ccsidr & 7) + 4;
u32 lbits = (level & 7) << 1;
for (u32 way = 0; way < numWays; way++) {
for (u32 set = 0; set < numSets; set++) {
u64 val = ((u64)way << wayShift) | ((u64)set << setShift) | lbits;
__asm__ __volatile__ ("dc cisw, %0" :: "r"(val) : "memory");
}
}
__dsb_sy();
__isb();
}
static inline ALINLINE void cacheInvalidateDataCacheLevels(u32 from, u32 to)
{
// Let's hope it doesn't generate a stack frame...
for (u32 level = from; level < to; level++) {
cacheInvalidateDataCacheLevel(level);
}
__dsb_sy();
__isb();
}
DEFINE_CACHE_RANGE_FUNC("dc civac", cacheCleanInvalidateDataCacheRange, Data, __dsb())
DEFINE_CACHE_RANGE_FUNC("dc cvau", cacheCleanDataCacheRangePoU, Data, __dsb())
DEFINE_CACHE_RANGE_FUNC("ic ivau", cacheInvalidateInstructionCacheRangePoU, Instruction, __dsb(); __isb())
void cacheHandleSelfModifyingCodePoU(const void *addr, size_t size)
{
// See docs for ctr_el0.{dic, idc}. It's unclear when these bits have been added, but they're
// RES0 if not implemented, so that's fine
u32 ctr = (u32)GET_SYSREG(ctr_el0);
if (!(ctr & BIT(28))) {
cacheCleanDataCacheRangePoU(addr, size);
}
if (!(ctr & BIT(29))) {
cacheInvalidateInstructionCacheRangePoU(addr, size);
}
}
void cacheClearSharedDataCachesOnBoot(void)
{
u32 clidr = (u32)GET_SYSREG(clidr_el1);
u32 louis = (clidr >> 21) & 7;
u32 loc = (clidr >> 24) & 7;
cacheInvalidateDataCacheLevels(louis, loc);
}
void cacheClearLocalDataCacheOnBoot(void)
{
u32 clidr = (u32)GET_SYSREG(clidr_el1);
u32 louis = (clidr >> 21) & 7;
cacheInvalidateDataCacheLevels(0, louis);
}
/* Ok so:
- cache set/way ops can't really be virtualized
- since we have only one guest OS & don't care about security (for space limitations),
we do the following:
- ignore all cache s/w ops applying before the Level Of Unification Inner Shareable (L1, typically).
These clearly break coherency and should only be done once, on power on/off/suspend/resume only. And we already
do it ourselves...
- allow ops after the LoUIS, but do it ourselves and ignore the next (numSets*numWay - 1) requests. This is because
we have to handle Nintendo's dodgy code
- ignore "invalidate only" ops by the guest. Should only be done on power on/resume and we already did it ourselves...
- transform "clean only" into "clean and invalidate"
*/
void cacheHandleTrappedSetWayOperation(bool invalidateOnly)
{
DEBUG("hello");
if (invalidateOnly) {
return;
}
u32 clidr = (u32)GET_SYSREG(clidr_el1);
u32 louis = (clidr >> 21) & 7;
u32 csselr = (u32)GET_SYSREG(csselr_el1);
u32 level = (csselr >> 1) & 7;
if (csselr & BIT(0)) {
// Icache, ignore
return;
} else if (level < louis) {
return;
}
u32 ccsidr = (u32)GET_SYSREG(ccsidr_el1);
u32 numWays = 1 + ((ccsidr >> 3) & 0x3FF);
u32 numSets = 1 + ((ccsidr >> 13) & 0x7FFF);
if (currentCoreCtx->setWayCounter++ == 0) {
cacheCleanInvalidateDataCacheLevel(level);
}
if (currentCoreCtx->setWayCounter >= numSets * numWays) {
currentCoreCtx->setWayCounter = 0;
}
}

View file

@ -1,66 +0,0 @@
/*
* Copyright (c) 2019 Atmosphère-NX
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "utils.h"
#include "sysreg.h"
static inline u32 cacheGetInstructionCachePolicy(void)
{
u32 ctr = (u32)GET_SYSREG(ctr_el0);
return (ctr >> 14) & 3;
}
static inline u32 cacheGetSmallestInstructionCacheLineSize(void)
{
u32 ctr = (u32)GET_SYSREG(ctr_el0);
u32 shift = ctr & 0xF;
// "log2 of the number of words"...
return 4 << shift;
}
static inline u32 cacheGetSmallestDataCacheLineSize(void)
{
u32 ctr = (u32)GET_SYSREG(ctr_el0);
u32 shift = (ctr >> 16) & 0xF;
// "log2 of the number of words"...
return 4 << shift;
}
static inline void cacheInvalidateInstructionCache(void)
{
__asm__ __volatile__ ("ic ialluis" ::: "memory");
__isb();
}
static inline void cacheInvalidateInstructionCacheLocal(void)
{
__asm__ __volatile__ ("ic iallu" ::: "memory");
__isb();
}
void cacheCleanInvalidateDataCacheRange(const void *addr, size_t size);
void cacheCleanDataCacheRangePoU(const void *addr, size_t size);
void cacheInvalidateInstructionCacheRangePoU(const void *addr, size_t size);
void cacheHandleSelfModifyingCodePoU(const void *addr, size_t size);
void cacheClearSharedDataCachesOnBoot(void);
void cacheClearLocalDataCacheOnBoot(void);
void cacheHandleTrappedSetWayOperation(bool invalidateOnly);

View file

@ -0,0 +1,164 @@
/*
* Copyright (c) 2019-2020 Atmosphère-NX
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "hvisor_cpu_caches.hpp"
#include "../core_ctx.h"
#define DEFINE_CACHE_RANGE_FUNC(isn, name, cache, post)\
void name(const void *addr, size_t size)\
{\
u32 lineCacheSize = GetSmallest##cache##CacheLineSize();\
uintptr_t begin = reinterpret_cast<uintptr_t>(addr) & ~(lineCacheSize - 1);\
uintptr_t end = (reinterpret_cast<uintptr_t>(addr) + size + lineCacheSize - 1) & ~(lineCacheSize - 1);\
for (uintptr_t pos = begin; pos < end; pos += lineCacheSize) {\
__asm__ __volatile__ (isn ", %0" :: "r"(pos) : "memory");\
}\
post;\
}
namespace {
ALWAYS_INLINE void SelectCacheLevel(bool instructionCache, u32 level)
{
u32 ibit = instructionCache ? 1 : 0;
u32 lbits = (level & 7) << 1;
THERMOSPHERE_SET_SYSREG(csselr_el1, lbits | ibit);
ams::hvisor::cpu::isb();
}
[[gnu::optimize("O2")]] ALWAYS_INLINE void InvalidateDataCacheLevel(u32 level)
{
SelectCacheLevel(false, level);
u32 ccsidr = static_cast<u32>(THERMOSPHERE_GET_SYSREG(ccsidr_el1));
u32 numWays = 1 + ((ccsidr >> 3) & 0x3FF);
u32 numSets = 1 + ((ccsidr >> 13) & 0x7FFF);
u32 wayShift = __builtin_clz(numWays);
u32 setShift = (ccsidr & 7) + 4;
u32 lbits = (level & 7) << 1;
for (u32 way = 0; way < numWays; way++) {
for (u32 set = 0; set < numSets; set++) {
u64 val = ((u64)way << wayShift) | ((u64)set << setShift) | lbits;
__asm__ __volatile__ ("dc isw, %0" :: "r"(val) : "memory");
}
}
}
ALWAYS_INLINE void CleanInvalidateDataCacheLevel(u32 level)
{
SelectCacheLevel(false, level);
u32 ccsidr = static_cast<u32>(THERMOSPHERE_GET_SYSREG(ccsidr_el1));
u32 numWays = 1 + ((ccsidr >> 3) & 0x3FF);
u32 numSets = 1 + ((ccsidr >> 13) & 0x7FFF);
u32 wayShift = __builtin_clz(numWays);
u32 setShift = (ccsidr & 7) + 4;
u32 lbits = (level & 7) << 1;
for (u32 way = 0; way < numWays; way++) {
for (u32 set = 0; set < numSets; set++) {
u64 val = ((u64)way << wayShift) | ((u64)set << setShift) | lbits;
__asm__ __volatile__ ("dc cisw, %0" :: "r"(val) : "memory");
}
}
}
[[gnu::optimize("O2")]] ALWAYS_INLINE void InvalidateDataCacheLevels(u32 from, u32 to)
{
// Let's hope it doesn't generate a stack frame...
for (u32 level = from; level < to; level++) {
InvalidateDataCacheLevel(level);
}
ams::hvisor::cpu::dsbSy();
ams::hvisor::cpu::isb();
}
}
namespace ams::hvisor::cpu {
DEFINE_CACHE_RANGE_FUNC("dc civac", CleanInvalidateDataCacheRange, Data, dsbSy())
DEFINE_CACHE_RANGE_FUNC("dc cvau", CleanDataCacheRangePoU, Data, dsb())
DEFINE_CACHE_RANGE_FUNC("ic ivau", InvalidateInstructionCacheRangePoU, Instruction, dsb(); isb())
void HandleSelfModifyingCodePoU(const void *addr, size_t size)
{
// See docs for ctr_el0.{dic, idc}. It's unclear when these bits have been added, but they're
// RES0 if not implemented, so that's fine
u32 ctr = static_cast<u32>(THERMOSPHERE_GET_SYSREG(ctr_el0));
if (!(ctr & BIT(28))) {
CleanDataCacheRangePoU(addr, size);
}
if (!(ctr & BIT(29))) {
InvalidateInstructionCacheRangePoU(addr, size);
}
}
[[gnu::optimize("O2")]] void ClearSharedDataCachesOnBoot(void)
{
u32 clidr = static_cast<u32>(THERMOSPHERE_GET_SYSREG(clidr_el1));
u32 louis = (clidr >> 21) & 7;
u32 loc = (clidr >> 24) & 7;
InvalidateDataCacheLevels(louis, loc);
}
[[gnu::optimize("O2")]] void ClearLocalDataCacheOnBoot(void)
{
u32 clidr = static_cast<u32>(THERMOSPHERE_GET_SYSREG(clidr_el1));
u32 louis = (clidr >> 21) & 7;
InvalidateDataCacheLevels(0, louis);
}
/* Ok so:
- cache set/way ops can't really be virtualized
- since we have only one guest OS & don't care about security (for space limitations),
we do the following:
- ignore all cache s/w ops applying before the Level Of Unification Inner Shareable (L1, typically).
These clearly break coherency and should only be done once, on power on/off/suspend/resume only. And we already
do it ourselves...
- allow ops after the LoUIS, but do it ourselves and ignore the next (numSets*numWay - 1) requests. This is because
we have to handle Nintendo's dodgy code
- transform all s/w cache ops into clean and invalidate
*/
void HandleTrappedSetWayOperation()
{
u32 clidr = static_cast<u32>(THERMOSPHERE_GET_SYSREG(clidr_el1));
u32 louis = (clidr >> 21) & 7;
u32 csselr = static_cast<u32>(THERMOSPHERE_GET_SYSREG(csselr_el1));
u32 level = (csselr >> 1) & 7;
if (csselr & BIT(0)) {
// Icache, ignore
return;
} else if (level < louis) {
return;
}
u32 ccsidr = static_cast<u32>(THERMOSPHERE_GET_SYSREG(ccsidr_el1));
u32 numWays = 1 + ((ccsidr >> 3) & 0x3FF);
u32 numSets = 1 + ((ccsidr >> 13) & 0x7FFF);
if (currentCoreCtx->setWayCounter++ == 0) {
CleanInvalidateDataCacheLevel(level);
ams::hvisor::cpu::dsbSy();
ams::hvisor::cpu::isb();
}
if (currentCoreCtx->setWayCounter >= numSets * numWays) {
currentCoreCtx->setWayCounter = 0;
}
}
}

View file

@ -0,0 +1,71 @@
/*
* Copyright (c) 2019-2020 Atmosphère-NX
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "hvisor_cpu_instructions.hpp"
#include "hvisor_cpu_sysreg_general.hpp"
namespace ams::hvisor::cpu {
static inline u32 GetInstructionCachePolicy(void)
{
u32 ctr = static_cast<u32>(THERMOSPHERE_GET_SYSREG(ctr_el0));
return (ctr >> 14) & 3;
}
static inline u32 GetSmallestInstructionCacheLineSize(void)
{
u32 ctr = static_cast<u32>(THERMOSPHERE_GET_SYSREG(ctr_el0));
u32 shift = ctr & 0xF;
// "log2 of the number of words"...
return 4 << shift;
}
static inline u32 GetSmallestDataCacheLineSize(void)
{
u32 ctr = static_cast<u32>(THERMOSPHERE_GET_SYSREG(ctr_el0));
u32 shift = (ctr >> 16) & 0xF;
// "log2 of the number of words"...
return 4 << shift;
}
static inline void InvalidateInstructionCache(void)
{
__asm__ __volatile__ ("ic ialluis" ::: "memory");
cpu::isb();
}
static inline void InvalidateInstructionCacheLocal(void)
{
__asm__ __volatile__ ("ic iallu" ::: "memory");
cpu::isb();
}
void CleanInvalidateDataCacheRange(const void *addr, size_t size);
void CleanDataCacheRangePoU(const void *addr, size_t size);
void InvalidateInstructionCacheRangePoU(const void *addr, size_t size);
void HandleSelfModifyingCodePoU(const void *addr, size_t size);
void ClearSharedDataCachesOnBoot(void);
void ClearLocalDataCacheOnBoot(void);
// Dunno where else to put that
void HandleTrappedSetWayOperation();
}