1
0
Fork 0
mirror of https://github.com/Ryujinx/Ryujinx.git synced 2024-12-30 15:56:03 +00:00
Ryujinx/ARMeilleure/Common/AddressTable.cs

262 lines
9 KiB
C#
Raw Normal View History

2022-09-09 00:14:08 +01:00
using ARMeilleure.Diagnostics;
Add multi-level function table (#2228) * Add AddressTable<T> * Use AddressTable<T> for dispatch * Remove JumpTable & co. * Add fallback for out of range addresses * Add PPTC support * Add documentation to `AddressTable<T>` * Make AddressTable<T> configurable * Fix table walk * Fix IsMapped check * Remove CountTableCapacity * Add PPTC support for fast path * Rename IsMapped to IsValid * Remove stale comment * Change format of address in exception message * Add TranslatorStubs * Split DispatchStub Avoids recompilation of stubs during tests. * Add hint for 64bit or 32bit * Add documentation to `Symbol` * Add documentation to `TranslatorStubs` Make `TranslatorStubs` disposable as well. * Add documentation to `SymbolType` * Add `AddressTableEventSource` to monitor function table size Add an EventSource which measures the amount of unmanaged bytes allocated by AddressTable<T> instances. dotnet-counters monitor -n Ryujinx --counters ARMeilleure * Add `AllowLcqInFunctionTable` optimization toggle This is to reduce the impact this change has on the test duration. Before everytime a test was ran, the FunctionTable would be initialized and populated so that the newly compiled test would get registered to it. * Implement unmanaged dispatcher Uses the DispatchStub to dispatch into the next translation, which allows execution to stay in unmanaged for longer and skips a ConcurrentDictionary look up when the target translation has been registered to the FunctionTable. * Remove redundant null check * Tune levels of FunctionTable Uses 5 levels instead of 4 and change unit of AddressTableEventSource from KB to MB. * Use 64-bit function table Improves codegen for direct branches: mov qword [rax+0x408],0x10603560 - mov rcx,sub_10603560_OFFSET - mov ecx,[rcx] - mov ecx,ecx - mov rdx,JIT_CACHE_BASE - add rdx,rcx + mov rcx,sub_10603560 + mov rdx,[rcx] mov rcx,rax Improves codegen for dispatch stub: and rax,byte +0x1f - mov eax,[rcx+rax*4] - mov eax,eax - mov rcx,JIT_CACHE_BASE - lea rax,[rcx+rax] + mov rax,[rcx+rax*8] mov rcx,rbx * Remove `JitCacheSymbol` & `JitCache.Offset` * Turn `Translator.Translate` into an instance method We do not have to add more parameter to this method and related ones as new structures are added & needed for translation. * Add symbol only when PTC is enabled Address LDj3SNuD's feedback * Change `NativeContext.Running` to a 32-bit integer * Fix PageTable symbol for host mapped
2021-05-29 22:06:28 +01:00
using System;
using System.Collections.Generic;
using System.Runtime.InteropServices;
namespace ARMeilleure.Common
{
/// <summary>
/// Represents a table of guest address to a value.
/// </summary>
/// <typeparam name="TEntry">Type of the value</typeparam>
unsafe class AddressTable<TEntry> : IDisposable where TEntry : unmanaged
{
/// <summary>
/// Represents a level in an <see cref="AddressTable{TEntry}"/>.
/// </summary>
public readonly struct Level
{
/// <summary>
/// Gets the index of the <see cref="Level"/> in the guest address.
/// </summary>
public int Index { get; }
/// <summary>
/// Gets the length of the <see cref="Level"/> in the guest address.
/// </summary>
public int Length { get; }
/// <summary>
/// Gets the mask which masks the bits used by the <see cref="Level"/>.
/// </summary>
public ulong Mask => ((1ul << Length) - 1) << Index;
/// <summary>
/// Initializes a new instance of the <see cref="Level"/> structure with the specified
/// <paramref name="index"/> and <paramref name="length"/>.
/// </summary>
/// <param name="index">Index of the <see cref="Level"/></param>
/// <param name="length">Length of the <see cref="Level"/></param>
public Level(int index, int length)
{
(Index, Length) = (index, length);
}
/// <summary>
/// Gets the value of the <see cref="Level"/> from the specified guest <paramref name="address"/>.
/// </summary>
/// <param name="address">Guest address</param>
/// <returns>Value of the <see cref="Level"/> from the specified guest <paramref name="address"/></returns>
public int GetValue(ulong address)
{
return (int)((address & Mask) >> Index);
}
}
private bool _disposed;
private TEntry** _table;
private readonly List<IntPtr> _pages;
/// <summary>
/// Gets the bits used by the <see cref="Levels"/> of the <see cref="AddressTable{TEntry}"/> instance.
/// </summary>
public ulong Mask { get; }
/// <summary>
/// Gets the <see cref="Level"/>s used by the <see cref="AddressTable{TEntry}"/> instance.
/// </summary>
public Level[] Levels { get; }
/// <summary>
/// Gets or sets the default fill value of newly created leaf pages.
/// </summary>
public TEntry Fill { get; set; }
/// <summary>
/// Gets the base address of the <see cref="EntryTable{TEntry}"/>.
/// </summary>
/// <exception cref="ObjectDisposedException"><see cref="EntryTable{TEntry}"/> instance was disposed</exception>
public IntPtr Base
{
get
{
if (_disposed)
{
throw new ObjectDisposedException(null);
}
lock (_pages)
{
return (IntPtr)GetRootPage();
}
}
}
/// <summary>
/// Constructs a new instance of the <see cref="AddressTable{TEntry}"/> class with the specified list of
/// <see cref="Level"/>.
/// </summary>
/// <exception cref="ArgumentNullException"><paramref name="levels"/> is null</exception>
/// <exception cref="ArgumentException">Length of <paramref name="levels"/> is less than 2</exception>
public AddressTable(Level[] levels)
{
if (levels == null)
{
throw new ArgumentNullException(nameof(levels));
}
if (levels.Length < 2)
{
throw new ArgumentException("Table must be at least 2 levels deep.", nameof(levels));
}
_pages = new List<IntPtr>(capacity: 16);
Levels = levels;
Mask = 0;
foreach (var level in Levels)
{
Mask |= level.Mask;
}
}
/// <summary>
/// Determines if the specified <paramref name="address"/> is in the range of the
/// <see cref="AddressTable{TEntry}"/>.
/// </summary>
/// <param name="address">Guest address</param>
/// <returns><see langword="true"/> if is valid; otherwise <see langword="false"/></returns>
public bool IsValid(ulong address)
{
return (address & ~Mask) == 0;
}
/// <summary>
/// Gets a reference to the value at the specified guest <paramref name="address"/>.
/// </summary>
/// <param name="address">Guest address</param>
/// <returns>Reference to the value at the specified guest <paramref name="address"/></returns>
/// <exception cref="ObjectDisposedException"><see cref="EntryTable{TEntry}"/> instance was disposed</exception>
/// <exception cref="ArgumentException"><paramref name="address"/> is not mapped</exception>
public ref TEntry GetValue(ulong address)
{
if (_disposed)
{
throw new ObjectDisposedException(null);
}
if (!IsValid(address))
{
throw new ArgumentException($"Address 0x{address:X} is not mapped onto the table.", nameof(address));
}
lock (_pages)
{
return ref GetPage(address)[Levels[^1].GetValue(address)];
}
}
/// <summary>
/// Gets the leaf page for the specified guest <paramref name="address"/>.
/// </summary>
/// <param name="address">Guest address</param>
/// <returns>Leaf page for the specified guest <paramref name="address"/></returns>
private TEntry* GetPage(ulong address)
{
TEntry** page = GetRootPage();
for (int i = 0; i < Levels.Length - 1; i++)
{
ref Level level = ref Levels[i];
ref TEntry* nextPage = ref page[level.GetValue(address)];
if (nextPage == null)
{
ref Level nextLevel = ref Levels[i + 1];
nextPage = i == Levels.Length - 2 ?
(TEntry*)Allocate(1 << nextLevel.Length, Fill, leaf: true) :
(TEntry*)Allocate(1 << nextLevel.Length, IntPtr.Zero, leaf: false);
}
page = (TEntry**)nextPage;
}
return (TEntry*)page;
}
/// <summary>
/// Lazily initialize and get the root page of the <see cref="AddressTable{TEntry}"/>.
/// </summary>
/// <returns>Root page of the <see cref="AddressTable{TEntry}"/></returns>
private TEntry** GetRootPage()
{
if (_table == null)
{
_table = (TEntry**)Allocate(1 << Levels[0].Length, fill: IntPtr.Zero, leaf: false);
}
return _table;
}
/// <summary>
/// Allocates a block of memory of the specified type and length.
/// </summary>
/// <typeparam name="T">Type of elements</typeparam>
/// <param name="length">Number of elements</param>
/// <param name="fill">Fill value</param>
/// <param name="leaf"><see langword="true"/> if leaf; otherwise <see langword="false"/></param>
Add multi-level function table (#2228) * Add AddressTable<T> * Use AddressTable<T> for dispatch * Remove JumpTable & co. * Add fallback for out of range addresses * Add PPTC support * Add documentation to `AddressTable<T>` * Make AddressTable<T> configurable * Fix table walk * Fix IsMapped check * Remove CountTableCapacity * Add PPTC support for fast path * Rename IsMapped to IsValid * Remove stale comment * Change format of address in exception message * Add TranslatorStubs * Split DispatchStub Avoids recompilation of stubs during tests. * Add hint for 64bit or 32bit * Add documentation to `Symbol` * Add documentation to `TranslatorStubs` Make `TranslatorStubs` disposable as well. * Add documentation to `SymbolType` * Add `AddressTableEventSource` to monitor function table size Add an EventSource which measures the amount of unmanaged bytes allocated by AddressTable<T> instances. dotnet-counters monitor -n Ryujinx --counters ARMeilleure * Add `AllowLcqInFunctionTable` optimization toggle This is to reduce the impact this change has on the test duration. Before everytime a test was ran, the FunctionTable would be initialized and populated so that the newly compiled test would get registered to it. * Implement unmanaged dispatcher Uses the DispatchStub to dispatch into the next translation, which allows execution to stay in unmanaged for longer and skips a ConcurrentDictionary look up when the target translation has been registered to the FunctionTable. * Remove redundant null check * Tune levels of FunctionTable Uses 5 levels instead of 4 and change unit of AddressTableEventSource from KB to MB. * Use 64-bit function table Improves codegen for direct branches: mov qword [rax+0x408],0x10603560 - mov rcx,sub_10603560_OFFSET - mov ecx,[rcx] - mov ecx,ecx - mov rdx,JIT_CACHE_BASE - add rdx,rcx + mov rcx,sub_10603560 + mov rdx,[rcx] mov rcx,rax Improves codegen for dispatch stub: and rax,byte +0x1f - mov eax,[rcx+rax*4] - mov eax,eax - mov rcx,JIT_CACHE_BASE - lea rax,[rcx+rax] + mov rax,[rcx+rax*8] mov rcx,rbx * Remove `JitCacheSymbol` & `JitCache.Offset` * Turn `Translator.Translate` into an instance method We do not have to add more parameter to this method and related ones as new structures are added & needed for translation. * Add symbol only when PTC is enabled Address LDj3SNuD's feedback * Change `NativeContext.Running` to a 32-bit integer * Fix PageTable symbol for host mapped
2021-05-29 22:06:28 +01:00
/// <returns>Allocated block</returns>
private IntPtr Allocate<T>(int length, T fill, bool leaf) where T : unmanaged
{
var size = sizeof(T) * length;
Reduce JIT GC allocations (#2515) * Turn `MemoryOperand` into a struct * Remove `IntrinsicOperation` * Remove `PhiNode` * Remove `Node` * Turn `Operand` into a struct * Turn `Operation` into a struct * Clean up pool management methods * Add `Arena` allocator * Move `OperationHelper` to `Operation.Factory` * Move `OperandHelper` to `Operand.Factory` * Optimize `Operation` a bit * Fix `Arena` initialization * Rename `NativeList<T>` to `ArenaList<T>` * Reduce `Operand` size from 88 to 56 bytes * Reduce `Operation` size from 56 to 40 bytes * Add optimistic interning of Register & Constant operands * Optimize `RegisterUsage` pass a bit * Optimize `RemoveUnusedNodes` pass a bit Iterating in reverse-order allows killing dependency chains in a single pass. * Fix PPTC symbols * Optimize `BasicBlock` a bit Reduce allocations from `_successor` & `DominanceFrontiers` * Fix `Operation` resize * Make `Arena` expandable Change the arena allocator to be expandable by allocating in pages, with some of them being pooled. Currently 32 pages are pooled. An LRU removal mechanism should probably be added to it. Apparently MHR can allocate bitmaps large enough to exceed the 16MB limit for the type. * Move `Arena` & `ArenaList` to `Common` * Remove `ThreadStaticPool` & co * Add `PhiOperation` * Reduce `Operand` size from 56 from 48 bytes * Add linear-probing to `Operand` intern table * Optimize `HybridAllocator` a bit * Add `Allocators` class * Tune `ArenaAllocator` sizes * Add page removal mechanism to `ArenaAllocator` Remove pages which have not been used for more than 5s after each reset. I am on fence if this would be better using a Gen2 callback object like the one in System.Buffers.ArrayPool<T>, to trim the pool. Because right now if a large translation happens, the pages will be freed only after a reset. This reset may not happen for a while because no new translation is hit, but the arena base sizes are rather small. * Fix `OOM` when allocating larger than page size in `ArenaAllocator` Tweak resizing mechanism for Operand.Uses and Assignemnts. * Optimize `Optimizer` a bit * Optimize `Operand.Add<T>/Remove<T>` a bit * Clean up `PreAllocator` * Fix phi insertion order Reduce codegen diffs. * Fix code alignment * Use new heuristics for degree of parallelism * Suppress warnings * Address gdkchan's feedback Renamed `GetValue()` to `GetValueUnsafe()` to make it more clear that `Operand.Value` should usually not be modified directly. * Add fast path to `ArenaAllocator` * Assembly for `ArenaAllocator.Allocate(ulong)`: .L0: mov rax, [rcx+0x18] lea r8, [rax+rdx] cmp r8, [rcx+0x10] ja short .L2 .L1: mov rdx, [rcx+8] add rax, [rdx+8] mov [rcx+0x18], r8 ret .L2: jmp ArenaAllocator.AllocateSlow(UInt64) A few variable/field had to be changed to ulong so that RyuJIT avoids emitting zero-extends. * Implement a new heuristic to free pooled pages. If an arena is used often, it is more likely that its pages will be needed, so the pages are kept for longer (e.g: during PPTC rebuild or burst sof compilations). If is not used often, then it is more likely that its pages will not be needed (e.g: after PPTC rebuild or bursts of compilations). * Address riperiperi's feedback * Use `EqualityComparer<T>` in `IntrusiveList<T>` Avoids a potential GC hole in `Equals(T, T)`.
2021-08-17 19:08:34 +01:00
var page = (IntPtr)NativeAllocator.Instance.Allocate((uint)size);
Add multi-level function table (#2228) * Add AddressTable<T> * Use AddressTable<T> for dispatch * Remove JumpTable & co. * Add fallback for out of range addresses * Add PPTC support * Add documentation to `AddressTable<T>` * Make AddressTable<T> configurable * Fix table walk * Fix IsMapped check * Remove CountTableCapacity * Add PPTC support for fast path * Rename IsMapped to IsValid * Remove stale comment * Change format of address in exception message * Add TranslatorStubs * Split DispatchStub Avoids recompilation of stubs during tests. * Add hint for 64bit or 32bit * Add documentation to `Symbol` * Add documentation to `TranslatorStubs` Make `TranslatorStubs` disposable as well. * Add documentation to `SymbolType` * Add `AddressTableEventSource` to monitor function table size Add an EventSource which measures the amount of unmanaged bytes allocated by AddressTable<T> instances. dotnet-counters monitor -n Ryujinx --counters ARMeilleure * Add `AllowLcqInFunctionTable` optimization toggle This is to reduce the impact this change has on the test duration. Before everytime a test was ran, the FunctionTable would be initialized and populated so that the newly compiled test would get registered to it. * Implement unmanaged dispatcher Uses the DispatchStub to dispatch into the next translation, which allows execution to stay in unmanaged for longer and skips a ConcurrentDictionary look up when the target translation has been registered to the FunctionTable. * Remove redundant null check * Tune levels of FunctionTable Uses 5 levels instead of 4 and change unit of AddressTableEventSource from KB to MB. * Use 64-bit function table Improves codegen for direct branches: mov qword [rax+0x408],0x10603560 - mov rcx,sub_10603560_OFFSET - mov ecx,[rcx] - mov ecx,ecx - mov rdx,JIT_CACHE_BASE - add rdx,rcx + mov rcx,sub_10603560 + mov rdx,[rcx] mov rcx,rax Improves codegen for dispatch stub: and rax,byte +0x1f - mov eax,[rcx+rax*4] - mov eax,eax - mov rcx,JIT_CACHE_BASE - lea rax,[rcx+rax] + mov rax,[rcx+rax*8] mov rcx,rbx * Remove `JitCacheSymbol` & `JitCache.Offset` * Turn `Translator.Translate` into an instance method We do not have to add more parameter to this method and related ones as new structures are added & needed for translation. * Add symbol only when PTC is enabled Address LDj3SNuD's feedback * Change `NativeContext.Running` to a 32-bit integer * Fix PageTable symbol for host mapped
2021-05-29 22:06:28 +01:00
var span = new Span<T>((void*)page, length);
span.Fill(fill);
_pages.Add(page);
2022-09-09 00:14:08 +01:00
TranslatorEventSource.Log.AddressTableAllocated(size, leaf);
Add multi-level function table (#2228) * Add AddressTable<T> * Use AddressTable<T> for dispatch * Remove JumpTable & co. * Add fallback for out of range addresses * Add PPTC support * Add documentation to `AddressTable<T>` * Make AddressTable<T> configurable * Fix table walk * Fix IsMapped check * Remove CountTableCapacity * Add PPTC support for fast path * Rename IsMapped to IsValid * Remove stale comment * Change format of address in exception message * Add TranslatorStubs * Split DispatchStub Avoids recompilation of stubs during tests. * Add hint for 64bit or 32bit * Add documentation to `Symbol` * Add documentation to `TranslatorStubs` Make `TranslatorStubs` disposable as well. * Add documentation to `SymbolType` * Add `AddressTableEventSource` to monitor function table size Add an EventSource which measures the amount of unmanaged bytes allocated by AddressTable<T> instances. dotnet-counters monitor -n Ryujinx --counters ARMeilleure * Add `AllowLcqInFunctionTable` optimization toggle This is to reduce the impact this change has on the test duration. Before everytime a test was ran, the FunctionTable would be initialized and populated so that the newly compiled test would get registered to it. * Implement unmanaged dispatcher Uses the DispatchStub to dispatch into the next translation, which allows execution to stay in unmanaged for longer and skips a ConcurrentDictionary look up when the target translation has been registered to the FunctionTable. * Remove redundant null check * Tune levels of FunctionTable Uses 5 levels instead of 4 and change unit of AddressTableEventSource from KB to MB. * Use 64-bit function table Improves codegen for direct branches: mov qword [rax+0x408],0x10603560 - mov rcx,sub_10603560_OFFSET - mov ecx,[rcx] - mov ecx,ecx - mov rdx,JIT_CACHE_BASE - add rdx,rcx + mov rcx,sub_10603560 + mov rdx,[rcx] mov rcx,rax Improves codegen for dispatch stub: and rax,byte +0x1f - mov eax,[rcx+rax*4] - mov eax,eax - mov rcx,JIT_CACHE_BASE - lea rax,[rcx+rax] + mov rax,[rcx+rax*8] mov rcx,rbx * Remove `JitCacheSymbol` & `JitCache.Offset` * Turn `Translator.Translate` into an instance method We do not have to add more parameter to this method and related ones as new structures are added & needed for translation. * Add symbol only when PTC is enabled Address LDj3SNuD's feedback * Change `NativeContext.Running` to a 32-bit integer * Fix PageTable symbol for host mapped
2021-05-29 22:06:28 +01:00
return page;
}
/// <summary>
/// Releases all resources used by the <see cref="AddressTable{TEntry}"/> instance.
/// </summary>
public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}
/// <summary>
/// Releases all unmanaged and optionally managed resources used by the <see cref="AddressTable{TEntry}"/>
/// instance.
/// </summary>
/// <param name="disposing"><see langword="true"/> to dispose managed resources also; otherwise just unmanaged resouces</param>
protected virtual void Dispose(bool disposing)
{
if (!_disposed)
{
foreach (var page in _pages)
{
Marshal.FreeHGlobal(page);
}
_disposed = true;
}
}
/// <summary>
/// Frees resources used by the <see cref="AddressTable{TEntry}"/> instance.
/// </summary>
~AddressTable()
{
Dispose(false);
}
}
}