2021-04-18 22:43:53 +01:00
|
|
|
|
using System;
|
|
|
|
|
using System.Collections.Generic;
|
|
|
|
|
using System.Numerics;
|
|
|
|
|
|
|
|
|
|
namespace ARMeilleure.Common
|
|
|
|
|
{
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Represents an expandable table of the type <typeparamref name="TEntry"/>, whose entries will remain at the same
|
|
|
|
|
/// address through out the table's lifetime.
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <typeparam name="TEntry">Type of the entry in the table</typeparam>
|
|
|
|
|
class EntryTable<TEntry> : IDisposable where TEntry : unmanaged
|
|
|
|
|
{
|
|
|
|
|
private bool _disposed;
|
|
|
|
|
private int _freeHint;
|
|
|
|
|
private readonly int _pageCapacity; // Number of entries per page.
|
|
|
|
|
private readonly int _pageLogCapacity;
|
|
|
|
|
private readonly Dictionary<int, IntPtr> _pages;
|
|
|
|
|
private readonly BitMap _allocated;
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Initializes a new instance of the <see cref="EntryTable{TEntry}"/> class with the desired page size in
|
|
|
|
|
/// bytes.
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <param name="pageSize">Desired page size in bytes</param>
|
|
|
|
|
/// <exception cref="ArgumentOutOfRangeException"><paramref name="pageSize"/> is less than 0</exception>
|
|
|
|
|
/// <exception cref="ArgumentException"><typeparamref name="TEntry"/>'s size is zero</exception>
|
|
|
|
|
/// <remarks>
|
|
|
|
|
/// The actual page size may be smaller or larger depending on the size of <typeparamref name="TEntry"/>.
|
|
|
|
|
/// </remarks>
|
|
|
|
|
public unsafe EntryTable(int pageSize = 4096)
|
|
|
|
|
{
|
|
|
|
|
if (pageSize < 0)
|
|
|
|
|
{
|
|
|
|
|
throw new ArgumentOutOfRangeException(nameof(pageSize), "Page size cannot be negative.");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (sizeof(TEntry) == 0)
|
|
|
|
|
{
|
|
|
|
|
throw new ArgumentException("Size of TEntry cannot be zero.");
|
|
|
|
|
}
|
|
|
|
|
|
Reduce JIT GC allocations (#2515)
* Turn `MemoryOperand` into a struct
* Remove `IntrinsicOperation`
* Remove `PhiNode`
* Remove `Node`
* Turn `Operand` into a struct
* Turn `Operation` into a struct
* Clean up pool management methods
* Add `Arena` allocator
* Move `OperationHelper` to `Operation.Factory`
* Move `OperandHelper` to `Operand.Factory`
* Optimize `Operation` a bit
* Fix `Arena` initialization
* Rename `NativeList<T>` to `ArenaList<T>`
* Reduce `Operand` size from 88 to 56 bytes
* Reduce `Operation` size from 56 to 40 bytes
* Add optimistic interning of Register & Constant operands
* Optimize `RegisterUsage` pass a bit
* Optimize `RemoveUnusedNodes` pass a bit
Iterating in reverse-order allows killing dependency chains in a single
pass.
* Fix PPTC symbols
* Optimize `BasicBlock` a bit
Reduce allocations from `_successor` & `DominanceFrontiers`
* Fix `Operation` resize
* Make `Arena` expandable
Change the arena allocator to be expandable by allocating in pages, with
some of them being pooled. Currently 32 pages are pooled. An LRU removal
mechanism should probably be added to it.
Apparently MHR can allocate bitmaps large enough to exceed the 16MB
limit for the type.
* Move `Arena` & `ArenaList` to `Common`
* Remove `ThreadStaticPool` & co
* Add `PhiOperation`
* Reduce `Operand` size from 56 from 48 bytes
* Add linear-probing to `Operand` intern table
* Optimize `HybridAllocator` a bit
* Add `Allocators` class
* Tune `ArenaAllocator` sizes
* Add page removal mechanism to `ArenaAllocator`
Remove pages which have not been used for more than 5s after each reset.
I am on fence if this would be better using a Gen2 callback object like
the one in System.Buffers.ArrayPool<T>, to trim the pool. Because right
now if a large translation happens, the pages will be freed only after a
reset. This reset may not happen for a while because no new translation
is hit, but the arena base sizes are rather small.
* Fix `OOM` when allocating larger than page size in `ArenaAllocator`
Tweak resizing mechanism for Operand.Uses and Assignemnts.
* Optimize `Optimizer` a bit
* Optimize `Operand.Add<T>/Remove<T>` a bit
* Clean up `PreAllocator`
* Fix phi insertion order
Reduce codegen diffs.
* Fix code alignment
* Use new heuristics for degree of parallelism
* Suppress warnings
* Address gdkchan's feedback
Renamed `GetValue()` to `GetValueUnsafe()` to make it more clear that
`Operand.Value` should usually not be modified directly.
* Add fast path to `ArenaAllocator`
* Assembly for `ArenaAllocator.Allocate(ulong)`:
.L0:
mov rax, [rcx+0x18]
lea r8, [rax+rdx]
cmp r8, [rcx+0x10]
ja short .L2
.L1:
mov rdx, [rcx+8]
add rax, [rdx+8]
mov [rcx+0x18], r8
ret
.L2:
jmp ArenaAllocator.AllocateSlow(UInt64)
A few variable/field had to be changed to ulong so that RyuJIT avoids
emitting zero-extends.
* Implement a new heuristic to free pooled pages.
If an arena is used often, it is more likely that its pages will be
needed, so the pages are kept for longer (e.g: during PPTC rebuild or
burst sof compilations). If is not used often, then it is more likely
that its pages will not be needed (e.g: after PPTC rebuild or bursts
of compilations).
* Address riperiperi's feedback
* Use `EqualityComparer<T>` in `IntrusiveList<T>`
Avoids a potential GC hole in `Equals(T, T)`.
2021-08-17 19:08:34 +01:00
|
|
|
|
_allocated = new BitMap(NativeAllocator.Instance);
|
2021-04-18 22:43:53 +01:00
|
|
|
|
_pages = new Dictionary<int, IntPtr>();
|
|
|
|
|
_pageLogCapacity = BitOperations.Log2((uint)(pageSize / sizeof(TEntry)));
|
|
|
|
|
_pageCapacity = 1 << _pageLogCapacity;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Allocates an entry in the <see cref="EntryTable{TEntry}"/>.
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <returns>Index of entry allocated in the table</returns>
|
|
|
|
|
/// <exception cref="ObjectDisposedException"><see cref="EntryTable{TEntry}"/> instance was disposed</exception>
|
|
|
|
|
public int Allocate()
|
|
|
|
|
{
|
|
|
|
|
if (_disposed)
|
|
|
|
|
{
|
|
|
|
|
throw new ObjectDisposedException(null);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
lock (_allocated)
|
|
|
|
|
{
|
|
|
|
|
if (_allocated.IsSet(_freeHint))
|
|
|
|
|
{
|
|
|
|
|
_freeHint = _allocated.FindFirstUnset();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int index = _freeHint++;
|
|
|
|
|
var page = GetPage(index);
|
|
|
|
|
|
|
|
|
|
_allocated.Set(index);
|
|
|
|
|
|
|
|
|
|
GetValue(page, index) = default;
|
|
|
|
|
|
|
|
|
|
return index;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Frees the entry at the specified <paramref name="index"/>.
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <param name="index">Index of entry to free</param>
|
|
|
|
|
/// <exception cref="ObjectDisposedException"><see cref="EntryTable{TEntry}"/> instance was disposed</exception>
|
|
|
|
|
public void Free(int index)
|
|
|
|
|
{
|
|
|
|
|
if (_disposed)
|
|
|
|
|
{
|
|
|
|
|
throw new ObjectDisposedException(null);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
lock (_allocated)
|
|
|
|
|
{
|
|
|
|
|
if (_allocated.IsSet(index))
|
|
|
|
|
{
|
|
|
|
|
_allocated.Clear(index);
|
|
|
|
|
|
|
|
|
|
_freeHint = index;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Gets a reference to the entry at the specified allocated <paramref name="index"/>.
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <param name="index">Index of the entry</param>
|
|
|
|
|
/// <returns>Reference to the entry at the specified <paramref name="index"/></returns>
|
|
|
|
|
/// <exception cref="ObjectDisposedException"><see cref="EntryTable{TEntry}"/> instance was disposed</exception>
|
|
|
|
|
/// <exception cref="ArgumentException">Entry at <paramref name="index"/> is not allocated</exception>
|
|
|
|
|
public ref TEntry GetValue(int index)
|
|
|
|
|
{
|
|
|
|
|
if (_disposed)
|
|
|
|
|
{
|
|
|
|
|
throw new ObjectDisposedException(null);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
lock (_allocated)
|
|
|
|
|
{
|
|
|
|
|
if (!_allocated.IsSet(index))
|
|
|
|
|
{
|
|
|
|
|
throw new ArgumentException("Entry at the specified index was not allocated", nameof(index));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var page = GetPage(index);
|
|
|
|
|
|
|
|
|
|
return ref GetValue(page, index);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Gets a reference to the entry at using the specified <paramref name="index"/> from the specified
|
|
|
|
|
/// <paramref name="page"/>.
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <param name="page">Page to use</param>
|
|
|
|
|
/// <param name="index">Index to use</param>
|
|
|
|
|
/// <returns>Reference to the entry</returns>
|
|
|
|
|
private ref TEntry GetValue(Span<TEntry> page, int index)
|
|
|
|
|
{
|
|
|
|
|
return ref page[index & (_pageCapacity - 1)];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Gets the page for the specified <see cref="index"/>.
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <param name="index">Index to use</param>
|
|
|
|
|
/// <returns>Page for the specified <see cref="index"/></returns>
|
|
|
|
|
private unsafe Span<TEntry> GetPage(int index)
|
|
|
|
|
{
|
|
|
|
|
var pageIndex = (int)((uint)(index & ~(_pageCapacity - 1)) >> _pageLogCapacity);
|
|
|
|
|
|
|
|
|
|
if (!_pages.TryGetValue(pageIndex, out IntPtr page))
|
|
|
|
|
{
|
Reduce JIT GC allocations (#2515)
* Turn `MemoryOperand` into a struct
* Remove `IntrinsicOperation`
* Remove `PhiNode`
* Remove `Node`
* Turn `Operand` into a struct
* Turn `Operation` into a struct
* Clean up pool management methods
* Add `Arena` allocator
* Move `OperationHelper` to `Operation.Factory`
* Move `OperandHelper` to `Operand.Factory`
* Optimize `Operation` a bit
* Fix `Arena` initialization
* Rename `NativeList<T>` to `ArenaList<T>`
* Reduce `Operand` size from 88 to 56 bytes
* Reduce `Operation` size from 56 to 40 bytes
* Add optimistic interning of Register & Constant operands
* Optimize `RegisterUsage` pass a bit
* Optimize `RemoveUnusedNodes` pass a bit
Iterating in reverse-order allows killing dependency chains in a single
pass.
* Fix PPTC symbols
* Optimize `BasicBlock` a bit
Reduce allocations from `_successor` & `DominanceFrontiers`
* Fix `Operation` resize
* Make `Arena` expandable
Change the arena allocator to be expandable by allocating in pages, with
some of them being pooled. Currently 32 pages are pooled. An LRU removal
mechanism should probably be added to it.
Apparently MHR can allocate bitmaps large enough to exceed the 16MB
limit for the type.
* Move `Arena` & `ArenaList` to `Common`
* Remove `ThreadStaticPool` & co
* Add `PhiOperation`
* Reduce `Operand` size from 56 from 48 bytes
* Add linear-probing to `Operand` intern table
* Optimize `HybridAllocator` a bit
* Add `Allocators` class
* Tune `ArenaAllocator` sizes
* Add page removal mechanism to `ArenaAllocator`
Remove pages which have not been used for more than 5s after each reset.
I am on fence if this would be better using a Gen2 callback object like
the one in System.Buffers.ArrayPool<T>, to trim the pool. Because right
now if a large translation happens, the pages will be freed only after a
reset. This reset may not happen for a while because no new translation
is hit, but the arena base sizes are rather small.
* Fix `OOM` when allocating larger than page size in `ArenaAllocator`
Tweak resizing mechanism for Operand.Uses and Assignemnts.
* Optimize `Optimizer` a bit
* Optimize `Operand.Add<T>/Remove<T>` a bit
* Clean up `PreAllocator`
* Fix phi insertion order
Reduce codegen diffs.
* Fix code alignment
* Use new heuristics for degree of parallelism
* Suppress warnings
* Address gdkchan's feedback
Renamed `GetValue()` to `GetValueUnsafe()` to make it more clear that
`Operand.Value` should usually not be modified directly.
* Add fast path to `ArenaAllocator`
* Assembly for `ArenaAllocator.Allocate(ulong)`:
.L0:
mov rax, [rcx+0x18]
lea r8, [rax+rdx]
cmp r8, [rcx+0x10]
ja short .L2
.L1:
mov rdx, [rcx+8]
add rax, [rdx+8]
mov [rcx+0x18], r8
ret
.L2:
jmp ArenaAllocator.AllocateSlow(UInt64)
A few variable/field had to be changed to ulong so that RyuJIT avoids
emitting zero-extends.
* Implement a new heuristic to free pooled pages.
If an arena is used often, it is more likely that its pages will be
needed, so the pages are kept for longer (e.g: during PPTC rebuild or
burst sof compilations). If is not used often, then it is more likely
that its pages will not be needed (e.g: after PPTC rebuild or bursts
of compilations).
* Address riperiperi's feedback
* Use `EqualityComparer<T>` in `IntrusiveList<T>`
Avoids a potential GC hole in `Equals(T, T)`.
2021-08-17 19:08:34 +01:00
|
|
|
|
page = (IntPtr)NativeAllocator.Instance.Allocate((uint)sizeof(TEntry) * (uint)_pageCapacity);
|
2021-04-18 22:43:53 +01:00
|
|
|
|
|
|
|
|
|
_pages.Add(pageIndex, page);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return new Span<TEntry>((void*)page, _pageCapacity);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Releases all resources used by the <see cref="EntryTable{TEntry}"/> instance.
|
|
|
|
|
/// </summary>
|
|
|
|
|
public void Dispose()
|
|
|
|
|
{
|
|
|
|
|
Dispose(true);
|
|
|
|
|
GC.SuppressFinalize(this);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
Add multi-level function table (#2228)
* Add AddressTable<T>
* Use AddressTable<T> for dispatch
* Remove JumpTable & co.
* Add fallback for out of range addresses
* Add PPTC support
* Add documentation to `AddressTable<T>`
* Make AddressTable<T> configurable
* Fix table walk
* Fix IsMapped check
* Remove CountTableCapacity
* Add PPTC support for fast path
* Rename IsMapped to IsValid
* Remove stale comment
* Change format of address in exception message
* Add TranslatorStubs
* Split DispatchStub
Avoids recompilation of stubs during tests.
* Add hint for 64bit or 32bit
* Add documentation to `Symbol`
* Add documentation to `TranslatorStubs`
Make `TranslatorStubs` disposable as well.
* Add documentation to `SymbolType`
* Add `AddressTableEventSource` to monitor function table size
Add an EventSource which measures the amount of unmanaged bytes
allocated by AddressTable<T> instances.
dotnet-counters monitor -n Ryujinx --counters ARMeilleure
* Add `AllowLcqInFunctionTable` optimization toggle
This is to reduce the impact this change has on the test duration.
Before everytime a test was ran, the FunctionTable would be initialized
and populated so that the newly compiled test would get registered to
it.
* Implement unmanaged dispatcher
Uses the DispatchStub to dispatch into the next translation, which
allows execution to stay in unmanaged for longer and skips a
ConcurrentDictionary look up when the target translation has been
registered to the FunctionTable.
* Remove redundant null check
* Tune levels of FunctionTable
Uses 5 levels instead of 4 and change unit of AddressTableEventSource
from KB to MB.
* Use 64-bit function table
Improves codegen for direct branches:
mov qword [rax+0x408],0x10603560
- mov rcx,sub_10603560_OFFSET
- mov ecx,[rcx]
- mov ecx,ecx
- mov rdx,JIT_CACHE_BASE
- add rdx,rcx
+ mov rcx,sub_10603560
+ mov rdx,[rcx]
mov rcx,rax
Improves codegen for dispatch stub:
and rax,byte +0x1f
- mov eax,[rcx+rax*4]
- mov eax,eax
- mov rcx,JIT_CACHE_BASE
- lea rax,[rcx+rax]
+ mov rax,[rcx+rax*8]
mov rcx,rbx
* Remove `JitCacheSymbol` & `JitCache.Offset`
* Turn `Translator.Translate` into an instance method
We do not have to add more parameter to this method and related ones as
new structures are added & needed for translation.
* Add symbol only when PTC is enabled
Address LDj3SNuD's feedback
* Change `NativeContext.Running` to a 32-bit integer
* Fix PageTable symbol for host mapped
2021-05-29 22:06:28 +01:00
|
|
|
|
/// Releases all unmanaged and optionally managed resources used by the <see cref="EntryTable{TEntry}"/>
|
2021-04-18 22:43:53 +01:00
|
|
|
|
/// instance.
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <param name="disposing"><see langword="true"/> to dispose managed resources also; otherwise just unmanaged resouces</param>
|
Reduce JIT GC allocations (#2515)
* Turn `MemoryOperand` into a struct
* Remove `IntrinsicOperation`
* Remove `PhiNode`
* Remove `Node`
* Turn `Operand` into a struct
* Turn `Operation` into a struct
* Clean up pool management methods
* Add `Arena` allocator
* Move `OperationHelper` to `Operation.Factory`
* Move `OperandHelper` to `Operand.Factory`
* Optimize `Operation` a bit
* Fix `Arena` initialization
* Rename `NativeList<T>` to `ArenaList<T>`
* Reduce `Operand` size from 88 to 56 bytes
* Reduce `Operation` size from 56 to 40 bytes
* Add optimistic interning of Register & Constant operands
* Optimize `RegisterUsage` pass a bit
* Optimize `RemoveUnusedNodes` pass a bit
Iterating in reverse-order allows killing dependency chains in a single
pass.
* Fix PPTC symbols
* Optimize `BasicBlock` a bit
Reduce allocations from `_successor` & `DominanceFrontiers`
* Fix `Operation` resize
* Make `Arena` expandable
Change the arena allocator to be expandable by allocating in pages, with
some of them being pooled. Currently 32 pages are pooled. An LRU removal
mechanism should probably be added to it.
Apparently MHR can allocate bitmaps large enough to exceed the 16MB
limit for the type.
* Move `Arena` & `ArenaList` to `Common`
* Remove `ThreadStaticPool` & co
* Add `PhiOperation`
* Reduce `Operand` size from 56 from 48 bytes
* Add linear-probing to `Operand` intern table
* Optimize `HybridAllocator` a bit
* Add `Allocators` class
* Tune `ArenaAllocator` sizes
* Add page removal mechanism to `ArenaAllocator`
Remove pages which have not been used for more than 5s after each reset.
I am on fence if this would be better using a Gen2 callback object like
the one in System.Buffers.ArrayPool<T>, to trim the pool. Because right
now if a large translation happens, the pages will be freed only after a
reset. This reset may not happen for a while because no new translation
is hit, but the arena base sizes are rather small.
* Fix `OOM` when allocating larger than page size in `ArenaAllocator`
Tweak resizing mechanism for Operand.Uses and Assignemnts.
* Optimize `Optimizer` a bit
* Optimize `Operand.Add<T>/Remove<T>` a bit
* Clean up `PreAllocator`
* Fix phi insertion order
Reduce codegen diffs.
* Fix code alignment
* Use new heuristics for degree of parallelism
* Suppress warnings
* Address gdkchan's feedback
Renamed `GetValue()` to `GetValueUnsafe()` to make it more clear that
`Operand.Value` should usually not be modified directly.
* Add fast path to `ArenaAllocator`
* Assembly for `ArenaAllocator.Allocate(ulong)`:
.L0:
mov rax, [rcx+0x18]
lea r8, [rax+rdx]
cmp r8, [rcx+0x10]
ja short .L2
.L1:
mov rdx, [rcx+8]
add rax, [rdx+8]
mov [rcx+0x18], r8
ret
.L2:
jmp ArenaAllocator.AllocateSlow(UInt64)
A few variable/field had to be changed to ulong so that RyuJIT avoids
emitting zero-extends.
* Implement a new heuristic to free pooled pages.
If an arena is used often, it is more likely that its pages will be
needed, so the pages are kept for longer (e.g: during PPTC rebuild or
burst sof compilations). If is not used often, then it is more likely
that its pages will not be needed (e.g: after PPTC rebuild or bursts
of compilations).
* Address riperiperi's feedback
* Use `EqualityComparer<T>` in `IntrusiveList<T>`
Avoids a potential GC hole in `Equals(T, T)`.
2021-08-17 19:08:34 +01:00
|
|
|
|
protected unsafe virtual void Dispose(bool disposing)
|
2021-04-18 22:43:53 +01:00
|
|
|
|
{
|
|
|
|
|
if (!_disposed)
|
|
|
|
|
{
|
Reduce JIT GC allocations (#2515)
* Turn `MemoryOperand` into a struct
* Remove `IntrinsicOperation`
* Remove `PhiNode`
* Remove `Node`
* Turn `Operand` into a struct
* Turn `Operation` into a struct
* Clean up pool management methods
* Add `Arena` allocator
* Move `OperationHelper` to `Operation.Factory`
* Move `OperandHelper` to `Operand.Factory`
* Optimize `Operation` a bit
* Fix `Arena` initialization
* Rename `NativeList<T>` to `ArenaList<T>`
* Reduce `Operand` size from 88 to 56 bytes
* Reduce `Operation` size from 56 to 40 bytes
* Add optimistic interning of Register & Constant operands
* Optimize `RegisterUsage` pass a bit
* Optimize `RemoveUnusedNodes` pass a bit
Iterating in reverse-order allows killing dependency chains in a single
pass.
* Fix PPTC symbols
* Optimize `BasicBlock` a bit
Reduce allocations from `_successor` & `DominanceFrontiers`
* Fix `Operation` resize
* Make `Arena` expandable
Change the arena allocator to be expandable by allocating in pages, with
some of them being pooled. Currently 32 pages are pooled. An LRU removal
mechanism should probably be added to it.
Apparently MHR can allocate bitmaps large enough to exceed the 16MB
limit for the type.
* Move `Arena` & `ArenaList` to `Common`
* Remove `ThreadStaticPool` & co
* Add `PhiOperation`
* Reduce `Operand` size from 56 from 48 bytes
* Add linear-probing to `Operand` intern table
* Optimize `HybridAllocator` a bit
* Add `Allocators` class
* Tune `ArenaAllocator` sizes
* Add page removal mechanism to `ArenaAllocator`
Remove pages which have not been used for more than 5s after each reset.
I am on fence if this would be better using a Gen2 callback object like
the one in System.Buffers.ArrayPool<T>, to trim the pool. Because right
now if a large translation happens, the pages will be freed only after a
reset. This reset may not happen for a while because no new translation
is hit, but the arena base sizes are rather small.
* Fix `OOM` when allocating larger than page size in `ArenaAllocator`
Tweak resizing mechanism for Operand.Uses and Assignemnts.
* Optimize `Optimizer` a bit
* Optimize `Operand.Add<T>/Remove<T>` a bit
* Clean up `PreAllocator`
* Fix phi insertion order
Reduce codegen diffs.
* Fix code alignment
* Use new heuristics for degree of parallelism
* Suppress warnings
* Address gdkchan's feedback
Renamed `GetValue()` to `GetValueUnsafe()` to make it more clear that
`Operand.Value` should usually not be modified directly.
* Add fast path to `ArenaAllocator`
* Assembly for `ArenaAllocator.Allocate(ulong)`:
.L0:
mov rax, [rcx+0x18]
lea r8, [rax+rdx]
cmp r8, [rcx+0x10]
ja short .L2
.L1:
mov rdx, [rcx+8]
add rax, [rdx+8]
mov [rcx+0x18], r8
ret
.L2:
jmp ArenaAllocator.AllocateSlow(UInt64)
A few variable/field had to be changed to ulong so that RyuJIT avoids
emitting zero-extends.
* Implement a new heuristic to free pooled pages.
If an arena is used often, it is more likely that its pages will be
needed, so the pages are kept for longer (e.g: during PPTC rebuild or
burst sof compilations). If is not used often, then it is more likely
that its pages will not be needed (e.g: after PPTC rebuild or bursts
of compilations).
* Address riperiperi's feedback
* Use `EqualityComparer<T>` in `IntrusiveList<T>`
Avoids a potential GC hole in `Equals(T, T)`.
2021-08-17 19:08:34 +01:00
|
|
|
|
_allocated.Dispose();
|
|
|
|
|
|
2021-04-18 22:43:53 +01:00
|
|
|
|
foreach (var page in _pages.Values)
|
|
|
|
|
{
|
Reduce JIT GC allocations (#2515)
* Turn `MemoryOperand` into a struct
* Remove `IntrinsicOperation`
* Remove `PhiNode`
* Remove `Node`
* Turn `Operand` into a struct
* Turn `Operation` into a struct
* Clean up pool management methods
* Add `Arena` allocator
* Move `OperationHelper` to `Operation.Factory`
* Move `OperandHelper` to `Operand.Factory`
* Optimize `Operation` a bit
* Fix `Arena` initialization
* Rename `NativeList<T>` to `ArenaList<T>`
* Reduce `Operand` size from 88 to 56 bytes
* Reduce `Operation` size from 56 to 40 bytes
* Add optimistic interning of Register & Constant operands
* Optimize `RegisterUsage` pass a bit
* Optimize `RemoveUnusedNodes` pass a bit
Iterating in reverse-order allows killing dependency chains in a single
pass.
* Fix PPTC symbols
* Optimize `BasicBlock` a bit
Reduce allocations from `_successor` & `DominanceFrontiers`
* Fix `Operation` resize
* Make `Arena` expandable
Change the arena allocator to be expandable by allocating in pages, with
some of them being pooled. Currently 32 pages are pooled. An LRU removal
mechanism should probably be added to it.
Apparently MHR can allocate bitmaps large enough to exceed the 16MB
limit for the type.
* Move `Arena` & `ArenaList` to `Common`
* Remove `ThreadStaticPool` & co
* Add `PhiOperation`
* Reduce `Operand` size from 56 from 48 bytes
* Add linear-probing to `Operand` intern table
* Optimize `HybridAllocator` a bit
* Add `Allocators` class
* Tune `ArenaAllocator` sizes
* Add page removal mechanism to `ArenaAllocator`
Remove pages which have not been used for more than 5s after each reset.
I am on fence if this would be better using a Gen2 callback object like
the one in System.Buffers.ArrayPool<T>, to trim the pool. Because right
now if a large translation happens, the pages will be freed only after a
reset. This reset may not happen for a while because no new translation
is hit, but the arena base sizes are rather small.
* Fix `OOM` when allocating larger than page size in `ArenaAllocator`
Tweak resizing mechanism for Operand.Uses and Assignemnts.
* Optimize `Optimizer` a bit
* Optimize `Operand.Add<T>/Remove<T>` a bit
* Clean up `PreAllocator`
* Fix phi insertion order
Reduce codegen diffs.
* Fix code alignment
* Use new heuristics for degree of parallelism
* Suppress warnings
* Address gdkchan's feedback
Renamed `GetValue()` to `GetValueUnsafe()` to make it more clear that
`Operand.Value` should usually not be modified directly.
* Add fast path to `ArenaAllocator`
* Assembly for `ArenaAllocator.Allocate(ulong)`:
.L0:
mov rax, [rcx+0x18]
lea r8, [rax+rdx]
cmp r8, [rcx+0x10]
ja short .L2
.L1:
mov rdx, [rcx+8]
add rax, [rdx+8]
mov [rcx+0x18], r8
ret
.L2:
jmp ArenaAllocator.AllocateSlow(UInt64)
A few variable/field had to be changed to ulong so that RyuJIT avoids
emitting zero-extends.
* Implement a new heuristic to free pooled pages.
If an arena is used often, it is more likely that its pages will be
needed, so the pages are kept for longer (e.g: during PPTC rebuild or
burst sof compilations). If is not used often, then it is more likely
that its pages will not be needed (e.g: after PPTC rebuild or bursts
of compilations).
* Address riperiperi's feedback
* Use `EqualityComparer<T>` in `IntrusiveList<T>`
Avoids a potential GC hole in `Equals(T, T)`.
2021-08-17 19:08:34 +01:00
|
|
|
|
NativeAllocator.Instance.Free((void*)page);
|
2021-04-18 22:43:53 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_disposed = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Frees resources used by the <see cref="EntryTable{TEntry}"/> instance.
|
|
|
|
|
/// </summary>
|
|
|
|
|
~EntryTable()
|
|
|
|
|
{
|
|
|
|
|
Dispose(false);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|