mirror of
https://github.com/Ryujinx/Ryujinx.git
synced 2024-12-20 14:32:00 +00:00
Buffer Conversions (#23)
* Why is this not working * Revert helper shader changes for now * Byte Index Buffer Restride
This commit is contained in:
parent
02de48a6f2
commit
6ebe5bb406
10 changed files with 692 additions and 39 deletions
|
@ -10,6 +10,8 @@ namespace Ryujinx.Graphics.Metal
|
||||||
[SupportedOSPlatform("macos")]
|
[SupportedOSPlatform("macos")]
|
||||||
public class BufferHolder : IDisposable
|
public class BufferHolder : IDisposable
|
||||||
{
|
{
|
||||||
|
private CacheByRange<BufferHolder> _cachedConvertedBuffers;
|
||||||
|
|
||||||
public int Size { get; }
|
public int Size { get; }
|
||||||
|
|
||||||
private readonly IntPtr _map;
|
private readonly IntPtr _map;
|
||||||
|
@ -271,9 +273,66 @@ namespace Ryujinx.Graphics.Metal
|
||||||
_waitable.WaitForFences(offset, size);
|
_waitable.WaitForFences(offset, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private bool BoundToRange(int offset, ref int size)
|
||||||
|
{
|
||||||
|
if (offset >= Size)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
size = Math.Min(Size - offset, size);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Auto<DisposableBuffer> GetBufferI8ToI16(CommandBufferScoped cbs, int offset, int size)
|
||||||
|
{
|
||||||
|
if (!BoundToRange(offset, ref size))
|
||||||
|
{
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
var key = new I8ToI16CacheKey(_renderer);
|
||||||
|
|
||||||
|
if (!_cachedConvertedBuffers.TryGetValue(offset, size, key, out var holder))
|
||||||
|
{
|
||||||
|
holder = _renderer.BufferManager.Create((size * 2 + 3) & ~3);
|
||||||
|
|
||||||
|
_renderer.HelperShader.ConvertI8ToI16(cbs, this, holder, offset, size);
|
||||||
|
|
||||||
|
key.SetBuffer(holder.GetBuffer());
|
||||||
|
|
||||||
|
_cachedConvertedBuffers.Add(offset, size, key, holder);
|
||||||
|
}
|
||||||
|
|
||||||
|
return holder.GetBuffer();
|
||||||
|
}
|
||||||
|
|
||||||
|
public bool TryGetCachedConvertedBuffer(int offset, int size, ICacheKey key, out BufferHolder holder)
|
||||||
|
{
|
||||||
|
return _cachedConvertedBuffers.TryGetValue(offset, size, key, out holder);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void AddCachedConvertedBuffer(int offset, int size, ICacheKey key, BufferHolder holder)
|
||||||
|
{
|
||||||
|
_cachedConvertedBuffers.Add(offset, size, key, holder);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void AddCachedConvertedBufferDependency(int offset, int size, ICacheKey key, Dependency dependency)
|
||||||
|
{
|
||||||
|
_cachedConvertedBuffers.AddDependency(offset, size, key, dependency);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void RemoveCachedConvertedBuffer(int offset, int size, ICacheKey key)
|
||||||
|
{
|
||||||
|
_cachedConvertedBuffers.Remove(offset, size, key);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public void Dispose()
|
public void Dispose()
|
||||||
{
|
{
|
||||||
_buffer.Dispose();
|
_buffer.Dispose();
|
||||||
|
_cachedConvertedBuffers.Dispose();
|
||||||
|
|
||||||
_flushLock.EnterWriteLock();
|
_flushLock.EnterWriteLock();
|
||||||
|
|
||||||
|
|
|
@ -153,6 +153,16 @@ namespace Ryujinx.Graphics.Metal
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Auto<DisposableBuffer> GetBufferI8ToI16(CommandBufferScoped cbs, BufferHandle handle, int offset, int size)
|
||||||
|
{
|
||||||
|
if (TryGetBuffer(handle, out var holder))
|
||||||
|
{
|
||||||
|
return holder.GetBufferI8ToI16(cbs, offset, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
public PinnedSpan<byte> GetData(BufferHandle handle, int offset, int size)
|
public PinnedSpan<byte> GetData(BufferHandle handle, int offset, int size)
|
||||||
{
|
{
|
||||||
if (TryGetBuffer(handle, out var holder))
|
if (TryGetBuffer(handle, out var holder))
|
||||||
|
|
333
src/Ryujinx.Graphics.Metal/CacheByRange.cs
Normal file
333
src/Ryujinx.Graphics.Metal/CacheByRange.cs
Normal file
|
@ -0,0 +1,333 @@
|
||||||
|
using SharpMetal.Metal;
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Runtime.Versioning;
|
||||||
|
|
||||||
|
namespace Ryujinx.Graphics.Metal
|
||||||
|
{
|
||||||
|
public interface ICacheKey : IDisposable
|
||||||
|
{
|
||||||
|
bool KeyEqual(ICacheKey other);
|
||||||
|
}
|
||||||
|
|
||||||
|
[SupportedOSPlatform("macos")]
|
||||||
|
struct I8ToI16CacheKey : ICacheKey
|
||||||
|
{
|
||||||
|
// Used to notify the pipeline that bindings have invalidated on dispose.
|
||||||
|
private readonly MetalRenderer _renderer;
|
||||||
|
private Auto<DisposableBuffer> _buffer;
|
||||||
|
|
||||||
|
public I8ToI16CacheKey(MetalRenderer renderer)
|
||||||
|
{
|
||||||
|
_renderer = renderer;
|
||||||
|
_buffer = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public readonly bool KeyEqual(ICacheKey other)
|
||||||
|
{
|
||||||
|
return other is I8ToI16CacheKey;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void SetBuffer(Auto<DisposableBuffer> buffer)
|
||||||
|
{
|
||||||
|
_buffer = buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Dispose()
|
||||||
|
{
|
||||||
|
// TODO: Tell pipeline buffer is dirty!
|
||||||
|
// _renderer.PipelineInternal.DirtyIndexBuffer(_buffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[SupportedOSPlatform("macos")]
|
||||||
|
struct AlignedVertexBufferCacheKey : ICacheKey
|
||||||
|
{
|
||||||
|
private readonly int _stride;
|
||||||
|
private readonly int _alignment;
|
||||||
|
|
||||||
|
// Used to notify the pipeline that bindings have invalidated on dispose.
|
||||||
|
private readonly MetalRenderer _renderer;
|
||||||
|
private Auto<DisposableBuffer> _buffer;
|
||||||
|
|
||||||
|
public AlignedVertexBufferCacheKey(MetalRenderer renderer, int stride, int alignment)
|
||||||
|
{
|
||||||
|
_renderer = renderer;
|
||||||
|
_stride = stride;
|
||||||
|
_alignment = alignment;
|
||||||
|
_buffer = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public readonly bool KeyEqual(ICacheKey other)
|
||||||
|
{
|
||||||
|
return other is AlignedVertexBufferCacheKey entry &&
|
||||||
|
entry._stride == _stride &&
|
||||||
|
entry._alignment == _alignment;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void SetBuffer(Auto<DisposableBuffer> buffer)
|
||||||
|
{
|
||||||
|
_buffer = buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
public readonly void Dispose()
|
||||||
|
{
|
||||||
|
// TODO: Tell pipeline buffer is dirty!
|
||||||
|
// _renderer.PipelineInternal.DirtyVertexBuffer(_buffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[SupportedOSPlatform("macos")]
|
||||||
|
struct TopologyConversionCacheKey : ICacheKey
|
||||||
|
{
|
||||||
|
// TODO: Patterns
|
||||||
|
// private readonly IndexBufferPattern _pattern;
|
||||||
|
private readonly int _indexSize;
|
||||||
|
|
||||||
|
// Used to notify the pipeline that bindings have invalidated on dispose.
|
||||||
|
private readonly MetalRenderer _renderer;
|
||||||
|
private Auto<DisposableBuffer> _buffer;
|
||||||
|
|
||||||
|
public TopologyConversionCacheKey(MetalRenderer renderer, /*IndexBufferPattern pattern, */int indexSize)
|
||||||
|
{
|
||||||
|
_renderer = renderer;
|
||||||
|
// _pattern = pattern;
|
||||||
|
_indexSize = indexSize;
|
||||||
|
_buffer = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public readonly bool KeyEqual(ICacheKey other)
|
||||||
|
{
|
||||||
|
return other is TopologyConversionCacheKey entry &&
|
||||||
|
// entry._pattern == _pattern &&
|
||||||
|
entry._indexSize == _indexSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void SetBuffer(Auto<DisposableBuffer> buffer)
|
||||||
|
{
|
||||||
|
_buffer = buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
public readonly void Dispose()
|
||||||
|
{
|
||||||
|
// TODO: Tell pipeline buffer is dirty!
|
||||||
|
// _renderer.PipelineInternal.DirtyVertexBuffer(_buffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[SupportedOSPlatform("macos")]
|
||||||
|
public readonly struct Dependency
|
||||||
|
{
|
||||||
|
private readonly BufferHolder _buffer;
|
||||||
|
private readonly int _offset;
|
||||||
|
private readonly int _size;
|
||||||
|
private readonly ICacheKey _key;
|
||||||
|
|
||||||
|
public Dependency(BufferHolder buffer, int offset, int size, ICacheKey key)
|
||||||
|
{
|
||||||
|
_buffer = buffer;
|
||||||
|
_offset = offset;
|
||||||
|
_size = size;
|
||||||
|
_key = key;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void RemoveFromOwner()
|
||||||
|
{
|
||||||
|
_buffer.RemoveCachedConvertedBuffer(_offset, _size, _key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[SupportedOSPlatform("macos")]
|
||||||
|
struct CacheByRange<T> where T : IDisposable
|
||||||
|
{
|
||||||
|
private struct Entry
|
||||||
|
{
|
||||||
|
public ICacheKey Key;
|
||||||
|
public T Value;
|
||||||
|
public List<Dependency> DependencyList;
|
||||||
|
|
||||||
|
public Entry(ICacheKey key, T value)
|
||||||
|
{
|
||||||
|
Key = key;
|
||||||
|
Value = value;
|
||||||
|
DependencyList = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public readonly void InvalidateDependencies()
|
||||||
|
{
|
||||||
|
if (DependencyList != null)
|
||||||
|
{
|
||||||
|
foreach (Dependency dependency in DependencyList)
|
||||||
|
{
|
||||||
|
dependency.RemoveFromOwner();
|
||||||
|
}
|
||||||
|
|
||||||
|
DependencyList.Clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Dictionary<ulong, List<Entry>> _ranges;
|
||||||
|
|
||||||
|
public void Add(int offset, int size, ICacheKey key, T value)
|
||||||
|
{
|
||||||
|
List<Entry> entries = GetEntries(offset, size);
|
||||||
|
|
||||||
|
entries.Add(new Entry(key, value));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void AddDependency(int offset, int size, ICacheKey key, Dependency dependency)
|
||||||
|
{
|
||||||
|
List<Entry> entries = GetEntries(offset, size);
|
||||||
|
|
||||||
|
for (int i = 0; i < entries.Count; i++)
|
||||||
|
{
|
||||||
|
Entry entry = entries[i];
|
||||||
|
|
||||||
|
if (entry.Key.KeyEqual(key))
|
||||||
|
{
|
||||||
|
if (entry.DependencyList == null)
|
||||||
|
{
|
||||||
|
entry.DependencyList = new List<Dependency>();
|
||||||
|
entries[i] = entry;
|
||||||
|
}
|
||||||
|
|
||||||
|
entry.DependencyList.Add(dependency);
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Remove(int offset, int size, ICacheKey key)
|
||||||
|
{
|
||||||
|
List<Entry> entries = GetEntries(offset, size);
|
||||||
|
|
||||||
|
for (int i = 0; i < entries.Count; i++)
|
||||||
|
{
|
||||||
|
Entry entry = entries[i];
|
||||||
|
|
||||||
|
if (entry.Key.KeyEqual(key))
|
||||||
|
{
|
||||||
|
entries.RemoveAt(i--);
|
||||||
|
|
||||||
|
DestroyEntry(entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (entries.Count == 0)
|
||||||
|
{
|
||||||
|
_ranges.Remove(PackRange(offset, size));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public bool TryGetValue(int offset, int size, ICacheKey key, out T value)
|
||||||
|
{
|
||||||
|
List<Entry> entries = GetEntries(offset, size);
|
||||||
|
|
||||||
|
foreach (Entry entry in entries)
|
||||||
|
{
|
||||||
|
if (entry.Key.KeyEqual(key))
|
||||||
|
{
|
||||||
|
value = entry.Value;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
value = default;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Clear()
|
||||||
|
{
|
||||||
|
if (_ranges != null)
|
||||||
|
{
|
||||||
|
foreach (List<Entry> entries in _ranges.Values)
|
||||||
|
{
|
||||||
|
foreach (Entry entry in entries)
|
||||||
|
{
|
||||||
|
DestroyEntry(entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
_ranges.Clear();
|
||||||
|
_ranges = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public readonly void ClearRange(int offset, int size)
|
||||||
|
{
|
||||||
|
if (_ranges != null && _ranges.Count > 0)
|
||||||
|
{
|
||||||
|
int end = offset + size;
|
||||||
|
|
||||||
|
List<ulong> toRemove = null;
|
||||||
|
|
||||||
|
foreach (KeyValuePair<ulong, List<Entry>> range in _ranges)
|
||||||
|
{
|
||||||
|
(int rOffset, int rSize) = UnpackRange(range.Key);
|
||||||
|
|
||||||
|
int rEnd = rOffset + rSize;
|
||||||
|
|
||||||
|
if (rEnd > offset && rOffset < end)
|
||||||
|
{
|
||||||
|
List<Entry> entries = range.Value;
|
||||||
|
|
||||||
|
foreach (Entry entry in entries)
|
||||||
|
{
|
||||||
|
DestroyEntry(entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
(toRemove ??= new List<ulong>()).Add(range.Key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (toRemove != null)
|
||||||
|
{
|
||||||
|
foreach (ulong range in toRemove)
|
||||||
|
{
|
||||||
|
_ranges.Remove(range);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<Entry> GetEntries(int offset, int size)
|
||||||
|
{
|
||||||
|
_ranges ??= new Dictionary<ulong, List<Entry>>();
|
||||||
|
|
||||||
|
ulong key = PackRange(offset, size);
|
||||||
|
|
||||||
|
if (!_ranges.TryGetValue(key, out List<Entry> value))
|
||||||
|
{
|
||||||
|
value = new List<Entry>();
|
||||||
|
_ranges.Add(key, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void DestroyEntry(Entry entry)
|
||||||
|
{
|
||||||
|
entry.Key.Dispose();
|
||||||
|
entry.Value?.Dispose();
|
||||||
|
entry.InvalidateDependencies();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static ulong PackRange(int offset, int size)
|
||||||
|
{
|
||||||
|
return (uint)offset | ((ulong)size << 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static (int offset, int size) UnpackRange(ulong range)
|
||||||
|
{
|
||||||
|
return ((int)range, (int)(range >> 32));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Dispose()
|
||||||
|
{
|
||||||
|
Clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,3 +1,4 @@
|
||||||
|
using Ryujinx.Common.Memory;
|
||||||
using Ryujinx.Graphics.GAL;
|
using Ryujinx.Graphics.GAL;
|
||||||
using SharpMetal.Metal;
|
using SharpMetal.Metal;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
|
@ -21,6 +22,26 @@ namespace Ryujinx.Graphics.Metal
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public record struct BufferRef
|
||||||
|
{
|
||||||
|
public Auto<DisposableBuffer> Buffer;
|
||||||
|
public int Index;
|
||||||
|
public BufferRange? Range;
|
||||||
|
|
||||||
|
public BufferRef(Auto<DisposableBuffer> buffer, int index)
|
||||||
|
{
|
||||||
|
Buffer = buffer;
|
||||||
|
Index = index;
|
||||||
|
}
|
||||||
|
|
||||||
|
public BufferRef(Auto<DisposableBuffer> buffer, int index, ref BufferRange range)
|
||||||
|
{
|
||||||
|
Buffer = buffer;
|
||||||
|
Index = index;
|
||||||
|
Range = range;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
[SupportedOSPlatform("macos")]
|
[SupportedOSPlatform("macos")]
|
||||||
struct EncoderState
|
struct EncoderState
|
||||||
{
|
{
|
||||||
|
@ -37,10 +58,10 @@ namespace Ryujinx.Graphics.Metal
|
||||||
public TextureBase[] ComputeTextures = new TextureBase[Constants.MaxTextures];
|
public TextureBase[] ComputeTextures = new TextureBase[Constants.MaxTextures];
|
||||||
public MTLSamplerState[] ComputeSamplers = new MTLSamplerState[Constants.MaxSamplers];
|
public MTLSamplerState[] ComputeSamplers = new MTLSamplerState[Constants.MaxSamplers];
|
||||||
|
|
||||||
public BufferAssignment[] UniformBuffers = [];
|
public BufferRef[] UniformBuffers = [];
|
||||||
public BufferAssignment[] StorageBuffers = [];
|
public BufferRef[] StorageBuffers = [];
|
||||||
|
|
||||||
public BufferRange IndexBuffer = default;
|
public Auto<DisposableBuffer> IndexBuffer = default;
|
||||||
public MTLIndexType IndexType = MTLIndexType.UInt16;
|
public MTLIndexType IndexType = MTLIndexType.UInt16;
|
||||||
public ulong IndexBufferOffset = 0;
|
public ulong IndexBufferOffset = 0;
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Metal
|
||||||
private EncoderState _currentState = new();
|
private EncoderState _currentState = new();
|
||||||
private readonly Stack<EncoderState> _backStates = [];
|
private readonly Stack<EncoderState> _backStates = [];
|
||||||
|
|
||||||
public readonly BufferRange IndexBuffer => _currentState.IndexBuffer;
|
public readonly Auto<DisposableBuffer> IndexBuffer => _currentState.IndexBuffer;
|
||||||
public readonly MTLIndexType IndexType => _currentState.IndexType;
|
public readonly MTLIndexType IndexType => _currentState.IndexType;
|
||||||
public readonly ulong IndexBufferOffset => _currentState.IndexBufferOffset;
|
public readonly ulong IndexBufferOffset => _currentState.IndexBufferOffset;
|
||||||
public readonly PrimitiveTopology Topology => _currentState.Topology;
|
public readonly PrimitiveTopology Topology => _currentState.Topology;
|
||||||
|
@ -355,10 +355,19 @@ namespace Ryujinx.Graphics.Metal
|
||||||
public void UpdateIndexBuffer(BufferRange buffer, IndexType type)
|
public void UpdateIndexBuffer(BufferRange buffer, IndexType type)
|
||||||
{
|
{
|
||||||
if (buffer.Handle != BufferHandle.Null)
|
if (buffer.Handle != BufferHandle.Null)
|
||||||
|
{
|
||||||
|
if (type == GAL.IndexType.UByte)
|
||||||
|
{
|
||||||
|
_currentState.IndexType = MTLIndexType.UInt16;
|
||||||
|
_currentState.IndexBufferOffset = (ulong)buffer.Offset;
|
||||||
|
_currentState.IndexBuffer = _bufferManager.GetBufferI8ToI16(_pipeline.CurrentCommandBuffer, buffer.Handle, buffer.Offset, buffer.Size);
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
_currentState.IndexType = type.Convert();
|
_currentState.IndexType = type.Convert();
|
||||||
_currentState.IndexBufferOffset = (ulong)buffer.Offset;
|
_currentState.IndexBufferOffset = (ulong)buffer.Offset;
|
||||||
_currentState.IndexBuffer = buffer;
|
_currentState.IndexBuffer = _bufferManager.GetBuffer(buffer.Handle, false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -659,7 +668,20 @@ namespace Ryujinx.Graphics.Metal
|
||||||
// Inlineable
|
// Inlineable
|
||||||
public void UpdateUniformBuffers(ReadOnlySpan<BufferAssignment> buffers)
|
public void UpdateUniformBuffers(ReadOnlySpan<BufferAssignment> buffers)
|
||||||
{
|
{
|
||||||
_currentState.UniformBuffers = buffers.ToArray();
|
_currentState.UniformBuffers = new BufferRef[buffers.Length];
|
||||||
|
|
||||||
|
for (int i = 0; i < buffers.Length; i++)
|
||||||
|
{
|
||||||
|
var assignment = buffers[i];
|
||||||
|
var buffer = assignment.Range;
|
||||||
|
int index = assignment.Binding;
|
||||||
|
|
||||||
|
Auto<DisposableBuffer> mtlBuffer = buffer.Handle == BufferHandle.Null
|
||||||
|
? null
|
||||||
|
: _bufferManager.GetBuffer(buffer.Handle, buffer.Write);
|
||||||
|
|
||||||
|
_currentState.UniformBuffers[i] = new BufferRef(mtlBuffer, index, ref buffer);
|
||||||
|
}
|
||||||
|
|
||||||
// Inline update
|
// Inline update
|
||||||
if (_pipeline.CurrentEncoder != null)
|
if (_pipeline.CurrentEncoder != null)
|
||||||
|
@ -680,13 +702,49 @@ namespace Ryujinx.Graphics.Metal
|
||||||
// Inlineable
|
// Inlineable
|
||||||
public void UpdateStorageBuffers(ReadOnlySpan<BufferAssignment> buffers)
|
public void UpdateStorageBuffers(ReadOnlySpan<BufferAssignment> buffers)
|
||||||
{
|
{
|
||||||
_currentState.StorageBuffers = buffers.ToArray();
|
_currentState.StorageBuffers = new BufferRef[buffers.Length];
|
||||||
|
|
||||||
for (int i = 0; i < _currentState.StorageBuffers.Length; i++)
|
for (int i = 0; i < buffers.Length; i++)
|
||||||
{
|
{
|
||||||
BufferAssignment buffer = _currentState.StorageBuffers[i];
|
var assignment = buffers[i];
|
||||||
// TODO: DONT offset the binding by 15
|
var buffer = assignment.Range;
|
||||||
_currentState.StorageBuffers[i] = new BufferAssignment(buffer.Binding + 15, buffer.Range);
|
// TODO: Dont do this
|
||||||
|
int index = assignment.Binding + 15;
|
||||||
|
|
||||||
|
Auto<DisposableBuffer> mtlBuffer = buffer.Handle == BufferHandle.Null
|
||||||
|
? null
|
||||||
|
: _bufferManager.GetBuffer(buffer.Handle, buffer.Write);
|
||||||
|
|
||||||
|
_currentState.StorageBuffers[i] = new BufferRef(mtlBuffer, index, ref buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Inline update
|
||||||
|
if (_pipeline.CurrentEncoder != null)
|
||||||
|
{
|
||||||
|
if (_pipeline.CurrentEncoderType == EncoderType.Render)
|
||||||
|
{
|
||||||
|
var renderCommandEncoder = new MTLRenderCommandEncoder(_pipeline.CurrentEncoder.Value);
|
||||||
|
SetRenderBuffers(renderCommandEncoder, _currentState.StorageBuffers, true);
|
||||||
|
}
|
||||||
|
else if (_pipeline.CurrentEncoderType == EncoderType.Compute)
|
||||||
|
{
|
||||||
|
var computeCommandEncoder = new MTLComputeCommandEncoder(_pipeline.CurrentEncoder.Value);
|
||||||
|
SetComputeBuffers(computeCommandEncoder, _currentState.StorageBuffers);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Inlineable
|
||||||
|
public void UpdateStorageBuffers(int first, ReadOnlySpan<Auto<DisposableBuffer>> buffers)
|
||||||
|
{
|
||||||
|
_currentState.StorageBuffers = new BufferRef[buffers.Length];
|
||||||
|
|
||||||
|
for (int i = 0; i < buffers.Length; i++)
|
||||||
|
{
|
||||||
|
var mtlBuffer = buffers[i];
|
||||||
|
int index = first + i;
|
||||||
|
|
||||||
|
_currentState.StorageBuffers[i] = new BufferRef(mtlBuffer, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Inline update
|
// Inline update
|
||||||
|
@ -938,51 +996,95 @@ namespace Ryujinx.Graphics.Metal
|
||||||
|
|
||||||
private void SetVertexBuffers(MTLRenderCommandEncoder renderCommandEncoder, VertexBufferDescriptor[] bufferDescriptors)
|
private void SetVertexBuffers(MTLRenderCommandEncoder renderCommandEncoder, VertexBufferDescriptor[] bufferDescriptors)
|
||||||
{
|
{
|
||||||
var buffers = new List<BufferAssignment>();
|
var buffers = new List<BufferRef>();
|
||||||
|
|
||||||
for (int i = 0; i < bufferDescriptors.Length; i++)
|
for (int i = 0; i < bufferDescriptors.Length; i++)
|
||||||
{
|
{
|
||||||
buffers.Add(new BufferAssignment(i, bufferDescriptors[i].Buffer));
|
Auto<DisposableBuffer> mtlBuffer = bufferDescriptors[i].Buffer.Handle == BufferHandle.Null
|
||||||
|
? null
|
||||||
|
: _bufferManager.GetBuffer(bufferDescriptors[i].Buffer.Handle, bufferDescriptors[i].Buffer.Write);
|
||||||
|
|
||||||
|
var range = bufferDescriptors[i].Buffer;
|
||||||
|
|
||||||
|
buffers.Add(new BufferRef(mtlBuffer, i, ref range));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var zeroBufferRange = new BufferRange(_zeroBuffer, 0, ZeroBufferSize);
|
||||||
|
|
||||||
|
Auto<DisposableBuffer> zeroBuffer = _zeroBuffer == BufferHandle.Null
|
||||||
|
? null
|
||||||
|
: _bufferManager.GetBuffer(_zeroBuffer, false);
|
||||||
|
|
||||||
// Zero buffer
|
// Zero buffer
|
||||||
buffers.Add(new BufferAssignment(
|
buffers.Add(new BufferRef(zeroBuffer, bufferDescriptors.Length, ref zeroBufferRange));
|
||||||
bufferDescriptors.Length,
|
|
||||||
new BufferRange(_zeroBuffer, 0, ZeroBufferSize)));
|
|
||||||
|
|
||||||
SetRenderBuffers(renderCommandEncoder, buffers.ToArray());
|
SetRenderBuffers(renderCommandEncoder, buffers.ToArray());
|
||||||
}
|
}
|
||||||
|
|
||||||
private readonly void SetRenderBuffers(MTLRenderCommandEncoder renderCommandEncoder, BufferAssignment[] buffers, bool fragment = false)
|
private readonly void SetRenderBuffers(MTLRenderCommandEncoder renderCommandEncoder, BufferRef[] buffers, bool fragment = false)
|
||||||
{
|
{
|
||||||
foreach (var buffer in buffers)
|
for (int i = 0; i < buffers.Length; i++)
|
||||||
{
|
{
|
||||||
var range = buffer.Range;
|
var range = buffers[i].Range;
|
||||||
var autoBuffer = _bufferManager.GetBuffer(range.Handle, range.Offset, range.Size, range.Write);
|
var autoBuffer = buffers[i].Buffer;
|
||||||
|
var offset = 0;
|
||||||
|
var index = buffers[i].Index;
|
||||||
|
|
||||||
if (autoBuffer != null)
|
if (autoBuffer == null)
|
||||||
{
|
{
|
||||||
var mtlBuffer = autoBuffer.Get(_pipeline.CurrentCommandBuffer).Value;
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
renderCommandEncoder.SetVertexBuffer(mtlBuffer, (ulong)range.Offset, (ulong)buffer.Binding);
|
MTLBuffer mtlBuffer;
|
||||||
|
|
||||||
|
if (range.HasValue)
|
||||||
|
{
|
||||||
|
offset = range.Value.Offset;
|
||||||
|
mtlBuffer = autoBuffer.Get(_pipeline.CurrentCommandBuffer, offset, range.Value.Size, range.Value.Write).Value;
|
||||||
|
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
mtlBuffer = autoBuffer.Get(_pipeline.CurrentCommandBuffer).Value;
|
||||||
|
}
|
||||||
|
|
||||||
|
renderCommandEncoder.SetVertexBuffer(mtlBuffer, (ulong)offset, (ulong)index);
|
||||||
|
|
||||||
if (fragment)
|
if (fragment)
|
||||||
{
|
{
|
||||||
renderCommandEncoder.SetFragmentBuffer(mtlBuffer, (ulong)range.Offset, (ulong)buffer.Binding);
|
renderCommandEncoder.SetFragmentBuffer(mtlBuffer, (ulong)offset, (ulong)index);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private readonly void SetComputeBuffers(MTLComputeCommandEncoder computeCommandEncoder, BufferAssignment[] buffers)
|
private readonly void SetComputeBuffers(MTLComputeCommandEncoder computeCommandEncoder, BufferRef[] buffers)
|
||||||
{
|
{
|
||||||
foreach (var buffer in buffers)
|
for (int i = 0; i < buffers.Length; i++)
|
||||||
{
|
{
|
||||||
var range = buffer.Range;
|
var range = buffers[i].Range;
|
||||||
var mtlBuffer = _bufferManager.GetBuffer(range.Handle, range.Offset, range.Size, range.Write).Get(_pipeline.CurrentCommandBuffer).Value;
|
var autoBuffer = buffers[i].Buffer;
|
||||||
|
var offset = 0;
|
||||||
|
var index = buffers[i].Index;
|
||||||
|
|
||||||
computeCommandEncoder.SetBuffer(mtlBuffer, (ulong)range.Offset, (ulong)buffer.Binding);
|
if (autoBuffer == null)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
MTLBuffer mtlBuffer;
|
||||||
|
|
||||||
|
if (range.HasValue)
|
||||||
|
{
|
||||||
|
offset = range.Value.Offset;
|
||||||
|
mtlBuffer = autoBuffer.Get(_pipeline.CurrentCommandBuffer, offset, range.Value.Size, range.Value.Write).Value;
|
||||||
|
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
mtlBuffer = autoBuffer.Get(_pipeline.CurrentCommandBuffer).Value;
|
||||||
|
}
|
||||||
|
|
||||||
|
computeCommandEncoder.SetBuffer(mtlBuffer, (ulong)offset, (ulong)index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -12,7 +12,9 @@ namespace Ryujinx.Graphics.Metal
|
||||||
[SupportedOSPlatform("macos")]
|
[SupportedOSPlatform("macos")]
|
||||||
public class HelperShader : IDisposable
|
public class HelperShader : IDisposable
|
||||||
{
|
{
|
||||||
|
private const int ConvertElementsPerWorkgroup = 32 * 100; // Work group size of 32 times 100 elements.
|
||||||
private const string ShadersSourcePath = "/Ryujinx.Graphics.Metal/Shaders";
|
private const string ShadersSourcePath = "/Ryujinx.Graphics.Metal/Shaders";
|
||||||
|
private readonly MetalRenderer _renderer;
|
||||||
private readonly Pipeline _pipeline;
|
private readonly Pipeline _pipeline;
|
||||||
private MTLDevice _device;
|
private MTLDevice _device;
|
||||||
|
|
||||||
|
@ -21,10 +23,12 @@ namespace Ryujinx.Graphics.Metal
|
||||||
private readonly IProgram _programColorBlit;
|
private readonly IProgram _programColorBlit;
|
||||||
private readonly List<IProgram> _programsColorClear = new();
|
private readonly List<IProgram> _programsColorClear = new();
|
||||||
private readonly IProgram _programDepthStencilClear;
|
private readonly IProgram _programDepthStencilClear;
|
||||||
|
private readonly IProgram _programStrideChange;
|
||||||
|
|
||||||
public HelperShader(MTLDevice device, Pipeline pipeline)
|
public HelperShader(MTLDevice device, MetalRenderer renderer, Pipeline pipeline)
|
||||||
{
|
{
|
||||||
_device = device;
|
_device = device;
|
||||||
|
_renderer = renderer;
|
||||||
_pipeline = pipeline;
|
_pipeline = pipeline;
|
||||||
|
|
||||||
_samplerNearest = new Sampler(_device, SamplerCreateInfo.Create(MinFilter.Nearest, MagFilter.Nearest));
|
_samplerNearest = new Sampler(_device, SamplerCreateInfo.Create(MinFilter.Nearest, MagFilter.Nearest));
|
||||||
|
@ -54,6 +58,12 @@ namespace Ryujinx.Graphics.Metal
|
||||||
new ShaderSource(depthStencilClearSource, ShaderStage.Fragment, TargetLanguage.Msl),
|
new ShaderSource(depthStencilClearSource, ShaderStage.Fragment, TargetLanguage.Msl),
|
||||||
new ShaderSource(depthStencilClearSource, ShaderStage.Vertex, TargetLanguage.Msl)
|
new ShaderSource(depthStencilClearSource, ShaderStage.Vertex, TargetLanguage.Msl)
|
||||||
], device);
|
], device);
|
||||||
|
|
||||||
|
var strideChangeSource = ReadMsl("ChangeBufferStride.metal");
|
||||||
|
_programStrideChange = new Program(
|
||||||
|
[
|
||||||
|
new ShaderSource(strideChangeSource, ShaderStage.Compute, TargetLanguage.Msl)
|
||||||
|
], device);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static string ReadMsl(string fileName)
|
private static string ReadMsl(string fileName)
|
||||||
|
@ -62,6 +72,7 @@ namespace Ryujinx.Graphics.Metal
|
||||||
}
|
}
|
||||||
|
|
||||||
public unsafe void BlitColor(
|
public unsafe void BlitColor(
|
||||||
|
CommandBufferScoped cbs,
|
||||||
ITexture src,
|
ITexture src,
|
||||||
ITexture dst,
|
ITexture dst,
|
||||||
Extents2D srcRegion,
|
Extents2D srcRegion,
|
||||||
|
@ -89,6 +100,10 @@ namespace Ryujinx.Graphics.Metal
|
||||||
(region[2], region[3]) = (region[3], region[2]);
|
(region[2], region[3]) = (region[3], region[2]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// using var buffer = _renderer.BufferManager.ReserveOrCreate(cbs, RegionBufferSize);
|
||||||
|
// buffer.Holder.SetDataUnchecked<float>(buffer.Offset, region);
|
||||||
|
// _pipeline.SetUniformBuffers([new BufferAssignment(0, buffer.Range)]);
|
||||||
|
|
||||||
var rect = new Rectangle<float>(
|
var rect = new Rectangle<float>(
|
||||||
MathF.Min(dstRegion.X1, dstRegion.X2),
|
MathF.Min(dstRegion.X1, dstRegion.X2),
|
||||||
MathF.Min(dstRegion.Y1, dstRegion.Y2),
|
MathF.Min(dstRegion.Y1, dstRegion.Y2),
|
||||||
|
@ -156,6 +171,10 @@ namespace Ryujinx.Graphics.Metal
|
||||||
(region[2], region[3]) = (region[3], region[2]);
|
(region[2], region[3]) = (region[3], region[2]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// var bufferHandle = _renderer.BufferManager.CreateWithHandle(RegionBufferSize);
|
||||||
|
// _renderer.BufferManager.SetData<float>(bufferHandle, 0, region);
|
||||||
|
// _pipeline.SetUniformBuffers([new BufferAssignment(0, new BufferRange(bufferHandle, 0, RegionBufferSize))]);
|
||||||
|
|
||||||
Span<Viewport> viewports = stackalloc Viewport[1];
|
Span<Viewport> viewports = stackalloc Viewport[1];
|
||||||
Span<Rectangle<int>> scissors = stackalloc Rectangle<int>[1];
|
Span<Rectangle<int>> scissors = stackalloc Rectangle<int>[1];
|
||||||
|
|
||||||
|
@ -200,6 +219,57 @@ namespace Ryujinx.Graphics.Metal
|
||||||
_pipeline.RestoreState();
|
_pipeline.RestoreState();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void ConvertI8ToI16(CommandBufferScoped cbs, BufferHolder src, BufferHolder dst, int srcOffset, int size)
|
||||||
|
{
|
||||||
|
ChangeStride(cbs, src, dst, srcOffset, size, 1, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
public unsafe void ChangeStride(
|
||||||
|
CommandBufferScoped cbs,
|
||||||
|
BufferHolder src,
|
||||||
|
BufferHolder dst,
|
||||||
|
int srcOffset,
|
||||||
|
int size,
|
||||||
|
int stride,
|
||||||
|
int newStride)
|
||||||
|
{
|
||||||
|
int elems = size / stride;
|
||||||
|
|
||||||
|
var srcBuffer = src.GetBuffer();
|
||||||
|
var dstBuffer = dst.GetBuffer();
|
||||||
|
|
||||||
|
const int ParamsBufferSize = 16;
|
||||||
|
|
||||||
|
// Save current state
|
||||||
|
_pipeline.SaveAndResetState();
|
||||||
|
|
||||||
|
Span<int> shaderParams = stackalloc int[ParamsBufferSize / sizeof(int)];
|
||||||
|
|
||||||
|
shaderParams[0] = stride;
|
||||||
|
shaderParams[1] = newStride;
|
||||||
|
shaderParams[2] = size;
|
||||||
|
shaderParams[3] = srcOffset;
|
||||||
|
|
||||||
|
using var buffer = _renderer.BufferManager.ReserveOrCreate(cbs, ParamsBufferSize);
|
||||||
|
|
||||||
|
buffer.Holder.SetDataUnchecked<int>(buffer.Offset, shaderParams);
|
||||||
|
|
||||||
|
_pipeline.SetUniformBuffers([new BufferAssignment(0, buffer.Range)]);
|
||||||
|
|
||||||
|
Span<Auto<DisposableBuffer>> sbRanges = new Auto<DisposableBuffer>[2];
|
||||||
|
|
||||||
|
sbRanges[0] = srcBuffer;
|
||||||
|
sbRanges[1] = dstBuffer;
|
||||||
|
|
||||||
|
_pipeline.SetStorageBuffers(1, sbRanges);
|
||||||
|
|
||||||
|
_pipeline.SetProgram(_programStrideChange);
|
||||||
|
_pipeline.DispatchCompute(1 + elems / ConvertElementsPerWorkgroup, 1, 1, 64, 1, 1);
|
||||||
|
|
||||||
|
// Restore previous state
|
||||||
|
_pipeline.RestoreState();
|
||||||
|
}
|
||||||
|
|
||||||
public unsafe void ClearColor(
|
public unsafe void ClearColor(
|
||||||
int index,
|
int index,
|
||||||
ReadOnlySpan<float> clearColor,
|
ReadOnlySpan<float> clearColor,
|
||||||
|
|
|
@ -58,7 +58,7 @@ namespace Ryujinx.Graphics.Metal
|
||||||
|
|
||||||
_pipeline.InitEncoderStateManager(_bufferManager);
|
_pipeline.InitEncoderStateManager(_bufferManager);
|
||||||
|
|
||||||
_helperShader = new HelperShader(_device, _pipeline);
|
_helperShader = new HelperShader(_device, this, _pipeline);
|
||||||
SyncManager = new SyncManager(this);
|
SyncManager = new SyncManager(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -193,7 +193,7 @@ namespace Ryujinx.Graphics.Metal
|
||||||
var textureInfo = new TextureCreateInfo((int)drawable.Texture.Width, (int)drawable.Texture.Height, (int)drawable.Texture.Depth, (int)drawable.Texture.MipmapLevelCount, (int)drawable.Texture.SampleCount, 0, 0, 0, Format.B8G8R8A8Unorm, 0, Target.Texture2D, SwizzleComponent.Red, SwizzleComponent.Green, SwizzleComponent.Blue, SwizzleComponent.Alpha);
|
var textureInfo = new TextureCreateInfo((int)drawable.Texture.Width, (int)drawable.Texture.Height, (int)drawable.Texture.Depth, (int)drawable.Texture.MipmapLevelCount, (int)drawable.Texture.SampleCount, 0, 0, 0, Format.B8G8R8A8Unorm, 0, Target.Texture2D, SwizzleComponent.Red, SwizzleComponent.Green, SwizzleComponent.Blue, SwizzleComponent.Alpha);
|
||||||
var dst = new Texture(_device, _renderer, this, textureInfo, drawable.Texture, 0, 0);
|
var dst = new Texture(_device, _renderer, this, textureInfo, drawable.Texture, 0, 0);
|
||||||
|
|
||||||
_renderer.HelperShader.BlitColor(src, dst, srcRegion, dstRegion, isLinear);
|
_renderer.HelperShader.BlitColor(Cbs, src, dst, srcRegion, dstRegion, isLinear);
|
||||||
|
|
||||||
EndCurrentPass();
|
EndCurrentPass();
|
||||||
|
|
||||||
|
@ -227,7 +227,7 @@ namespace Ryujinx.Graphics.Metal
|
||||||
Extents2D dstRegion,
|
Extents2D dstRegion,
|
||||||
bool linearFilter)
|
bool linearFilter)
|
||||||
{
|
{
|
||||||
_renderer.HelperShader.BlitColor(src, dst, srcRegion, dstRegion, linearFilter);
|
_renderer.HelperShader.BlitColor(Cbs, src, dst, srcRegion, dstRegion, linearFilter);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void Barrier()
|
public void Barrier()
|
||||||
|
@ -348,7 +348,7 @@ namespace Ryujinx.Graphics.Metal
|
||||||
// TODO: Support topology re-indexing to provide support for TriangleFans
|
// TODO: Support topology re-indexing to provide support for TriangleFans
|
||||||
var primitiveType = _encoderStateManager.Topology.Convert();
|
var primitiveType = _encoderStateManager.Topology.Convert();
|
||||||
|
|
||||||
var indexBuffer = _renderer.BufferManager.GetBuffer(_encoderStateManager.IndexBuffer.Handle, false);
|
var indexBuffer = _encoderStateManager.IndexBuffer;
|
||||||
|
|
||||||
renderCommandEncoder.DrawIndexedPrimitives(
|
renderCommandEncoder.DrawIndexedPrimitives(
|
||||||
primitiveType,
|
primitiveType,
|
||||||
|
@ -546,6 +546,11 @@ namespace Ryujinx.Graphics.Metal
|
||||||
_encoderStateManager.UpdateStorageBuffers(buffers);
|
_encoderStateManager.UpdateStorageBuffers(buffers);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void SetStorageBuffers(int first, ReadOnlySpan<Auto<DisposableBuffer>> buffers)
|
||||||
|
{
|
||||||
|
_encoderStateManager.UpdateStorageBuffers(first, buffers);
|
||||||
|
}
|
||||||
|
|
||||||
public void SetTextureAndSampler(ShaderStage stage, int binding, ITexture texture, ISampler sampler)
|
public void SetTextureAndSampler(ShaderStage stage, int binding, ITexture texture, ISampler sampler)
|
||||||
{
|
{
|
||||||
if (texture is TextureBase tex)
|
if (texture is TextureBase tex)
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<EmbeddedResource Include="Shaders\Blit.metal" />
|
<EmbeddedResource Include="Shaders\Blit.metal" />
|
||||||
|
<EmbeddedResource Include="Shaders\ChangeBufferStride.metal" />
|
||||||
<EmbeddedResource Include="Shaders\ColorClear.metal" />
|
<EmbeddedResource Include="Shaders\ColorClear.metal" />
|
||||||
<EmbeddedResource Include="Shaders\DepthStencilClear.metal" />
|
<EmbeddedResource Include="Shaders\DepthStencilClear.metal" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
52
src/Ryujinx.Graphics.Metal/Shaders/ChangeBufferStride.metal
Normal file
52
src/Ryujinx.Graphics.Metal/Shaders/ChangeBufferStride.metal
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
#include <metal_stdlib>
|
||||||
|
|
||||||
|
using namespace metal;
|
||||||
|
|
||||||
|
kernel void kernelMain(constant int4& stride_arguments [[buffer(0)]],
|
||||||
|
device uint8_t* in_data [[buffer(1)]],
|
||||||
|
device uint8_t* out_data [[buffer(2)]],
|
||||||
|
uint3 thread_position_in_grid [[thread_position_in_grid]],
|
||||||
|
uint3 threads_per_threadgroup [[threads_per_threadgroup]],
|
||||||
|
uint3 threadgroups_per_grid [[threads_per_grid]])
|
||||||
|
{
|
||||||
|
// Determine what slice of the stride copies this invocation will perform.
|
||||||
|
|
||||||
|
int sourceStride = stride_arguments.x;
|
||||||
|
int targetStride = stride_arguments.y;
|
||||||
|
int bufferSize = stride_arguments.z;
|
||||||
|
int sourceOffset = stride_arguments.w;
|
||||||
|
|
||||||
|
int strideRemainder = targetStride - sourceStride;
|
||||||
|
int invocations = int(threads_per_threadgroup.x * threadgroups_per_grid.x);
|
||||||
|
|
||||||
|
int copiesRequired = bufferSize / sourceStride;
|
||||||
|
|
||||||
|
// Find the copies that this invocation should perform.
|
||||||
|
|
||||||
|
// - Copies that all invocations perform.
|
||||||
|
int allInvocationCopies = copiesRequired / invocations;
|
||||||
|
|
||||||
|
// - Extra remainder copy that this invocation performs.
|
||||||
|
int index = int(thread_position_in_grid.x);
|
||||||
|
int extra = (index < (copiesRequired % invocations)) ? 1 : 0;
|
||||||
|
|
||||||
|
int copyCount = allInvocationCopies + extra;
|
||||||
|
|
||||||
|
// Finally, get the starting offset. Make sure to count extra copies.
|
||||||
|
|
||||||
|
int startCopy = allInvocationCopies * index + min(copiesRequired % invocations, index);
|
||||||
|
|
||||||
|
int srcOffset = sourceOffset + startCopy * sourceStride;
|
||||||
|
int dstOffset = startCopy * targetStride;
|
||||||
|
|
||||||
|
// Perform the copies for this region
|
||||||
|
for (int i = 0; i < copyCount; i++) {
|
||||||
|
for (int j = 0; j < sourceStride; j++) {
|
||||||
|
out_data[dstOffset++] = in_data[srcOffset++];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int j = 0; j < strideRemainder; j++) {
|
||||||
|
out_data[dstOffset++] = uint8_t(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue