From 278a4c317c0b87add67cc9ebc904afe1db23a031 Mon Sep 17 00:00:00 2001 From: gdk Date: Thu, 31 Oct 2019 00:29:22 -0300 Subject: [PATCH] Implement BFI, BRK, FLO, FSWZADD, PBK, SHFL and TXD shader instructions, misc. fixes --- Ryujinx.Common/Utilities/EmbeddedResources.cs | 139 +++++++++ .../CodeGen/Glsl/CodeGenContext.cs | 7 +- .../CodeGen/Glsl/Declarations.cs | 35 +++ .../CodeGen/Glsl/GlslGenerator.cs | 9 + .../HelperFunctions/HelperFunctionNames.cs | 11 + .../CodeGen/Glsl/HelperFunctions/Shuffle.glsl | 9 + .../Glsl/HelperFunctions/ShuffleDown.glsl | 9 + .../Glsl/HelperFunctions/ShuffleUp.glsl | 8 + .../Glsl/HelperFunctions/ShuffleXor.glsl | 9 + .../Glsl/HelperFunctions/SwizzleAdd.glsl | 7 + .../Glsl/Instructions/InstGenHelper.cs | 10 + .../Glsl/Instructions/InstGenMemory.cs | 44 ++- Ryujinx.Graphics.Shader/Decoders/Decoder.cs | 2 +- .../Decoders/OpCodeBranch.cs | 4 + .../Decoders/OpCodeShuffle.cs | 40 +++ Ryujinx.Graphics.Shader/Decoders/OpCodeSsy.cs | 2 + .../Decoders/OpCodeTable.cs | 272 +++++++++--------- .../Decoders/OpCodeTextureScalar.cs | 2 +- .../Decoders/OpCodeTld4s.cs | 2 + Ryujinx.Graphics.Shader/Decoders/OpCodeTxd.cs | 18 ++ .../Decoders/ShuffleType.cs | 10 + .../Decoders/SystemRegister.cs | 15 +- .../Instructions/InstEmitAlu.cs | 39 +++ .../Instructions/InstEmitFArith.cs | 16 ++ .../Instructions/InstEmitFlow.cs | 22 +- .../Instructions/InstEmitMove.cs | 35 +++ .../Instructions/InstEmitTexture.cs | 127 +++++++- .../IntermediateRepresentation/Instruction.cs | 10 + .../IntermediateRepresentation/Operation.cs | 7 +- .../TextureFlags.cs | 17 +- .../Ryujinx.Graphics.Shader.csproj | 12 + .../StructuredIr/HelperFunctionsMask.cs | 14 + .../StructuredIr/InstructionInfo.cs | 10 + .../StructuredIr/StructuredProgram.cs | 22 ++ .../StructuredIr/StructuredProgramInfo.cs | 2 + .../Translation/EmitterContextInsts.cs | 40 +++ .../Optimizations/ConstantFolding.cs | 19 ++ .../Translation/Optimizations/Optimizer.cs | 82 +++++- 38 files changed, 972 insertions(+), 166 deletions(-) create mode 100644 Ryujinx.Common/Utilities/EmbeddedResources.cs create mode 100644 Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs create mode 100644 Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/Shuffle.glsl create mode 100644 Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleDown.glsl create mode 100644 Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleUp.glsl create mode 100644 Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl create mode 100644 Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl create mode 100644 Ryujinx.Graphics.Shader/Decoders/OpCodeShuffle.cs create mode 100644 Ryujinx.Graphics.Shader/Decoders/OpCodeTxd.cs create mode 100644 Ryujinx.Graphics.Shader/Decoders/ShuffleType.cs create mode 100644 Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs diff --git a/Ryujinx.Common/Utilities/EmbeddedResources.cs b/Ryujinx.Common/Utilities/EmbeddedResources.cs new file mode 100644 index 000000000..93ff70ea0 --- /dev/null +++ b/Ryujinx.Common/Utilities/EmbeddedResources.cs @@ -0,0 +1,139 @@ +using System; +using System.IO; +using System.Reflection; +using System.Threading.Tasks; + +namespace Ryujinx.Common +{ + public static class EmbeddedResources + { + private readonly static Assembly ResourceAssembly; + + static EmbeddedResources() + { + ResourceAssembly = Assembly.GetAssembly(typeof(EmbeddedResources)); + } + + public static byte[] Read(string filename) + { + var (assembly, path) = ResolveManifestPath(filename); + + return Read(assembly, path); + } + + public static Task ReadAsync(string filename) + { + var (assembly, path) = ResolveManifestPath(filename); + + return ReadAsync(assembly, path); + } + + public static byte[] Read(Assembly assembly, string filename) + { + using (var stream = GetStream(assembly, filename)) + { + if (stream == null) + return null; + + using (var mem = new MemoryStream()) + { + stream.CopyTo(mem); + return mem.ToArray(); + } + } + } + + public async static Task ReadAsync(Assembly assembly, string filename) + { + using (var stream = GetStream(assembly, filename)) + { + if (stream == null) + return null; + + using (var mem = new MemoryStream()) + { + await stream.CopyToAsync(mem); + return mem.ToArray(); + } + } + } + + public static string ReadAllText(string filename) + { + var (assembly, path) = ResolveManifestPath(filename); + + return ReadAllText(assembly, path); + } + + public static Task ReadAllTextAsync(string filename) + { + var (assembly, path) = ResolveManifestPath(filename); + + return ReadAllTextAsync(assembly, path); + } + + public static string ReadAllText(Assembly assembly, string filename) + { + using (var stream = GetStream(assembly, filename)) + { + if (stream == null) + return null; + + using (var reader = new StreamReader(stream)) + { + return reader.ReadToEnd(); + } + } + } + + public async static Task ReadAllTextAsync(Assembly assembly, string filename) + { + using (var stream = GetStream(assembly, filename)) + { + if (stream == null) + return null; + + using (var reader = new StreamReader(stream)) + { + return await reader.ReadToEndAsync(); + } + } + } + + public static Stream GetStream(string filename) + { + var (assembly, path) = ResolveManifestPath(filename); + + return GetStream(assembly, filename); + } + + public static Stream GetStream(Assembly assembly, string filename) + { + var namespace_ = assembly.GetName().Name; + var manifestUri = namespace_ + "." + filename.Replace('/', '.'); + + var stream = assembly.GetManifestResourceStream(manifestUri); + + if (stream == null) + return null; + + return stream; + } + + private static (Assembly, string) ResolveManifestPath(string filename) + { + var segments = filename.Split(new[] { '/' }, 2, StringSplitOptions.RemoveEmptyEntries); + + if (segments.Length >= 2) + { + foreach (var assembly in AppDomain.CurrentDomain.GetAssemblies()) + { + if (assembly.GetName().Name == segments[0]) + return (assembly, segments[1]); + } + } + + return (EmbeddedResources.ResourceAssembly, filename); + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/CodeGenContext.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/CodeGenContext.cs index abfe55a5a..5222fc7d0 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/CodeGenContext.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/CodeGenContext.cs @@ -5,7 +5,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl { class CodeGenContext { - private const string Tab = " "; + public const string Tab = " "; public ShaderConfig Config { get; } @@ -90,5 +90,10 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl return indentation; } + + public string GetTabString() + { + return Tab; + } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs index ab10d91a6..7c67bc13c 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs @@ -1,3 +1,4 @@ +using Ryujinx.Common; using Ryujinx.Graphics.Shader.IntermediateRepresentation; using Ryujinx.Graphics.Shader.StructuredIr; using Ryujinx.Graphics.Shader.Translation; @@ -15,6 +16,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl public static void Declare(CodeGenContext context, StructuredProgramInfo info) { context.AppendLine("#version 420 core"); + context.AppendLine("#extension GL_ARB_shader_ballot : enable"); context.AppendLine("#extension GL_ARB_shader_storage_buffer_object : enable"); if (context.Config.Stage == ShaderStage.Compute) @@ -131,6 +133,31 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl $"local_size_z = {localSizeZ}) in;"); context.AppendLine(); } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.Shuffle) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/Shuffle.glsl"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.ShuffleDown) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleDown.glsl"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.ShuffleUp) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleUp.glsl"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.ShuffleXor) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.SwizzleAdd) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl"); + } } public static void DeclareLocals(CodeGenContext context, StructuredProgramInfo info) @@ -321,6 +348,14 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl } } + private static void AppendHelperFunction(CodeGenContext context, string filename) + { + string code = EmbeddedResources.ReadAllText(filename); + + context.AppendLine(code.Replace("\t", CodeGenContext.Tab)); + context.AppendLine(); + } + private static string GetSamplerTypeName(SamplerType type) { string typeName; diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs index b5407eb86..b1b9afad7 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs @@ -33,6 +33,15 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl Declarations.DeclareLocals(context, info); + // Some games will leave some elements of gl_Position uninitialized, + // in those cases, the elements will contain undefined values according + // to the spec, but on NVIDIA they seems to be always initialized to (0, 0, 0, 1), + // so we do explicit initialization to avoid UB on non-NVIDIA gpus. + if (context.Config.Stage == ShaderStage.Vertex) + { + context.AppendLine("gl_Position = vec4(0.0, 0.0, 0.0, 1.0);"); + } + // Ensure that unused attributes are set, otherwise the downstream // compiler may eliminate them. // (Not needed for fragment shader as it is the last stage). diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs new file mode 100644 index 000000000..f1540fbfb --- /dev/null +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs @@ -0,0 +1,11 @@ +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl +{ + static class HelperFunctionNames + { + public static string Shuffle = "Helper_Shuffle"; + public static string ShuffleDown = "Helper_ShuffleDown"; + public static string ShuffleUp = "Helper_ShuffleUp"; + public static string ShuffleXor = "Helper_ShuffleXor"; + public static string SwizzleAdd = "Helper_SwizzleAdd"; + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/Shuffle.glsl b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/Shuffle.glsl new file mode 100644 index 000000000..380bc581f --- /dev/null +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/Shuffle.glsl @@ -0,0 +1,9 @@ +float Helper_Shuffle(float x, uint index, uint mask) +{ + uint clamp = mask & 0x1fu; + uint segMask = (mask >> 8) & 0x1fu; + uint minThreadId = gl_SubGroupInvocationARB & segMask; + uint maxThreadId = minThreadId | (clamp & ~segMask); + uint srcThreadId = (index & ~segMask) | minThreadId; + return (srcThreadId <= maxThreadId) ? readInvocationARB(x, srcThreadId) : x; +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleDown.glsl b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleDown.glsl new file mode 100644 index 000000000..46750f20d --- /dev/null +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleDown.glsl @@ -0,0 +1,9 @@ +float Helper_ShuffleDown(float x, uint index, uint mask) +{ + uint clamp = mask & 0x1fu; + uint segMask = (mask >> 8) & 0x1fu; + uint minThreadId = gl_SubGroupInvocationARB & segMask; + uint maxThreadId = minThreadId | (clamp & ~segMask); + uint srcThreadId = gl_SubGroupInvocationARB + index; + return (srcThreadId <= maxThreadId) ? readInvocationARB(x, srcThreadId) : x; +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleUp.glsl b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleUp.glsl new file mode 100644 index 000000000..2bc834697 --- /dev/null +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleUp.glsl @@ -0,0 +1,8 @@ +float Helper_ShuffleUp(float x, uint index, uint mask) +{ + uint clamp = mask & 0x1fu; + uint segMask = (mask >> 8) & 0x1fu; + uint minThreadId = gl_SubGroupInvocationARB & segMask; + uint srcThreadId = gl_SubGroupInvocationARB - index; + return (srcThreadId >= minThreadId) ? readInvocationARB(x, srcThreadId) : x; +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl new file mode 100644 index 000000000..1049e181f --- /dev/null +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl @@ -0,0 +1,9 @@ +float Helper_ShuffleXor(float x, uint index, uint mask) +{ + uint clamp = mask & 0x1fu; + uint segMask = (mask >> 8) & 0x1fu; + uint minThreadId = gl_SubGroupInvocationARB & segMask; + uint maxThreadId = minThreadId | (clamp & ~segMask); + uint srcThreadId = gl_SubGroupInvocationARB ^ index; + return (srcThreadId <= maxThreadId) ? readInvocationARB(x, srcThreadId) : x; +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl new file mode 100644 index 000000000..7df3e57fd --- /dev/null +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl @@ -0,0 +1,7 @@ +float Helper_SwizzleAdd(float x, float y, int mask) +{ + vec4 xLut = vec4(1.0, -1.0, 1.0, 0.0); + vec4 yLut = vec4(1.0, 1.0, -1.0, 1.0); + int lutIdx = mask >> int(gl_SubGroupInvocationARB & 3u) * 2; + return x * xLut[lutIdx] + y * yLut[lutIdx]; +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs index 24b93afb4..2aaae71c4 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs @@ -15,6 +15,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions Add(Instruction.Absolute, InstType.CallUnary, "abs"); Add(Instruction.Add, InstType.OpBinaryCom, "+", 2); + Add(Instruction.BitCount, InstType.CallUnary, "bitCount"); Add(Instruction.BitfieldExtractS32, InstType.CallTernary, "bitfieldExtract"); Add(Instruction.BitfieldExtractU32, InstType.CallTernary, "bitfieldExtract"); Add(Instruction.BitfieldInsert, InstType.CallQuaternary, "bitfieldInsert"); @@ -41,11 +42,15 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions Add(Instruction.ConvertS32ToFP, InstType.CallUnary, "float"); Add(Instruction.ConvertU32ToFP, InstType.CallUnary, "float"); Add(Instruction.Cosine, InstType.CallUnary, "cos"); + Add(Instruction.Ddx, InstType.CallUnary, "dFdx"); + Add(Instruction.Ddy, InstType.CallUnary, "dFdy"); Add(Instruction.Discard, InstType.OpNullary, "discard"); Add(Instruction.Divide, InstType.OpBinary, "/", 1); Add(Instruction.EmitVertex, InstType.CallNullary, "EmitVertex"); Add(Instruction.EndPrimitive, InstType.CallNullary, "EndPrimitive"); Add(Instruction.ExponentB2, InstType.CallUnary, "exp2"); + Add(Instruction.FindFirstSetS32, InstType.CallUnary, "findMSB"); + Add(Instruction.FindFirstSetU32, InstType.CallUnary, "findMSB"); Add(Instruction.Floor, InstType.CallUnary, "floor"); Add(Instruction.FusedMultiplyAdd, InstType.CallTernary, "fma"); Add(Instruction.ImageLoad, InstType.Special); @@ -66,6 +71,10 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions Add(Instruction.ShiftLeft, InstType.OpBinary, "<<", 3); Add(Instruction.ShiftRightS32, InstType.OpBinary, ">>", 3); Add(Instruction.ShiftRightU32, InstType.OpBinary, ">>", 3); + Add(Instruction.Shuffle, InstType.CallTernary, HelperFunctionNames.Shuffle); + Add(Instruction.ShuffleDown, InstType.CallTernary, HelperFunctionNames.ShuffleDown); + Add(Instruction.ShuffleUp, InstType.CallTernary, HelperFunctionNames.ShuffleUp); + Add(Instruction.ShuffleXor, InstType.CallTernary, HelperFunctionNames.ShuffleXor); Add(Instruction.Maximum, InstType.CallBinary, "max"); Add(Instruction.MaximumU32, InstType.CallBinary, "max"); Add(Instruction.Minimum, InstType.CallBinary, "min"); @@ -80,6 +89,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions Add(Instruction.StoreLocal, InstType.Special); Add(Instruction.StoreStorage, InstType.Special); Add(Instruction.Subtract, InstType.OpBinary, "-", 2); + Add(Instruction.SwizzleAdd, InstType.CallTernary, HelperFunctionNames.SwizzleAdd); Add(Instruction.TextureSample, InstType.Special); Add(Instruction.TextureSize, InstType.Special); Add(Instruction.Truncate, InstType.CallUnary, "trunc"); diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs index f2f6ae0c9..913cace16 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs @@ -164,13 +164,14 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions { AstTextureOperation texOp = (AstTextureOperation)operation; - bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; - bool isGather = (texOp.Flags & TextureFlags.Gather) != 0; - bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0; - bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0; - bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0; - bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0; - bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0; + bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; + bool isGather = (texOp.Flags & TextureFlags.Gather) != 0; + bool hasDerivatives = (texOp.Flags & TextureFlags.Derivatives) != 0; + bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0; + bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0; + bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0; + bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0; + bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0; bool isArray = (texOp.Type & SamplerType.Array) != 0; bool isMultisample = (texOp.Type & SamplerType.Multisample) != 0; @@ -190,6 +191,10 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions { texCall += "Gather"; } + else if (hasDerivatives) + { + texCall += "Grad"; + } else if (hasLodLevel && !intCoords) { texCall += "Lod"; @@ -297,6 +302,31 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions Append(AssemblePVector(pCount)); + string AssembleDerivativesVector(int count) + { + if (count > 1) + { + string[] elems = new string[count]; + + for (int index = 0; index < count; index++) + { + elems[index] = Src(VariableType.F32); + } + + return "vec" + count + "(" + string.Join(", ", elems) + ")"; + } + else + { + return Src(VariableType.F32); + } + } + + if (hasDerivatives) + { + Append(AssembleDerivativesVector(coordsCount)); // dPdx + Append(AssembleDerivativesVector(coordsCount)); // dPdy + } + if (hasExtraCompareArg) { Append(Src(VariableType.F32)); diff --git a/Ryujinx.Graphics.Shader/Decoders/Decoder.cs b/Ryujinx.Graphics.Shader/Decoders/Decoder.cs index dd5347d9e..4078440b1 100644 --- a/Ryujinx.Graphics.Shader/Decoders/Decoder.cs +++ b/Ryujinx.Graphics.Shader/Decoders/Decoder.cs @@ -241,7 +241,7 @@ namespace Ryujinx.Graphics.Shader.Decoders private static bool IsBranch(OpCode opCode) { - return (opCode is OpCodeBranch && opCode.Emitter != InstEmit.Ssy) || + return (opCode is OpCodeBranch opBranch && !opBranch.PushTarget) || opCode is OpCodeSync || opCode is OpCodeExit; } diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeBranch.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeBranch.cs index 25941b396..f51c39966 100644 --- a/Ryujinx.Graphics.Shader/Decoders/OpCodeBranch.cs +++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeBranch.cs @@ -6,9 +6,13 @@ namespace Ryujinx.Graphics.Shader.Decoders { public int Offset { get; } + public bool PushTarget { get; protected set; } + public OpCodeBranch(InstEmitter emitter, ulong address, long opCode) : base(emitter, address, opCode) { Offset = ((int)(opCode >> 20) << 8) >> 8; + + PushTarget = false; } public ulong GetAbsoluteAddress() diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeShuffle.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeShuffle.cs new file mode 100644 index 000000000..43693cf49 --- /dev/null +++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeShuffle.cs @@ -0,0 +1,40 @@ +using Ryujinx.Graphics.Shader.Instructions; + +namespace Ryujinx.Graphics.Shader.Decoders +{ + class OpCodeShuffle : OpCode, IOpCodeRd, IOpCodeRa + { + public Register Rd { get; } + public Register Ra { get; } + public Register Rb { get; } + public Register Rc { get; } + + public int ImmediateB { get; } + public int ImmediateC { get; } + + public bool IsBImmediate { get; } + public bool IsCImmediate { get; } + + public ShuffleType ShuffleType { get; } + + public Register Predicate48 { get; } + + public OpCodeShuffle(InstEmitter emitter, ulong address, long opCode) : base(emitter, address, opCode) + { + Rd = new Register(opCode.Extract(0, 8), RegisterType.Gpr); + Ra = new Register(opCode.Extract(8, 8), RegisterType.Gpr); + Rb = new Register(opCode.Extract(20, 8), RegisterType.Gpr); + Rc = new Register(opCode.Extract(39, 8), RegisterType.Gpr); + + ImmediateB = opCode.Extract(20, 5); + ImmediateC = opCode.Extract(34, 13); + + IsBImmediate = opCode.Extract(28); + IsCImmediate = opCode.Extract(29); + + ShuffleType = (ShuffleType)opCode.Extract(30, 2); + + Predicate48 = new Register(opCode.Extract(48, 3), RegisterType.Predicate); + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeSsy.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeSsy.cs index 499c07068..d3831e22d 100644 --- a/Ryujinx.Graphics.Shader/Decoders/OpCodeSsy.cs +++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeSsy.cs @@ -15,6 +15,8 @@ namespace Ryujinx.Graphics.Shader.Decoders Predicate = new Register(RegisterConsts.PredicateTrueIndex, RegisterType.Predicate); InvertPredicate = false; + + PushTarget = true; } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs index 5128dae39..7adaff614 100644 --- a/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs +++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs @@ -30,136 +30,148 @@ namespace Ryujinx.Graphics.Shader.Decoders _opCodes = new TableEntry[1 << EncodingBits]; #region Instructions - Set("1110111111011x", InstEmit.Ald, typeof(OpCodeAttribute)); - Set("1110111111110x", InstEmit.Ast, typeof(OpCodeAttribute)); - Set("0100110000000x", InstEmit.Bfe, typeof(OpCodeAluCbuf)); - Set("0011100x00000x", InstEmit.Bfe, typeof(OpCodeAluImm)); - Set("0101110000000x", InstEmit.Bfe, typeof(OpCodeAluReg)); - Set("111000100100xx", InstEmit.Bra, typeof(OpCodeBranch)); - Set("0101000010100x", InstEmit.Csetp, typeof(OpCodePsetp)); - Set("111000110000xx", InstEmit.Exit, typeof(OpCodeExit)); - Set("0100110010101x", InstEmit.F2F, typeof(OpCodeFArithCbuf)); - Set("0011100x10101x", InstEmit.F2F, typeof(OpCodeFArithImm)); - Set("0101110010101x", InstEmit.F2F, typeof(OpCodeFArithReg)); - Set("0100110010110x", InstEmit.F2I, typeof(OpCodeFArithCbuf)); - Set("0011100x10110x", InstEmit.F2I, typeof(OpCodeFArithImm)); - Set("0101110010110x", InstEmit.F2I, typeof(OpCodeFArithReg)); - Set("0100110001011x", InstEmit.Fadd, typeof(OpCodeFArithCbuf)); - Set("0011100x01011x", InstEmit.Fadd, typeof(OpCodeFArithImm)); - Set("000010xxxxxxxx", InstEmit.Fadd, typeof(OpCodeFArithImm32)); - Set("0101110001011x", InstEmit.Fadd, typeof(OpCodeFArithReg)); - Set("010010011xxxxx", InstEmit.Ffma, typeof(OpCodeFArithCbuf)); - Set("0011001x1xxxxx", InstEmit.Ffma, typeof(OpCodeFArithImm)); - Set("010100011xxxxx", InstEmit.Ffma, typeof(OpCodeFArithRegCbuf)); - Set("010110011xxxxx", InstEmit.Ffma, typeof(OpCodeFArithReg)); - Set("0100110001100x", InstEmit.Fmnmx, typeof(OpCodeFArithCbuf)); - Set("0011100x01100x", InstEmit.Fmnmx, typeof(OpCodeFArithImm)); - Set("0101110001100x", InstEmit.Fmnmx, typeof(OpCodeFArithReg)); - Set("0100110001101x", InstEmit.Fmul, typeof(OpCodeFArithCbuf)); - Set("0011100x01101x", InstEmit.Fmul, typeof(OpCodeFArithImm)); - Set("00011110xxxxxx", InstEmit.Fmul, typeof(OpCodeFArithImm32)); - Set("0101110001101x", InstEmit.Fmul, typeof(OpCodeFArithReg)); - Set("0100100xxxxxxx", InstEmit.Fset, typeof(OpCodeSetCbuf)); - Set("0011000xxxxxxx", InstEmit.Fset, typeof(OpCodeFsetImm)); - Set("01011000xxxxxx", InstEmit.Fset, typeof(OpCodeSetReg)); - Set("010010111011xx", InstEmit.Fsetp, typeof(OpCodeSetCbuf)); - Set("0011011x1011xx", InstEmit.Fsetp, typeof(OpCodeFsetImm)); - Set("010110111011xx", InstEmit.Fsetp, typeof(OpCodeSetReg)); - Set("0111101x1xxxxx", InstEmit.Hadd2, typeof(OpCodeAluCbuf)); - Set("0111101x0xxxxx", InstEmit.Hadd2, typeof(OpCodeAluImm2x10)); - Set("0010110xxxxxxx", InstEmit.Hadd2, typeof(OpCodeAluImm32)); - Set("0101110100010x", InstEmit.Hadd2, typeof(OpCodeAluReg)); - Set("01110xxx1xxxxx", InstEmit.Hfma2, typeof(OpCodeHfmaCbuf)); - Set("01110xxx0xxxxx", InstEmit.Hfma2, typeof(OpCodeHfmaImm2x10)); - Set("0010100xxxxxxx", InstEmit.Hfma2, typeof(OpCodeHfmaImm32)); - Set("0101110100000x", InstEmit.Hfma2, typeof(OpCodeHfmaReg)); - Set("01100xxx1xxxxx", InstEmit.Hfma2, typeof(OpCodeHfmaRegCbuf)); - Set("0111100x1xxxxx", InstEmit.Hmul2, typeof(OpCodeAluCbuf)); - Set("0111100x0xxxxx", InstEmit.Hmul2, typeof(OpCodeAluImm2x10)); - Set("0010101xxxxxxx", InstEmit.Hmul2, typeof(OpCodeAluImm32)); - Set("0101110100001x", InstEmit.Hmul2, typeof(OpCodeAluReg)); - Set("0111111x1xxxxx", InstEmit.Hsetp2, typeof(OpCodeSetCbuf)); - Set("0111111x0xxxxx", InstEmit.Hsetp2, typeof(OpCodeHsetImm2x10)); - Set("0101110100100x", InstEmit.Hsetp2, typeof(OpCodeSetReg)); - Set("0100110010111x", InstEmit.I2F, typeof(OpCodeAluCbuf)); - Set("0011100x10111x", InstEmit.I2F, typeof(OpCodeAluImm)); - Set("0101110010111x", InstEmit.I2F, typeof(OpCodeAluReg)); - Set("0100110011100x", InstEmit.I2I, typeof(OpCodeAluCbuf)); - Set("0011100x11100x", InstEmit.I2I, typeof(OpCodeAluImm)); - Set("0101110011100x", InstEmit.I2I, typeof(OpCodeAluReg)); - Set("0100110000010x", InstEmit.Iadd, typeof(OpCodeAluCbuf)); - Set("0011100000010x", InstEmit.Iadd, typeof(OpCodeAluImm)); - Set("0001110x0xxxxx", InstEmit.Iadd, typeof(OpCodeAluImm32)); - Set("0101110000010x", InstEmit.Iadd, typeof(OpCodeAluReg)); - Set("010011001100xx", InstEmit.Iadd3, typeof(OpCodeAluCbuf)); - Set("001110001100xx", InstEmit.Iadd3, typeof(OpCodeAluImm)); - Set("010111001100xx", InstEmit.Iadd3, typeof(OpCodeAluReg)); - Set("0100110000100x", InstEmit.Imnmx, typeof(OpCodeAluCbuf)); - Set("0011100x00100x", InstEmit.Imnmx, typeof(OpCodeAluImm)); - Set("0101110000100x", InstEmit.Imnmx, typeof(OpCodeAluReg)); - Set("11100000xxxxxx", InstEmit.Ipa, typeof(OpCodeIpa)); - Set("1110111111010x", InstEmit.Isberd, typeof(OpCodeAlu)); - Set("0100110000011x", InstEmit.Iscadd, typeof(OpCodeAluCbuf)); - Set("0011100x00011x", InstEmit.Iscadd, typeof(OpCodeAluImm)); - Set("000101xxxxxxxx", InstEmit.Iscadd, typeof(OpCodeAluImm32)); - Set("0101110000011x", InstEmit.Iscadd, typeof(OpCodeAluReg)); - Set("010010110101xx", InstEmit.Iset, typeof(OpCodeSetCbuf)); - Set("001101100101xx", InstEmit.Iset, typeof(OpCodeSetImm)); - Set("010110110101xx", InstEmit.Iset, typeof(OpCodeSetReg)); - Set("010010110110xx", InstEmit.Isetp, typeof(OpCodeSetCbuf)); - Set("0011011x0110xx", InstEmit.Isetp, typeof(OpCodeSetImm)); - Set("010110110110xx", InstEmit.Isetp, typeof(OpCodeSetReg)); - Set("111000110011xx", InstEmit.Kil, typeof(OpCodeExit)); - Set("1110111101000x", InstEmit.Ld, typeof(OpCodeMemory)); - Set("1110111110010x", InstEmit.Ldc, typeof(OpCodeLdc)); - Set("1110111011010x", InstEmit.Ldg, typeof(OpCodeMemory)); - Set("0100110001000x", InstEmit.Lop, typeof(OpCodeLopCbuf)); - Set("0011100001000x", InstEmit.Lop, typeof(OpCodeLopImm)); - Set("000001xxxxxxxx", InstEmit.Lop, typeof(OpCodeLopImm32)); - Set("0101110001000x", InstEmit.Lop, typeof(OpCodeLopReg)); - Set("0010000xxxxxxx", InstEmit.Lop3, typeof(OpCodeLopCbuf)); - Set("001111xxxxxxxx", InstEmit.Lop3, typeof(OpCodeLopImm)); - Set("0101101111100x", InstEmit.Lop3, typeof(OpCodeLopReg)); - Set("0100110010011x", InstEmit.Mov, typeof(OpCodeAluCbuf)); - Set("0011100x10011x", InstEmit.Mov, typeof(OpCodeAluImm)); - Set("000000010000xx", InstEmit.Mov, typeof(OpCodeAluImm32)); - Set("0101110010011x", InstEmit.Mov, typeof(OpCodeAluReg)); - Set("0101000010000x", InstEmit.Mufu, typeof(OpCodeFArith)); - Set("1111101111100x", InstEmit.Out, typeof(OpCode)); - Set("0101000010010x", InstEmit.Psetp, typeof(OpCodePsetp)); - Set("0100110010010x", InstEmit.Rro, typeof(OpCodeFArithCbuf)); - Set("0011100x10010x", InstEmit.Rro, typeof(OpCodeFArithImm)); - Set("0101110010010x", InstEmit.Rro, typeof(OpCodeFArithReg)); - Set("1111000011001x", InstEmit.S2r, typeof(OpCodeAlu)); - Set("0100110010100x", InstEmit.Sel, typeof(OpCodeAluCbuf)); - Set("0011100x10100x", InstEmit.Sel, typeof(OpCodeAluImm)); - Set("0101110010100x", InstEmit.Sel, typeof(OpCodeAluReg)); - Set("0100110001001x", InstEmit.Shl, typeof(OpCodeAluCbuf)); - Set("0011100x01001x", InstEmit.Shl, typeof(OpCodeAluImm)); - Set("0101110001001x", InstEmit.Shl, typeof(OpCodeAluReg)); - Set("0100110000101x", InstEmit.Shr, typeof(OpCodeAluCbuf)); - Set("0011100x00101x", InstEmit.Shr, typeof(OpCodeAluImm)); - Set("0101110000101x", InstEmit.Shr, typeof(OpCodeAluReg)); - Set("111000101001xx", InstEmit.Ssy, typeof(OpCodeSsy)); - Set("1110111101010x", InstEmit.St, typeof(OpCodeMemory)); - Set("1110111011011x", InstEmit.Stg, typeof(OpCodeMemory)); - Set("11101011001xxx", InstEmit.Sust, typeof(OpCodeImage)); - Set("1111000011111x", InstEmit.Sync, typeof(OpCodeSync)); - Set("110000xxxx111x", InstEmit.Tex, typeof(OpCodeTex)); - Set("1101111010111x", InstEmit.TexB, typeof(OpCodeTexB)); - Set("1101x00xxxxxxx", InstEmit.Texs, typeof(OpCodeTexs)); - Set("1101x01xxxxxxx", InstEmit.Texs, typeof(OpCodeTlds)); - Set("1101x11100xxxx", InstEmit.Texs, typeof(OpCodeTld4s)); - Set("11011100xx111x", InstEmit.Tld, typeof(OpCodeTld)); - Set("11011101xx111x", InstEmit.TldB, typeof(OpCodeTld)); - Set("110010xxxx111x", InstEmit.Tld4, typeof(OpCodeTld4)); - Set("1101111101001x", InstEmit.Txq, typeof(OpCodeTex)); - Set("1101111101010x", InstEmit.TxqB, typeof(OpCodeTex)); - Set("01011111xxxxxx", InstEmit.Vmad, typeof(OpCodeVideo)); - Set("0100111xxxxxxx", InstEmit.Xmad, typeof(OpCodeAluCbuf)); - Set("0011011x00xxxx", InstEmit.Xmad, typeof(OpCodeAluImm)); - Set("010100010xxxxx", InstEmit.Xmad, typeof(OpCodeAluRegCbuf)); - Set("0101101100xxxx", InstEmit.Xmad, typeof(OpCodeAluReg)); + Set("1110111111011x", InstEmit.Ald, typeof(OpCodeAttribute)); + Set("1110111111110x", InstEmit.Ast, typeof(OpCodeAttribute)); + Set("0100110000000x", InstEmit.Bfe, typeof(OpCodeAluCbuf)); + Set("0011100x00000x", InstEmit.Bfe, typeof(OpCodeAluImm)); + Set("0101110000000x", InstEmit.Bfe, typeof(OpCodeAluReg)); + Set("0100101111110x", InstEmit.Bfi, typeof(OpCodeAluCbuf)); + Set("0011011x11110x", InstEmit.Bfi, typeof(OpCodeAluImm)); + Set("0101001111110x", InstEmit.Bfi, typeof(OpCodeAluRegCbuf)); + Set("0101101111110x", InstEmit.Bfi, typeof(OpCodeAluReg)); + Set("111000100100xx", InstEmit.Bra, typeof(OpCodeBranch)); + Set("111000110100xx", InstEmit.Brk, typeof(OpCodeSync)); + Set("0101000010100x", InstEmit.Csetp, typeof(OpCodePsetp)); + Set("111000110000xx", InstEmit.Exit, typeof(OpCodeExit)); + Set("0100110010101x", InstEmit.F2F, typeof(OpCodeFArithCbuf)); + Set("0011100x10101x", InstEmit.F2F, typeof(OpCodeFArithImm)); + Set("0101110010101x", InstEmit.F2F, typeof(OpCodeFArithReg)); + Set("0100110010110x", InstEmit.F2I, typeof(OpCodeFArithCbuf)); + Set("0011100x10110x", InstEmit.F2I, typeof(OpCodeFArithImm)); + Set("0101110010110x", InstEmit.F2I, typeof(OpCodeFArithReg)); + Set("0100110001011x", InstEmit.Fadd, typeof(OpCodeFArithCbuf)); + Set("0011100x01011x", InstEmit.Fadd, typeof(OpCodeFArithImm)); + Set("000010xxxxxxxx", InstEmit.Fadd, typeof(OpCodeFArithImm32)); + Set("0101110001011x", InstEmit.Fadd, typeof(OpCodeFArithReg)); + Set("010010011xxxxx", InstEmit.Ffma, typeof(OpCodeFArithCbuf)); + Set("0011001x1xxxxx", InstEmit.Ffma, typeof(OpCodeFArithImm)); + Set("010100011xxxxx", InstEmit.Ffma, typeof(OpCodeFArithRegCbuf)); + Set("010110011xxxxx", InstEmit.Ffma, typeof(OpCodeFArithReg)); + Set("0100110000110x", InstEmit.Flo, typeof(OpCodeAluCbuf)); + Set("0011100x00110x", InstEmit.Flo, typeof(OpCodeAluImm)); + Set("0101110000110x", InstEmit.Flo, typeof(OpCodeAluReg)); + Set("0100110001100x", InstEmit.Fmnmx, typeof(OpCodeFArithCbuf)); + Set("0011100x01100x", InstEmit.Fmnmx, typeof(OpCodeFArithImm)); + Set("0101110001100x", InstEmit.Fmnmx, typeof(OpCodeFArithReg)); + Set("0100110001101x", InstEmit.Fmul, typeof(OpCodeFArithCbuf)); + Set("0011100x01101x", InstEmit.Fmul, typeof(OpCodeFArithImm)); + Set("00011110xxxxxx", InstEmit.Fmul, typeof(OpCodeFArithImm32)); + Set("0101110001101x", InstEmit.Fmul, typeof(OpCodeFArithReg)); + Set("0100100xxxxxxx", InstEmit.Fset, typeof(OpCodeSetCbuf)); + Set("0011000xxxxxxx", InstEmit.Fset, typeof(OpCodeFsetImm)); + Set("01011000xxxxxx", InstEmit.Fset, typeof(OpCodeSetReg)); + Set("010010111011xx", InstEmit.Fsetp, typeof(OpCodeSetCbuf)); + Set("0011011x1011xx", InstEmit.Fsetp, typeof(OpCodeFsetImm)); + Set("010110111011xx", InstEmit.Fsetp, typeof(OpCodeSetReg)); + Set("0101000011111x", InstEmit.Fswzadd, typeof(OpCodeAluReg)); + Set("0111101x1xxxxx", InstEmit.Hadd2, typeof(OpCodeAluCbuf)); + Set("0111101x0xxxxx", InstEmit.Hadd2, typeof(OpCodeAluImm2x10)); + Set("0010110xxxxxxx", InstEmit.Hadd2, typeof(OpCodeAluImm32)); + Set("0101110100010x", InstEmit.Hadd2, typeof(OpCodeAluReg)); + Set("01110xxx1xxxxx", InstEmit.Hfma2, typeof(OpCodeHfmaCbuf)); + Set("01110xxx0xxxxx", InstEmit.Hfma2, typeof(OpCodeHfmaImm2x10)); + Set("0010100xxxxxxx", InstEmit.Hfma2, typeof(OpCodeHfmaImm32)); + Set("0101110100000x", InstEmit.Hfma2, typeof(OpCodeHfmaReg)); + Set("01100xxx1xxxxx", InstEmit.Hfma2, typeof(OpCodeHfmaRegCbuf)); + Set("0111100x1xxxxx", InstEmit.Hmul2, typeof(OpCodeAluCbuf)); + Set("0111100x0xxxxx", InstEmit.Hmul2, typeof(OpCodeAluImm2x10)); + Set("0010101xxxxxxx", InstEmit.Hmul2, typeof(OpCodeAluImm32)); + Set("0101110100001x", InstEmit.Hmul2, typeof(OpCodeAluReg)); + Set("0111111x1xxxxx", InstEmit.Hsetp2, typeof(OpCodeSetCbuf)); + Set("0111111x0xxxxx", InstEmit.Hsetp2, typeof(OpCodeHsetImm2x10)); + Set("0101110100100x", InstEmit.Hsetp2, typeof(OpCodeSetReg)); + Set("0100110010111x", InstEmit.I2F, typeof(OpCodeAluCbuf)); + Set("0011100x10111x", InstEmit.I2F, typeof(OpCodeAluImm)); + Set("0101110010111x", InstEmit.I2F, typeof(OpCodeAluReg)); + Set("0100110011100x", InstEmit.I2I, typeof(OpCodeAluCbuf)); + Set("0011100x11100x", InstEmit.I2I, typeof(OpCodeAluImm)); + Set("0101110011100x", InstEmit.I2I, typeof(OpCodeAluReg)); + Set("0100110000010x", InstEmit.Iadd, typeof(OpCodeAluCbuf)); + Set("0011100000010x", InstEmit.Iadd, typeof(OpCodeAluImm)); + Set("0001110x0xxxxx", InstEmit.Iadd, typeof(OpCodeAluImm32)); + Set("0101110000010x", InstEmit.Iadd, typeof(OpCodeAluReg)); + Set("010011001100xx", InstEmit.Iadd3, typeof(OpCodeAluCbuf)); + Set("001110001100xx", InstEmit.Iadd3, typeof(OpCodeAluImm)); + Set("010111001100xx", InstEmit.Iadd3, typeof(OpCodeAluReg)); + Set("0100110000100x", InstEmit.Imnmx, typeof(OpCodeAluCbuf)); + Set("0011100x00100x", InstEmit.Imnmx, typeof(OpCodeAluImm)); + Set("0101110000100x", InstEmit.Imnmx, typeof(OpCodeAluReg)); + Set("11100000xxxxxx", InstEmit.Ipa, typeof(OpCodeIpa)); + Set("1110111111010x", InstEmit.Isberd, typeof(OpCodeAlu)); + Set("0100110000011x", InstEmit.Iscadd, typeof(OpCodeAluCbuf)); + Set("0011100x00011x", InstEmit.Iscadd, typeof(OpCodeAluImm)); + Set("000101xxxxxxxx", InstEmit.Iscadd, typeof(OpCodeAluImm32)); + Set("0101110000011x", InstEmit.Iscadd, typeof(OpCodeAluReg)); + Set("010010110101xx", InstEmit.Iset, typeof(OpCodeSetCbuf)); + Set("001101100101xx", InstEmit.Iset, typeof(OpCodeSetImm)); + Set("010110110101xx", InstEmit.Iset, typeof(OpCodeSetReg)); + Set("010010110110xx", InstEmit.Isetp, typeof(OpCodeSetCbuf)); + Set("0011011x0110xx", InstEmit.Isetp, typeof(OpCodeSetImm)); + Set("010110110110xx", InstEmit.Isetp, typeof(OpCodeSetReg)); + Set("111000110011xx", InstEmit.Kil, typeof(OpCodeExit)); + Set("1110111101000x", InstEmit.Ld, typeof(OpCodeMemory)); + Set("1110111110010x", InstEmit.Ldc, typeof(OpCodeLdc)); + Set("1110111011010x", InstEmit.Ldg, typeof(OpCodeMemory)); + Set("0100110001000x", InstEmit.Lop, typeof(OpCodeLopCbuf)); + Set("0011100001000x", InstEmit.Lop, typeof(OpCodeLopImm)); + Set("000001xxxxxxxx", InstEmit.Lop, typeof(OpCodeLopImm32)); + Set("0101110001000x", InstEmit.Lop, typeof(OpCodeLopReg)); + Set("0010000xxxxxxx", InstEmit.Lop3, typeof(OpCodeLopCbuf)); + Set("001111xxxxxxxx", InstEmit.Lop3, typeof(OpCodeLopImm)); + Set("0101101111100x", InstEmit.Lop3, typeof(OpCodeLopReg)); + Set("0100110010011x", InstEmit.Mov, typeof(OpCodeAluCbuf)); + Set("0011100x10011x", InstEmit.Mov, typeof(OpCodeAluImm)); + Set("000000010000xx", InstEmit.Mov, typeof(OpCodeAluImm32)); + Set("0101110010011x", InstEmit.Mov, typeof(OpCodeAluReg)); + Set("0101000010000x", InstEmit.Mufu, typeof(OpCodeFArith)); + Set("1111101111100x", InstEmit.Out, typeof(OpCode)); + Set("111000101010xx", InstEmit.Pbk, typeof(OpCodeSsy)); + Set("0101000010010x", InstEmit.Psetp, typeof(OpCodePsetp)); + Set("0100110010010x", InstEmit.Rro, typeof(OpCodeFArithCbuf)); + Set("0011100x10010x", InstEmit.Rro, typeof(OpCodeFArithImm)); + Set("0101110010010x", InstEmit.Rro, typeof(OpCodeFArithReg)); + Set("1111000011001x", InstEmit.S2r, typeof(OpCodeAlu)); + Set("0100110010100x", InstEmit.Sel, typeof(OpCodeAluCbuf)); + Set("0011100x10100x", InstEmit.Sel, typeof(OpCodeAluImm)); + Set("0101110010100x", InstEmit.Sel, typeof(OpCodeAluReg)); + Set("1110111100010x", InstEmit.Shfl, typeof(OpCodeShuffle)); + Set("0100110001001x", InstEmit.Shl, typeof(OpCodeAluCbuf)); + Set("0011100x01001x", InstEmit.Shl, typeof(OpCodeAluImm)); + Set("0101110001001x", InstEmit.Shl, typeof(OpCodeAluReg)); + Set("0100110000101x", InstEmit.Shr, typeof(OpCodeAluCbuf)); + Set("0011100x00101x", InstEmit.Shr, typeof(OpCodeAluImm)); + Set("0101110000101x", InstEmit.Shr, typeof(OpCodeAluReg)); + Set("111000101001xx", InstEmit.Ssy, typeof(OpCodeSsy)); + Set("1110111101010x", InstEmit.St, typeof(OpCodeMemory)); + Set("1110111011011x", InstEmit.Stg, typeof(OpCodeMemory)); + Set("11101011001xxx", InstEmit.Sust, typeof(OpCodeImage)); + Set("1111000011111x", InstEmit.Sync, typeof(OpCodeSync)); + Set("110000xxxx111x", InstEmit.Tex, typeof(OpCodeTex)); + Set("1101111010111x", InstEmit.TexB, typeof(OpCodeTexB)); + Set("1101x00xxxxxxx", InstEmit.Texs, typeof(OpCodeTexs)); + Set("1101x01xxxxxxx", InstEmit.Texs, typeof(OpCodeTlds)); + Set("11011111x0xxxx", InstEmit.Texs, typeof(OpCodeTld4s)); + Set("11011100xx111x", InstEmit.Tld, typeof(OpCodeTld)); + Set("11011101xx111x", InstEmit.TldB, typeof(OpCodeTld)); + Set("110010xxxx111x", InstEmit.Tld4, typeof(OpCodeTld4)); + Set("110111100x1110", InstEmit.Txd, typeof(OpCodeTxd)); + Set("1101111101001x", InstEmit.Txq, typeof(OpCodeTex)); + Set("1101111101010x", InstEmit.TxqB, typeof(OpCodeTex)); + Set("01011111xxxxxx", InstEmit.Vmad, typeof(OpCodeVideo)); + Set("0100111xxxxxxx", InstEmit.Xmad, typeof(OpCodeAluCbuf)); + Set("0011011x00xxxx", InstEmit.Xmad, typeof(OpCodeAluImm)); + Set("010100010xxxxx", InstEmit.Xmad, typeof(OpCodeAluRegCbuf)); + Set("0101101100xxxx", InstEmit.Xmad, typeof(OpCodeAluReg)); #endregion } diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeTextureScalar.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeTextureScalar.cs index 1c175e30b..543f8d136 100644 --- a/Ryujinx.Graphics.Shader/Decoders/OpCodeTextureScalar.cs +++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeTextureScalar.cs @@ -39,7 +39,7 @@ namespace Ryujinx.Graphics.Shader.Decoders protected int RawType; - public bool IsFp16 { get; } + public bool IsFp16 { get; protected set; } public OpCodeTextureScalar(InstEmitter emitter, ulong address, long opCode) : base(emitter, address, opCode) { diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeTld4s.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeTld4s.cs index 7e51a9e50..fd3240a0e 100644 --- a/Ryujinx.Graphics.Shader/Decoders/OpCodeTld4s.cs +++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeTld4s.cs @@ -16,6 +16,8 @@ namespace Ryujinx.Graphics.Shader.Decoders GatherCompIndex = opCode.Extract(52, 2); + IsFp16 = opCode.Extract(55); + ComponentMask = Rd1.IsRZ ? 3 : 0xf; } } diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeTxd.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeTxd.cs new file mode 100644 index 000000000..25df1f81f --- /dev/null +++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeTxd.cs @@ -0,0 +1,18 @@ +using Ryujinx.Graphics.Shader.Instructions; + +namespace Ryujinx.Graphics.Shader.Decoders +{ + class OpCodeTxd : OpCodeTexture + { + public bool IsBindless { get; } + + public OpCodeTxd(InstEmitter emitter, ulong address, long opCode) : base(emitter, address, opCode) + { + HasOffset = opCode.Extract(35); + + IsBindless = opCode.Extract(54); + + LodMode = TextureLodMode.None; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Decoders/ShuffleType.cs b/Ryujinx.Graphics.Shader/Decoders/ShuffleType.cs new file mode 100644 index 000000000..2892c8dd1 --- /dev/null +++ b/Ryujinx.Graphics.Shader/Decoders/ShuffleType.cs @@ -0,0 +1,10 @@ +namespace Ryujinx.Graphics.Shader.Decoders +{ + enum ShuffleType + { + Indexed = 0, + Up = 1, + Down = 2, + Butterfly = 3 + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Decoders/SystemRegister.cs b/Ryujinx.Graphics.Shader/Decoders/SystemRegister.cs index 1f51d93c2..2f3f44928 100644 --- a/Ryujinx.Graphics.Shader/Decoders/SystemRegister.cs +++ b/Ryujinx.Graphics.Shader/Decoders/SystemRegister.cs @@ -2,12 +2,13 @@ namespace Ryujinx.Graphics.Shader.Decoders { enum SystemRegister { - ThreadId = 0x20, - ThreadIdX = 0x21, - ThreadIdY = 0x22, - ThreadIdZ = 0x23, - CtaIdX = 0x25, - CtaIdY = 0x26, - CtaIdZ = 0x27 + YDirection = 0x12, + ThreadId = 0x20, + ThreadIdX = 0x21, + ThreadIdY = 0x22, + ThreadIdZ = 0x23, + CtaIdX = 0x25, + CtaIdY = 0x26, + CtaIdZ = 0x27 } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitAlu.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitAlu.cs index 5cbb3b732..8d14b0cf0 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitAlu.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitAlu.cs @@ -39,6 +39,23 @@ namespace Ryujinx.Graphics.Shader.Instructions // TODO: CC, X, corner cases } + public static void Bfi(EmitterContext context) + { + OpCodeAlu op = (OpCodeAlu)context.CurrOp; + + Operand srcA = GetSrcA(context); + Operand srcB = GetSrcB(context); + Operand srcC = GetSrcC(context); + + Operand position = context.BitwiseAnd(srcB, Const(0xff)); + + Operand size = context.BitfieldExtractU32(srcB, Const(8), Const(8)); + + Operand res = context.BitfieldInsert(srcC, srcA, position, size); + + context.Copy(GetDest(context), res); + } + public static void Csetp(EmitterContext context) { OpCodePsetp op = (OpCodePsetp)context.CurrOp; @@ -58,6 +75,28 @@ namespace Ryujinx.Graphics.Shader.Instructions context.Copy(Register(op.Predicate0), p1Res); } + public static void Flo(EmitterContext context) + { + OpCodeAlu op = (OpCodeAlu)context.CurrOp; + + bool invert = op.RawOpCode.Extract(40); + bool countZeros = op.RawOpCode.Extract(41); + bool isSigned = op.RawOpCode.Extract(48); + + Operand srcB = context.BitwiseNot(GetSrcB(context), invert); + + Operand res = isSigned + ? context.FindFirstSetS32(srcB) + : context.FindFirstSetU32(srcB); + + if (countZeros) + { + res = context.BitwiseExclusiveOr(res, Const(31)); + } + + context.Copy(GetDest(context), res); + } + public static void Iadd(EmitterContext context) { OpCodeAlu op = (OpCodeAlu)context.CurrOp; diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitFArith.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitFArith.cs index 1a7d42511..4f7072eb0 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitFArith.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitFArith.cs @@ -180,6 +180,22 @@ namespace Ryujinx.Graphics.Shader.Instructions context.Copy(Register(op.Predicate0), p1Res); } + public static void Fswzadd(EmitterContext context) + { + OpCodeAlu op = (OpCodeAlu)context.CurrOp; + + int mask = op.RawOpCode.Extract(28, 8); + + Operand srcA = GetSrcA(context); + Operand srcB = GetSrcB(context); + + Operand dest = GetDest(context); + + context.Copy(dest, context.FPSwizzleAdd(srcA, srcB, mask)); + + SetFPZnFlags(context, dest, op.SetCondCode); + } + public static void Hadd2(EmitterContext context) { Hadd2Hmul2Impl(context, isAdd: true); diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitFlow.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitFlow.cs index fb76e06a2..e17c9d6c6 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitFlow.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitFlow.cs @@ -15,6 +15,11 @@ namespace Ryujinx.Graphics.Shader.Instructions EmitBranch(context, context.CurrBlock.Branch.Address); } + public static void Brk(EmitterContext context) + { + EmitBrkOrSync(context); + } + public static void Exit(EmitterContext context) { OpCodeExit op = (OpCodeExit)context.CurrOp; @@ -32,7 +37,22 @@ namespace Ryujinx.Graphics.Shader.Instructions context.Discard(); } + public static void Pbk(EmitterContext context) + { + EmitPbkOrSsy(context); + } + public static void Ssy(EmitterContext context) + { + EmitPbkOrSsy(context); + } + + public static void Sync(EmitterContext context) + { + EmitBrkOrSync(context); + } + + private static void EmitPbkOrSsy(EmitterContext context) { OpCodeSsy op = (OpCodeSsy)context.CurrOp; @@ -48,7 +68,7 @@ namespace Ryujinx.Graphics.Shader.Instructions } } - public static void Sync(EmitterContext context) + private static void EmitBrkOrSync(EmitterContext context) { OpCodeSync op = (OpCodeSync)context.CurrOp; diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs index b9bb18d96..f07922453 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs @@ -27,6 +27,9 @@ namespace Ryujinx.Graphics.Shader.Instructions switch (sysReg) { + // TODO: Use value from Y direction GPU register. + case SystemRegister.YDirection: src = ConstF(1); break; + case SystemRegister.ThreadId: { Operand tidX = Attribute(AttributeConsts.ThreadIdX); @@ -67,5 +70,37 @@ namespace Ryujinx.Graphics.Shader.Instructions context.Copy(GetDest(context), res); } + + public static void Shfl(EmitterContext context) + { + OpCodeShuffle op = (OpCodeShuffle)context.CurrOp; + + Operand pred = Register(op.Predicate48); + + Operand srcA = GetSrcA(context); + + Operand srcB = op.IsBImmediate ? Const(op.ImmediateB) : Register(op.Rb); + Operand srcC = op.IsCImmediate ? Const(op.ImmediateC) : Register(op.Rc); + + Operand res = null; + + switch (op.ShuffleType) + { + case ShuffleType.Indexed: + res = context.Shuffle(srcA, srcB, srcC); + break; + case ShuffleType.Up: + res = context.ShuffleUp(srcA, srcB, srcC); + break; + case ShuffleType.Down: + res = context.ShuffleDown(srcA, srcB, srcC); + break; + case ShuffleType.Butterfly: + res = context.ShuffleXor(srcA, srcB, srcC); + break; + } + + context.Copy(GetDest(context), res); + } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs index 396727894..2654a05b8 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs @@ -102,22 +102,22 @@ namespace Ryujinx.Graphics.Shader.Instructions public static void Tex(EmitterContext context) { - Tex(context, TextureFlags.None); + EmitTextureSample(context, TextureFlags.None); } public static void TexB(EmitterContext context) { - Tex(context, TextureFlags.Bindless); + EmitTextureSample(context, TextureFlags.Bindless); } public static void Tld(EmitterContext context) { - Tex(context, TextureFlags.IntCoords); + EmitTextureSample(context, TextureFlags.IntCoords); } public static void TldB(EmitterContext context) { - Tex(context, TextureFlags.IntCoords | TextureFlags.Bindless); + EmitTextureSample(context, TextureFlags.IntCoords | TextureFlags.Bindless); } public static void Texs(EmitterContext context) @@ -512,17 +512,128 @@ namespace Ryujinx.Graphics.Shader.Instructions } } + public static void Txd(EmitterContext context) + { + OpCodeTxd op = (OpCodeTxd)context.CurrOp; + + if (op.Rd.IsRZ) + { + return; + } + + int raIndex = op.Ra.Index; + int rbIndex = op.Rb.Index; + + Operand Ra() + { + if (raIndex > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(raIndex++, RegisterType.Gpr)); + } + + Operand Rb() + { + if (rbIndex > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(rbIndex++, RegisterType.Gpr)); + } + + TextureFlags flags = TextureFlags.Derivatives; + + List sourcesList = new List(); + + if (op.IsBindless) + { + sourcesList.Add(Ra()); + } + + SamplerType type = GetSamplerType(op.Dimensions); + + int coordsCount = type.GetDimensions(); + + for (int index = 0; index < coordsCount; index++) + { + sourcesList.Add(Ra()); + } + + Operand packedParams = Ra(); + + if (op.IsArray) + { + sourcesList.Add(context.BitwiseAnd(packedParams, Const(0xffff))); + + type |= SamplerType.Array; + } + + // Derivatives (X and Y). + for (int dIndex = 0; dIndex < 2 * coordsCount; dIndex++) + { + sourcesList.Add(Rb()); + } + + if (op.HasOffset) + { + for (int index = 0; index < coordsCount; index++) + { + sourcesList.Add(context.BitfieldExtractS32(packedParams, Const(16 + index * 4), Const(4))); + } + + flags |= TextureFlags.Offset; + } + + Operand[] sources = sourcesList.ToArray(); + + int rdIndex = op.Rd.Index; + + Operand GetDest() + { + if (rdIndex > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return Register(rdIndex++, RegisterType.Gpr); + } + + int handle = !op.IsBindless ? op.Immediate : 0; + + for (int compMask = op.ComponentMask, compIndex = 0; compMask != 0; compMask >>= 1, compIndex++) + { + if ((compMask & 1) != 0) + { + Operand dest = GetDest(); + + TextureOperation operation = new TextureOperation( + Instruction.TextureSample, + type, + flags, + handle, + compIndex, + dest, + sources); + + context.Add(operation); + } + } + } + public static void Txq(EmitterContext context) { - Txq(context, bindless: false); + EmitTextureQuery(context, bindless: false); } public static void TxqB(EmitterContext context) { - Txq(context, bindless: true); + EmitTextureQuery(context, bindless: true); } - private static void Txq(EmitterContext context, bool bindless) + private static void EmitTextureQuery(EmitterContext context, bool bindless) { OpCodeTex op = (OpCodeTex)context.CurrOp; @@ -597,7 +708,7 @@ namespace Ryujinx.Graphics.Shader.Instructions } } - private static void Tex(EmitterContext context, TextureFlags flags) + private static void EmitTextureSample(EmitterContext context, TextureFlags flags) { OpCodeTexture op = (OpCodeTexture)context.CurrOp; diff --git a/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs b/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs index 88918f3fd..46c6b57f3 100644 --- a/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs +++ b/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs @@ -7,6 +7,7 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation { Absolute = 1, Add, + BitCount, BitfieldExtractS32, BitfieldExtractU32, BitfieldInsert, @@ -38,11 +39,15 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation ConvertU32ToFP, Copy, Cosine, + Ddx, + Ddy, Discard, Divide, EmitVertex, EndPrimitive, ExponentB2, + FindFirstSetS32, + FindFirstSetU32, Floor, FusedMultiplyAdd, ImageLoad, @@ -75,12 +80,17 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation ShiftLeft, ShiftRightS32, ShiftRightU32, + Shuffle, + ShuffleDown, + ShuffleUp, + ShuffleXor, Sine, SquareRoot, StoreGlobal, StoreLocal, StoreStorage, Subtract, + SwizzleAdd, TextureSample, TextureSize, Truncate, diff --git a/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operation.cs b/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operation.cs index fc01d47ea..0d7379a8e 100644 --- a/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operation.cs +++ b/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operation.cs @@ -80,7 +80,12 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation public void TurnIntoCopy(Operand source) { - Inst = Instruction.Copy; + TurnInto(Instruction.Copy, source); + } + + public void TurnInto(Instruction newInst, Operand source) + { + Inst = newInst; foreach (Operand oldSrc in _sources) { diff --git a/Ryujinx.Graphics.Shader/IntermediateRepresentation/TextureFlags.cs b/Ryujinx.Graphics.Shader/IntermediateRepresentation/TextureFlags.cs index 5f0a84276..5334afacc 100644 --- a/Ryujinx.Graphics.Shader/IntermediateRepresentation/TextureFlags.cs +++ b/Ryujinx.Graphics.Shader/IntermediateRepresentation/TextureFlags.cs @@ -5,13 +5,14 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation [Flags] enum TextureFlags { - None = 0, - Bindless = 1 << 0, - Gather = 1 << 1, - IntCoords = 1 << 2, - LodBias = 1 << 3, - LodLevel = 1 << 4, - Offset = 1 << 5, - Offsets = 1 << 6 + None = 0, + Bindless = 1 << 0, + Gather = 1 << 1, + Derivatives = 1 << 2, + IntCoords = 1 << 3, + LodBias = 1 << 4, + LodLevel = 1 << 5, + Offset = 1 << 6, + Offsets = 1 << 7 } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj b/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj index ea83d2968..e10d1edaf 100644 --- a/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj +++ b/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj @@ -1,5 +1,17 @@ + + + + + + + + + + + + netcoreapp3.0 diff --git a/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs b/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs new file mode 100644 index 000000000..e2eee78d9 --- /dev/null +++ b/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs @@ -0,0 +1,14 @@ +using System; + +namespace Ryujinx.Graphics.Shader.StructuredIr +{ + [Flags] + enum HelperFunctionsMask + { + Shuffle = 1 << 0, + ShuffleDown = 1 << 1, + ShuffleUp = 1 << 2, + ShuffleXor = 1 << 3, + SwizzleAdd = 1 << 4 + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs b/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs index 675a96785..381cf2921 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs @@ -27,6 +27,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr // Inst Destination type Source 1 type Source 2 type Source 3 type Source 4 type Add(Instruction.Absolute, VariableType.Scalar, VariableType.Scalar); Add(Instruction.Add, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar); + Add(Instruction.BitCount, VariableType.Int, VariableType.Int); Add(Instruction.BitfieldExtractS32, VariableType.S32, VariableType.S32, VariableType.S32, VariableType.S32); Add(Instruction.BitfieldExtractU32, VariableType.U32, VariableType.U32, VariableType.S32, VariableType.S32); Add(Instruction.BitfieldInsert, VariableType.Int, VariableType.Int, VariableType.Int, VariableType.S32, VariableType.S32); @@ -55,8 +56,12 @@ namespace Ryujinx.Graphics.Shader.StructuredIr Add(Instruction.ConvertS32ToFP, VariableType.F32, VariableType.S32); Add(Instruction.ConvertU32ToFP, VariableType.F32, VariableType.U32); Add(Instruction.Cosine, VariableType.Scalar, VariableType.Scalar); + Add(Instruction.Ddx, VariableType.F32, VariableType.F32); + Add(Instruction.Ddy, VariableType.F32, VariableType.F32); Add(Instruction.Divide, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar); Add(Instruction.ExponentB2, VariableType.Scalar, VariableType.Scalar); + Add(Instruction.FindFirstSetS32, VariableType.S32, VariableType.S32); + Add(Instruction.FindFirstSetU32, VariableType.S32, VariableType.U32); Add(Instruction.Floor, VariableType.F32, VariableType.F32); Add(Instruction.FusedMultiplyAdd, VariableType.F32, VariableType.F32, VariableType.F32, VariableType.F32); Add(Instruction.ImageLoad, VariableType.F32); @@ -75,6 +80,10 @@ namespace Ryujinx.Graphics.Shader.StructuredIr Add(Instruction.ShiftLeft, VariableType.Int, VariableType.Int, VariableType.Int); Add(Instruction.ShiftRightS32, VariableType.S32, VariableType.S32, VariableType.Int); Add(Instruction.ShiftRightU32, VariableType.U32, VariableType.U32, VariableType.Int); + Add(Instruction.Shuffle, VariableType.F32, VariableType.F32, VariableType.U32, VariableType.U32); + Add(Instruction.ShuffleDown, VariableType.F32, VariableType.F32, VariableType.U32, VariableType.U32); + Add(Instruction.ShuffleUp, VariableType.F32, VariableType.F32, VariableType.U32, VariableType.U32); + Add(Instruction.ShuffleXor, VariableType.F32, VariableType.F32, VariableType.U32, VariableType.U32); Add(Instruction.Maximum, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar); Add(Instruction.MaximumU32, VariableType.U32, VariableType.U32, VariableType.U32); Add(Instruction.Minimum, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar); @@ -90,6 +99,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr Add(Instruction.StoreLocal, VariableType.None, VariableType.S32, VariableType.F32); Add(Instruction.StoreStorage, VariableType.None, VariableType.S32, VariableType.S32, VariableType.F32); Add(Instruction.Subtract, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar); + Add(Instruction.SwizzleAdd, VariableType.F32, VariableType.F32, VariableType.F32, VariableType.S32); Add(Instruction.TextureSample, VariableType.F32); Add(Instruction.TextureSize, VariableType.S32, VariableType.S32, VariableType.S32); Add(Instruction.Truncate, VariableType.F32, VariableType.F32); diff --git a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs index 53ca6700f..c4ffbe1ad 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs @@ -179,6 +179,28 @@ namespace Ryujinx.Graphics.Shader.StructuredIr context.AddNode(new AstOperation(inst, sources)); } + + // Those instructions needs to be emulated by using helper functions, + // because they are NVIDIA specific. Those flags helps the backend to + // decide which helper functions are needed on the final generated code. + switch (operation.Inst) + { + case Instruction.Shuffle: + context.Info.HelperFunctionsMask |= HelperFunctionsMask.Shuffle; + break; + case Instruction.ShuffleDown: + context.Info.HelperFunctionsMask |= HelperFunctionsMask.ShuffleDown; + break; + case Instruction.ShuffleUp: + context.Info.HelperFunctionsMask |= HelperFunctionsMask.ShuffleUp; + break; + case Instruction.ShuffleXor: + context.Info.HelperFunctionsMask |= HelperFunctionsMask.ShuffleXor; + break; + case Instruction.SwizzleAdd: + context.Info.HelperFunctionsMask |= HelperFunctionsMask.SwizzleAdd; + break; + } } private static VariableType GetVarTypeFromUses(Operand dest) diff --git a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs index 1094fba2b..0ef4bde34 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs @@ -18,6 +18,8 @@ namespace Ryujinx.Graphics.Shader.StructuredIr public bool UsesInstanceId { get; set; } + public HelperFunctionsMask HelperFunctionsMask { get; set; } + public HashSet Samplers { get; } public HashSet Images { get; } diff --git a/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs b/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs index 7d64e7ca2..58a37b529 100644 --- a/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs +++ b/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs @@ -6,6 +6,11 @@ namespace Ryujinx.Graphics.Shader.Translation { static class EmitterContextInsts { + public static Operand BitCount(this EmitterContext context, Operand a) + { + return context.Add(Instruction.BitCount, Local(), a); + } + public static Operand BitfieldExtractS32(this EmitterContext context, Operand a, Operand b, Operand c) { return context.Add(Instruction.BitfieldExtractS32, Local(), a, b, c); @@ -106,6 +111,16 @@ namespace Ryujinx.Graphics.Shader.Translation return context.Add(Instruction.EndPrimitive); } + public static Operand FindFirstSetS32(this EmitterContext context, Operand a) + { + return context.Add(Instruction.FindFirstSetS32, Local(), a); + } + + public static Operand FindFirstSetU32(this EmitterContext context, Operand a) + { + return context.Add(Instruction.FindFirstSetU32, Local(), a); + } + public static Operand FPAbsNeg(this EmitterContext context, Operand a, bool abs, bool neg) { return context.FPNegate(context.FPAbsolute(a, abs), neg); @@ -256,6 +271,11 @@ namespace Ryujinx.Graphics.Shader.Translation return context.Add(Instruction.Truncate, Local(), a); } + public static Operand FPSwizzleAdd(this EmitterContext context, Operand a, Operand b, int mask) + { + return context.Add(Instruction.SwizzleAdd, Local(), a, b, Const(mask)); + } + public static Operand IAbsNeg(this EmitterContext context, Operand a, bool abs, bool neg) { return context.INegate(context.IAbsolute(a, abs), neg); @@ -418,6 +438,26 @@ namespace Ryujinx.Graphics.Shader.Translation return context.Add(Instruction.ShiftRightU32, Local(), a, b); } + public static Operand Shuffle(this EmitterContext context, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.Shuffle, Local(), a, b, c); + } + + public static Operand ShuffleDown(this EmitterContext context, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.ShuffleDown, Local(), a, b, c); + } + + public static Operand ShuffleUp(this EmitterContext context, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.ShuffleUp, Local(), a, b, c); + } + + public static Operand ShuffleXor(this EmitterContext context, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.ShuffleXor, Local(), a, b, c); + } + public static Operand StoreGlobal(this EmitterContext context, Operand a, Operand b) { return context.Add(Instruction.StoreGlobal, null, a, b); diff --git a/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs b/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs index d64579b71..97852ac1f 100644 --- a/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs +++ b/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs @@ -21,6 +21,10 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations EvaluateBinary(operation, (x, y) => x + y); break; + case Instruction.BitCount: + EvaluateUnary(operation, (x) => BitCount(x)); + break; + case Instruction.BitwiseAnd: EvaluateBinary(operation, (x, y) => x & y); break; @@ -208,6 +212,21 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations return true; } + private static int BitCount(int value) + { + int count = 0; + + for (int bit = 0; bit < 32; bit++) + { + if (value.Extract(bit)) + { + count++; + } + } + + return count; + } + private static void BitfieldExtractS32(Operation operation) { int value = GetBitfieldExtractValue(operation); diff --git a/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs b/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs index d5e57546a..22d794a40 100644 --- a/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs +++ b/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs @@ -1,5 +1,6 @@ using Ryujinx.Graphics.Shader.IntermediateRepresentation; using System.Collections.Generic; +using System.Diagnostics; using System.Linq; namespace Ryujinx.Graphics.Shader.Translation.Optimizations @@ -59,7 +60,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations modified = true; } - else if (operation.Inst == Instruction.PackHalf2x16 && PropagatePack(operation)) + else if ((operation.Inst == Instruction.PackHalf2x16 && PropagatePack(operation)) || + (operation.Inst == Instruction.ShuffleXor && MatchDdxOrDdy(operation))) { if (operation.Dest.UseOps.Count == 0) { @@ -135,6 +137,84 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations return modified; } + public static bool MatchDdxOrDdy(Operation operation) + { + // It's assumed that "operation.Inst" is ShuffleXor, + // that should be checked before calling this method. + Debug.Assert(operation.Inst == Instruction.ShuffleXor); + + bool modified = false; + + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + if (src2.Type != OperandType.Constant || (src2.Value != 1 && src2.Value != 2)) + { + return false; + } + + if (src3.Type != OperandType.Constant || src3.Value != 0x1c03) + { + return false; + } + + bool isDdy = src2.Value == 2; + bool isDdx = !isDdy; + + // We can replace any use by a FSWZADD with DDX/DDY, when + // the following conditions are true: + // - The mask should be 0b10100101 for DDY, or 0b10011001 for DDX. + // - The first source operand must be the shuffle output. + // - The second source operand must be the shuffle first source operand. + INode[] uses = operation.Dest.UseOps.ToArray(); + + foreach (INode use in uses) + { + if (!(use is Operation test)) + { + continue; + } + + if (!(use is Operation useOp) || useOp.Inst != Instruction.SwizzleAdd) + { + continue; + } + + Operand fswzaddSrc1 = useOp.GetSource(0); + Operand fswzaddSrc2 = useOp.GetSource(1); + Operand fswzaddSrc3 = useOp.GetSource(2); + + if (fswzaddSrc1 != operation.Dest) + { + continue; + } + + if (fswzaddSrc2 != operation.GetSource(0)) + { + continue; + } + + if (fswzaddSrc3.Type != OperandType.Constant) + { + continue; + } + + int mask = fswzaddSrc3.Value; + + if ((isDdx && mask != 0b10011001) || + (isDdy && mask != 0b10100101)) + { + continue; + } + + useOp.TurnInto(isDdx ? Instruction.Ddx : Instruction.Ddy, fswzaddSrc2); + + modified = true; + } + + return modified; + } + private static void RemoveNode(BasicBlock block, LinkedListNode llNode) { // Remove a node from the nodes list, and also remove itself