1
0
Fork 0
mirror of https://github.com/Ryujinx/Ryujinx.git synced 2024-11-10 20:46:43 +00:00

Initial support for shader half float instructions (#507)

This commit is contained in:
gdkchan 2019-01-31 09:43:24 -03:00 committed by GitHub
parent c81abdde4c
commit e10ff17e2d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 244 additions and 65 deletions

View file

@ -63,6 +63,7 @@ namespace Ryujinx.Graphics.Gal.Shader
private Dictionary<int, ShaderDeclInfo> m_OutAttributes;
private Dictionary<int, ShaderDeclInfo> m_Gprs;
private Dictionary<int, ShaderDeclInfo> m_GprsHalf;
private Dictionary<int, ShaderDeclInfo> m_Preds;
public IReadOnlyDictionary<ShaderIrOp, ShaderDeclInfo> CbTextures => m_CbTextures;
@ -74,8 +75,9 @@ namespace Ryujinx.Graphics.Gal.Shader
public IReadOnlyDictionary<int, ShaderDeclInfo> InAttributes => m_InAttributes;
public IReadOnlyDictionary<int, ShaderDeclInfo> OutAttributes => m_OutAttributes;
public IReadOnlyDictionary<int, ShaderDeclInfo> Gprs => m_Gprs;
public IReadOnlyDictionary<int, ShaderDeclInfo> Preds => m_Preds;
public IReadOnlyDictionary<int, ShaderDeclInfo> Gprs => m_Gprs;
public IReadOnlyDictionary<int, ShaderDeclInfo> GprsHalf => m_GprsHalf;
public IReadOnlyDictionary<int, ShaderDeclInfo> Preds => m_Preds;
public GalShaderType ShaderType { get; private set; }
@ -92,8 +94,9 @@ namespace Ryujinx.Graphics.Gal.Shader
m_InAttributes = new Dictionary<int, ShaderDeclInfo>();
m_OutAttributes = new Dictionary<int, ShaderDeclInfo>();
m_Gprs = new Dictionary<int, ShaderDeclInfo>();
m_Preds = new Dictionary<int, ShaderDeclInfo>();
m_Gprs = new Dictionary<int, ShaderDeclInfo>();
m_GprsHalf = new Dictionary<int, ShaderDeclInfo>();
m_Preds = new Dictionary<int, ShaderDeclInfo>();
}
public GlslDecl(ShaderIrBlock[] Blocks, GalShaderType ShaderType, ShaderHeader Header) : this(ShaderType)
@ -146,8 +149,9 @@ namespace Ryujinx.Graphics.Gal.Shader
Merge(Combined.m_Attributes, VpA.m_Attributes, VpB.m_Attributes);
Merge(Combined.m_OutAttributes, VpA.m_OutAttributes, VpB.m_OutAttributes);
Merge(Combined.m_Gprs, VpA.m_Gprs, VpB.m_Gprs);
Merge(Combined.m_Preds, VpA.m_Preds, VpB.m_Preds);
Merge(Combined.m_Gprs, VpA.m_Gprs, VpB.m_Gprs);
Merge(Combined.m_GprsHalf, VpA.m_GprsHalf, VpB.m_GprsHalf);
Merge(Combined.m_Preds, VpA.m_Preds, VpB.m_Preds);
//Merge input attributes.
foreach (KeyValuePair<int, ShaderDeclInfo> KV in VpA.m_InAttributes)
@ -343,7 +347,20 @@ namespace Ryujinx.Graphics.Gal.Shader
{
string Name = GetGprName(Gpr.Index);
m_Gprs.TryAdd(Gpr.Index, new ShaderDeclInfo(Name, Gpr.Index));
if (Gpr.RegisterSize == ShaderRegisterSize.Single)
{
m_Gprs.TryAdd(Gpr.Index, new ShaderDeclInfo(Name, Gpr.Index));
}
else if (Gpr.RegisterSize == ShaderRegisterSize.Half)
{
Name += "_h" + Gpr.HalfPart;
m_GprsHalf.TryAdd((Gpr.Index << 1) | Gpr.HalfPart, new ShaderDeclInfo(Name, Gpr.Index));
}
else /* if (Gpr.RegisterSize == ShaderRegisterSize.Double) */
{
throw new NotImplementedException("Double types are not supported.");
}
}
break;
}

View file

@ -364,6 +364,7 @@ namespace Ryujinx.Graphics.Gal.Shader
private void PrintDeclGprs()
{
PrintDecls(Decl.Gprs);
PrintDecls(Decl.GprsHalf);
}
private void PrintDeclPreds()
@ -897,7 +898,23 @@ namespace Ryujinx.Graphics.Gal.Shader
private string GetName(ShaderIrOperGpr Gpr)
{
return Gpr.IsConst ? "0" : GetNameWithSwizzle(Decl.Gprs, Gpr.Index);
if (Gpr.IsConst)
{
return "0";
}
if (Gpr.RegisterSize == ShaderRegisterSize.Single)
{
return GetNameWithSwizzle(Decl.Gprs, Gpr.Index);
}
else if (Gpr.RegisterSize == ShaderRegisterSize.Half)
{
return GetNameWithSwizzle(Decl.GprsHalf, (Gpr.Index << 1) | Gpr.HalfPart);
}
else /* if (Gpr.RegisterSize == ShaderRegisterSize.Double) */
{
throw new NotImplementedException("Double types are not supported.");
}
}
private string GetValue(ShaderIrOperImm Imm)

View file

@ -6,6 +6,14 @@ namespace Ryujinx.Graphics.Gal.Shader
{
static partial class ShaderDecode
{
private enum HalfOutputType
{
PackedFp16,
Fp32,
MergeH0,
MergeH1
}
public static void Bfe_C(ShaderIrBlock Block, long OpCode, int Position)
{
EmitBfe(Block, OpCode, ShaderOper.CR);
@ -144,6 +152,16 @@ namespace Ryujinx.Graphics.Gal.Shader
EmitFsetp(Block, OpCode, ShaderOper.RR);
}
public static void Hadd2_R(ShaderIrBlock Block, long OpCode, int Position)
{
EmitBinaryHalfOp(Block, OpCode, ShaderIrInst.Fadd);
}
public static void Hmul2_R(ShaderIrBlock Block, long OpCode, int Position)
{
EmitBinaryHalfOp(Block, OpCode, ShaderIrInst.Fmul);
}
public static void Iadd_C(ShaderIrBlock Block, long OpCode, int Position)
{
EmitIadd(Block, OpCode, ShaderOper.CR);
@ -1041,6 +1059,47 @@ namespace Ryujinx.Graphics.Gal.Shader
Block.AddNode(OpCode.PredNode(new ShaderIrAsg(P0Node, Op)));
}
private static void EmitBinaryHalfOp(ShaderIrBlock Block, long OpCode, ShaderIrInst Inst)
{
bool AbsB = OpCode.Read(30);
bool NegB = OpCode.Read(31);
bool Sat = OpCode.Read(32);
bool AbsA = OpCode.Read(44);
ShaderIrOperGpr[] VecA = OpCode.GprHalfVec8();
ShaderIrOperGpr[] VecB = OpCode.GprHalfVec20();
HalfOutputType OutputType = (HalfOutputType)OpCode.Read(49, 3);
int Elems = OutputType == HalfOutputType.PackedFp16 ? 2 : 1;
int First = OutputType == HalfOutputType.MergeH1 ? 1 : 0;
for (int Index = First; Index < Elems; Index++)
{
ShaderIrNode OperA = GetAluFabs (VecA[Index], AbsA);
ShaderIrNode OperB = GetAluFabsFneg(VecB[Index], AbsB, NegB);
ShaderIrNode Op = new ShaderIrOp(Inst, OperA, OperB);
ShaderIrOperGpr Dst = GetHalfDst(OpCode, OutputType, Index);
Block.AddNode(OpCode.PredNode(new ShaderIrAsg(Dst, GetAluFsat(Op, Sat))));
}
}
private static ShaderIrOperGpr GetHalfDst(long OpCode, HalfOutputType OutputType, int Index)
{
switch (OutputType)
{
case HalfOutputType.PackedFp16: return OpCode.GprHalf0(Index);
case HalfOutputType.Fp32: return OpCode.Gpr0();
case HalfOutputType.MergeH0: return OpCode.GprHalf0(0);
case HalfOutputType.MergeH1: return OpCode.GprHalf0(1);
}
throw new ArgumentException(nameof(OutputType));
}
private static void EmitLop(ShaderIrBlock Block, long OpCode, ShaderOper Oper)
{
int SubOp = OpCode.Read(41, 3);

View file

@ -6,8 +6,6 @@ namespace Ryujinx.Graphics.Gal.Shader
{
static partial class ShaderDecode
{
private const int TempRegStart = 0x100;
private const int ____ = 0x0;
private const int R___ = 0x1;
private const int _G__ = 0x2;
@ -149,14 +147,18 @@ namespace Ryujinx.Graphics.Gal.Shader
for (int Index = 0; Index < Coords.Length; Index++)
{
Coords[Index] = OpCode.Gpr8();
ShaderIrOperGpr CoordReg = OpCode.Gpr8();
Coords[Index].Index += Index;
CoordReg.Index += Index;
if (Coords[Index].Index > ShaderIrOperGpr.ZRIndex)
if (!CoordReg.IsValidRegister)
{
Coords[Index].Index = ShaderIrOperGpr.ZRIndex;
CoordReg.Index = ShaderIrOperGpr.ZRIndex;
}
Coords[Index] = ShaderIrOperGpr.MakeTemporary(Index);
Block.AddNode(new ShaderIrAsg(Coords[Index], CoordReg));
}
int ChMask = OpCode.Read(31, 0xf);
@ -167,17 +169,6 @@ namespace Ryujinx.Graphics.Gal.Shader
ShaderIrInst Inst = GprHandle ? ShaderIrInst.Texb : ShaderIrInst.Texs;
for (int Ch = 0; Ch < 4; Ch++)
{
ShaderIrOperGpr Dst = new ShaderIrOperGpr(TempRegStart + Ch);
ShaderIrMetaTex Meta = new ShaderIrMetaTex(Ch);
ShaderIrOp Op = new ShaderIrOp(Inst, Coords[0], Coords[1], OperC, Meta);
Block.AddNode(OpCode.PredNode(new ShaderIrAsg(Dst, Op)));
}
int RegInc = 0;
for (int Ch = 0; Ch < 4; Ch++)
@ -187,18 +178,20 @@ namespace Ryujinx.Graphics.Gal.Shader
continue;
}
ShaderIrOperGpr Src = new ShaderIrOperGpr(TempRegStart + Ch);
ShaderIrOperGpr Dst = OpCode.Gpr0();
Dst.Index += RegInc++;
if (Dst.Index >= ShaderIrOperGpr.ZRIndex)
if (!Dst.IsValidRegister || Dst.IsConst)
{
continue;
}
Block.AddNode(OpCode.PredNode(new ShaderIrAsg(Dst, Src)));
ShaderIrMetaTex Meta = new ShaderIrMetaTex(Ch);
ShaderIrOp Op = new ShaderIrOp(Inst, Coords[0], Coords[1], OperC, Meta);
Block.AddNode(OpCode.PredNode(new ShaderIrAsg(Dst, Op)));
}
}
@ -215,57 +208,81 @@ namespace Ryujinx.Graphics.Gal.Shader
private static void EmitTexs(ShaderIrBlock Block, long OpCode, ShaderIrInst Inst)
{
//TODO: Support other formats.
ShaderIrNode OperA = OpCode.Gpr8();
ShaderIrNode OperB = OpCode.Gpr20();
ShaderIrNode OperC = OpCode.Imm13_36();
int LutIndex;
LutIndex = OpCode.Gpr0 ().Index != ShaderIrOperGpr.ZRIndex ? 1 : 0;
LutIndex |= OpCode.Gpr28().Index != ShaderIrOperGpr.ZRIndex ? 2 : 0;
LutIndex = !OpCode.Gpr0().IsConst ? 1 : 0;
LutIndex |= !OpCode.Gpr28().IsConst ? 2 : 0;
if (LutIndex == 0)
{
//Both registers are RZ, color is not written anywhere.
//So, the intruction is basically a no-op.
//Both destination registers are RZ, do nothing.
return;
}
int ChMask = MaskLut[LutIndex, OpCode.Read(50, 7)];
bool Fp16 = !OpCode.Read(59);
for (int Ch = 0; Ch < 4; Ch++)
{
ShaderIrOperGpr Dst = new ShaderIrOperGpr(TempRegStart + Ch);
ShaderIrMetaTex Meta = new ShaderIrMetaTex(Ch);
ShaderIrOp Op = new ShaderIrOp(Inst, OperA, OperB, OperC, Meta);
Block.AddNode(OpCode.PredNode(new ShaderIrAsg(Dst, Op)));
}
int RegInc = 0;
int DstIncrement = 0;
ShaderIrOperGpr GetDst()
{
ShaderIrOperGpr Dst;
switch (LutIndex)
if (Fp16)
{
case 1: Dst = OpCode.Gpr0(); break;
case 2: Dst = OpCode.Gpr28(); break;
case 3: Dst = (RegInc >> 1) != 0
? OpCode.Gpr28()
: OpCode.Gpr0 (); break;
//FP16 mode, two components are packed on the two
//halfs of a 32-bits register, as two half-float values.
int HalfPart = DstIncrement & 1;
default: throw new InvalidOperationException();
switch (LutIndex)
{
case 1: Dst = OpCode.GprHalf0(HalfPart); break;
case 2: Dst = OpCode.GprHalf28(HalfPart); break;
case 3: Dst = (DstIncrement >> 1) != 0
? OpCode.GprHalf28(HalfPart)
: OpCode.GprHalf0(HalfPart); break;
default: throw new InvalidOperationException();
}
}
else
{
//32-bits mode, each component uses one register.
//Two components uses two consecutive registers.
switch (LutIndex)
{
case 1: Dst = OpCode.Gpr0(); break;
case 2: Dst = OpCode.Gpr28(); break;
case 3: Dst = (DstIncrement >> 1) != 0
? OpCode.Gpr28()
: OpCode.Gpr0(); break;
default: throw new InvalidOperationException();
}
Dst.Index += DstIncrement & 1;
}
Dst.Index += RegInc++ & 1;
DstIncrement++;
return Dst;
}
int ChMask = MaskLut[LutIndex, OpCode.Read(50, 7)];
if (ChMask == 0)
{
//All channels are disabled, do nothing.
return;
}
ShaderIrNode OperC = OpCode.Imm13_36();
ShaderIrOperGpr Coord0 = ShaderIrOperGpr.MakeTemporary(0);
ShaderIrOperGpr Coord1 = ShaderIrOperGpr.MakeTemporary(1);
Block.AddNode(new ShaderIrAsg(Coord0, OpCode.Gpr8()));
Block.AddNode(new ShaderIrAsg(Coord1, OpCode.Gpr20()));
for (int Ch = 0; Ch < 4; Ch++)
{
if (!IsChannelUsed(ChMask, Ch))
@ -273,13 +290,15 @@ namespace Ryujinx.Graphics.Gal.Shader
continue;
}
ShaderIrOperGpr Src = new ShaderIrOperGpr(TempRegStart + Ch);
ShaderIrMetaTex Meta = new ShaderIrMetaTex(Ch);
ShaderIrOp Op = new ShaderIrOp(Inst, Coord0, Coord1, OperC, Meta);
ShaderIrOperGpr Dst = GetDst();
if (Dst.Index != ShaderIrOperGpr.ZRIndex)
if (Dst.IsValidRegister && !Dst.IsConst)
{
Block.AddNode(OpCode.PredNode(new ShaderIrAsg(Dst, Src)));
Block.AddNode(OpCode.PredNode(new ShaderIrAsg(Dst, Op)));
}
}
}

View file

@ -75,6 +75,49 @@ namespace Ryujinx.Graphics.Gal.Shader
return new ShaderIrOperGpr(OpCode.Read(28, 0xff));
}
private static ShaderIrOperGpr[] GprHalfVec8(this long OpCode)
{
return GetGprHalfVec2(OpCode.Read(8, 0xff), OpCode.Read(47, 3));
}
private static ShaderIrOperGpr[] GprHalfVec20(this long OpCode)
{
return GetGprHalfVec2(OpCode.Read(20, 0xff), OpCode.Read(28, 3));
}
private static ShaderIrOperGpr[] GetGprHalfVec2(int Gpr, int Mask)
{
if (Mask == 1)
{
//This value is used for FP32, the whole 32-bits register
//is used as each element on the vector.
return new ShaderIrOperGpr[]
{
new ShaderIrOperGpr(Gpr),
new ShaderIrOperGpr(Gpr)
};
}
ShaderIrOperGpr Low = new ShaderIrOperGpr(Gpr, 0);
ShaderIrOperGpr High = new ShaderIrOperGpr(Gpr, 1);
return new ShaderIrOperGpr[]
{
(Mask & 1) != 0 ? High : Low,
(Mask & 2) != 0 ? High : Low
};
}
private static ShaderIrOperGpr GprHalf0(this long OpCode, int HalfPart)
{
return new ShaderIrOperGpr(OpCode.Read(0, 0xff), HalfPart);
}
private static ShaderIrOperGpr GprHalf28(this long OpCode, int HalfPart)
{
return new ShaderIrOperGpr(OpCode.Read(28, 0xff), HalfPart);
}
private static ShaderIrOperImm Imm5_39(this long OpCode)
{
return new ShaderIrOperImm(OpCode.Read(39, 0x1f));

View file

@ -6,13 +6,26 @@ namespace Ryujinx.Graphics.Gal.Shader
public bool IsConst => Index == ZRIndex;
public bool IsValidRegister => (Index <= ZRIndex);
public bool IsValidRegister => (uint)Index <= ZRIndex;
public int Index { get; set; }
public int Index { get; set; }
public int HalfPart { get; set; }
public ShaderRegisterSize RegisterSize { get; private set; }
public ShaderIrOperGpr(int Index)
{
this.Index = Index;
RegisterSize = ShaderRegisterSize.Single;
}
public ShaderIrOperGpr(int Index, int HalfPart)
{
this.Index = Index;
this.HalfPart = HalfPart;
RegisterSize = ShaderRegisterSize.Half;
}
public static ShaderIrOperGpr MakeTemporary(int Index = 0)

View file

@ -58,6 +58,8 @@ namespace Ryujinx.Graphics.Gal.Shader
Set("010010111011xx", ShaderDecode.Fsetp_C);
Set("0011011x1011xx", ShaderDecode.Fsetp_I);
Set("010110111011xx", ShaderDecode.Fsetp_R);
Set("0101110100010x", ShaderDecode.Hadd2_R);
Set("0101110100001x", ShaderDecode.Hmul2_R);
Set("0100110010111x", ShaderDecode.I2f_C);
Set("0011100x10111x", ShaderDecode.I2f_I);
Set("0101110010111x", ShaderDecode.I2f_R);
@ -118,7 +120,7 @@ namespace Ryujinx.Graphics.Gal.Shader
Set("110000xxxx111x", ShaderDecode.Tex);
Set("1101111010111x", ShaderDecode.Tex_B);
Set("1101111101001x", ShaderDecode.Texq);
Set("1101100xxxxxxx", ShaderDecode.Texs);
Set("1101x00xxxxxxx", ShaderDecode.Texs);
Set("1101101xxxxxxx", ShaderDecode.Tlds);
Set("01011111xxxxxx", ShaderDecode.Vmad);
Set("0100111xxxxxxx", ShaderDecode.Xmad_CR);

View file

@ -0,0 +1,9 @@
namespace Ryujinx.Graphics.Gal.Shader
{
enum ShaderRegisterSize
{
Half,
Single,
Double
}
}

View file

@ -39,7 +39,7 @@ namespace Ryujinx.Graphics.Gal
ulong Instruction = 0;
//Dump until a NOP instruction is found
while ((Instruction >> 52 & 0xfff8) != 0x50b0)
while ((Instruction >> 48 & 0xfff8) != 0x50b0)
{
uint Word0 = (uint)Memory.ReadInt32(Position + 0x50 + Offset + 0);
uint Word1 = (uint)Memory.ReadInt32(Position + 0x50 + Offset + 4);