1
0
Fork 0
mirror of https://github.com/Ryujinx/Ryujinx.git synced 2024-11-10 09:51:43 +00:00

Add Smlal_Ve, Smlsl_Ve, Smull_Ve, Umlal_Ve, Umlsl_Ve, Umull_Ve Inst.; add Tests. Add Sse Opt. for Trn1/2_V and Uzp1/2_V Inst. Nits. (#566)

* Update OpCodeTable.cs

* Update InstEmitSimdArithmetic.cs

* Update InstEmitSimdHelper.cs

* Update CpuTestSimdRegElem.cs

* Update InstEmitSimdMove.cs

* Update InstEmitSimdCvt.cs

* Update SoftFallback.cs

* Update InstEmitSimdHelper.cs

* Update SoftFloat.cs

* Update CryptoHelper.cs

* Update InstEmitSimdArithmetic.cs

* Update InstEmitSimdCmp.cs

* Address PR feedback.

* Address PR feedback.
This commit is contained in:
LDj3SNuD 2019-01-29 14:54:39 +01:00 committed by gdkchan
parent 36b9ab0e48
commit 8f7fcede7f
10 changed files with 453 additions and 175 deletions

View file

@ -9,7 +9,7 @@ namespace ChocolArm64.Instructions
static class CryptoHelper
{
#region "LookUp Tables"
private static byte[] _sBox =
private static readonly byte[] _sBox = new byte[]
{
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
@ -29,7 +29,7 @@ namespace ChocolArm64.Instructions
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
};
private static byte[] _invSBox =
private static readonly byte[] _invSBox = new byte[]
{
0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
@ -49,7 +49,7 @@ namespace ChocolArm64.Instructions
0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
};
private static byte[] _gfMul02 =
private static readonly byte[] _gfMul02 = new byte[]
{
0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e,
0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e,
@ -69,7 +69,7 @@ namespace ChocolArm64.Instructions
0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5
};
private static byte[] _gfMul03 =
private static readonly byte[] _gfMul03 = new byte[]
{
0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11,
0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21,
@ -89,7 +89,7 @@ namespace ChocolArm64.Instructions
0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a
};
private static byte[] _gfMul09 =
private static readonly byte[] _gfMul09 = new byte[]
{
0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77,
0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7,
@ -109,7 +109,7 @@ namespace ChocolArm64.Instructions
0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46
};
private static byte[] _gfMul0B =
private static readonly byte[] _gfMul0B = new byte[]
{
0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69,
0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9,
@ -129,7 +129,7 @@ namespace ChocolArm64.Instructions
0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3
};
private static byte[] _gfMul0D =
private static readonly byte[] _gfMul0D = new byte[]
{
0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b,
0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b,
@ -149,7 +149,7 @@ namespace ChocolArm64.Instructions
0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97
};
private static byte[] _gfMul0E =
private static readonly byte[] _gfMul0E = new byte[]
{
0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a,
0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba,
@ -169,9 +169,15 @@ namespace ChocolArm64.Instructions
0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d
};
private static byte[] _srPerm = { 0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3 };
private static readonly byte[] _srPerm = new byte[]
{
0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3
};
private static byte[] _isrPerm = { 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11 };
private static readonly byte[] _isrPerm = new byte[]
{
0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11
};
#endregion
public static Vector128<float> AesInvMixColumns(Vector128<float> op)
@ -179,7 +185,7 @@ namespace ChocolArm64.Instructions
byte[] inState = new byte[16];
byte[] outState = new byte[16];
FromVectorToByteArray(inState, ref op);
FromVectorToByteArray(op, inState);
for (int columns = 0; columns <= 3; columns++)
{
@ -206,7 +212,7 @@ namespace ChocolArm64.Instructions
byte[] inState = new byte[16];
byte[] outState = new byte[16];
FromVectorToByteArray(inState, ref op);
FromVectorToByteArray(op, inState);
for (int idx = 0; idx <= 15; idx++)
{
@ -223,7 +229,7 @@ namespace ChocolArm64.Instructions
byte[] inState = new byte[16];
byte[] outState = new byte[16];
FromVectorToByteArray(inState, ref op);
FromVectorToByteArray(op, inState);
for (int idx = 0; idx <= 15; idx++)
{
@ -240,7 +246,7 @@ namespace ChocolArm64.Instructions
byte[] inState = new byte[16];
byte[] outState = new byte[16];
FromVectorToByteArray(inState, ref op);
FromVectorToByteArray(op, inState);
for (int columns = 0; columns <= 3; columns++)
{
@ -267,7 +273,7 @@ namespace ChocolArm64.Instructions
byte[] inState = new byte[16];
byte[] outState = new byte[16];
FromVectorToByteArray(inState, ref op);
FromVectorToByteArray(op, inState);
for (int idx = 0; idx <= 15; idx++)
{
@ -284,7 +290,7 @@ namespace ChocolArm64.Instructions
byte[] inState = new byte[16];
byte[] outState = new byte[16];
FromVectorToByteArray(inState, ref op);
FromVectorToByteArray(op, inState);
for (int idx = 0; idx <= 15; idx++)
{
@ -296,33 +302,30 @@ namespace ChocolArm64.Instructions
return op;
}
private static void FromVectorToByteArray(byte[] state, ref Vector128<float> op)
{
ulong uLongLow = VectorHelper.VectorExtractIntZx((op), (byte)0, 3);
ulong uLongHigh = VectorHelper.VectorExtractIntZx((op), (byte)1, 3);
for (int idx = 0; idx <= 7; idx++)
{
state[idx + 0] = (byte)(uLongLow & 0xFFUL);
state[idx + 8] = (byte)(uLongHigh & 0xFFUL);
uLongLow >>= 8;
uLongHigh >>= 8;
}
}
private static void FromByteArrayToVector(byte[] state, ref Vector128<float> op)
private unsafe static void FromVectorToByteArray(Vector128<float> op, byte[] state)
{
if (!Sse2.IsSupported)
{
throw new PlatformNotSupportedException();
}
op = Sse.StaticCast<byte, float>(Sse2.SetVector128(
state[15], state[14], state[13], state[12],
state[11], state[10], state[9], state[8],
state[7], state[6], state[5], state[4],
state[3], state[2], state[1], state[0]));
fixed (byte* ptr = &state[0])
{
Sse2.Store(ptr, Sse.StaticCast<float, byte>(op));
}
}
private unsafe static void FromByteArrayToVector(byte[] state, ref Vector128<float> op)
{
if (!Sse2.IsSupported)
{
throw new PlatformNotSupportedException();
}
fixed (byte* ptr = &state[0])
{
op = Sse.StaticCast<byte, float>(Sse2.LoadVector128(ptr));
}
}
}
}

View file

@ -392,8 +392,7 @@ namespace ChocolArm64.Instructions
public static void Fadd_S(ILEmitterCtx context)
{
if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitScalarSseOrSse2OpF(context, nameof(Sse.AddScalar));
}
@ -408,8 +407,7 @@ namespace ChocolArm64.Instructions
public static void Fadd_V(ILEmitterCtx context)
{
if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitVectorSseOrSse2OpF(context, nameof(Sse.Add));
}
@ -470,8 +468,7 @@ namespace ChocolArm64.Instructions
public static void Faddp_V(ILEmitterCtx context)
{
if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitVectorPairwiseSseOrSse2OpF(context, nameof(Sse.Add));
}
@ -486,8 +483,7 @@ namespace ChocolArm64.Instructions
public static void Fdiv_S(ILEmitterCtx context)
{
if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitScalarSseOrSse2OpF(context, nameof(Sse.DivideScalar));
}
@ -502,8 +498,7 @@ namespace ChocolArm64.Instructions
public static void Fdiv_V(ILEmitterCtx context)
{
if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitVectorSseOrSse2OpF(context, nameof(Sse.Divide));
}
@ -564,8 +559,7 @@ namespace ChocolArm64.Instructions
public static void Fmax_S(ILEmitterCtx context)
{
if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitScalarSseOrSse2OpF(context, nameof(Sse.MaxScalar));
}
@ -580,8 +574,7 @@ namespace ChocolArm64.Instructions
public static void Fmax_V(ILEmitterCtx context)
{
if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitVectorSseOrSse2OpF(context, nameof(Sse.Max));
}
@ -612,8 +605,7 @@ namespace ChocolArm64.Instructions
public static void Fmaxp_V(ILEmitterCtx context)
{
if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitVectorPairwiseSseOrSse2OpF(context, nameof(Sse.Max));
}
@ -628,8 +620,7 @@ namespace ChocolArm64.Instructions
public static void Fmin_S(ILEmitterCtx context)
{
if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitScalarSseOrSse2OpF(context, nameof(Sse.MinScalar));
}
@ -644,8 +635,7 @@ namespace ChocolArm64.Instructions
public static void Fmin_V(ILEmitterCtx context)
{
if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitVectorSseOrSse2OpF(context, nameof(Sse.Min));
}
@ -676,8 +666,7 @@ namespace ChocolArm64.Instructions
public static void Fminp_V(ILEmitterCtx context)
{
if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitVectorPairwiseSseOrSse2OpF(context, nameof(Sse.Min));
}
@ -984,8 +973,7 @@ namespace ChocolArm64.Instructions
public static void Fmul_S(ILEmitterCtx context)
{
if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitScalarSseOrSse2OpF(context, nameof(Sse.MultiplyScalar));
}
@ -1005,8 +993,7 @@ namespace ChocolArm64.Instructions
public static void Fmul_V(ILEmitterCtx context)
{
if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitVectorSseOrSse2OpF(context, nameof(Sse.Multiply));
}
@ -1753,8 +1740,7 @@ namespace ChocolArm64.Instructions
public static void Fsqrt_S(ILEmitterCtx context)
{
if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitScalarSseOrSse2OpF(context, nameof(Sse.SqrtScalar));
}
@ -1769,8 +1755,7 @@ namespace ChocolArm64.Instructions
public static void Fsqrt_V(ILEmitterCtx context)
{
if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitVectorSseOrSse2OpF(context, nameof(Sse.Sqrt));
}
@ -1785,8 +1770,7 @@ namespace ChocolArm64.Instructions
public static void Fsub_S(ILEmitterCtx context)
{
if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitScalarSseOrSse2OpF(context, nameof(Sse.SubtractScalar));
}
@ -1801,8 +1785,7 @@ namespace ChocolArm64.Instructions
public static void Fsub_V(ILEmitterCtx context)
{
if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitVectorSseOrSse2OpF(context, nameof(Sse.Subtract));
}
@ -2268,6 +2251,15 @@ namespace ChocolArm64.Instructions
}
}
public static void Smlal_Ve(ILEmitterCtx context)
{
EmitVectorWidenTernaryOpByElemSx(context, () =>
{
context.Emit(OpCodes.Mul);
context.Emit(OpCodes.Add);
});
}
public static void Smlsl_V(ILEmitterCtx context)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
@ -2319,11 +2311,25 @@ namespace ChocolArm64.Instructions
}
}
public static void Smlsl_Ve(ILEmitterCtx context)
{
EmitVectorWidenTernaryOpByElemSx(context, () =>
{
context.Emit(OpCodes.Mul);
context.Emit(OpCodes.Sub);
});
}
public static void Smull_V(ILEmitterCtx context)
{
EmitVectorWidenRnRmBinaryOpSx(context, () => context.Emit(OpCodes.Mul));
}
public static void Smull_Ve(ILEmitterCtx context)
{
EmitVectorWidenBinaryOpByElemSx(context, () => context.Emit(OpCodes.Mul));
}
public static void Sqabs_S(ILEmitterCtx context)
{
EmitScalarSaturatingUnaryOpSx(context, () => EmitAbs(context));
@ -2929,6 +2935,15 @@ namespace ChocolArm64.Instructions
}
}
public static void Umlal_Ve(ILEmitterCtx context)
{
EmitVectorWidenTernaryOpByElemZx(context, () =>
{
context.Emit(OpCodes.Mul);
context.Emit(OpCodes.Add);
});
}
public static void Umlsl_V(ILEmitterCtx context)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
@ -2980,11 +2995,25 @@ namespace ChocolArm64.Instructions
}
}
public static void Umlsl_Ve(ILEmitterCtx context)
{
EmitVectorWidenTernaryOpByElemZx(context, () =>
{
context.Emit(OpCodes.Mul);
context.Emit(OpCodes.Sub);
});
}
public static void Umull_V(ILEmitterCtx context)
{
EmitVectorWidenRnRmBinaryOpZx(context, () => context.Emit(OpCodes.Mul));
}
public static void Umull_Ve(ILEmitterCtx context)
{
EmitVectorWidenBinaryOpByElemZx(context, () => context.Emit(OpCodes.Mul));
}
public static void Uqadd_S(ILEmitterCtx context)
{
EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Add);

View file

@ -173,8 +173,7 @@ namespace ChocolArm64.Instructions
public static void Fcmeq_S(ILEmitterCtx context)
{
if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareEqualScalar), scalar: true);
}
@ -186,8 +185,7 @@ namespace ChocolArm64.Instructions
public static void Fcmeq_V(ILEmitterCtx context)
{
if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareEqual), scalar: false);
}
@ -199,8 +197,7 @@ namespace ChocolArm64.Instructions
public static void Fcmge_S(ILEmitterCtx context)
{
if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanOrEqualScalar), scalar: true);
}
@ -212,8 +209,7 @@ namespace ChocolArm64.Instructions
public static void Fcmge_V(ILEmitterCtx context)
{
if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanOrEqual), scalar: false);
}
@ -225,8 +221,7 @@ namespace ChocolArm64.Instructions
public static void Fcmgt_S(ILEmitterCtx context)
{
if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanScalar), scalar: true);
}
@ -238,8 +233,7 @@ namespace ChocolArm64.Instructions
public static void Fcmgt_V(ILEmitterCtx context)
{
if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareGreaterThan), scalar: false);
}
@ -251,8 +245,7 @@ namespace ChocolArm64.Instructions
public static void Fcmle_S(ILEmitterCtx context)
{
if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanOrEqualScalar), scalar: true, isLeOrLt: true);
}
@ -264,8 +257,7 @@ namespace ChocolArm64.Instructions
public static void Fcmle_V(ILEmitterCtx context)
{
if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanOrEqual), scalar: false, isLeOrLt: true);
}
@ -277,8 +269,7 @@ namespace ChocolArm64.Instructions
public static void Fcmlt_S(ILEmitterCtx context)
{
if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanScalar), scalar: true, isLeOrLt: true);
}
@ -290,8 +281,7 @@ namespace ChocolArm64.Instructions
public static void Fcmlt_V(ILEmitterCtx context)
{
if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareGreaterThan), scalar: false, isLeOrLt: true);
}

View file

@ -78,7 +78,6 @@ namespace ChocolArm64.Instructions
if (Optimizations.UseSse2 && sizeF == 1)
{
Type[] typesMov = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
Type[] typesCvt = new Type[] { typeof(Vector128<float>) };
string nameMov = op.RegisterSize == RegisterSize.Simd128
@ -88,7 +87,7 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rn);
context.Emit(OpCodes.Dup);
context.EmitCall(typeof(Sse).GetMethod(nameMov, typesMov));
context.EmitCall(typeof(Sse).GetMethod(nameMov));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Double), typesCvt));
@ -144,7 +143,6 @@ namespace ChocolArm64.Instructions
if (Optimizations.UseSse2 && sizeF == 1)
{
Type[] typesMov = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
Type[] typesCvt = new Type[] { typeof(Vector128<double>) };
string nameMov = op.RegisterSize == RegisterSize.Simd128
@ -154,15 +152,15 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rd);
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh), typesMov));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));
EmitLdvecWithCastToDouble(context, op.Rn);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Single), typesCvt));
context.Emit(OpCodes.Dup);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh), typesMov));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));
context.EmitCall(typeof(Sse).GetMethod(nameMov, typesMov));
context.EmitCall(typeof(Sse).GetMethod(nameMov));
context.EmitStvec(op.Rd);
}

View file

@ -642,21 +642,21 @@ namespace ChocolArm64.Instructions
{
OpCodeSimdRegElem64 op = (OpCodeSimdRegElem64)context.CurrOp;
EmitVectorOpByElem(context, emit, op.Index, false, true);
EmitVectorOpByElem(context, emit, op.Index, ternary: false, signed: true);
}
public static void EmitVectorBinaryOpByElemZx(ILEmitterCtx context, Action emit)
{
OpCodeSimdRegElem64 op = (OpCodeSimdRegElem64)context.CurrOp;
EmitVectorOpByElem(context, emit, op.Index, false, false);
EmitVectorOpByElem(context, emit, op.Index, ternary: false, signed: false);
}
public static void EmitVectorTernaryOpByElemZx(ILEmitterCtx context, Action emit)
{
OpCodeSimdRegElem64 op = (OpCodeSimdRegElem64)context.CurrOp;
EmitVectorOpByElem(context, emit, op.Index, true, false);
EmitVectorOpByElem(context, emit, op.Index, ternary: true, signed: false);
}
public static void EmitVectorOpByElem(ILEmitterCtx context, Action emit, int elem, bool ternary, bool signed)
@ -809,6 +809,64 @@ namespace ChocolArm64.Instructions
context.EmitStvec(op.Rd);
}
public static void EmitVectorWidenBinaryOpByElemSx(ILEmitterCtx context, Action emit)
{
OpCodeSimdRegElem64 op = (OpCodeSimdRegElem64)context.CurrOp;
EmitVectorWidenOpByElem(context, emit, op.Index, ternary: false, signed: true);
}
public static void EmitVectorWidenBinaryOpByElemZx(ILEmitterCtx context, Action emit)
{
OpCodeSimdRegElem64 op = (OpCodeSimdRegElem64)context.CurrOp;
EmitVectorWidenOpByElem(context, emit, op.Index, ternary: false, signed: false);
}
public static void EmitVectorWidenTernaryOpByElemSx(ILEmitterCtx context, Action emit)
{
OpCodeSimdRegElem64 op = (OpCodeSimdRegElem64)context.CurrOp;
EmitVectorWidenOpByElem(context, emit, op.Index, ternary: true, signed: true);
}
public static void EmitVectorWidenTernaryOpByElemZx(ILEmitterCtx context, Action emit)
{
OpCodeSimdRegElem64 op = (OpCodeSimdRegElem64)context.CurrOp;
EmitVectorWidenOpByElem(context, emit, op.Index, ternary: true, signed: false);
}
public static void EmitVectorWidenOpByElem(ILEmitterCtx context, Action emit, int elem, bool ternary, bool signed)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
int elems = 8 >> op.Size;
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
EmitVectorExtract(context, op.Rm, elem, op.Size, signed);
context.EmitSttmp();
for (int index = 0; index < elems; index++)
{
if (ternary)
{
EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
}
EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
context.EmitLdtmp();
emit();
EmitVectorInsertTmp(context, index, op.Size + 1);
}
context.EmitLdvectmp();
context.EmitStvec(op.Rd);
}
public static void EmitVectorPairwiseOpSx(ILEmitterCtx context, Action emit)
{
EmitVectorPairwiseOp(context, emit, true);
@ -1416,7 +1474,7 @@ namespace ChocolArm64.Instructions
if (Optimizations.UseSse)
{
//TODO: Use Sse2.MoveScalar once it is fixed,
//as of the time of writing it just crashes the JIT (SDK 2.1.500).
//as of the time of writing it just crashes the JIT (SDK 2.1.503).
/*Type[] typesMov = new Type[] { typeof(Vector128<ulong>) };

View file

@ -12,6 +12,34 @@ namespace ChocolArm64.Instructions
{
static partial class InstEmit
{
#region "Masks"
private static readonly long[] _masksE0_TrnUzpXtn = new long[]
{
14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0,
13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0,
11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0
};
private static readonly long[] _masksE1_TrnUzp = new long[]
{
15L << 56 | 13L << 48 | 11L << 40 | 09L << 32 | 07L << 24 | 05L << 16 | 03L << 8 | 01L << 0,
15L << 56 | 14L << 48 | 11L << 40 | 10L << 32 | 07L << 24 | 06L << 16 | 03L << 8 | 02L << 0,
15L << 56 | 14L << 48 | 13L << 40 | 12L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0
};
private static readonly long[] _masksE0_Uzp = new long[]
{
13L << 56 | 09L << 48 | 05L << 40 | 01L << 32 | 12L << 24 | 08L << 16 | 04L << 8 | 00L << 0,
11L << 56 | 10L << 48 | 03L << 40 | 02L << 32 | 09L << 24 | 08L << 16 | 01L << 8 | 00L << 0
};
private static readonly long[] _masksE1_Uzp = new long[]
{
15L << 56 | 11L << 48 | 07L << 40 | 03L << 32 | 14L << 24 | 10L << 16 | 06L << 8 | 02L << 0,
15L << 56 | 14L << 48 | 07L << 40 | 06L << 32 | 13L << 24 | 12L << 16 | 05L << 8 | 04L << 0
};
#endregion
public static void Dup_Gp(ILEmitterCtx context)
{
OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp;
@ -379,15 +407,6 @@ namespace ChocolArm64.Instructions
if (Optimizations.UseSsse3)
{
long[] masks = new long[]
{
14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0,
13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0,
11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0
};
Type[] typesMov = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
Type[] typesSfl = new Type[] { typeof(Vector128<sbyte>), typeof(Vector128<sbyte>) };
Type[] typesSve = new Type[] { typeof(long), typeof(long) };
string nameMov = op.RegisterSize == RegisterSize.Simd128
@ -397,18 +416,18 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rd);
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh), typesMov));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));
EmitLdvecWithSignedCast(context, op.Rn, 0);
EmitLdvecWithSignedCast(context, op.Rn, 0); // value
context.EmitLdc_I8(masks[op.Size]);
context.Emit(OpCodes.Dup);
context.EmitLdc_I8(_masksE0_TrnUzpXtn[op.Size]); // mask
context.Emit(OpCodes.Dup); // mask
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve));
context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl));
context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesSflUpk(0)));
context.EmitCall(typeof(Sse).GetMethod(nameMov, typesMov));
context.EmitCall(typeof(Sse).GetMethod(nameMov));
context.EmitStvec(op.Rd);
}
@ -465,22 +484,61 @@ namespace ChocolArm64.Instructions
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
int words = op.GetBitsCount() >> 4;
int pairs = words >> op.Size;
for (int index = 0; index < pairs; index++)
if (Optimizations.UseSsse3)
{
int idx = index << 1;
Type[] typesSve = new Type[] { typeof(long), typeof(long) };
EmitVectorExtractZx(context, op.Rn, idx + part, op.Size);
EmitVectorExtractZx(context, op.Rm, idx + part, op.Size);
string nameUpk = part == 0
? nameof(Sse2.UnpackLow)
: nameof(Sse2.UnpackHigh);
EmitVectorInsertTmp(context, idx + 1, op.Size);
EmitVectorInsertTmp(context, idx, op.Size);
EmitLdvecWithSignedCast(context, op.Rn, op.Size); // value
if (op.Size < 3)
{
context.EmitLdc_I8(_masksE1_TrnUzp [op.Size]); // maskE1
context.EmitLdc_I8(_masksE0_TrnUzpXtn[op.Size]); // maskE0
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve));
context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesSflUpk(0)));
}
EmitLdvecWithSignedCast(context, op.Rm, op.Size); // value
if (op.Size < 3)
{
context.EmitLdc_I8(_masksE1_TrnUzp [op.Size]); // maskE1
context.EmitLdc_I8(_masksE0_TrnUzpXtn[op.Size]); // maskE0
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve));
context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesSflUpk(0)));
}
context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(op.Size)));
EmitStvecWithSignedCast(context, op.Rd, op.Size);
}
else
{
int words = op.GetBitsCount() >> 4;
int pairs = words >> op.Size;
context.EmitLdvectmp();
context.EmitStvec(op.Rd);
for (int index = 0; index < pairs; index++)
{
int idx = index << 1;
EmitVectorExtractZx(context, op.Rn, idx + part, op.Size);
EmitVectorExtractZx(context, op.Rm, idx + part, op.Size);
EmitVectorInsertTmp(context, idx + 1, op.Size);
EmitVectorInsertTmp(context, idx, op.Size);
}
context.EmitLdvectmp();
context.EmitStvec(op.Rd);
}
if (op.RegisterSize == RegisterSize.Simd64)
{
@ -492,26 +550,91 @@ namespace ChocolArm64.Instructions
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
int words = op.GetBitsCount() >> 4;
int pairs = words >> op.Size;
for (int index = 0; index < pairs; index++)
if (Optimizations.UseSsse3)
{
int idx = index << 1;
Type[] typesSve = new Type[] { typeof(long), typeof(long) };
EmitVectorExtractZx(context, op.Rn, idx + part, op.Size);
EmitVectorExtractZx(context, op.Rm, idx + part, op.Size);
string nameUpk = part == 0
? nameof(Sse2.UnpackLow)
: nameof(Sse2.UnpackHigh);
EmitVectorInsertTmp(context, pairs + index, op.Size);
EmitVectorInsertTmp(context, index, op.Size);
if (op.RegisterSize == RegisterSize.Simd128)
{
EmitLdvecWithSignedCast(context, op.Rn, op.Size); // value
if (op.Size < 3)
{
context.EmitLdc_I8(_masksE1_TrnUzp [op.Size]); // maskE1
context.EmitLdc_I8(_masksE0_TrnUzpXtn[op.Size]); // maskE0
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve));
context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesSflUpk(0)));
}
EmitLdvecWithSignedCast(context, op.Rm, op.Size); // value
if (op.Size < 3)
{
context.EmitLdc_I8(_masksE1_TrnUzp [op.Size]); // maskE1
context.EmitLdc_I8(_masksE0_TrnUzpXtn[op.Size]); // maskE0
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve));
context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesSflUpk(0)));
}
context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(3)));
EmitStvecWithSignedCast(context, op.Rd, op.Size);
}
else
{
EmitLdvecWithSignedCast(context, op.Rn, op.Size);
EmitLdvecWithSignedCast(context, op.Rm, op.Size);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackLow), GetTypesSflUpk(op.Size))); // value
if (op.Size < 2)
{
context.EmitLdc_I8(_masksE1_Uzp[op.Size]); // maskE1
context.EmitLdc_I8(_masksE0_Uzp[op.Size]); // maskE0
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve));
context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesSflUpk(0)));
}
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt64Zero));
context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(3)));
EmitStvecWithSignedCast(context, op.Rd, op.Size);
}
}
context.EmitLdvectmp();
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
else
{
EmitVectorZeroUpper(context, op.Rd);
int words = op.GetBitsCount() >> 4;
int pairs = words >> op.Size;
for (int index = 0; index < pairs; index++)
{
int idx = index << 1;
EmitVectorExtractZx(context, op.Rn, idx + part, op.Size);
EmitVectorExtractZx(context, op.Rm, idx + part, op.Size);
EmitVectorInsertTmp(context, pairs + index, op.Size);
EmitVectorInsertTmp(context, index, op.Size);
}
context.EmitLdvectmp();
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
}
@ -521,36 +644,26 @@ namespace ChocolArm64.Instructions
if (Optimizations.UseSse2)
{
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
Type[] types = new Type[]
{
VectorUIntTypesPerSizeLog2[op.Size],
VectorUIntTypesPerSizeLog2[op.Size]
};
string name = part == 0 || (part != 0 && op.RegisterSize == RegisterSize.Simd64)
string nameUpk = part == 0
? nameof(Sse2.UnpackLow)
: nameof(Sse2.UnpackHigh);
context.EmitCall(typeof(Sse2).GetMethod(name, types));
EmitLdvecWithSignedCast(context, op.Rn, op.Size);
EmitLdvecWithSignedCast(context, op.Rm, op.Size);
if (op.RegisterSize == RegisterSize.Simd64 && part != 0)
if (op.RegisterSize == RegisterSize.Simd128)
{
context.EmitLdc_I4(8);
context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(op.Size)));
}
else
{
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackLow), GetTypesSflUpk(op.Size)));
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt64Zero));
Type[] shTypes = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), shTypes));
context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(3)));
}
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
if (op.RegisterSize == RegisterSize.Simd64 && part == 0)
{
EmitVectorZeroUpper(context, op.Rd);
}
EmitStvecWithSignedCast(context, op.Rd, op.Size);
}
else
{
@ -579,5 +692,10 @@ namespace ChocolArm64.Instructions
}
}
}
private static Type[] GetTypesSflUpk(int size)
{
return new Type[] { VectorIntTypesPerSizeLog2[size], VectorIntTypesPerSizeLog2[size] };
}
}
}

View file

@ -664,7 +664,7 @@ namespace ChocolArm64.Instructions
for (int bit = highBit; bit >= 0; bit--)
{
if (((value >> bit) & 0b1) != 0)
if (((int)(value >> bit) & 0b1) != 0)
{
return (ulong)(highBit - bit);
}
@ -688,7 +688,7 @@ namespace ChocolArm64.Instructions
do
{
nibbleIdx -= 4;
preCount = ClzNibbleTbl[(value >> nibbleIdx) & 0b1111];
preCount = ClzNibbleTbl[(int)(value >> nibbleIdx) & 0b1111];
count += preCount;
}
while (preCount == 4);
@ -698,11 +698,6 @@ namespace ChocolArm64.Instructions
public static ulong CountSetBits8(ulong value) // "size" is 8 (SIMD&FP Inst.).
{
if (value == 0xfful)
{
return 8ul;
}
value = ((value >> 1) & 0x55ul) + (value & 0x55ul);
value = ((value >> 2) & 0x33ul) + (value & 0x33ul);

View file

@ -1545,9 +1545,9 @@ namespace ChocolArm64.Instructions
return -value;
}
private static float ZerosOrOnes(bool zeros)
private static float ZerosOrOnes(bool ones)
{
return BitConverter.Int32BitsToSingle(!zeros ? 0 : -1);
return BitConverter.Int32BitsToSingle(ones ? -1 : 0);
}
private static float FPUnpack(
@ -2629,9 +2629,9 @@ namespace ChocolArm64.Instructions
return -value;
}
private static double ZerosOrOnes(bool zeros)
private static double ZerosOrOnes(bool ones)
{
return BitConverter.Int64BitsToDouble(!zeros ? 0L : -1L);
return BitConverter.Int64BitsToDouble(ones ? -1L : 0L);
}
private static double FPUnpack(

View file

@ -445,9 +445,12 @@ namespace ChocolArm64
SetA64("0x001110<<1xxxxx011011xxxxxxxxxx", InstEmit.Smin_V, typeof(OpCodeSimdReg64));
SetA64("0x001110<<1xxxxx101011xxxxxxxxxx", InstEmit.Sminp_V, typeof(OpCodeSimdReg64));
SetA64("0x001110<<1xxxxx100000xxxxxxxxxx", InstEmit.Smlal_V, typeof(OpCodeSimdReg64));
SetA64("0x001111xxxxxxxx0010x0xxxxxxxxxx", InstEmit.Smlal_Ve, typeof(OpCodeSimdRegElem64));
SetA64("0x001110<<1xxxxx101000xxxxxxxxxx", InstEmit.Smlsl_V, typeof(OpCodeSimdReg64));
SetA64("0x001111xxxxxxxx0110x0xxxxxxxxxx", InstEmit.Smlsl_Ve, typeof(OpCodeSimdRegElem64));
SetA64("0x001110000xxxxx001011xxxxxxxxxx", InstEmit.Smov_S, typeof(OpCodeSimdIns64));
SetA64("0x001110<<1xxxxx110000xxxxxxxxxx", InstEmit.Smull_V, typeof(OpCodeSimdReg64));
SetA64("0x001111xxxxxxxx1010x0xxxxxxxxxx", InstEmit.Smull_Ve, typeof(OpCodeSimdRegElem64));
SetA64("01011110xx100000011110xxxxxxxxxx", InstEmit.Sqabs_S, typeof(OpCodeSimd64));
SetA64("0>001110<<100000011110xxxxxxxxxx", InstEmit.Sqabs_V, typeof(OpCodeSimd64));
SetA64("01011110xx1xxxxx000011xxxxxxxxxx", InstEmit.Sqadd_S, typeof(OpCodeSimdReg64));
@ -534,9 +537,12 @@ namespace ChocolArm64
SetA64("0x101110<<1xxxxx011011xxxxxxxxxx", InstEmit.Umin_V, typeof(OpCodeSimdReg64));
SetA64("0x101110<<1xxxxx101011xxxxxxxxxx", InstEmit.Uminp_V, typeof(OpCodeSimdReg64));
SetA64("0x101110<<1xxxxx100000xxxxxxxxxx", InstEmit.Umlal_V, typeof(OpCodeSimdReg64));
SetA64("0x101111xxxxxxxx0010x0xxxxxxxxxx", InstEmit.Umlal_Ve, typeof(OpCodeSimdRegElem64));
SetA64("0x101110<<1xxxxx101000xxxxxxxxxx", InstEmit.Umlsl_V, typeof(OpCodeSimdReg64));
SetA64("0x101111xxxxxxxx0110x0xxxxxxxxxx", InstEmit.Umlsl_Ve, typeof(OpCodeSimdRegElem64));
SetA64("0x001110000xxxxx001111xxxxxxxxxx", InstEmit.Umov_S, typeof(OpCodeSimdIns64));
SetA64("0x101110<<1xxxxx110000xxxxxxxxxx", InstEmit.Umull_V, typeof(OpCodeSimdReg64));
SetA64("0x101111xxxxxxxx1010x0xxxxxxxxxx", InstEmit.Umull_Ve, typeof(OpCodeSimdRegElem64));
SetA64("01111110xx1xxxxx000011xxxxxxxxxx", InstEmit.Uqadd_S, typeof(OpCodeSimdReg64));
SetA64("0>101110<<1xxxxx000011xxxxxxxxxx", InstEmit.Uqadd_V, typeof(OpCodeSimdReg64));
SetA64("0>101110<<1xxxxx010111xxxxxxxxxx", InstEmit.Uqrshl_V, typeof(OpCodeSimdReg64));

View file

@ -45,6 +45,32 @@ namespace Ryujinx.Tests.Cpu
0x0F808000u // MUL V0.2S, V0.2S, V0.S[0]
};
}
private static uint[] _SU_Mlal_Mlsl_Mull_Ve_4H4S_8H4S_()
{
return new uint[]
{
0x0F402000u, // SMLAL V0.4S, V0.4H, V0.H[0]
0x0F406000u, // SMLSL V0.4S, V0.4H, V0.H[0]
0x0F40A000u, // SMULL V0.4S, V0.4H, V0.H[0]
0x2F402000u, // UMLAL V0.4S, V0.4H, V0.H[0]
0x2F406000u, // UMLSL V0.4S, V0.4H, V0.H[0]
0x2F40A000u // UMULL V0.4S, V0.4H, V0.H[0]
};
}
private static uint[] _SU_Mlal_Mlsl_Mull_Ve_2S2D_4S2D_()
{
return new uint[]
{
0x0F802000u, // SMLAL V0.2D, V0.2S, V0.S[0]
0x0F806000u, // SMLSL V0.2D, V0.2S, V0.S[0]
0x0F80A000u, // SMULL V0.2D, V0.2S, V0.S[0]
0x2F802000u, // UMLAL V0.2D, V0.2S, V0.S[0]
0x2F806000u, // UMLSL V0.2D, V0.2S, V0.S[0]
0x2F80A000u // UMULL V0.2D, V0.2S, V0.S[0]
};
}
#endregion
private const int RndCnt = 2;
@ -103,6 +129,61 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn();
}
[Test, Pairwise]
public void SU_Mlal_Mlsl_Mull_Ve_4H4S_8H4S([ValueSource("_SU_Mlal_Mlsl_Mull_Ve_4H4S_8H4S_")] uint opcodes,
[Values(0u)] uint rd,
[Values(1u, 0u)] uint rn,
[Values(2u, 0u)] uint rm,
[ValueSource("_4H_")] [Random(RndCnt)] ulong z,
[ValueSource("_4H_")] [Random(RndCnt)] ulong a,
[ValueSource("_4H_")] [Random(RndCnt)] ulong b,
[Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint index,
[Values(0b0u, 0b1u)] uint q) // <4H4S, 8H4S>
{
uint h = (index >> 2) & 1;
uint l = (index >> 1) & 1;
uint m = index & 1;
opcodes |= ((rm & 15) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
opcodes |= (l << 21) | (m << 20) | (h << 11);
opcodes |= ((q & 1) << 30);
Vector128<float> v0 = MakeVectorE0E1(z, z);
Vector128<float> v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul);
Vector128<float> v2 = MakeVectorE0E1(b, b * h);
SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2);
CompareAgainstUnicorn();
}
[Test, Pairwise]
public void SU_Mlal_Mlsl_Mull_Ve_2S2D_4S2D([ValueSource("_SU_Mlal_Mlsl_Mull_Ve_2S2D_4S2D_")] uint opcodes,
[Values(0u)] uint rd,
[Values(1u, 0u)] uint rn,
[Values(2u, 0u)] uint rm,
[ValueSource("_2S_")] [Random(RndCnt)] ulong z,
[ValueSource("_2S_")] [Random(RndCnt)] ulong a,
[ValueSource("_2S_")] [Random(RndCnt)] ulong b,
[Values(0u, 1u, 2u, 3u)] uint index,
[Values(0b0u, 0b1u)] uint q) // <2S2D, 4S2D>
{
uint h = (index >> 1) & 1;
uint l = index & 1;
opcodes |= ((rm & 15) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
opcodes |= (l << 21) | (h << 11);
opcodes |= ((q & 1) << 30);
Vector128<float> v0 = MakeVectorE0E1(z, z);
Vector128<float> v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul);
Vector128<float> v2 = MakeVectorE0E1(b, b * h);
SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2);
CompareAgainstUnicorn();
}
#endif
}
}