1
0
Fork 0
mirror of https://github.com/Ryujinx/Ryujinx.git synced 2025-01-11 01:36:01 +00:00

Zero out bits 63:32 of scalar float operations with SSE intrinsics (#273)

This commit is contained in:
gdkchan 2018-08-14 23:54:12 -03:00 committed by GitHub
parent 0673dc183a
commit 55374ebba0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 69 additions and 16 deletions

View file

@ -305,7 +305,7 @@ namespace ChocolArm64.Instruction
{ {
if (AOptimizations.UseSse && AOptimizations.UseSse2) if (AOptimizations.UseSse && AOptimizations.UseSse2)
{ {
EmitSseOrSse2CallF(Context, nameof(Sse.AddScalar)); EmitScalarSseOrSse2CallF(Context, nameof(Sse.AddScalar));
} }
else else
{ {
@ -317,7 +317,7 @@ namespace ChocolArm64.Instruction
{ {
if (AOptimizations.UseSse && AOptimizations.UseSse2) if (AOptimizations.UseSse && AOptimizations.UseSse2)
{ {
EmitSseOrSse2CallF(Context, nameof(Sse.Add)); EmitVectorSseOrSse2CallF(Context, nameof(Sse.Add));
} }
else else
{ {
@ -375,7 +375,7 @@ namespace ChocolArm64.Instruction
{ {
if (AOptimizations.UseSse && AOptimizations.UseSse2) if (AOptimizations.UseSse && AOptimizations.UseSse2)
{ {
EmitSseOrSse2CallF(Context, nameof(Sse.DivideScalar)); EmitScalarSseOrSse2CallF(Context, nameof(Sse.DivideScalar));
} }
else else
{ {
@ -387,7 +387,7 @@ namespace ChocolArm64.Instruction
{ {
if (AOptimizations.UseSse && AOptimizations.UseSse2) if (AOptimizations.UseSse && AOptimizations.UseSse2)
{ {
EmitSseOrSse2CallF(Context, nameof(Sse.Divide)); EmitVectorSseOrSse2CallF(Context, nameof(Sse.Divide));
} }
else else
{ {
@ -526,7 +526,7 @@ namespace ChocolArm64.Instruction
{ {
if (AOptimizations.UseSse && AOptimizations.UseSse2) if (AOptimizations.UseSse && AOptimizations.UseSse2)
{ {
EmitSseOrSse2CallF(Context, nameof(Sse.MultiplyScalar)); EmitScalarSseOrSse2CallF(Context, nameof(Sse.MultiplyScalar));
} }
else else
{ {
@ -543,7 +543,7 @@ namespace ChocolArm64.Instruction
{ {
if (AOptimizations.UseSse && AOptimizations.UseSse2) if (AOptimizations.UseSse && AOptimizations.UseSse2)
{ {
EmitSseOrSse2CallF(Context, nameof(Sse.Multiply)); EmitVectorSseOrSse2CallF(Context, nameof(Sse.Multiply));
} }
else else
{ {
@ -910,7 +910,7 @@ namespace ChocolArm64.Instruction
{ {
if (AOptimizations.UseSse && AOptimizations.UseSse2) if (AOptimizations.UseSse && AOptimizations.UseSse2)
{ {
EmitSseOrSse2CallF(Context, nameof(Sse.SubtractScalar)); EmitScalarSseOrSse2CallF(Context, nameof(Sse.SubtractScalar));
} }
else else
{ {
@ -922,7 +922,7 @@ namespace ChocolArm64.Instruction
{ {
if (AOptimizations.UseSse && AOptimizations.UseSse2) if (AOptimizations.UseSse && AOptimizations.UseSse2)
{ {
EmitSseOrSse2CallF(Context, nameof(Sse.Subtract)); EmitVectorSseOrSse2CallF(Context, nameof(Sse.Subtract));
} }
else else
{ {

View file

@ -158,7 +158,7 @@ namespace ChocolArm64.Instruction
if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse
&& AOptimizations.UseSse2) && AOptimizations.UseSse2)
{ {
EmitSseOrSse2CallF(Context, nameof(Sse.CompareEqualScalar)); EmitScalarSseOrSse2CallF(Context, nameof(Sse.CompareEqualScalar));
} }
else else
{ {
@ -171,7 +171,7 @@ namespace ChocolArm64.Instruction
if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse
&& AOptimizations.UseSse2) && AOptimizations.UseSse2)
{ {
EmitSseOrSse2CallF(Context, nameof(Sse.CompareEqual)); EmitVectorSseOrSse2CallF(Context, nameof(Sse.CompareEqual));
} }
else else
{ {
@ -184,7 +184,7 @@ namespace ChocolArm64.Instruction
if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse
&& AOptimizations.UseSse2) && AOptimizations.UseSse2)
{ {
EmitSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThanOrEqualScalar)); EmitScalarSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThanOrEqualScalar));
} }
else else
{ {
@ -197,7 +197,7 @@ namespace ChocolArm64.Instruction
if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse
&& AOptimizations.UseSse2) && AOptimizations.UseSse2)
{ {
EmitSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThanOrEqual)); EmitVectorSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThanOrEqual));
} }
else else
{ {
@ -210,7 +210,7 @@ namespace ChocolArm64.Instruction
if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse
&& AOptimizations.UseSse2) && AOptimizations.UseSse2)
{ {
EmitSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThanScalar)); EmitScalarSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThanScalar));
} }
else else
{ {
@ -223,7 +223,7 @@ namespace ChocolArm64.Instruction
if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse
&& AOptimizations.UseSse2) && AOptimizations.UseSse2)
{ {
EmitSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThan)); EmitVectorSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThan));
} }
else else
{ {

View file

@ -110,7 +110,17 @@ namespace ChocolArm64.Instruction
} }
} }
public static void EmitSseOrSse2CallF(AILEmitterCtx Context, string Name) public static void EmitScalarSseOrSse2CallF(AILEmitterCtx Context, string Name)
{
EmitSseOrSse2CallF(Context, Name, true);
}
public static void EmitVectorSseOrSse2CallF(AILEmitterCtx Context, string Name)
{
EmitSseOrSse2CallF(Context, Name, false);
}
public static void EmitSseOrSse2CallF(AILEmitterCtx Context, string Name, bool Scalar)
{ {
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
@ -160,7 +170,18 @@ namespace ChocolArm64.Instruction
Context.EmitStvec(Op.Rd); Context.EmitStvec(Op.Rd);
if (Op.RegisterSize == ARegisterSize.SIMD64) if (Scalar)
{
if (SizeF == 0)
{
EmitVectorZero32_128(Context, Op.Rd);
}
else /* if (SizeF == 1) */
{
EmitVectorZeroUpper(Context, Op.Rd);
}
}
else if (Op.RegisterSize == ARegisterSize.SIMD64)
{ {
EmitVectorZeroUpper(Context, Op.Rd); EmitVectorZeroUpper(Context, Op.Rd);
} }
@ -1238,6 +1259,15 @@ namespace ChocolArm64.Instruction
EmitVectorInsert(Context, Rd, 1, 3, 0); EmitVectorInsert(Context, Rd, 1, 3, 0);
} }
public static void EmitVectorZero32_128(AILEmitterCtx Context, int Reg)
{
Context.EmitLdvec(Reg);
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorZero32_128));
Context.EmitStvec(Reg);
}
public static void EmitVectorInsert(AILEmitterCtx Context, int Reg, int Index, int Size) public static void EmitVectorInsert(AILEmitterCtx Context, int Reg, int Index, int Size)
{ {
ThrowIfInvalid(Index, Size); ThrowIfInvalid(Index, Size);

View file

@ -9,6 +9,18 @@ namespace ChocolArm64.Instruction
{ {
static class AVectorHelper static class AVectorHelper
{ {
private static readonly Vector128<float> Zero32_128Mask;
static AVectorHelper()
{
if (!Sse2.IsSupported)
{
throw new PlatformNotSupportedException();
}
Zero32_128Mask = Sse.StaticCast<uint, float>(Sse2.SetVector128(0, 0, 0, 0xffffffff));
}
public static void EmitCall(AILEmitterCtx Context, string Name64, string Name128) public static void EmitCall(AILEmitterCtx Context, string Name64, string Name128)
{ {
bool IsSimd64 = Context.CurrOp.RegisterSize == ARegisterSize.SIMD64; bool IsSimd64 = Context.CurrOp.RegisterSize == ARegisterSize.SIMD64;
@ -448,6 +460,17 @@ namespace ChocolArm64.Instruction
throw new PlatformNotSupportedException(); throw new PlatformNotSupportedException();
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorZero32_128(Vector128<float> Vector)
{
if (Sse.IsSupported)
{
return Sse.And(Vector, Zero32_128Mask);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<sbyte> VectorSingleToSByte(Vector128<float> Vector) public static Vector128<sbyte> VectorSingleToSByte(Vector128<float> Vector)
{ {