1
0
mirror of https://github.com/Ryujinx/Ryujinx.git synced 2025-01-12 00:21:55 -08:00

Implement UQADD16, UQADD8, UQSUB16, UQSUB8, VQRDMULH, VSLI and VSWP Arm32 instructions ()

This commit is contained in:
gdkchan 2024-08-08 17:07:24 -03:00 committed by GitHub
parent 7969fb6bba
commit 8d8983049e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 445 additions and 13 deletions

@ -822,6 +822,10 @@ namespace ARMeilleure.Decoders
SetA32("<<<<00000100xxxxxxxxxxxx1001xxxx", InstName.Umaal, InstEmit32.Umaal, OpCode32AluUmull.Create); SetA32("<<<<00000100xxxxxxxxxxxx1001xxxx", InstName.Umaal, InstEmit32.Umaal, OpCode32AluUmull.Create);
SetA32("<<<<0000101xxxxxxxxxxxxx1001xxxx", InstName.Umlal, InstEmit32.Umlal, OpCode32AluUmull.Create); SetA32("<<<<0000101xxxxxxxxxxxxx1001xxxx", InstName.Umlal, InstEmit32.Umlal, OpCode32AluUmull.Create);
SetA32("<<<<0000100xxxxxxxxxxxxx1001xxxx", InstName.Umull, InstEmit32.Umull, OpCode32AluUmull.Create); SetA32("<<<<0000100xxxxxxxxxxxxx1001xxxx", InstName.Umull, InstEmit32.Umull, OpCode32AluUmull.Create);
SetA32("<<<<01100110xxxxxxxx11110001xxxx", InstName.Uqadd16, InstEmit32.Uqadd16, OpCode32AluReg.Create);
SetA32("<<<<01100110xxxxxxxx11111001xxxx", InstName.Uqadd8, InstEmit32.Uqadd8, OpCode32AluReg.Create);
SetA32("<<<<01100110xxxxxxxx11110111xxxx", InstName.Uqsub16, InstEmit32.Uqsub16, OpCode32AluReg.Create);
SetA32("<<<<01100110xxxxxxxx11111111xxxx", InstName.Uqsub8, InstEmit32.Uqsub8, OpCode32AluReg.Create);
SetA32("<<<<0110111xxxxxxxxxxxxxxx01xxxx", InstName.Usat, InstEmit32.Usat, OpCode32Sat.Create); SetA32("<<<<0110111xxxxxxxxxxxxxxx01xxxx", InstName.Usat, InstEmit32.Usat, OpCode32Sat.Create);
SetA32("<<<<01101110xxxxxxxx11110011xxxx", InstName.Usat16, InstEmit32.Usat16, OpCode32Sat16.Create); SetA32("<<<<01101110xxxxxxxx11110011xxxx", InstName.Usat16, InstEmit32.Usat16, OpCode32Sat16.Create);
SetA32("<<<<01100101xxxxxxxx11111111xxxx", InstName.Usub8, InstEmit32.Usub8, OpCode32AluReg.Create); SetA32("<<<<01100101xxxxxxxx11111111xxxx", InstName.Usub8, InstEmit32.Usub8, OpCode32AluReg.Create);
@ -1007,6 +1011,8 @@ namespace ARMeilleure.Decoders
SetAsimd("111100100x10xxxxxxxx1011xxx0xxxx", InstName.Vqdmulh, InstEmit32.Vqdmulh, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); SetAsimd("111100100x10xxxxxxxx1011xxx0xxxx", InstName.Vqdmulh, InstEmit32.Vqdmulh, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
SetAsimd("111100111x11<<10xxxx00101xx0xxx0", InstName.Vqmovn, InstEmit32.Vqmovn, OpCode32SimdMovn.Create, OpCode32SimdMovn.CreateT32); SetAsimd("111100111x11<<10xxxx00101xx0xxx0", InstName.Vqmovn, InstEmit32.Vqmovn, OpCode32SimdMovn.Create, OpCode32SimdMovn.CreateT32);
SetAsimd("111100111x11<<10xxxx001001x0xxx0", InstName.Vqmovun, InstEmit32.Vqmovun, OpCode32SimdMovn.Create, OpCode32SimdMovn.CreateT32); SetAsimd("111100111x11<<10xxxx001001x0xxx0", InstName.Vqmovun, InstEmit32.Vqmovun, OpCode32SimdMovn.Create, OpCode32SimdMovn.CreateT32);
SetAsimd("111100110x01xxxxxxxx1011xxx0xxxx", InstName.Vqrdmulh, InstEmit32.Vqrdmulh, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
SetAsimd("111100110x10xxxxxxxx1011xxx0xxxx", InstName.Vqrdmulh, InstEmit32.Vqrdmulh, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
SetAsimd("1111001x1x>>>xxxxxxx100101x1xxx0", InstName.Vqrshrn, InstEmit32.Vqrshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32); SetAsimd("1111001x1x>>>xxxxxxx100101x1xxx0", InstName.Vqrshrn, InstEmit32.Vqrshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32);
SetAsimd("111100111x>>>xxxxxxx100001x1xxx0", InstName.Vqrshrun, InstEmit32.Vqrshrun, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32); SetAsimd("111100111x>>>xxxxxxx100001x1xxx0", InstName.Vqrshrun, InstEmit32.Vqrshrun, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32);
SetAsimd("1111001x1x>>>xxxxxxx100100x1xxx0", InstName.Vqshrn, InstEmit32.Vqshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32); SetAsimd("1111001x1x>>>xxxxxxx100100x1xxx0", InstName.Vqshrn, InstEmit32.Vqshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32);
@ -1030,6 +1036,7 @@ namespace ARMeilleure.Decoders
SetAsimd("1111001x1x>>>xxxxxxx101000x1xxxx", InstName.Vshll, InstEmit32.Vshll, OpCode32SimdShImmLong.Create, OpCode32SimdShImmLong.CreateT32); // A1 encoding. SetAsimd("1111001x1x>>>xxxxxxx101000x1xxxx", InstName.Vshll, InstEmit32.Vshll, OpCode32SimdShImmLong.Create, OpCode32SimdShImmLong.CreateT32); // A1 encoding.
SetAsimd("1111001x1x>>>xxxxxxx0000>xx1xxxx", InstName.Vshr, InstEmit32.Vshr, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32); SetAsimd("1111001x1x>>>xxxxxxx0000>xx1xxxx", InstName.Vshr, InstEmit32.Vshr, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32);
SetAsimd("111100101x>>>xxxxxxx100000x1xxx0", InstName.Vshrn, InstEmit32.Vshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32); SetAsimd("111100101x>>>xxxxxxx100000x1xxx0", InstName.Vshrn, InstEmit32.Vshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32);
SetAsimd("111100111x>>>xxxxxxx0101>xx1xxxx", InstName.Vsli, InstEmit32.Vsli_I, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32);
SetAsimd("1111001x1x>>>xxxxxxx0001>xx1xxxx", InstName.Vsra, InstEmit32.Vsra, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32); SetAsimd("1111001x1x>>>xxxxxxx0001>xx1xxxx", InstName.Vsra, InstEmit32.Vsra, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32);
SetAsimd("111101001x00xxxxxxxx0000xxx0xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); SetAsimd("111101001x00xxxxxxxx0000xxx0xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
SetAsimd("111101001x00xxxxxxxx0100xx0xxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); SetAsimd("111101001x00xxxxxxxx0100xx0xxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
@ -1054,6 +1061,7 @@ namespace ARMeilleure.Decoders
SetAsimd("111100100x10xxxxxxxx1101xxx0xxxx", InstName.Vsub, InstEmit32.Vsub_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); SetAsimd("111100100x10xxxxxxxx1101xxx0xxxx", InstName.Vsub, InstEmit32.Vsub_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
SetAsimd("1111001x1x<<xxxxxxx00010x0x0xxxx", InstName.Vsubl, InstEmit32.Vsubl_I, OpCode32SimdRegLong.Create, OpCode32SimdRegLong.CreateT32); SetAsimd("1111001x1x<<xxxxxxx00010x0x0xxxx", InstName.Vsubl, InstEmit32.Vsubl_I, OpCode32SimdRegLong.Create, OpCode32SimdRegLong.CreateT32);
SetAsimd("1111001x1x<<xxxxxxx00011x0x0xxxx", InstName.Vsubw, InstEmit32.Vsubw_I, OpCode32SimdRegWide.Create, OpCode32SimdRegWide.CreateT32); SetAsimd("1111001x1x<<xxxxxxx00011x0x0xxxx", InstName.Vsubw, InstEmit32.Vsubw_I, OpCode32SimdRegWide.Create, OpCode32SimdRegWide.CreateT32);
SetAsimd("111100111x110010xxxx00000xx0xxxx", InstName.Vswp, InstEmit32.Vswp, OpCode32Simd.Create, OpCode32Simd.CreateT32);
SetAsimd("111100111x11xxxxxxxx10xxxxx0xxxx", InstName.Vtbl, InstEmit32.Vtbl, OpCode32SimdTbl.Create, OpCode32SimdTbl.CreateT32); SetAsimd("111100111x11xxxxxxxx10xxxxx0xxxx", InstName.Vtbl, InstEmit32.Vtbl, OpCode32SimdTbl.Create, OpCode32SimdTbl.CreateT32);
SetAsimd("111100111x11<<10xxxx00001xx0xxxx", InstName.Vtrn, InstEmit32.Vtrn, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32); SetAsimd("111100111x11<<10xxxx00001xx0xxxx", InstName.Vtrn, InstEmit32.Vtrn, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32);
SetAsimd("111100100x<<xxxxxxxx1000xxx1xxxx", InstName.Vtst, InstEmit32.Vtst, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); SetAsimd("111100100x<<xxxxxxxx1000xxx1xxxx", InstName.Vtst, InstEmit32.Vtst, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);

@ -2,6 +2,8 @@ using ARMeilleure.Decoders;
using ARMeilleure.IntermediateRepresentation; using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.State; using ARMeilleure.State;
using ARMeilleure.Translation; using ARMeilleure.Translation;
using System;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis; using System.Diagnostics.CodeAnalysis;
using static ARMeilleure.Instructions.InstEmitAluHelper; using static ARMeilleure.Instructions.InstEmitAluHelper;
using static ARMeilleure.Instructions.InstEmitHelper; using static ARMeilleure.Instructions.InstEmitHelper;
@ -558,6 +560,46 @@ namespace ARMeilleure.Instructions
EmitHsub8(context, unsigned: true); EmitHsub8(context, unsigned: true);
} }
public static void Uqadd16(ArmEmitterContext context)
{
OpCode32AluReg op = (OpCode32AluReg)context.CurrOp;
SetIntA32(context, op.Rd, EmitUnsigned16BitPair(context, GetIntA32(context, op.Rn), GetIntA32(context, op.Rm), (d, n, m) =>
{
EmitSaturateUqadd(context, d, context.Add(n, m), 16);
}));
}
public static void Uqadd8(ArmEmitterContext context)
{
OpCode32AluReg op = (OpCode32AluReg)context.CurrOp;
SetIntA32(context, op.Rd, EmitUnsigned8BitPair(context, GetIntA32(context, op.Rn), GetIntA32(context, op.Rm), (d, n, m) =>
{
EmitSaturateUqadd(context, d, context.Add(n, m), 8);
}));
}
public static void Uqsub16(ArmEmitterContext context)
{
OpCode32AluReg op = (OpCode32AluReg)context.CurrOp;
SetIntA32(context, op.Rd, EmitUnsigned16BitPair(context, GetIntA32(context, op.Rn), GetIntA32(context, op.Rm), (d, n, m) =>
{
EmitSaturateUqsub(context, d, context.Subtract(n, m), 16);
}));
}
public static void Uqsub8(ArmEmitterContext context)
{
OpCode32AluReg op = (OpCode32AluReg)context.CurrOp;
SetIntA32(context, op.Rd, EmitUnsigned8BitPair(context, GetIntA32(context, op.Rn), GetIntA32(context, op.Rm), (d, n, m) =>
{
EmitSaturateUqsub(context, d, context.Subtract(n, m), 8);
}));
}
public static void Usat(ArmEmitterContext context) public static void Usat(ArmEmitterContext context)
{ {
OpCode32Sat op = (OpCode32Sat)context.CurrOp; OpCode32Sat op = (OpCode32Sat)context.CurrOp;
@ -934,6 +976,148 @@ namespace ARMeilleure.Instructions
} }
} }
private static void EmitSaturateUqadd(ArmEmitterContext context, Operand result, Operand value, uint saturateTo)
{
Debug.Assert(saturateTo <= 32);
if (saturateTo == 32)
{
// No saturation possible for this case.
context.Copy(result, value);
return;
}
else if (saturateTo == 0)
{
// Result is always zero if we saturate 0 bits.
context.Copy(result, Const(0));
return;
}
// If the result is 0, the values are equal and we don't need saturation.
Operand lblNoSat = Label();
context.BranchIfFalse(lblNoSat, context.ShiftRightUI(value, Const((int)saturateTo)));
// Saturate.
context.Copy(result, Const(uint.MaxValue >> (32 - (int)saturateTo)));
Operand lblExit = Label();
context.Branch(lblExit);
context.MarkLabel(lblNoSat);
context.Copy(result, value);
context.MarkLabel(lblExit);
}
private static void EmitSaturateUqsub(ArmEmitterContext context, Operand result, Operand value, uint saturateTo)
{
Debug.Assert(saturateTo <= 32);
if (saturateTo == 32)
{
// No saturation possible for this case.
context.Copy(result, value);
return;
}
else if (saturateTo == 0)
{
// Result is always zero if we saturate 0 bits.
context.Copy(result, Const(0));
return;
}
// If the result is 0, the values are equal and we don't need saturation.
Operand lblNoSat = Label();
context.BranchIf(lblNoSat, value, Const(0), Comparison.GreaterOrEqual);
// Saturate.
// Assumes that the value can only underflow, since this is only used for unsigned subtraction.
context.Copy(result, Const(0));
Operand lblExit = Label();
context.Branch(lblExit);
context.MarkLabel(lblNoSat);
context.Copy(result, value);
context.MarkLabel(lblExit);
}
private static Operand EmitUnsigned16BitPair(ArmEmitterContext context, Operand rn, Operand rm, Action<Operand, Operand, Operand> elementAction)
{
Operand tempD = context.AllocateLocal(OperandType.I32);
Operand tempN = context.ZeroExtend16(OperandType.I32, rn);
Operand tempM = context.ZeroExtend16(OperandType.I32, rm);
elementAction(tempD, tempN, tempM);
Operand tempD2 = context.ZeroExtend16(OperandType.I32, tempD);
tempN = context.ShiftRightUI(rn, Const(16));
tempM = context.ShiftRightUI(rm, Const(16));
elementAction(tempD, tempN, tempM);
return context.BitwiseOr(tempD2, context.ShiftLeft(tempD, Const(16)));
}
private static Operand EmitSigned8BitPair(ArmEmitterContext context, Operand rn, Operand rm, Action<Operand, Operand, Operand> elementAction)
{
return Emit8BitPair(context, rn, rm, elementAction, unsigned: false);
}
private static Operand EmitUnsigned8BitPair(ArmEmitterContext context, Operand rn, Operand rm, Action<Operand, Operand, Operand> elementAction)
{
return Emit8BitPair(context, rn, rm, elementAction, unsigned: true);
}
private static Operand Emit8BitPair(ArmEmitterContext context, Operand rn, Operand rm, Action<Operand, Operand, Operand> elementAction, bool unsigned)
{
Operand tempD = context.AllocateLocal(OperandType.I32);
Operand result = default;
for (int b = 0; b < 4; b++)
{
Operand nByte = b != 0 ? context.ShiftRightUI(rn, Const(b * 8)) : rn;
Operand mByte = b != 0 ? context.ShiftRightUI(rm, Const(b * 8)) : rm;
if (unsigned)
{
nByte = context.ZeroExtend8(OperandType.I32, nByte);
mByte = context.ZeroExtend8(OperandType.I32, mByte);
}
else
{
nByte = context.SignExtend8(OperandType.I32, nByte);
mByte = context.SignExtend8(OperandType.I32, mByte);
}
elementAction(tempD, nByte, mByte);
if (b == 0)
{
result = context.ZeroExtend8(OperandType.I32, tempD);
}
else if (b < 3)
{
result = context.BitwiseOr(result, context.ShiftLeft(context.ZeroExtend8(OperandType.I32, tempD), Const(b * 8)));
}
else
{
result = context.BitwiseOr(result, context.ShiftLeft(tempD, Const(24)));
}
}
return result;
}
private static void EmitAluStore(ArmEmitterContext context, Operand value) private static void EmitAluStore(ArmEmitterContext context, Operand value)
{ {
IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;

@ -1246,6 +1246,33 @@ namespace ARMeilleure.Instructions
EmitVectorUnaryNarrowOp32(context, (op1) => EmitSatQ(context, op1, 8 << op.Size, signedSrc: true, signedDst: false), signed: true); EmitVectorUnaryNarrowOp32(context, (op1) => EmitSatQ(context, op1, 8 << op.Size, signedSrc: true, signedDst: false), signed: true);
} }
public static void Vqrdmulh(ArmEmitterContext context)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
int eSize = 8 << op.Size;
EmitVectorBinaryOpI32(context, (op1, op2) =>
{
if (op.Size == 2)
{
op1 = context.SignExtend32(OperandType.I64, op1);
op2 = context.SignExtend32(OperandType.I64, op2);
}
Operand res = context.Multiply(op1, op2);
res = context.Add(res, Const(res.Type, 1L << (eSize - 2)));
res = context.ShiftRightSI(res, Const(eSize - 1));
res = EmitSatQ(context, res, eSize, signedSrc: true, signedDst: true);
if (op.Size == 2)
{
res = context.ConvertI64ToI32(res);
}
return res;
}, signed: true);
}
public static void Vqsub(ArmEmitterContext context) public static void Vqsub(ArmEmitterContext context)
{ {
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;

@ -191,6 +191,26 @@ namespace ARMeilleure.Instructions
context.Copy(GetVecA32(op.Qd), res); context.Copy(GetVecA32(op.Qd), res);
} }
public static void Vswp(ArmEmitterContext context)
{
OpCode32Simd op = (OpCode32Simd)context.CurrOp;
if (op.Q)
{
Operand temp = context.Copy(GetVecA32(op.Qd));
context.Copy(GetVecA32(op.Qd), GetVecA32(op.Qm));
context.Copy(GetVecA32(op.Qm), temp);
}
else
{
Operand temp = ExtractScalar(context, OperandType.I64, op.Vd);
InsertScalar(context, op.Vd, ExtractScalar(context, OperandType.I64, op.Vm));
InsertScalar(context, op.Vm, temp);
}
}
public static void Vtbl(ArmEmitterContext context) public static void Vtbl(ArmEmitterContext context)
{ {
OpCode32SimdTbl op = (OpCode32SimdTbl)context.CurrOp; OpCode32SimdTbl op = (OpCode32SimdTbl)context.CurrOp;

@ -130,6 +130,36 @@ namespace ARMeilleure.Instructions
EmitVectorUnaryNarrowOp32(context, (op1) => context.ShiftRightUI(op1, Const(shift))); EmitVectorUnaryNarrowOp32(context, (op1) => context.ShiftRightUI(op1, Const(shift)));
} }
public static void Vsli_I(ArmEmitterContext context)
{
OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
int shift = op.Shift;
int eSize = 8 << op.Size;
ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0UL;
Operand res = GetVec(op.Qd);
int elems = op.GetBytesCount() >> op.Size;
for (int index = 0; index < elems; index++)
{
Operand me = EmitVectorExtractZx(context, op.Qm, op.Im + index, op.Size);
Operand neShifted = context.ShiftLeft(me, Const(shift));
Operand de = EmitVectorExtractZx(context, op.Qd, op.Id + index, op.Size);
Operand deMasked = context.BitwiseAnd(de, Const(mask));
Operand e = context.BitwiseOr(neShifted, deMasked);
res = EmitVectorInsert(context, res, e, op.Id + index, op.Size);
}
context.Copy(GetVec(op.Qd), res);
}
public static void Vsra(ArmEmitterContext context) public static void Vsra(ArmEmitterContext context)
{ {
OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;

@ -571,6 +571,10 @@ namespace ARMeilleure.Instructions
Umaal, Umaal,
Umlal, Umlal,
Umull, Umull,
Uqadd16,
Uqadd8,
Uqsub16,
Uqsub8,
Usat, Usat,
Usat16, Usat16,
Usub8, Usub8,
@ -645,6 +649,7 @@ namespace ARMeilleure.Instructions
Vqdmulh, Vqdmulh,
Vqmovn, Vqmovn,
Vqmovun, Vqmovun,
Vqrdmulh,
Vqrshrn, Vqrshrn,
Vqrshrun, Vqrshrun,
Vqshrn, Vqshrn,
@ -666,6 +671,7 @@ namespace ARMeilleure.Instructions
Vshll, Vshll,
Vshr, Vshr,
Vshrn, Vshrn,
Vsli,
Vst1, Vst1,
Vst2, Vst2,
Vst3, Vst3,
@ -682,6 +688,7 @@ namespace ARMeilleure.Instructions
Vsub, Vsub,
Vsubl, Vsubl,
Vsubw, Vsubw,
Vswp,
Vtbl, Vtbl,
Vtrn, Vtrn,
Vtst, Vtst,

@ -1,6 +1,5 @@
using Ryujinx.Cpu.LightningJit.CodeGen; using Ryujinx.Cpu.LightningJit.CodeGen;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64; using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
using System.Diagnostics;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64 namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{ {

@ -114,7 +114,7 @@ namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
InstEmitCommon.EmitUnsigned16BitPair(context, rd, rn, rm, (d, n, m) => InstEmitCommon.EmitUnsigned16BitPair(context, rd, rn, rm, (d, n, m) =>
{ {
context.Arm64Assembler.Add(d, n, m); context.Arm64Assembler.Add(d, n, m);
EmitSaturateUnsignedRange(context, d, 16); EmitSaturateUqadd(context, d, 16);
}); });
} }
@ -123,7 +123,7 @@ namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
InstEmitCommon.EmitUnsigned8BitPair(context, rd, rn, rm, (d, n, m) => InstEmitCommon.EmitUnsigned8BitPair(context, rd, rn, rm, (d, n, m) =>
{ {
context.Arm64Assembler.Add(d, n, m); context.Arm64Assembler.Add(d, n, m);
EmitSaturateUnsignedRange(context, d, 8); EmitSaturateUqadd(context, d, 8);
}); });
} }
@ -140,7 +140,7 @@ namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
context.Arm64Assembler.Add(d, n, m); context.Arm64Assembler.Add(d, n, m);
} }
EmitSaturateUnsignedRange(context, d, 16); EmitSaturateUq(context, d, 16, e == 0);
}); });
} }
@ -157,25 +157,25 @@ namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
context.Arm64Assembler.Sub(d, n, m); context.Arm64Assembler.Sub(d, n, m);
} }
EmitSaturateUnsignedRange(context, d, 16); EmitSaturateUq(context, d, 16, e != 0);
}); });
} }
public static void Uqsub16(CodeGenContext context, uint rd, uint rn, uint rm) public static void Uqsub16(CodeGenContext context, uint rd, uint rn, uint rm)
{ {
InstEmitCommon.EmitSigned16BitPair(context, rd, rn, rm, (d, n, m) => InstEmitCommon.EmitUnsigned16BitPair(context, rd, rn, rm, (d, n, m) =>
{ {
context.Arm64Assembler.Sub(d, n, m); context.Arm64Assembler.Sub(d, n, m);
EmitSaturateUnsignedRange(context, d, 16); EmitSaturateUqsub(context, d, 16);
}); });
} }
public static void Uqsub8(CodeGenContext context, uint rd, uint rn, uint rm) public static void Uqsub8(CodeGenContext context, uint rd, uint rn, uint rm)
{ {
InstEmitCommon.EmitSigned8BitPair(context, rd, rn, rm, (d, n, m) => InstEmitCommon.EmitUnsigned8BitPair(context, rd, rn, rm, (d, n, m) =>
{ {
context.Arm64Assembler.Sub(d, n, m); context.Arm64Assembler.Sub(d, n, m);
EmitSaturateUnsignedRange(context, d, 8); EmitSaturateUqsub(context, d, 8);
}); });
} }
@ -358,7 +358,17 @@ namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
} }
} }
private static void EmitSaturateUnsignedRange(CodeGenContext context, Operand value, uint saturateTo) private static void EmitSaturateUqadd(CodeGenContext context, Operand value, uint saturateTo)
{
EmitSaturateUq(context, value, saturateTo, isSub: false);
}
private static void EmitSaturateUqsub(CodeGenContext context, Operand value, uint saturateTo)
{
EmitSaturateUq(context, value, saturateTo, isSub: true);
}
private static void EmitSaturateUq(CodeGenContext context, Operand value, uint saturateTo, bool isSub)
{ {
Debug.Assert(saturateTo <= 32); Debug.Assert(saturateTo <= 32);
@ -379,7 +389,7 @@ namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
return; return;
} }
context.Arm64Assembler.Lsr(tempRegister.Operand, value, InstEmitCommon.Const(32 - (int)saturateTo)); context.Arm64Assembler.Lsr(tempRegister.Operand, value, InstEmitCommon.Const((int)saturateTo));
int branchIndex = context.CodeWriter.InstructionPointer; int branchIndex = context.CodeWriter.InstructionPointer;
@ -387,7 +397,7 @@ namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
context.Arm64Assembler.Cbz(tempRegister.Operand, 0); context.Arm64Assembler.Cbz(tempRegister.Operand, 0);
// Saturate. // Saturate.
context.Arm64Assembler.Mov(value, uint.MaxValue >> (32 - (int)saturateTo)); context.Arm64Assembler.Mov(value, isSub ? 0u : uint.MaxValue >> (32 - (int)saturateTo));
int delta = context.CodeWriter.InstructionPointer - branchIndex; int delta = context.CodeWriter.InstructionPointer - branchIndex;
context.CodeWriter.WriteInstructionAt(branchIndex, context.CodeWriter.ReadInstructionAt(branchIndex) | (uint)((delta & 0x7ffff) << 5)); context.CodeWriter.WriteInstructionAt(branchIndex, context.CodeWriter.ReadInstructionAt(branchIndex) | (uint)((delta & 0x7ffff) << 5));

@ -25,6 +25,24 @@ namespace Ryujinx.Tests.Cpu
}; };
} }
private static uint[] UQAddSub16()
{
return new[]
{
0xe6600f10u, // UQADD16 R0, R0, R0
0xe6600f70u, // UQSUB16 R0, R0, R0
};
}
private static uint[] UQAddSub8()
{
return new[]
{
0xe6600f90u, // UQADD8 R0, R0, R0
0xe6600ff0u, // UQSUB8 R0, R0, R0
};
}
private static uint[] SsatUsat() private static uint[] SsatUsat()
{ {
return new[] return new[]
@ -182,6 +200,42 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn(); CompareAgainstUnicorn();
} }
[Test, Pairwise]
public void U_Q_AddSub_16([ValueSource(nameof(UQAddSub16))] uint opcode,
[Values(0u, 0xdu)] uint rd,
[Values(1u)] uint rm,
[Values(2u)] uint rn,
[Random(RndCnt)] uint w0,
[Random(RndCnt)] uint w1,
[Random(RndCnt)] uint w2)
{
opcode |= ((rm & 15) << 0) | ((rd & 15) << 12) | ((rn & 15) << 16);
uint sp = TestContext.CurrentContext.Random.NextUInt();
SingleOpcode(opcode, r0: w0, r1: w1, r2: w2, sp: sp);
CompareAgainstUnicorn();
}
[Test, Pairwise]
public void U_Q_AddSub_8([ValueSource(nameof(UQAddSub8))] uint opcode,
[Values(0u, 0xdu)] uint rd,
[Values(1u)] uint rm,
[Values(2u)] uint rn,
[Random(RndCnt)] uint w0,
[Random(RndCnt)] uint w1,
[Random(RndCnt)] uint w2)
{
opcode |= ((rm & 15) << 0) | ((rd & 15) << 12) | ((rn & 15) << 16);
uint sp = TestContext.CurrentContext.Random.NextUInt();
SingleOpcode(opcode, r0: w0, r1: w1, r2: w2, sp: sp);
CompareAgainstUnicorn();
}
[Test, Pairwise] [Test, Pairwise]
public void Uadd8_Sel([Values(0u)] uint rd, public void Uadd8_Sel([Values(0u)] uint rd,
[Values(1u)] uint rm, [Values(1u)] uint rm,

@ -327,6 +327,32 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn(); CompareAgainstUnicorn();
} }
[Test, Pairwise, Description("VSWP D0, D0")]
public void Vswp([Values(0u, 1u)] uint rd,
[Values(0u, 1u)] uint rm,
[Values] bool q)
{
uint opcode = 0xf3b20000u; // VSWP D0, D0
if (q)
{
opcode |= 1u << 6;
rd &= ~1u;
rm &= ~1u;
}
opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1);
V128 v0 = new(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
V128 v1 = new(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
SingleOpcode(opcode, v0: v0, v1: v1);
CompareAgainstUnicorn();
}
#endif #endif
} }
} }

@ -909,6 +909,39 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn(); CompareAgainstUnicorn();
} }
[Test, Pairwise, Description("VQRDMULH.<S16, S32> <Qd>, <Qn>, <Qm>")]
public void Vqrdmulh_I([Range(0u, 5u)] uint rd,
[Range(0u, 5u)] uint rn,
[Range(0u, 5u)] uint rm,
[ValueSource(nameof(_8B4H2S1D_))] ulong z,
[ValueSource(nameof(_8B4H2S1D_))] ulong a,
[ValueSource(nameof(_8B4H2S1D_))] ulong b,
[Values(1u, 2u)] uint size) // <S16, S32>
{
rd >>= 1;
rd <<= 1;
rn >>= 1;
rn <<= 1;
rm >>= 1;
rm <<= 1;
uint opcode = 0xf3100b40u & ~(3u << 20); // VQRDMULH.S16 Q0, Q0, Q0
opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3);
opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1);
opcode |= (size & 0x3) << 20;
V128 v0 = MakeVectorE0E1(z, ~z);
V128 v1 = MakeVectorE0E1(a, ~a);
V128 v2 = MakeVectorE0E1(b, ~b);
SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
CompareAgainstUnicorn();
}
[Test, Pairwise] [Test, Pairwise]
public void Vp_Add_Long_Accumulate([Values(0u, 2u, 4u, 8u)] uint rd, public void Vp_Add_Long_Accumulate([Values(0u, 2u, 4u, 8u)] uint rd,
[Values(0u, 2u, 4u, 8u)] uint rm, [Values(0u, 2u, 4u, 8u)] uint rm,

@ -202,7 +202,7 @@ namespace Ryujinx.Tests.Cpu
} }
[Test, Pairwise, Description("VSHL.<size> {<Vd>}, <Vm>, #<imm>")] [Test, Pairwise, Description("VSHL.<size> {<Vd>}, <Vm>, #<imm>")]
public void Vshl_Imm([Values(0u)] uint rd, public void Vshl_Imm([Values(0u, 1u)] uint rd,
[Values(2u, 0u)] uint rm, [Values(2u, 0u)] uint rm,
[Values(0u, 1u, 2u, 3u)] uint size, [Values(0u, 1u, 2u, 3u)] uint size,
[Random(RndCntShiftImm)] uint shiftImm, [Random(RndCntShiftImm)] uint shiftImm,
@ -262,6 +262,40 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn(); CompareAgainstUnicorn();
} }
[Test, Pairwise, Description("VSLI.<size> {<Vd>}, <Vm>, #<imm>")]
public void Vsli([Values(0u, 1u)] uint rd,
[Values(2u, 0u)] uint rm,
[Values(0u, 1u, 2u, 3u)] uint size,
[Random(RndCntShiftImm)] uint shiftImm,
[Random(RndCnt)] ulong z,
[Random(RndCnt)] ulong a,
[Random(RndCnt)] ulong b,
[Values] bool q)
{
uint opcode = 0xf3800510u; // VORR.I32 D0, #0x800000 (immediate value changes it into SLI)
if (q)
{
opcode |= 1 << 6;
rm <<= 1;
rd <<= 1;
}
uint imm = 1u << ((int)size + 3);
imm |= shiftImm & (imm - 1);
opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1);
opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
opcode |= ((imm & 0x3f) << 16) | ((imm & 0x40) << 1);
V128 v0 = MakeVectorE0E1(z, z);
V128 v1 = MakeVectorE0E1(a, z);
V128 v2 = MakeVectorE0E1(b, z);
SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
CompareAgainstUnicorn();
}
[Test, Pairwise] [Test, Pairwise]
public void Vqshrn_Vqrshrn_Vrshrn_Imm([ValueSource(nameof(_Vqshrn_Vqrshrn_Vrshrn_Imm_))] uint opcode, public void Vqshrn_Vqrshrn_Vrshrn_Imm([ValueSource(nameof(_Vqshrn_Vqrshrn_Vrshrn_Imm_))] uint opcode,
[Values(0u, 1u)] uint rd, [Values(0u, 1u)] uint rd,