From 3af2ce74ecf76cc8d6fdb9ff19101bfee47af7dd Mon Sep 17 00:00:00 2001
From: Valentin PONS <valx76@gmail.com>
Date: Sun, 19 Jul 2020 14:11:58 -0400
Subject: [PATCH] Implements some 32-bit instructions (VBIC, VTST, VSRA)
 (#1192)

* Added some 32 bits instructions:

* VBIC
* VTST
* VSRA

* Incremented the PTC

* Add tests and fix implementation

* Fixed VBIC immediate opcode mapping

* Hey hey!

* Nit.

Co-authored-by: gdkchan <gab.dark.100@gmail.com>
Co-authored-by: LDj3SNuD <dvitiello@gmail.com>
Co-authored-by: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com>
---
 ARMeilleure/Decoders/OpCodeTable.cs           |   6 +-
 .../Instructions/InstEmitSimdHelper32.cs      |  31 +++-
 .../Instructions/InstEmitSimdLogical32.cs     |  63 ++++++-
 .../Instructions/InstEmitSimdShift32.cs       |  21 +++
 ARMeilleure/Instructions/InstName.cs          |   3 +
 ARMeilleure/Translation/EmitterContext.cs     |   5 +
 ARMeilleure/Translation/PTC/Ptc.cs            |   2 +-
 Ryujinx.Tests/Cpu/CpuTestSimdLogical32.cs     | 120 ++++++++++---
 Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs         |   6 +-
 Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs       | 170 +++++++++++++++---
 10 files changed, 361 insertions(+), 66 deletions(-)

diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs
index 66993bbb..bbcc15ba 100644
--- a/ARMeilleure/Decoders/OpCodeTable.cs
+++ b/ARMeilleure/Decoders/OpCodeTable.cs
@@ -806,6 +806,8 @@ namespace ARMeilleure.Decoders
             SetA32("111100100x00xxxxxxxx1101xxx0xxxx", InstName.Vadd,     InstEmit32.Vadd_V,   typeof(OpCode32SimdReg));
             SetA32("1111001x1x<<xxxxxxxx0001x0x0xxxx", InstName.Vaddw,    InstEmit32.Vaddw_I,  typeof(OpCode32SimdRegWide));
             SetA32("111100100x00xxxxxxxx0001xxx1xxxx", InstName.Vand,     InstEmit32.Vand_I,   typeof(OpCode32SimdBinary));
+            SetA32("111100100x01xxxxxxxx0001xxx1xxxx", InstName.Vbic,     InstEmit32.Vbic_I,   typeof(OpCode32SimdBinary));
+            SetA32("1111001x1x000xxxxxxx<<x10x11xxxx", InstName.Vbic,     InstEmit32.Vbic_II,  typeof(OpCode32SimdImm));
             SetA32("111100110x11xxxxxxxx0001xxx1xxxx", InstName.Vbif,     InstEmit32.Vbif,     typeof(OpCode32SimdBinary));
             SetA32("111100110x10xxxxxxxx0001xxx1xxxx", InstName.Vbit,     InstEmit32.Vbit,     typeof(OpCode32SimdBinary));
             SetA32("111100110x01xxxxxxxx0001xxx1xxxx", InstName.Vbsl,     InstEmit32.Vbsl,     typeof(OpCode32SimdBinary));
@@ -904,7 +906,7 @@ namespace ARMeilleure.Decoders
             SetA32("<<<<11100x01xxxxxxxx101xx0x0xxxx", InstName.Vnmls,    InstEmit32.Vnmls_S,  typeof(OpCode32SimdRegS));
             SetA32("<<<<11100x10xxxxxxxx101xx1x0xxxx", InstName.Vnmul,    InstEmit32.Vnmul_S,  typeof(OpCode32SimdRegS));
             SetA32("111100100x10xxxxxxxx0001xxx1xxxx", InstName.Vorr,     InstEmit32.Vorr_I,   typeof(OpCode32SimdBinary));
-            SetA32("1111001x1x000xxxxxxx0xx10x01xxxx", InstName.Vorr,     InstEmit32.Vorr_II,  typeof(OpCode32SimdImm));
+            SetA32("1111001x1x000xxxxxxx<<x10x01xxxx", InstName.Vorr,     InstEmit32.Vorr_II,  typeof(OpCode32SimdImm));
             SetA32("111100100x<<xxxxxxxx1011x0x1xxxx", InstName.Vpadd,    InstEmit32.Vpadd_I,  typeof(OpCode32SimdReg));
             SetA32("111100110x00xxxxxxxx1101x0x0xxxx", InstName.Vpadd,    InstEmit32.Vpadd_V,  typeof(OpCode32SimdReg));
             SetA32("1111001x0x<<xxxxxxxx1010x0x0xxxx", InstName.Vpmax,    InstEmit32.Vpmax_I,  typeof(OpCode32SimdReg));
@@ -927,6 +929,7 @@ namespace ARMeilleure.Decoders
             SetA32("1111001x1x>>>xxxxxxx0000>xx1xxxx", InstName.Vshr,     InstEmit32.Vshr,     typeof(OpCode32SimdShImm));
             SetA32("111100101x>>>xxxxxxx100000x1xxx0", InstName.Vshrn,    InstEmit32.Vshrn,    typeof(OpCode32SimdShImmNarrow));
             SetA32("<<<<11101x110001xxxx101x11x0xxxx", InstName.Vsqrt,    InstEmit32.Vsqrt_S,  typeof(OpCode32SimdS));
+            SetA32("1111001x1x>>>xxxxxxx0001>xx1xxxx", InstName.Vsra,     InstEmit32.Vsra,     typeof(OpCode32SimdShImm));
             SetA32("111101001x00xxxxxxxx<<00xxxxxxxx", InstName.Vst1,     InstEmit32.Vst1,     typeof(OpCode32SimdMemSingle));
             SetA32("111101000x00xxxxxxxx0111xxxxxxxx", InstName.Vst1,     InstEmit32.Vst1,     typeof(OpCode32SimdMemPair)); // Regs = 1.
             SetA32("111101000x00xxxxxxxx1010xxxxxxxx", InstName.Vst1,     InstEmit32.Vst1,     typeof(OpCode32SimdMemPair)); // Regs = 2.
@@ -952,6 +955,7 @@ namespace ARMeilleure.Decoders
             SetA32("1111001x1x<<xxxxxxxx0011x0x0xxxx", InstName.Vsubw,    InstEmit32.Vsubw_I,  typeof(OpCode32SimdRegWide));
             SetA32("111100111x11xxxxxxxx10xxxxx0xxxx", InstName.Vtbl,     InstEmit32.Vtbl,     typeof(OpCode32SimdTbl));
             SetA32("111100111x11<<10xxxx00001xx0xxxx", InstName.Vtrn,     InstEmit32.Vtrn,     typeof(OpCode32SimdCmpZ));
+            SetA32("111100100x<<xxxxxxxx1000xxx1xxxx", InstName.Vtst,     InstEmit32.Vtst,     typeof(OpCode32SimdReg));
             SetA32("111100111x11<<10xxxx00010xx0xxxx", InstName.Vuzp,     InstEmit32.Vuzp,     typeof(OpCode32SimdCmpZ));
             SetA32("111100111x11<<10xxxx00011xx0xxxx", InstName.Vzip,     InstEmit32.Vzip,     typeof(OpCode32SimdCmpZ));
 #endregion
diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
index 9753af66..e045c601 100644
--- a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
@@ -1,4 +1,4 @@
-using ARMeilleure.Decoders;
+using ARMeilleure.Decoders;
 using ARMeilleure.IntermediateRepresentation;
 using ARMeilleure.Translation;
 using System;
@@ -305,6 +305,35 @@ namespace ARMeilleure.Instructions
             context.Copy(GetVecA32(op.Qd), res);
         }
 
+        public static void EmitVectorImmBinaryQdQmOpZx32(ArmEmitterContext context, Func2I emit)
+        {
+            EmitVectorImmBinaryQdQmOpI32(context, emit, false);
+        }
+
+        public static void EmitVectorImmBinaryQdQmOpSx32(ArmEmitterContext context, Func2I emit)
+        {
+            EmitVectorImmBinaryQdQmOpI32(context, emit, true);
+        }
+
+        public static void EmitVectorImmBinaryQdQmOpI32(ArmEmitterContext context, Func2I emit, bool signed)
+        {
+            OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+
+            Operand res = GetVecA32(op.Qd);
+
+            int elems = op.GetBytesCount() >> op.Size;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size, signed);
+                Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
+
+                res = EmitVectorInsert(context, res, emit(de, me), op.Id + index, op.Size);
+            }
+
+            context.Copy(GetVecA32(op.Qd), res);
+        }
+
         public static void EmitVectorTernaryLongOpI32(ArmEmitterContext context, Func3I emit, bool signed)
         {
             OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
diff --git a/ARMeilleure/Instructions/InstEmitSimdLogical32.cs b/ARMeilleure/Instructions/InstEmitSimdLogical32.cs
index 6505e834..2d6bf481 100644
--- a/ARMeilleure/Instructions/InstEmitSimdLogical32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdLogical32.cs
@@ -15,7 +15,7 @@ namespace ARMeilleure.Instructions
         {
             if (Optimizations.UseSse2)
             {
-                EmitVectorBinaryOpF32(context, Intrinsic.X86Pand, Intrinsic.X86Pand);
+                EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pand, n, m));
             }
             else
             {
@@ -23,6 +23,54 @@ namespace ARMeilleure.Instructions
             }
         }
 
+        public static void Vbic_I(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse2)
+            {
+                EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pandn, m, n));
+            }
+            else
+            {
+                EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseAnd(op1, context.BitwiseNot(op2)));
+            }
+        }
+
+        public static void Vbic_II(ArmEmitterContext context)
+        {
+            OpCode32SimdImm op = (OpCode32SimdImm)context.CurrOp;
+
+            long immediate = op.Immediate;
+
+            // Replicate fields to fill the 64-bits, if size is < 64-bits.
+            switch (op.Size)
+            {
+                case 0: immediate *= 0x0101010101010101L; break;
+                case 1: immediate *= 0x0001000100010001L; break;
+                case 2: immediate *= 0x0000000100000001L; break;
+            }
+
+            Operand imm = Const(immediate);
+            Operand res = GetVecA32(op.Qd);
+
+            if (op.Q)
+            {
+                for (int elem = 0; elem < 2; elem++)
+                {
+                    Operand de = EmitVectorExtractZx(context, op.Qd, elem, 3);
+
+                    res = EmitVectorInsert(context, res, context.BitwiseAnd(de, context.BitwiseNot(imm)), elem, 3);
+                }
+            }
+            else
+            {
+                Operand de = EmitVectorExtractZx(context, op.Qd, op.Vd & 1, 3);
+
+                res = EmitVectorInsert(context, res, context.BitwiseAnd(de, context.BitwiseNot(imm)), op.Vd & 1, 3);
+            }
+
+            context.Copy(GetVecA32(op.Qd), res);
+        }
+
         public static void Vbif(ArmEmitterContext context)
         {
             EmitBifBit(context, true);
@@ -59,7 +107,7 @@ namespace ARMeilleure.Instructions
         {
             if (Optimizations.UseSse2)
             {
-                EmitVectorBinaryOpF32(context, Intrinsic.X86Pxor, Intrinsic.X86Pxor);
+                EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pxor, n, m));
             }
             else
             {
@@ -71,7 +119,7 @@ namespace ARMeilleure.Instructions
         {
             if (Optimizations.UseSse2)
             {
-                EmitVectorBinaryOpF32(context, Intrinsic.X86Por, Intrinsic.X86Por);
+                EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Por, n, m));
             }
             else
             {
@@ -115,6 +163,15 @@ namespace ARMeilleure.Instructions
             context.Copy(GetVecA32(op.Qd), res);
         }
 
+        public static void Vtst(ArmEmitterContext context)
+        {
+            EmitVectorBinaryOpZx32(context, (op1, op2) =>
+            {
+                Operand isZero = context.ICompareEqual(context.BitwiseAnd(op1, op2), Const(0));
+                return context.ConditionalSelect(isZero, Const(0), Const(-1));
+            });
+        }
+
         private static void EmitBifBit(ArmEmitterContext context, bool notRm)
         {
             OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
diff --git a/ARMeilleure/Instructions/InstEmitSimdShift32.cs b/ARMeilleure/Instructions/InstEmitSimdShift32.cs
index b9055c30..215fe1e8 100644
--- a/ARMeilleure/Instructions/InstEmitSimdShift32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdShift32.cs
@@ -129,6 +129,27 @@ namespace ARMeilleure.Instructions
             EmitVectorUnaryNarrowOp32(context, (op1) => context.ShiftRightUI(op1, Const(shift)));
         }
 
+        public static void Vsra(ArmEmitterContext context)
+        {
+            OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+            int shift = GetImmShr(op);
+            int maxShift = (8 << op.Size) - 1;
+
+            if (op.U)
+            {
+                EmitVectorImmBinaryQdQmOpZx32(context, (op1, op2) =>
+                {
+                    Operand shiftRes = shift > maxShift ? Const(op2.Type, 0) : context.ShiftRightUI(op2, Const(shift));
+
+                    return context.Add(op1, shiftRes);
+                });
+            }
+            else
+            {
+                EmitVectorImmBinaryQdQmOpSx32(context, (op1, op2) => context.Add(op1, context.ShiftRightSI(op2, Const(Math.Min(maxShift, shift)))));
+            }
+        }
+
         private static Operand EmitShlRegOp(ArmEmitterContext context, Operand op, Operand shiftLsB, int size, bool unsigned)
         {
             if (shiftLsB.Type == OperandType.I64)
diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs
index 28041874..d7283029 100644
--- a/ARMeilleure/Instructions/InstName.cs
+++ b/ARMeilleure/Instructions/InstName.cs
@@ -547,6 +547,7 @@ namespace ARMeilleure.Instructions
         Vadd,
         Vaddw,
         Vand,
+        Vbic,
         Vbif,
         Vbit,
         Vbsl,
@@ -611,10 +612,12 @@ namespace ARMeilleure.Instructions
         Vrecps,
         Vrsqrte,
         Vrsqrts,
+        Vsra,
         Vsub,
         Vsubw,
         Vtbl,
         Vtrn,
+        Vtst,
         Vuzp,
         Vzip,
     }
diff --git a/ARMeilleure/Translation/EmitterContext.cs b/ARMeilleure/Translation/EmitterContext.cs
index 656f1704..74421854 100644
--- a/ARMeilleure/Translation/EmitterContext.cs
+++ b/ARMeilleure/Translation/EmitterContext.cs
@@ -441,6 +441,11 @@ namespace ARMeilleure.Translation
             return Add(Instruction.VectorInsert8, Local(OperandType.V128), vector, value, Const(index));
         }
 
+        public Operand VectorOne()
+        {
+            return Add(Instruction.VectorOne, Local(OperandType.V128));
+        }
+
         public Operand VectorZero()
         {
             return Add(Instruction.VectorZero, Local(OperandType.V128));
diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs
index 9db7c162..032e111e 100644
--- a/ARMeilleure/Translation/PTC/Ptc.cs
+++ b/ARMeilleure/Translation/PTC/Ptc.cs
@@ -20,7 +20,7 @@ namespace ARMeilleure.Translation.PTC
     {
         private const string HeaderMagic = "PTChd";
 
-        private const int InternalVersion = 12; //! To be incremented manually for each change to the ARMeilleure project.
+        private const int InternalVersion = 13; //! To be incremented manually for each change to the ARMeilleure project.
 
         private const string BaseDir = "Ryujinx";
 
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdLogical32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdLogical32.cs
index b6c05b10..0818b680 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdLogical32.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdLogical32.cs
@@ -11,11 +11,22 @@ namespace Ryujinx.Tests.Cpu
     {
 #if SimdLogical32
 
+#region "ValueSource (Types)"
+        private static ulong[] _8B4H2S_()
+        {
+            return new ulong[] { 0x0000000000000000ul, 0x7F7F7F7F7F7F7F7Ful,
+                                 0x8080808080808080ul, 0x7FFF7FFF7FFF7FFFul,
+                                 0x8000800080008000ul, 0x7FFFFFFF7FFFFFFFul,
+                                 0x8000000080000000ul, 0xFFFFFFFFFFFFFFFFul };
+        }
+#endregion
+
 #region "ValueSource (Opcodes)"
-        private static uint[] _Vbif_Vbit_Vbsl_Vand_Vorr_Veor_()
+        private static uint[] _Vbic_Vbif_Vbit_Vbsl_Vand_Vorr_Veor_I_()
         {
             return new uint[]
             {
+                0xf2100110u, // VBIC D0, D0, D0
                 0xf3300110u, // VBIF D0, D0, D0
                 0xf3200110u, // VBIT D0, D0, D0
                 0xf3100110u, // VBSL D0, D0, D0
@@ -24,68 +35,121 @@ namespace Ryujinx.Tests.Cpu
                 0xf3000110u  // VEOR D0, D0, D0
             };
         }
+
+        private static uint[] _Vbic_Vorr_II_()
+        {
+            return new uint[]
+            {
+                0xf2800130u, // VBIC.I32 D0, #0 (A1)
+                0xf2800930u, // VBIC.I16 D0, #0 (A2)
+                0xf2800110u, // VORR.I32 D0, #0 (A1)
+                0xf2800910u  // VORR.I16 D0, #0 (A2)
+            };
+        }
  #endregion
 
         private const int RndCnt = 2;
 
         [Test, Pairwise]
-        public void Vbif_Vbit_Vbsl_Vand_Vorr_Veor([ValueSource("_Vbif_Vbit_Vbsl_Vand_Vorr_Veor_")] uint opcode,
-                                                  [Range(0u, 4u)] uint rd,
-                                                  [Range(0u, 4u)] uint rn,
-                                                  [Range(0u, 4u)] uint rm,
-                                                  [Random(RndCnt)] ulong z,
-                                                  [Random(RndCnt)] ulong a,
-                                                  [Random(RndCnt)] ulong b,
-                                                  [Values] bool q)
+        public void Vbic_Vbif_Vbit_Vbsl_Vand_Vorr_Veor_I([ValueSource("_Vbic_Vbif_Vbit_Vbsl_Vand_Vorr_Veor_I_")] uint opcode,
+                                                         [Range(0u, 5u)] uint rd,
+                                                         [Range(0u, 5u)] uint rn,
+                                                         [Range(0u, 5u)] uint rm,
+                                                         [Values(ulong.MinValue, ulong.MaxValue)] [Random(RndCnt)] ulong z,
+                                                         [Values(ulong.MinValue, ulong.MaxValue)] [Random(RndCnt)] ulong a,
+                                                         [Values(ulong.MinValue, ulong.MaxValue)] [Random(RndCnt)] ulong b,
+                                                         [Values] bool q)
         {
             if (q)
             {
                 opcode |= 1 << 6;
-                rm <<= 1;
-                rn <<= 1;
-                rd <<= 1;
+
+                rd >>= 1; rd <<= 1;
+                rn >>= 1; rn <<= 1;
+                rm >>= 1; rm <<= 1;
             }
 
-            opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1);
             opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
             opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3);
+            opcode |= ((rm & 0xf) << 0)  | ((rm & 0x10) << 1);
 
-            V128 v0 = MakeVectorE0E1(z, z);
-            V128 v1 = MakeVectorE0E1(a, z);
-            V128 v2 = MakeVectorE0E1(b, z);
+            V128 v0 = MakeVectorE0E1(z, ~z);
+            V128 v1 = MakeVectorE0E1(a, ~a);
+            V128 v2 = MakeVectorE0E1(b, ~b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
             CompareAgainstUnicorn();
         }
 
-        [Test, Pairwise, Description("VORR.I32 <Vd>, #<imm>")]
-        public void Vorr_II([Range(0u, 4u)] uint rd,
-                            [Random(RndCnt)] ulong z,
-                            [Random(RndCnt)] byte imm,
-                            [Values(0u, 1u, 2u, 3u)] uint cMode,
-                            [Values] bool q)
+        [Test, Pairwise]
+        public void Vbic_Vorr_II([ValueSource("_Vbic_Vorr_II_")] uint opcode,
+                                 [Values(0u, 1u)] uint rd,
+                                 [Values(ulong.MinValue, ulong.MaxValue)] [Random(RndCnt)] ulong z,
+                                 [Values(byte.MinValue, byte.MaxValue)] [Random(RndCnt)] byte imm,
+                                 [Values(0u, 1u, 2u, 3u)] uint cMode,
+                                 [Values] bool q)
         {
-            uint opcode = 0xf2800110u; // VORR.I32 D0, #0
+            if ((opcode & 0x800) != 0) // cmode<3> == '1' (A2)
+            {
+                cMode &= 1;
+            }
 
             if (q)
             {
                 opcode |= 1 << 6;
-                rd <<= 1;
+
+                rd >>= 1; rd <<= 1;
             }
 
-            opcode |= (uint)(imm & 0xf) << 0;
-            opcode |= (uint)(imm & 0x70) << 12;
-            opcode |= (uint)(imm & 0x80) << 17;
+            opcode |= ((uint)imm & 0xf)  << 0;
+            opcode |= ((uint)imm & 0x70) << 12;
+            opcode |= ((uint)imm & 0x80) << 17;
             opcode |= (cMode & 0x3) << 9;
             opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
 
-            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v0 = MakeVectorE0E1(z, ~z);
 
             SingleOpcode(opcode, v0: v0);
 
             CompareAgainstUnicorn();
         }
+
+        [Test, Pairwise, Description("VTST.<dt> <Vd>, <Vn>, <Vm>")]
+        public void Vtst([Range(0u, 5u)] uint rd,
+                         [Range(0u, 5u)] uint rn,
+                         [Range(0u, 5u)] uint rm,
+                         [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong z,
+                         [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong a,
+                         [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong b,
+                         [Values(0u, 1u, 2u)] uint size,
+                         [Values] bool q)
+        {
+            uint opcode = 0xf2000810u; // VTST.8 D0, D0, D0
+
+            if (q)
+            {
+                opcode |= 1 << 6;
+
+                rd >>= 1; rd <<= 1;
+                rn >>= 1; rn <<= 1;
+                rm >>= 1; rm <<= 1;
+            }
+
+            opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
+            opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3);
+            opcode |= ((rm & 0xf) << 0)  | ((rm & 0x10) << 1);
+
+            opcode |= (size & 0x3) << 20;
+
+            V128 v0 = MakeVectorE0E1(z, ~z);
+            V128 v1 = MakeVectorE0E1(a, ~a);
+            V128 v2 = MakeVectorE0E1(b, ~b);
+
+            SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
+
+            CompareAgainstUnicorn();
+        }
 #endif
     }
 }
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs
index dbe69124..866f50a9 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs
@@ -28,7 +28,7 @@ namespace Ryujinx.Tests.Cpu
             {
                 0xf3000d00u, // VPADD.F32 D0, D0, D0
                 0xf3000f00u, // VPMAX.F32 D0, D0, D0
-                0xf3200f00u // VPMIN.F32 D0, D0, D0
+                0xf3200f00u  // VPMIN.F32 D0, D0, D0
             };
         }
 
@@ -41,7 +41,7 @@ namespace Ryujinx.Tests.Cpu
             {
                 VpaddI8,
                 0xf2000a00u, // VPMAX.S8 D0, D0, D0
-                0xf2000a10u // VPMIN.S8 D0, D0, D0
+                0xf2000a10u  // VPMIN.S8 D0, D0, D0
             };
         }
 #endregion
@@ -189,7 +189,7 @@ namespace Ryujinx.Tests.Cpu
 
         [Explicit]
         [Test, Pairwise, Description("VADD.f32 V0, V0, V0")]
-        public void Vadd_f32([Values(0u)]    uint rd,
+        public void Vadd_f32([Values(0u)] uint rd,
                              [Values(0u, 1u)] uint rn,
                              [Values(0u, 2u)] uint rm,
                              [ValueSource("_2S_F_")] ulong z0,
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs
index aad4a2a5..cd93cb16 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs
@@ -9,50 +9,162 @@ namespace Ryujinx.Tests.Cpu
     public sealed class CpuTestSimdShImm32 : CpuTest32
     {
 #if SimdShImm32
+
+#region "ValueSource (Types)"
+        private static ulong[] _1D_()
+        {
+            return new ulong[] { 0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul,
+                                 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul };
+        }
+
+        private static ulong[] _2S_()
+        {
+            return new ulong[] { 0x0000000000000000ul, 0x7FFFFFFF7FFFFFFFul,
+                                 0x8000000080000000ul, 0xFFFFFFFFFFFFFFFFul };
+        }
+
+        private static ulong[] _4H_()
+        {
+            return new ulong[] { 0x0000000000000000ul, 0x7FFF7FFF7FFF7FFFul,
+                                 0x8000800080008000ul, 0xFFFFFFFFFFFFFFFFul };
+        }
+
+        private static ulong[] _8B_()
+        {
+            return new ulong[] { 0x0000000000000000ul, 0x7F7F7F7F7F7F7F7Ful,
+                                 0x8080808080808080ul, 0xFFFFFFFFFFFFFFFFul };
+        }
+#endregion
+
+#region "ValueSource (Opcodes)"
+        private static uint[] _Vshr_Imm_SU8_()
+        {
+            return new uint[]
+            {
+                0xf2880110u, // VSRA.S8  D0, D0, #8
+                0xf2880210u, // VRSHR.S8 D0, D0, #8
+                0xf2880010u  // VSHR.S8  D0, D0, #8
+            };
+        }
+
+        private static uint[] _Vshr_Imm_SU16_()
+        {
+            return new uint[]
+            {
+                0xf2900110u, // VSRA.S16  D0, D0, #16
+                0xf2900210u, // VRSHR.S16 D0, D0, #16
+                0xf2900010u  // VSHR.S16  D0, D0, #16
+            };
+        }
+
+        private static uint[] _Vshr_Imm_SU32_()
+        {
+            return new uint[]
+            {
+                0xf2a00110u, // VSRA.S32  D0, D0, #32
+                0xf2a00210u, // VRSHR.S32 D0, D0, #32
+                0xf2a00010u  // VSHR.S32  D0, D0, #32
+            };
+        }
+
+        private static uint[] _Vshr_Imm_SU64_()
+        {
+            return new uint[]
+            {
+                0xf2800190u, // VSRA.S64  D0, D0, #64
+                0xf2800290u, // VRSHR.S64 D0, D0, #64
+                0xf2800090u  // VSHR.S64  D0, D0, #64
+            };
+        }
+#endregion
+
         private const int RndCnt = 2;
+        private const int RndCntShiftImm = 2;
 
         [Test, Pairwise]
-        public void Vrshr_Vshr_Imm([Values(0u)] uint rd,
-                             [Values(2u, 0u)] uint rm,
-                             [Values(0u, 1u, 2u, 3u)] uint size,
-                             [Random(RndCnt), Values(0u)] uint shiftImm,
-                             [Random(RndCnt)] ulong z,
-                             [Random(RndCnt)] ulong a,
-                             [Random(RndCnt)] ulong b,
-                             [Values] bool u,
-                             [Values] bool q,
-                             [Values] bool round)
+        public void Vshr_Imm_SU8([ValueSource("_Vshr_Imm_SU8_")] uint opcode,
+                                 [Range(0u, 3u)] uint rd,
+                                 [Range(0u, 3u)] uint rm,
+                                 [ValueSource("_8B_")] [Random(RndCnt)] ulong z,
+                                 [ValueSource("_8B_")] [Random(RndCnt)] ulong b,
+                                 [Values(1u, 8u)] [Random(2u, 7u, RndCntShiftImm)] uint shiftImm,
+                                 [Values] bool u,
+                                 [Values] bool q)
         {
-            uint opcode = 0xf2800010u; // VMOV.I32 D0, #0 (immediate value changes it into SHR)
-            if (q)
-            {
-                opcode |= 1 << 6;
-                rm <<= 1;
-                rd <<= 1;
-            }
+            uint imm6 = 16 - shiftImm;
 
-            if (round)
-            {
-                opcode |= 1 << 9; // Turn into VRSHR
-            }
+            Vshr_Imm_SU(opcode, rd, rm, z, b, imm6, u, q);
+        }
 
+        [Test, Pairwise]
+        public void Vshr_Imm_SU16([ValueSource("_Vshr_Imm_SU16_")] uint opcode,
+                                  [Range(0u, 3u)] uint rd,
+                                  [Range(0u, 3u)] uint rm,
+                                  [ValueSource("_4H_")] [Random(RndCnt)] ulong z,
+                                  [ValueSource("_4H_")] [Random(RndCnt)] ulong b,
+                                  [Values(1u, 16u)] [Random(2u, 15u, RndCntShiftImm)] uint shiftImm,
+                                  [Values] bool u,
+                                  [Values] bool q)
+        {
+            uint imm6 = 32 - shiftImm;
+
+            Vshr_Imm_SU(opcode, rd, rm, z, b, imm6, u, q);
+        }
+
+        [Test, Pairwise]
+        public void Vshr_Imm_SU32([ValueSource("_Vshr_Imm_SU32_")] uint opcode,
+                                  [Range(0u, 3u)] uint rd,
+                                  [Range(0u, 3u)] uint rm,
+                                  [ValueSource("_2S_")] [Random(RndCnt)] ulong z,
+                                  [ValueSource("_2S_")] [Random(RndCnt)] ulong b,
+                                  [Values(1u, 32u)] [Random(2u, 31u, RndCntShiftImm)] uint shiftImm,
+                                  [Values] bool u,
+                                  [Values] bool q)
+        {
+            uint imm6 = 64 - shiftImm;
+
+            Vshr_Imm_SU(opcode, rd, rm, z, b, imm6, u, q);
+        }
+
+        [Test, Pairwise]
+        public void Vshr_Imm_SU64([ValueSource("_Vshr_Imm_SU64_")] uint opcode,
+                                  [Range(0u, 3u)] uint rd,
+                                  [Range(0u, 3u)] uint rm,
+                                  [ValueSource("_1D_")] [Random(RndCnt)] ulong z,
+                                  [ValueSource("_1D_")] [Random(RndCnt)] ulong b,
+                                  [Values(1u, 64u)] [Random(2u, 63u, RndCntShiftImm)] uint shiftImm,
+                                  [Values] bool u,
+                                  [Values] bool q)
+        {
+            uint imm6 = 64 - shiftImm;
+
+            Vshr_Imm_SU(opcode, rd, rm, z, b, imm6, u, q);
+        }
+
+        private void Vshr_Imm_SU(uint opcode, uint rd, uint rm, ulong z, ulong b, uint imm6, bool u, bool q)
+        {
             if (u)
             {
                 opcode |= 1 << 24;
             }
 
-            uint imm = 1u << ((int)size + 3);
-            imm |= shiftImm & (imm - 1);
+            if (q)
+            {
+                opcode |= 1 << 6;
+
+                rd >>= 1; rd <<= 1;
+                rm >>= 1; rm <<= 1;
+            }
 
-            opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1);
             opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
-            opcode |= ((imm & 0x3f) << 16) | ((imm & 0x40) << 1);
+            opcode |= ((rm & 0xf) << 0)  | ((rm & 0x10) << 1);
 
-            V128 v0 = MakeVectorE0E1(z, z);
-            V128 v1 = MakeVectorE0E1(a, z);
-            V128 v2 = MakeVectorE0E1(b, z);
+            opcode |= (imm6 & 0x3f) << 16;
 
-            SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
+            V128 v0 = MakeVectorE0E1(z, ~z);
+            V128 v1 = MakeVectorE0E1(b, ~b);
+
+            SingleOpcode(opcode, v0: v0, v1: v1);
 
             CompareAgainstUnicorn();
         }