mirror of
				https://github.com/Ryujinx/Ryujinx.git
				synced 2025-10-25 01:32:26 -07:00 
			
		
		
		
	Implement fast path for AES crypto instructions on Arm64 (#5281)
* Implement fast path for AES crypto instructions on Arm64 * PPTC version bump * Use AES HW feature check
This commit is contained in:
		| @@ -168,8 +168,6 @@ namespace ARMeilleure.CodeGen.Arm64 | ||||
|  | ||||
|             Logger.StartPass(PassName.CodeGeneration); | ||||
|  | ||||
|             //Console.Error.WriteLine(IRDumper.GetDump(cfg)); | ||||
|  | ||||
|             bool relocatable = (cctx.Options & CompilerOptions.Relocatable) != 0; | ||||
|  | ||||
|             CodeGenContext context = new(allocResult, maxCallArgs, cfg.Blocks.Count, relocatable); | ||||
|   | ||||
| @@ -179,6 +179,35 @@ namespace ARMeilleure.CodeGen.Arm64 | ||||
|                         (uint)operation.GetSource(2).AsInt32()); | ||||
|                     break; | ||||
|  | ||||
|                 case IntrinsicType.Vector128Unary: | ||||
|                     GenerateVectorUnary( | ||||
|                         context, | ||||
|                         1, | ||||
|                         0, | ||||
|                         info.Inst, | ||||
|                         operation.Destination, | ||||
|                         operation.GetSource(0)); | ||||
|                     break; | ||||
|                 case IntrinsicType.Vector128Binary: | ||||
|                     GenerateVectorBinary( | ||||
|                         context, | ||||
|                         1, | ||||
|                         0, | ||||
|                         info.Inst, | ||||
|                         operation.Destination, | ||||
|                         operation.GetSource(0), | ||||
|                         operation.GetSource(1)); | ||||
|                     break; | ||||
|                 case IntrinsicType.Vector128BinaryRd: | ||||
|                     GenerateVectorUnary( | ||||
|                         context, | ||||
|                         1, | ||||
|                         0, | ||||
|                         info.Inst, | ||||
|                         operation.Destination, | ||||
|                         operation.GetSource(1)); | ||||
|                     break; | ||||
|  | ||||
|                 case IntrinsicType.VectorUnary: | ||||
|                     GenerateVectorUnary( | ||||
|                         context, | ||||
|   | ||||
| @@ -19,8 +19,8 @@ namespace ARMeilleure.CodeGen.Arm64 | ||||
|             Add(Intrinsic.Arm64AddvV,         new IntrinsicInfo(0x0e31b800u, IntrinsicType.VectorUnary)); | ||||
|             Add(Intrinsic.Arm64AddS,          new IntrinsicInfo(0x5e208400u, IntrinsicType.ScalarBinary)); | ||||
|             Add(Intrinsic.Arm64AddV,          new IntrinsicInfo(0x0e208400u, IntrinsicType.VectorBinary)); | ||||
|             Add(Intrinsic.Arm64AesdV,         new IntrinsicInfo(0x4e285800u, IntrinsicType.Vector128Unary)); | ||||
|             Add(Intrinsic.Arm64AeseV,         new IntrinsicInfo(0x4e284800u, IntrinsicType.Vector128Unary)); | ||||
|             Add(Intrinsic.Arm64AesdV,         new IntrinsicInfo(0x4e285800u, IntrinsicType.Vector128BinaryRd)); | ||||
|             Add(Intrinsic.Arm64AeseV,         new IntrinsicInfo(0x4e284800u, IntrinsicType.Vector128BinaryRd)); | ||||
|             Add(Intrinsic.Arm64AesimcV,       new IntrinsicInfo(0x4e287800u, IntrinsicType.Vector128Unary)); | ||||
|             Add(Intrinsic.Arm64AesmcV,        new IntrinsicInfo(0x4e286800u, IntrinsicType.Vector128Unary)); | ||||
|             Add(Intrinsic.Arm64AndV,          new IntrinsicInfo(0x0e201c00u, IntrinsicType.VectorBinaryBitwise)); | ||||
|   | ||||
| @@ -23,6 +23,10 @@ namespace ARMeilleure.CodeGen.Arm64 | ||||
|         ScalarTernaryShlRd, | ||||
|         ScalarTernaryShrRd, | ||||
|  | ||||
|         Vector128Unary, | ||||
|         Vector128Binary, | ||||
|         Vector128BinaryRd, | ||||
|  | ||||
|         VectorUnary, | ||||
|         VectorUnaryBitwise, | ||||
|         VectorUnaryByElem, | ||||
| @@ -50,9 +54,6 @@ namespace ARMeilleure.CodeGen.Arm64 | ||||
|         VectorTernaryShlRd, | ||||
|         VectorTernaryShrRd, | ||||
|  | ||||
|         Vector128Unary, | ||||
|         Vector128Binary, | ||||
|  | ||||
|         GetRegister, | ||||
|         SetRegister | ||||
|     } | ||||
|   | ||||
| @@ -746,6 +746,7 @@ namespace ARMeilleure.CodeGen.Arm64 | ||||
|                    info.Type == IntrinsicType.ScalarTernaryFPRdByElem || | ||||
|                    info.Type == IntrinsicType.ScalarTernaryShlRd || | ||||
|                    info.Type == IntrinsicType.ScalarTernaryShrRd || | ||||
|                    info.Type == IntrinsicType.Vector128BinaryRd || | ||||
|                    info.Type == IntrinsicType.VectorBinaryRd || | ||||
|                    info.Type == IntrinsicType.VectorInsertByElem || | ||||
|                    info.Type == IntrinsicType.VectorTernaryRd || | ||||
|   | ||||
| @@ -17,7 +17,11 @@ namespace ARMeilleure.Instructions | ||||
|  | ||||
|             Operand res; | ||||
|  | ||||
|             if (Optimizations.UseAesni) | ||||
|             if (Optimizations.UseArm64Aes) | ||||
|             { | ||||
|                 res = context.AddIntrinsic(Intrinsic.Arm64AesdV, d, n); | ||||
|             } | ||||
|             else if (Optimizations.UseAesni) | ||||
|             { | ||||
|                 res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero()); | ||||
|             } | ||||
| @@ -38,7 +42,11 @@ namespace ARMeilleure.Instructions | ||||
|  | ||||
|             Operand res; | ||||
|  | ||||
|             if (Optimizations.UseAesni) | ||||
|             if (Optimizations.UseArm64Aes) | ||||
|             { | ||||
|                 res = context.AddIntrinsic(Intrinsic.Arm64AeseV, d, n); | ||||
|             } | ||||
|             else if (Optimizations.UseAesni) | ||||
|             { | ||||
|                 res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero()); | ||||
|             } | ||||
| @@ -58,7 +66,11 @@ namespace ARMeilleure.Instructions | ||||
|  | ||||
|             Operand res; | ||||
|  | ||||
|             if (Optimizations.UseAesni) | ||||
|             if (Optimizations.UseArm64Aes) | ||||
|             { | ||||
|                 res = context.AddIntrinsic(Intrinsic.Arm64AesimcV, n); | ||||
|             } | ||||
|             else if (Optimizations.UseAesni) | ||||
|             { | ||||
|                 res = context.AddIntrinsic(Intrinsic.X86Aesimc, n); | ||||
|             } | ||||
| @@ -78,7 +90,11 @@ namespace ARMeilleure.Instructions | ||||
|  | ||||
|             Operand res; | ||||
|  | ||||
|             if (Optimizations.UseAesni) | ||||
|             if (Optimizations.UseArm64Aes) | ||||
|             { | ||||
|                 res = context.AddIntrinsic(Intrinsic.Arm64AesmcV, n); | ||||
|             } | ||||
|             else if (Optimizations.UseAesni) | ||||
|             { | ||||
|                 Operand roundKey = context.VectorZero(); | ||||
|  | ||||
|   | ||||
| @@ -17,7 +17,11 @@ namespace ARMeilleure.Instructions | ||||
|  | ||||
|             Operand res; | ||||
|  | ||||
|             if (Optimizations.UseAesni) | ||||
|             if (Optimizations.UseArm64Aes) | ||||
|             { | ||||
|                 res = context.AddIntrinsic(Intrinsic.Arm64AesdV, d, n); | ||||
|             } | ||||
|             else if (Optimizations.UseAesni) | ||||
|             { | ||||
|                 res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero()); | ||||
|             } | ||||
| @@ -38,7 +42,11 @@ namespace ARMeilleure.Instructions | ||||
|  | ||||
|             Operand res; | ||||
|  | ||||
|             if (Optimizations.UseAesni) | ||||
|             if (Optimizations.UseArm64Aes) | ||||
|             { | ||||
|                 res = context.AddIntrinsic(Intrinsic.Arm64AeseV, d, n); | ||||
|             } | ||||
|             else if (Optimizations.UseAesni) | ||||
|             { | ||||
|                 res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero()); | ||||
|             } | ||||
| @@ -58,7 +66,11 @@ namespace ARMeilleure.Instructions | ||||
|  | ||||
|             Operand res; | ||||
|  | ||||
|             if (Optimizations.UseAesni) | ||||
|             if (Optimizations.UseArm64Aes) | ||||
|             { | ||||
|                 res = context.AddIntrinsic(Intrinsic.Arm64AesimcV, n); | ||||
|             } | ||||
|             else if (Optimizations.UseAesni) | ||||
|             { | ||||
|                 res = context.AddIntrinsic(Intrinsic.X86Aesimc, n); | ||||
|             } | ||||
| @@ -78,7 +90,11 @@ namespace ARMeilleure.Instructions | ||||
|  | ||||
|             Operand res; | ||||
|  | ||||
|             if (Optimizations.UseAesni) | ||||
|             if (Optimizations.UseArm64Aes) | ||||
|             { | ||||
|                 res = context.AddIntrinsic(Intrinsic.Arm64AesmcV, n); | ||||
|             } | ||||
|             else if (Optimizations.UseAesni) | ||||
|             { | ||||
|                 Operand roundKey = context.VectorZero(); | ||||
|  | ||||
|   | ||||
| @@ -13,6 +13,7 @@ namespace ARMeilleure | ||||
|         public static bool UseUnmanagedDispatchLoop { get; set; } = true; | ||||
|  | ||||
|         public static bool UseAdvSimdIfAvailable    { get; set; } = true; | ||||
|         public static bool UseArm64AesIfAvailable   { get; set; } = true; | ||||
|         public static bool UseArm64PmullIfAvailable { get; set; } = true; | ||||
|  | ||||
|         public static bool UseSseIfAvailable       { get; set; } = true; | ||||
| @@ -41,6 +42,7 @@ namespace ARMeilleure | ||||
|         } | ||||
|  | ||||
|         internal static bool UseAdvSimd    => UseAdvSimdIfAvailable    && Arm64HardwareCapabilities.SupportsAdvSimd; | ||||
|         internal static bool UseArm64Aes   => UseArm64AesIfAvailable   && Arm64HardwareCapabilities.SupportsAes; | ||||
|         internal static bool UseArm64Pmull => UseArm64PmullIfAvailable && Arm64HardwareCapabilities.SupportsPmull; | ||||
|  | ||||
|         internal static bool UseSse       => UseSseIfAvailable       && X86HardwareCapabilities.SupportsSse; | ||||
|   | ||||
| @@ -30,7 +30,7 @@ namespace ARMeilleure.Translation.PTC | ||||
|         private const string OuterHeaderMagicString = "PTCohd\0\0"; | ||||
|         private const string InnerHeaderMagicString = "PTCihd\0\0"; | ||||
|  | ||||
|         private const uint InternalVersion = 4661; //! To be incremented manually for each change to the ARMeilleure project. | ||||
|         private const uint InternalVersion = 5281; //! To be incremented manually for each change to the ARMeilleure project. | ||||
|  | ||||
|         private const string ActualDir = "0"; | ||||
|         private const string BackupDir = "1"; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user