mirror of
				https://github.com/Ryujinx/Ryujinx.git
				synced 2025-10-24 20:30:32 -07:00 
			
		
		
		
	Arm64: Cpu feature detection (#4264)
* Arm64: Cpu feature detection * Ptc: Add Arm64 feature info * nits * simplify CheckSysctlName * restore some macos flags * feedback
This commit is contained in:
		
							
								
								
									
										185
									
								
								ARMeilleure/CodeGen/Arm64/HardwareCapabilities.cs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										185
									
								
								ARMeilleure/CodeGen/Arm64/HardwareCapabilities.cs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,185 @@ | ||||
| using System; | ||||
| using System.Linq; | ||||
| using System.Reflection; | ||||
| using System.Runtime.CompilerServices; | ||||
| using System.Runtime.InteropServices; | ||||
| using System.Runtime.Intrinsics.Arm; | ||||
| using System.Runtime.Versioning; | ||||
|  | ||||
| namespace ARMeilleure.CodeGen.Arm64 | ||||
| { | ||||
|     static partial class HardwareCapabilities | ||||
|     { | ||||
|         static HardwareCapabilities() | ||||
|         { | ||||
|             if (!ArmBase.Arm64.IsSupported) | ||||
|             { | ||||
|                 return; | ||||
|             } | ||||
|  | ||||
|             if (OperatingSystem.IsLinux()) | ||||
|             { | ||||
|                 LinuxFeatureInfoHwCap = (LinuxFeatureFlagsHwCap)getauxval(AT_HWCAP); | ||||
|                 LinuxFeatureInfoHwCap2 = (LinuxFeatureFlagsHwCap2)getauxval(AT_HWCAP2); | ||||
|             } | ||||
|  | ||||
|             if (OperatingSystem.IsMacOS()) | ||||
|             { | ||||
|                 for (int i = 0; i < _sysctlNames.Length; i++) | ||||
|                 { | ||||
|                     if (CheckSysctlName(_sysctlNames[i])) | ||||
|                     { | ||||
|                         MacOsFeatureInfo |= (MacOsFeatureFlags)(1 << i); | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
| #region Linux | ||||
|  | ||||
|         private const ulong AT_HWCAP = 16; | ||||
|         private const ulong AT_HWCAP2 = 26; | ||||
|  | ||||
|         [LibraryImport("libc", SetLastError = true)] | ||||
|         private static partial ulong getauxval(ulong type); | ||||
|  | ||||
|         [Flags] | ||||
|         public enum LinuxFeatureFlagsHwCap : ulong | ||||
|         { | ||||
|             Fp        = 1 << 0, | ||||
|             Asimd     = 1 << 1, | ||||
|             Evtstrm   = 1 << 2, | ||||
|             Aes       = 1 << 3, | ||||
|             Pmull     = 1 << 4, | ||||
|             Sha1      = 1 << 5, | ||||
|             Sha2      = 1 << 6, | ||||
|             Crc32     = 1 << 7, | ||||
|             Atomics   = 1 << 8, | ||||
|             FpHp      = 1 << 9, | ||||
|             AsimdHp   = 1 << 10, | ||||
|             CpuId     = 1 << 11, | ||||
|             AsimdRdm  = 1 << 12, | ||||
|             Jscvt     = 1 << 13, | ||||
|             Fcma      = 1 << 14, | ||||
|             Lrcpc     = 1 << 15, | ||||
|             DcpOp     = 1 << 16, | ||||
|             Sha3      = 1 << 17, | ||||
|             Sm3       = 1 << 18, | ||||
|             Sm4       = 1 << 19, | ||||
|             AsimdDp   = 1 << 20, | ||||
|             Sha512    = 1 << 21, | ||||
|             Sve       = 1 << 22, | ||||
|             AsimdFhm  = 1 << 23, | ||||
|             Dit       = 1 << 24, | ||||
|             Uscat     = 1 << 25, | ||||
|             Ilrcpc    = 1 << 26, | ||||
|             FlagM     = 1 << 27, | ||||
|             Ssbs      = 1 << 28, | ||||
|             Sb        = 1 << 29, | ||||
|             Paca      = 1 << 30, | ||||
|             Pacg      = 1UL << 31 | ||||
|         } | ||||
|  | ||||
|         [Flags] | ||||
|         public enum LinuxFeatureFlagsHwCap2 : ulong | ||||
|         { | ||||
|             Dcpodp      = 1 << 0, | ||||
|             Sve2        = 1 << 1, | ||||
|             SveAes      = 1 << 2, | ||||
|             SvePmull    = 1 << 3, | ||||
|             SveBitperm  = 1 << 4, | ||||
|             SveSha3     = 1 << 5, | ||||
|             SveSm4      = 1 << 6, | ||||
|             FlagM2      = 1 << 7, | ||||
|             Frint       = 1 << 8, | ||||
|             SveI8mm     = 1 << 9, | ||||
|             SveF32mm    = 1 << 10, | ||||
|             SveF64mm    = 1 << 11, | ||||
|             SveBf16     = 1 << 12, | ||||
|             I8mm        = 1 << 13, | ||||
|             Bf16        = 1 << 14, | ||||
|             Dgh         = 1 << 15, | ||||
|             Rng         = 1 << 16, | ||||
|             Bti         = 1 << 17, | ||||
|             Mte         = 1 << 18, | ||||
|             Ecv         = 1 << 19, | ||||
|             Afp         = 1 << 20, | ||||
|             Rpres       = 1 << 21, | ||||
|             Mte3        = 1 << 22, | ||||
|             Sme         = 1 << 23, | ||||
|             Sme_i16i64  = 1 << 24, | ||||
|             Sme_f64f64  = 1 << 25, | ||||
|             Sme_i8i32   = 1 << 26, | ||||
|             Sme_f16f32  = 1 << 27, | ||||
|             Sme_b16f32  = 1 << 28, | ||||
|             Sme_f32f32  = 1 << 29, | ||||
|             Sme_fa64    = 1 << 30, | ||||
|             Wfxt        = 1UL << 31, | ||||
|             Ebf16       = 1UL << 32, | ||||
|             Sve_Ebf16   = 1UL << 33, | ||||
|             Cssc        = 1UL << 34, | ||||
|             Rprfm       = 1UL << 35, | ||||
|             Sve2p1      = 1UL << 36 | ||||
|         } | ||||
|  | ||||
|         public static LinuxFeatureFlagsHwCap LinuxFeatureInfoHwCap { get; } = 0; | ||||
|         public static LinuxFeatureFlagsHwCap2 LinuxFeatureInfoHwCap2 { get; } = 0; | ||||
|  | ||||
| #endregion | ||||
|  | ||||
| #region macOS | ||||
|  | ||||
|         [LibraryImport("libSystem.dylib", SetLastError = true)] | ||||
|         private static unsafe partial int sysctlbyname([MarshalAs(UnmanagedType.LPStr)] string name, out int oldValue, ref ulong oldSize, IntPtr newValue, ulong newValueSize); | ||||
|  | ||||
|         [SupportedOSPlatform("macos")] | ||||
|         private static bool CheckSysctlName(string name) | ||||
|         { | ||||
|             ulong size = sizeof(int); | ||||
|             if (sysctlbyname(name, out int val, ref size, IntPtr.Zero, 0) == 0 && size == sizeof(int)) | ||||
|             { | ||||
|                 return val != 0; | ||||
|             } | ||||
|             return false; | ||||
|         } | ||||
|  | ||||
|         private static string[] _sysctlNames = new string[] | ||||
|         { | ||||
|             "hw.optional.floatingpoint", | ||||
|             "hw.optional.AdvSIMD", | ||||
|             "hw.optional.arm.FEAT_FP16", | ||||
|             "hw.optional.arm.FEAT_AES", | ||||
|             "hw.optional.arm.FEAT_PMULL", | ||||
|             "hw.optional.arm.FEAT_LSE", | ||||
|             "hw.optional.armv8_crc32", | ||||
|             "hw.optional.arm.FEAT_SHA1", | ||||
|             "hw.optional.arm.FEAT_SHA256" | ||||
|         }; | ||||
|  | ||||
|         [Flags] | ||||
|         public enum MacOsFeatureFlags | ||||
|         { | ||||
|             Fp      = 1 << 0, | ||||
|             AdvSimd = 1 << 1, | ||||
|             Fp16    = 1 << 2, | ||||
|             Aes     = 1 << 3, | ||||
|             Pmull   = 1 << 4, | ||||
|             Lse     = 1 << 5, | ||||
|             Crc32   = 1 << 6, | ||||
|             Sha1    = 1 << 7, | ||||
|             Sha256  = 1 << 8 | ||||
|         } | ||||
|  | ||||
|         public static MacOsFeatureFlags MacOsFeatureInfo { get; } = 0; | ||||
|  | ||||
| #endregion | ||||
|  | ||||
|         public static bool SupportsAdvSimd => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Asimd) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.AdvSimd); | ||||
|         public static bool SupportsAes => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Aes) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Aes); | ||||
|         public static bool SupportsPmull => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Pmull) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Pmull); | ||||
|         public static bool SupportsLse => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Atomics) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Lse); | ||||
|         public static bool SupportsCrc32 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Crc32) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Crc32); | ||||
|         public static bool SupportsSha1 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Sha1) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Sha1); | ||||
|         public static bool SupportsSha256 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Sha2) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Sha256); | ||||
|     } | ||||
| } | ||||
| @@ -2556,7 +2556,7 @@ namespace ARMeilleure.Instructions | ||||
|         { | ||||
|             OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; | ||||
|  | ||||
|             if (Optimizations.UseAdvSimd && false) // Not supported by all Arm CPUs. | ||||
|             if (Optimizations.UseArm64Pmull) | ||||
|             { | ||||
|                 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64PmullV); | ||||
|             } | ||||
|   | ||||
| @@ -1,8 +1,10 @@ | ||||
| using ARMeilleure.CodeGen.X86; | ||||
| using System.Runtime.Intrinsics.Arm; | ||||
|  | ||||
| namespace ARMeilleure | ||||
| { | ||||
|     using Arm64HardwareCapabilities = ARMeilleure.CodeGen.Arm64.HardwareCapabilities; | ||||
|     using X86HardwareCapabilities = ARMeilleure.CodeGen.X86.HardwareCapabilities; | ||||
|  | ||||
|     public static class Optimizations | ||||
|     { | ||||
|         public static bool FastFP { get; set; } = true; | ||||
| @@ -10,7 +12,8 @@ namespace ARMeilleure | ||||
|         public static bool AllowLcqInFunctionTable  { get; set; } = true; | ||||
|         public static bool UseUnmanagedDispatchLoop { get; set; } = true; | ||||
|  | ||||
|         public static bool UseAdvSimdIfAvailable { get; set; } = true; | ||||
|         public static bool UseAdvSimdIfAvailable    { get; set; } = true; | ||||
|         public static bool UseArm64PmullIfAvailable { get; set; } = true; | ||||
|  | ||||
|         public static bool UseSseIfAvailable       { get; set; } = true; | ||||
|         public static bool UseSse2IfAvailable      { get; set; } = true; | ||||
| @@ -29,25 +32,26 @@ namespace ARMeilleure | ||||
|  | ||||
|         public static bool ForceLegacySse | ||||
|         { | ||||
|             get => HardwareCapabilities.ForceLegacySse; | ||||
|             set => HardwareCapabilities.ForceLegacySse = value; | ||||
|             get => X86HardwareCapabilities.ForceLegacySse; | ||||
|             set => X86HardwareCapabilities.ForceLegacySse = value; | ||||
|         } | ||||
|  | ||||
|         internal static bool UseAdvSimd => UseAdvSimdIfAvailable && AdvSimd.IsSupported; | ||||
|         internal static bool UseAdvSimd    => UseAdvSimdIfAvailable    && Arm64HardwareCapabilities.SupportsAdvSimd; | ||||
|         internal static bool UseArm64Pmull => UseArm64PmullIfAvailable && Arm64HardwareCapabilities.SupportsPmull; | ||||
|  | ||||
|         internal static bool UseSse       => UseSseIfAvailable       && HardwareCapabilities.SupportsSse; | ||||
|         internal static bool UseSse2      => UseSse2IfAvailable      && HardwareCapabilities.SupportsSse2; | ||||
|         internal static bool UseSse3      => UseSse3IfAvailable      && HardwareCapabilities.SupportsSse3; | ||||
|         internal static bool UseSsse3     => UseSsse3IfAvailable     && HardwareCapabilities.SupportsSsse3; | ||||
|         internal static bool UseSse41     => UseSse41IfAvailable     && HardwareCapabilities.SupportsSse41; | ||||
|         internal static bool UseSse42     => UseSse42IfAvailable     && HardwareCapabilities.SupportsSse42; | ||||
|         internal static bool UsePopCnt    => UsePopCntIfAvailable    && HardwareCapabilities.SupportsPopcnt; | ||||
|         internal static bool UseAvx       => UseAvxIfAvailable       && HardwareCapabilities.SupportsAvx && !ForceLegacySse; | ||||
|         internal static bool UseF16c      => UseF16cIfAvailable      && HardwareCapabilities.SupportsF16c; | ||||
|         internal static bool UseFma       => UseFmaIfAvailable       && HardwareCapabilities.SupportsFma; | ||||
|         internal static bool UseAesni     => UseAesniIfAvailable     && HardwareCapabilities.SupportsAesni; | ||||
|         internal static bool UsePclmulqdq => UsePclmulqdqIfAvailable && HardwareCapabilities.SupportsPclmulqdq; | ||||
|         internal static bool UseSha       => UseShaIfAvailable       && HardwareCapabilities.SupportsSha; | ||||
|         internal static bool UseGfni      => UseGfniIfAvailable      && HardwareCapabilities.SupportsGfni; | ||||
|         internal static bool UseSse       => UseSseIfAvailable       && X86HardwareCapabilities.SupportsSse; | ||||
|         internal static bool UseSse2      => UseSse2IfAvailable      && X86HardwareCapabilities.SupportsSse2; | ||||
|         internal static bool UseSse3      => UseSse3IfAvailable      && X86HardwareCapabilities.SupportsSse3; | ||||
|         internal static bool UseSsse3     => UseSsse3IfAvailable     && X86HardwareCapabilities.SupportsSsse3; | ||||
|         internal static bool UseSse41     => UseSse41IfAvailable     && X86HardwareCapabilities.SupportsSse41; | ||||
|         internal static bool UseSse42     => UseSse42IfAvailable     && X86HardwareCapabilities.SupportsSse42; | ||||
|         internal static bool UsePopCnt    => UsePopCntIfAvailable    && X86HardwareCapabilities.SupportsPopcnt; | ||||
|         internal static bool UseAvx       => UseAvxIfAvailable       && X86HardwareCapabilities.SupportsAvx && !ForceLegacySse; | ||||
|         internal static bool UseF16c      => UseF16cIfAvailable      && X86HardwareCapabilities.SupportsF16c; | ||||
|         internal static bool UseFma       => UseFmaIfAvailable       && X86HardwareCapabilities.SupportsFma; | ||||
|         internal static bool UseAesni     => UseAesniIfAvailable     && X86HardwareCapabilities.SupportsAesni; | ||||
|         internal static bool UsePclmulqdq => UsePclmulqdqIfAvailable && X86HardwareCapabilities.SupportsPclmulqdq; | ||||
|         internal static bool UseSha       => UseShaIfAvailable       && X86HardwareCapabilities.SupportsSha; | ||||
|         internal static bool UseGfni      => UseGfniIfAvailable      && X86HardwareCapabilities.SupportsGfni; | ||||
|     } | ||||
| } | ||||
| } | ||||
|   | ||||
| @@ -1,7 +1,6 @@ | ||||
| using ARMeilleure.CodeGen; | ||||
| using ARMeilleure.CodeGen.Linking; | ||||
| using ARMeilleure.CodeGen.Unwinding; | ||||
| using ARMeilleure.CodeGen.X86; | ||||
| using ARMeilleure.Common; | ||||
| using ARMeilleure.Memory; | ||||
| using Ryujinx.Common; | ||||
| @@ -22,12 +21,15 @@ using static ARMeilleure.Translation.PTC.PtcFormatter; | ||||
|  | ||||
| namespace ARMeilleure.Translation.PTC | ||||
| { | ||||
|     using Arm64HardwareCapabilities = ARMeilleure.CodeGen.Arm64.HardwareCapabilities; | ||||
|     using X86HardwareCapabilities = ARMeilleure.CodeGen.X86.HardwareCapabilities; | ||||
|  | ||||
|     class Ptc : IPtcLoadState | ||||
|     { | ||||
|         private const string OuterHeaderMagicString = "PTCohd\0\0"; | ||||
|         private const string InnerHeaderMagicString = "PTCihd\0\0"; | ||||
|  | ||||
|         private const uint InternalVersion = 4114; //! To be incremented manually for each change to the ARMeilleure project. | ||||
|         private const uint InternalVersion = 4264; //! To be incremented manually for each change to the ARMeilleure project. | ||||
|  | ||||
|         private const string ActualDir = "0"; | ||||
|         private const string BackupDir = "1"; | ||||
| @@ -952,11 +954,26 @@ namespace ARMeilleure.Translation.PTC | ||||
|  | ||||
|         private static FeatureInfo GetFeatureInfo() | ||||
|         { | ||||
|             return new FeatureInfo( | ||||
|                 (uint)HardwareCapabilities.FeatureInfo1Ecx, | ||||
|                 (uint)HardwareCapabilities.FeatureInfo1Edx, | ||||
|                 (uint)HardwareCapabilities.FeatureInfo7Ebx, | ||||
|                 (uint)HardwareCapabilities.FeatureInfo7Ecx); | ||||
|             if (RuntimeInformation.ProcessArchitecture == Architecture.Arm64) | ||||
|             { | ||||
|                 return new FeatureInfo( | ||||
|                     (ulong)Arm64HardwareCapabilities.LinuxFeatureInfoHwCap, | ||||
|                     (ulong)Arm64HardwareCapabilities.LinuxFeatureInfoHwCap2, | ||||
|                     (ulong)Arm64HardwareCapabilities.MacOsFeatureInfo, | ||||
|                     0); | ||||
|             } | ||||
|             else if (RuntimeInformation.ProcessArchitecture == Architecture.X64) | ||||
|             { | ||||
|                 return new FeatureInfo( | ||||
|                     (ulong)X86HardwareCapabilities.FeatureInfo1Ecx, | ||||
|                     (ulong)X86HardwareCapabilities.FeatureInfo1Edx, | ||||
|                     (ulong)X86HardwareCapabilities.FeatureInfo7Ebx, | ||||
|                     (ulong)X86HardwareCapabilities.FeatureInfo7Ecx); | ||||
|             } | ||||
|             else | ||||
|             { | ||||
|                 return new FeatureInfo(0, 0, 0, 0); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         private byte GetMemoryManagerMode() | ||||
| @@ -976,7 +993,7 @@ namespace ARMeilleure.Translation.PTC | ||||
|             return osPlatform; | ||||
|         } | ||||
|  | ||||
|         [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 58*/)] | ||||
|         [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 74*/)] | ||||
|         private struct OuterHeader | ||||
|         { | ||||
|             public ulong Magic; | ||||
| @@ -1007,8 +1024,8 @@ namespace ARMeilleure.Translation.PTC | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 16*/)] | ||||
|         private record struct FeatureInfo(uint FeatureInfo0, uint FeatureInfo1, uint FeatureInfo2, uint FeatureInfo3); | ||||
|         [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 32*/)] | ||||
|         private record struct FeatureInfo(ulong FeatureInfo0, ulong FeatureInfo1, ulong FeatureInfo2, ulong FeatureInfo3); | ||||
|  | ||||
|         [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 128*/)] | ||||
|         private struct InnerHeader | ||||
|   | ||||
		Reference in New Issue
	
	Block a user