mirror of
				https://github.com/Ryujinx/Ryujinx.git
				synced 2025-10-24 23:42:28 -07:00 
			
		
		
		
	CPU (A64): Add Pmull_V Inst. with Clmul fast path for the "1/2D -> 1Q" variant & Sse fast path and slow path for both the "8/16B -> 8H" and "1/2D -> 1Q" variants; with Test. (#1817)
* Add Pmull_V Sse fast path only, both "8/16B -> 8H" and "1/2D -> 1Q" variants; with Test. * Add Clmul fast path for the 128 bits variant. * Small optimisation (save 60 instructions) for the Sse fast path about the 128 bits variant. * Add slow path, both variants. Fix V128 Shl/Shr when shift = 0. * A32: Add Vmull_I P64 variant (slow path); not tested. * A32: Add Vmull_I_P8_P64 Test and fix P64 variant.
This commit is contained in:
		| @@ -1260,5 +1260,22 @@ namespace ARMeilleure.Instructions | ||||
|                 : (uint)(value >> 32); | ||||
|         } | ||||
| #endregion | ||||
|  | ||||
|         public static V128 PolynomialMult64_128(ulong op1, ulong op2) | ||||
|         { | ||||
|             V128 result = V128.Zero; | ||||
|  | ||||
|             V128 op2_128 = new V128(op2, 0); | ||||
|  | ||||
|             for (int i = 0; i < 64; i++) | ||||
|             { | ||||
|                 if (((op1 >> i) & 1) == 1) | ||||
|                 { | ||||
|                     result ^= op2_128 << i; | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             return result; | ||||
|         } | ||||
|     } | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user