Add a new JIT compiler for CPU code (#693)
* Start of the ARMeilleure project
* Refactoring around the old IRAdapter, now renamed to PreAllocator
* Optimize the LowestBitSet method
* Add CLZ support and fix CLS implementation
* Add missing Equals and GetHashCode overrides on some structs, misc small tweaks
* Implement the ByteSwap IR instruction, and some refactoring on the assembler
* Implement the DivideUI IR instruction and fix 64-bits IDIV
* Correct constant operand type on CSINC
* Move division instructions implementation to InstEmitDiv
* Fix destination type for the ConditionalSelect IR instruction
* Implement UMULH and SMULH, with new IR instructions
* Fix some issues with shift instructions
* Fix constant types for BFM instructions
* Fix up new tests using the new V128 struct
* Update tests
* Move DIV tests to a separate file
* Add support for calls, and some instructions that depends on them
* Start adding support for SIMD & FP types, along with some of the related ARM instructions
* Fix some typos and the divide instruction with FP operands
* Fix wrong method call on Clz_V
* Implement ARM FP & SIMD move instructions, Saddlv_V, and misc. fixes
* Implement SIMD logical instructions and more misc. fixes
* Fix PSRAD x86 instruction encoding, TRN, UABD and UABDL implementations
* Implement float conversion instruction, merge in LDj3SNuD fixes, and some other misc. fixes
* Implement SIMD shift instruction and fix Dup_V
* Add SCVTF and UCVTF (vector, fixed-point) variants to the opcode table
* Fix check with tolerance on tester
* Implement FP & SIMD comparison instructions, and some fixes
* Update FCVT (Scalar) encoding on the table to support the Half-float variants
* Support passing V128 structs, some cleanup on the register allocator, merge LDj3SNuD fixes
* Use old memory access methods, made a start on SIMD memory insts support, some fixes
* Fix float constant passed to functions, save and restore non-volatile XMM registers, other fixes
* Fix arguments count with struct return values, other fixes
* More instructions
* Misc. fixes and integrate LDj3SNuD fixes
* Update tests
* Add a faster linear scan allocator, unwinding support on windows, and other changes
* Update Ryujinx.HLE
* Update Ryujinx.Graphics
* Fix V128 return pointer passing, RCX is clobbered
* Update Ryujinx.Tests
* Update ITimeZoneService
* Stop using GetFunctionPointer as that can't be called from native code, misc. fixes and tweaks
* Use generic GetFunctionPointerForDelegate method and other tweaks
* Some refactoring on the code generator, assert on invalid operations and use a separate enum for intrinsics
* Remove some unused code on the assembler
* Fix REX.W prefix regression on float conversion instructions, add some sort of profiler
* Add hardware capability detection
* Fix regression on Sha1h and revert Fcm** changes
* Add SSE2-only paths on vector extract and insert, some refactoring on the pre-allocator
* Fix silly mistake introduced on last commit on CpuId
* Generate inline stack probes when the stack allocation is too large
* Initial support for the System-V ABI
* Support multiple destination operands
* Fix SSE2 VectorInsert8 path, and other fixes
* Change placement of XMM callee save and restore code to match other compilers
* Rename Dest to Destination and Inst to Instruction
* Fix a regression related to calls and the V128 type
* Add an extra space on comments to match code style
* Some refactoring
* Fix vector insert FP32 SSE2 path
* Port over the ARM32 instructions
* Avoid memory protection races on JIT Cache
* Another fix on VectorInsert FP32 (thanks to LDj3SNuD
* Float operands don't need to use the same register when VEX is supported
* Add a new register allocator, higher quality code for hot code (tier up), and other tweaks
* Some nits, small improvements on the pre allocator
* CpuThreadState is gone
* Allow changing CPU emulators with a config entry
* Add runtime identifiers on the ARMeilleure project
* Allow switching between CPUs through a config entry (pt. 2)
* Change win10-x64 to win-x64 on projects
* Update the Ryujinx project to use ARMeilleure
* Ensure that the selected register is valid on the hybrid allocator
* Allow exiting on returns to 0 (should fix test regression)
* Remove register assignments for most used variables on the hybrid allocator
* Do not use fixed registers as spill temp
* Add missing namespace and remove unneeded using
* Address PR feedback
* Fix types, etc
* Enable AssumeStrictAbiCompliance by default
* Ensure that Spill and Fill don't load or store any more than necessary
2019-08-08 11:56:22 -07:00
|
|
|
using ARMeilleure.Common;
|
|
|
|
using ARMeilleure.IntermediateRepresentation;
|
|
|
|
|
|
|
|
namespace ARMeilleure.CodeGen.X86
|
|
|
|
{
|
|
|
|
static class IntrinsicTable
|
|
|
|
{
|
|
|
|
private const int BadOp = 0;
|
|
|
|
|
|
|
|
private static IntrinsicInfo[] _intrinTable;
|
|
|
|
|
|
|
|
static IntrinsicTable()
|
|
|
|
{
|
|
|
|
_intrinTable = new IntrinsicInfo[EnumUtils.GetCount(typeof(Intrinsic))];
|
|
|
|
|
|
|
|
Add(Intrinsic.X86Addpd, new IntrinsicInfo(X86Instruction.Addpd, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Addps, new IntrinsicInfo(X86Instruction.Addps, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Addsd, new IntrinsicInfo(X86Instruction.Addsd, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Addss, new IntrinsicInfo(X86Instruction.Addss, IntrinsicType.Binary));
|
2020-03-24 23:20:29 -07:00
|
|
|
Add(Intrinsic.X86Aesdec, new IntrinsicInfo(X86Instruction.Aesdec, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Aesdeclast, new IntrinsicInfo(X86Instruction.Aesdeclast, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Aesenc, new IntrinsicInfo(X86Instruction.Aesenc, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Aesenclast, new IntrinsicInfo(X86Instruction.Aesenclast, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Aesimc, new IntrinsicInfo(X86Instruction.Aesimc, IntrinsicType.Unary));
|
Add a new JIT compiler for CPU code (#693)
* Start of the ARMeilleure project
* Refactoring around the old IRAdapter, now renamed to PreAllocator
* Optimize the LowestBitSet method
* Add CLZ support and fix CLS implementation
* Add missing Equals and GetHashCode overrides on some structs, misc small tweaks
* Implement the ByteSwap IR instruction, and some refactoring on the assembler
* Implement the DivideUI IR instruction and fix 64-bits IDIV
* Correct constant operand type on CSINC
* Move division instructions implementation to InstEmitDiv
* Fix destination type for the ConditionalSelect IR instruction
* Implement UMULH and SMULH, with new IR instructions
* Fix some issues with shift instructions
* Fix constant types for BFM instructions
* Fix up new tests using the new V128 struct
* Update tests
* Move DIV tests to a separate file
* Add support for calls, and some instructions that depends on them
* Start adding support for SIMD & FP types, along with some of the related ARM instructions
* Fix some typos and the divide instruction with FP operands
* Fix wrong method call on Clz_V
* Implement ARM FP & SIMD move instructions, Saddlv_V, and misc. fixes
* Implement SIMD logical instructions and more misc. fixes
* Fix PSRAD x86 instruction encoding, TRN, UABD and UABDL implementations
* Implement float conversion instruction, merge in LDj3SNuD fixes, and some other misc. fixes
* Implement SIMD shift instruction and fix Dup_V
* Add SCVTF and UCVTF (vector, fixed-point) variants to the opcode table
* Fix check with tolerance on tester
* Implement FP & SIMD comparison instructions, and some fixes
* Update FCVT (Scalar) encoding on the table to support the Half-float variants
* Support passing V128 structs, some cleanup on the register allocator, merge LDj3SNuD fixes
* Use old memory access methods, made a start on SIMD memory insts support, some fixes
* Fix float constant passed to functions, save and restore non-volatile XMM registers, other fixes
* Fix arguments count with struct return values, other fixes
* More instructions
* Misc. fixes and integrate LDj3SNuD fixes
* Update tests
* Add a faster linear scan allocator, unwinding support on windows, and other changes
* Update Ryujinx.HLE
* Update Ryujinx.Graphics
* Fix V128 return pointer passing, RCX is clobbered
* Update Ryujinx.Tests
* Update ITimeZoneService
* Stop using GetFunctionPointer as that can't be called from native code, misc. fixes and tweaks
* Use generic GetFunctionPointerForDelegate method and other tweaks
* Some refactoring on the code generator, assert on invalid operations and use a separate enum for intrinsics
* Remove some unused code on the assembler
* Fix REX.W prefix regression on float conversion instructions, add some sort of profiler
* Add hardware capability detection
* Fix regression on Sha1h and revert Fcm** changes
* Add SSE2-only paths on vector extract and insert, some refactoring on the pre-allocator
* Fix silly mistake introduced on last commit on CpuId
* Generate inline stack probes when the stack allocation is too large
* Initial support for the System-V ABI
* Support multiple destination operands
* Fix SSE2 VectorInsert8 path, and other fixes
* Change placement of XMM callee save and restore code to match other compilers
* Rename Dest to Destination and Inst to Instruction
* Fix a regression related to calls and the V128 type
* Add an extra space on comments to match code style
* Some refactoring
* Fix vector insert FP32 SSE2 path
* Port over the ARM32 instructions
* Avoid memory protection races on JIT Cache
* Another fix on VectorInsert FP32 (thanks to LDj3SNuD
* Float operands don't need to use the same register when VEX is supported
* Add a new register allocator, higher quality code for hot code (tier up), and other tweaks
* Some nits, small improvements on the pre allocator
* CpuThreadState is gone
* Allow changing CPU emulators with a config entry
* Add runtime identifiers on the ARMeilleure project
* Allow switching between CPUs through a config entry (pt. 2)
* Change win10-x64 to win-x64 on projects
* Update the Ryujinx project to use ARMeilleure
* Ensure that the selected register is valid on the hybrid allocator
* Allow exiting on returns to 0 (should fix test regression)
* Remove register assignments for most used variables on the hybrid allocator
* Do not use fixed registers as spill temp
* Add missing namespace and remove unneeded using
* Address PR feedback
* Fix types, etc
* Enable AssumeStrictAbiCompliance by default
* Ensure that Spill and Fill don't load or store any more than necessary
2019-08-08 11:56:22 -07:00
|
|
|
Add(Intrinsic.X86Andnpd, new IntrinsicInfo(X86Instruction.Andnpd, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Andnps, new IntrinsicInfo(X86Instruction.Andnps, IntrinsicType.Binary));
|
2019-12-07 04:45:32 -08:00
|
|
|
Add(Intrinsic.X86Andpd, new IntrinsicInfo(X86Instruction.Andpd, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Andps, new IntrinsicInfo(X86Instruction.Andps, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Blendvpd, new IntrinsicInfo(X86Instruction.Blendvpd, IntrinsicType.Ternary));
|
|
|
|
Add(Intrinsic.X86Blendvps, new IntrinsicInfo(X86Instruction.Blendvps, IntrinsicType.Ternary));
|
Add a new JIT compiler for CPU code (#693)
* Start of the ARMeilleure project
* Refactoring around the old IRAdapter, now renamed to PreAllocator
* Optimize the LowestBitSet method
* Add CLZ support and fix CLS implementation
* Add missing Equals and GetHashCode overrides on some structs, misc small tweaks
* Implement the ByteSwap IR instruction, and some refactoring on the assembler
* Implement the DivideUI IR instruction and fix 64-bits IDIV
* Correct constant operand type on CSINC
* Move division instructions implementation to InstEmitDiv
* Fix destination type for the ConditionalSelect IR instruction
* Implement UMULH and SMULH, with new IR instructions
* Fix some issues with shift instructions
* Fix constant types for BFM instructions
* Fix up new tests using the new V128 struct
* Update tests
* Move DIV tests to a separate file
* Add support for calls, and some instructions that depends on them
* Start adding support for SIMD & FP types, along with some of the related ARM instructions
* Fix some typos and the divide instruction with FP operands
* Fix wrong method call on Clz_V
* Implement ARM FP & SIMD move instructions, Saddlv_V, and misc. fixes
* Implement SIMD logical instructions and more misc. fixes
* Fix PSRAD x86 instruction encoding, TRN, UABD and UABDL implementations
* Implement float conversion instruction, merge in LDj3SNuD fixes, and some other misc. fixes
* Implement SIMD shift instruction and fix Dup_V
* Add SCVTF and UCVTF (vector, fixed-point) variants to the opcode table
* Fix check with tolerance on tester
* Implement FP & SIMD comparison instructions, and some fixes
* Update FCVT (Scalar) encoding on the table to support the Half-float variants
* Support passing V128 structs, some cleanup on the register allocator, merge LDj3SNuD fixes
* Use old memory access methods, made a start on SIMD memory insts support, some fixes
* Fix float constant passed to functions, save and restore non-volatile XMM registers, other fixes
* Fix arguments count with struct return values, other fixes
* More instructions
* Misc. fixes and integrate LDj3SNuD fixes
* Update tests
* Add a faster linear scan allocator, unwinding support on windows, and other changes
* Update Ryujinx.HLE
* Update Ryujinx.Graphics
* Fix V128 return pointer passing, RCX is clobbered
* Update Ryujinx.Tests
* Update ITimeZoneService
* Stop using GetFunctionPointer as that can't be called from native code, misc. fixes and tweaks
* Use generic GetFunctionPointerForDelegate method and other tweaks
* Some refactoring on the code generator, assert on invalid operations and use a separate enum for intrinsics
* Remove some unused code on the assembler
* Fix REX.W prefix regression on float conversion instructions, add some sort of profiler
* Add hardware capability detection
* Fix regression on Sha1h and revert Fcm** changes
* Add SSE2-only paths on vector extract and insert, some refactoring on the pre-allocator
* Fix silly mistake introduced on last commit on CpuId
* Generate inline stack probes when the stack allocation is too large
* Initial support for the System-V ABI
* Support multiple destination operands
* Fix SSE2 VectorInsert8 path, and other fixes
* Change placement of XMM callee save and restore code to match other compilers
* Rename Dest to Destination and Inst to Instruction
* Fix a regression related to calls and the V128 type
* Add an extra space on comments to match code style
* Some refactoring
* Fix vector insert FP32 SSE2 path
* Port over the ARM32 instructions
* Avoid memory protection races on JIT Cache
* Another fix on VectorInsert FP32 (thanks to LDj3SNuD
* Float operands don't need to use the same register when VEX is supported
* Add a new register allocator, higher quality code for hot code (tier up), and other tweaks
* Some nits, small improvements on the pre allocator
* CpuThreadState is gone
* Allow changing CPU emulators with a config entry
* Add runtime identifiers on the ARMeilleure project
* Allow switching between CPUs through a config entry (pt. 2)
* Change win10-x64 to win-x64 on projects
* Update the Ryujinx project to use ARMeilleure
* Ensure that the selected register is valid on the hybrid allocator
* Allow exiting on returns to 0 (should fix test regression)
* Remove register assignments for most used variables on the hybrid allocator
* Do not use fixed registers as spill temp
* Add missing namespace and remove unneeded using
* Address PR feedback
* Fix types, etc
* Enable AssumeStrictAbiCompliance by default
* Ensure that Spill and Fill don't load or store any more than necessary
2019-08-08 11:56:22 -07:00
|
|
|
Add(Intrinsic.X86Cmppd, new IntrinsicInfo(X86Instruction.Cmppd, IntrinsicType.TernaryImm));
|
|
|
|
Add(Intrinsic.X86Cmpps, new IntrinsicInfo(X86Instruction.Cmpps, IntrinsicType.TernaryImm));
|
|
|
|
Add(Intrinsic.X86Cmpsd, new IntrinsicInfo(X86Instruction.Cmpsd, IntrinsicType.TernaryImm));
|
|
|
|
Add(Intrinsic.X86Cmpss, new IntrinsicInfo(X86Instruction.Cmpss, IntrinsicType.TernaryImm));
|
|
|
|
Add(Intrinsic.X86Comisdeq, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_));
|
|
|
|
Add(Intrinsic.X86Comisdge, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_));
|
|
|
|
Add(Intrinsic.X86Comisdlt, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_));
|
|
|
|
Add(Intrinsic.X86Comisseq, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_));
|
|
|
|
Add(Intrinsic.X86Comissge, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_));
|
|
|
|
Add(Intrinsic.X86Comisslt, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_));
|
2020-07-13 03:48:14 -07:00
|
|
|
Add(Intrinsic.X86Crc32, new IntrinsicInfo(X86Instruction.Crc32, IntrinsicType.Crc32));
|
|
|
|
Add(Intrinsic.X86Crc32_16, new IntrinsicInfo(X86Instruction.Crc32_16, IntrinsicType.Crc32));
|
|
|
|
Add(Intrinsic.X86Crc32_8, new IntrinsicInfo(X86Instruction.Crc32_8, IntrinsicType.Crc32));
|
Add a new JIT compiler for CPU code (#693)
* Start of the ARMeilleure project
* Refactoring around the old IRAdapter, now renamed to PreAllocator
* Optimize the LowestBitSet method
* Add CLZ support and fix CLS implementation
* Add missing Equals and GetHashCode overrides on some structs, misc small tweaks
* Implement the ByteSwap IR instruction, and some refactoring on the assembler
* Implement the DivideUI IR instruction and fix 64-bits IDIV
* Correct constant operand type on CSINC
* Move division instructions implementation to InstEmitDiv
* Fix destination type for the ConditionalSelect IR instruction
* Implement UMULH and SMULH, with new IR instructions
* Fix some issues with shift instructions
* Fix constant types for BFM instructions
* Fix up new tests using the new V128 struct
* Update tests
* Move DIV tests to a separate file
* Add support for calls, and some instructions that depends on them
* Start adding support for SIMD & FP types, along with some of the related ARM instructions
* Fix some typos and the divide instruction with FP operands
* Fix wrong method call on Clz_V
* Implement ARM FP & SIMD move instructions, Saddlv_V, and misc. fixes
* Implement SIMD logical instructions and more misc. fixes
* Fix PSRAD x86 instruction encoding, TRN, UABD and UABDL implementations
* Implement float conversion instruction, merge in LDj3SNuD fixes, and some other misc. fixes
* Implement SIMD shift instruction and fix Dup_V
* Add SCVTF and UCVTF (vector, fixed-point) variants to the opcode table
* Fix check with tolerance on tester
* Implement FP & SIMD comparison instructions, and some fixes
* Update FCVT (Scalar) encoding on the table to support the Half-float variants
* Support passing V128 structs, some cleanup on the register allocator, merge LDj3SNuD fixes
* Use old memory access methods, made a start on SIMD memory insts support, some fixes
* Fix float constant passed to functions, save and restore non-volatile XMM registers, other fixes
* Fix arguments count with struct return values, other fixes
* More instructions
* Misc. fixes and integrate LDj3SNuD fixes
* Update tests
* Add a faster linear scan allocator, unwinding support on windows, and other changes
* Update Ryujinx.HLE
* Update Ryujinx.Graphics
* Fix V128 return pointer passing, RCX is clobbered
* Update Ryujinx.Tests
* Update ITimeZoneService
* Stop using GetFunctionPointer as that can't be called from native code, misc. fixes and tweaks
* Use generic GetFunctionPointerForDelegate method and other tweaks
* Some refactoring on the code generator, assert on invalid operations and use a separate enum for intrinsics
* Remove some unused code on the assembler
* Fix REX.W prefix regression on float conversion instructions, add some sort of profiler
* Add hardware capability detection
* Fix regression on Sha1h and revert Fcm** changes
* Add SSE2-only paths on vector extract and insert, some refactoring on the pre-allocator
* Fix silly mistake introduced on last commit on CpuId
* Generate inline stack probes when the stack allocation is too large
* Initial support for the System-V ABI
* Support multiple destination operands
* Fix SSE2 VectorInsert8 path, and other fixes
* Change placement of XMM callee save and restore code to match other compilers
* Rename Dest to Destination and Inst to Instruction
* Fix a regression related to calls and the V128 type
* Add an extra space on comments to match code style
* Some refactoring
* Fix vector insert FP32 SSE2 path
* Port over the ARM32 instructions
* Avoid memory protection races on JIT Cache
* Another fix on VectorInsert FP32 (thanks to LDj3SNuD
* Float operands don't need to use the same register when VEX is supported
* Add a new register allocator, higher quality code for hot code (tier up), and other tweaks
* Some nits, small improvements on the pre allocator
* CpuThreadState is gone
* Allow changing CPU emulators with a config entry
* Add runtime identifiers on the ARMeilleure project
* Allow switching between CPUs through a config entry (pt. 2)
* Change win10-x64 to win-x64 on projects
* Update the Ryujinx project to use ARMeilleure
* Ensure that the selected register is valid on the hybrid allocator
* Allow exiting on returns to 0 (should fix test regression)
* Remove register assignments for most used variables on the hybrid allocator
* Do not use fixed registers as spill temp
* Add missing namespace and remove unneeded using
* Address PR feedback
* Fix types, etc
* Enable AssumeStrictAbiCompliance by default
* Ensure that Spill and Fill don't load or store any more than necessary
2019-08-08 11:56:22 -07:00
|
|
|
Add(Intrinsic.X86Cvtdq2pd, new IntrinsicInfo(X86Instruction.Cvtdq2pd, IntrinsicType.Unary));
|
|
|
|
Add(Intrinsic.X86Cvtdq2ps, new IntrinsicInfo(X86Instruction.Cvtdq2ps, IntrinsicType.Unary));
|
|
|
|
Add(Intrinsic.X86Cvtpd2dq, new IntrinsicInfo(X86Instruction.Cvtpd2dq, IntrinsicType.Unary));
|
|
|
|
Add(Intrinsic.X86Cvtpd2ps, new IntrinsicInfo(X86Instruction.Cvtpd2ps, IntrinsicType.Unary));
|
|
|
|
Add(Intrinsic.X86Cvtps2dq, new IntrinsicInfo(X86Instruction.Cvtps2dq, IntrinsicType.Unary));
|
|
|
|
Add(Intrinsic.X86Cvtps2pd, new IntrinsicInfo(X86Instruction.Cvtps2pd, IntrinsicType.Unary));
|
|
|
|
Add(Intrinsic.X86Cvtsd2si, new IntrinsicInfo(X86Instruction.Cvtsd2si, IntrinsicType.UnaryToGpr));
|
|
|
|
Add(Intrinsic.X86Cvtsd2ss, new IntrinsicInfo(X86Instruction.Cvtsd2ss, IntrinsicType.Binary));
|
2019-12-29 17:22:47 -08:00
|
|
|
Add(Intrinsic.X86Cvtsi2sd, new IntrinsicInfo(X86Instruction.Cvtsi2sd, IntrinsicType.BinaryGpr));
|
2019-11-09 18:21:03 -08:00
|
|
|
Add(Intrinsic.X86Cvtsi2si, new IntrinsicInfo(X86Instruction.Movd, IntrinsicType.UnaryToGpr));
|
2019-12-29 17:22:47 -08:00
|
|
|
Add(Intrinsic.X86Cvtsi2ss, new IntrinsicInfo(X86Instruction.Cvtsi2ss, IntrinsicType.BinaryGpr));
|
Add a new JIT compiler for CPU code (#693)
* Start of the ARMeilleure project
* Refactoring around the old IRAdapter, now renamed to PreAllocator
* Optimize the LowestBitSet method
* Add CLZ support and fix CLS implementation
* Add missing Equals and GetHashCode overrides on some structs, misc small tweaks
* Implement the ByteSwap IR instruction, and some refactoring on the assembler
* Implement the DivideUI IR instruction and fix 64-bits IDIV
* Correct constant operand type on CSINC
* Move division instructions implementation to InstEmitDiv
* Fix destination type for the ConditionalSelect IR instruction
* Implement UMULH and SMULH, with new IR instructions
* Fix some issues with shift instructions
* Fix constant types for BFM instructions
* Fix up new tests using the new V128 struct
* Update tests
* Move DIV tests to a separate file
* Add support for calls, and some instructions that depends on them
* Start adding support for SIMD & FP types, along with some of the related ARM instructions
* Fix some typos and the divide instruction with FP operands
* Fix wrong method call on Clz_V
* Implement ARM FP & SIMD move instructions, Saddlv_V, and misc. fixes
* Implement SIMD logical instructions and more misc. fixes
* Fix PSRAD x86 instruction encoding, TRN, UABD and UABDL implementations
* Implement float conversion instruction, merge in LDj3SNuD fixes, and some other misc. fixes
* Implement SIMD shift instruction and fix Dup_V
* Add SCVTF and UCVTF (vector, fixed-point) variants to the opcode table
* Fix check with tolerance on tester
* Implement FP & SIMD comparison instructions, and some fixes
* Update FCVT (Scalar) encoding on the table to support the Half-float variants
* Support passing V128 structs, some cleanup on the register allocator, merge LDj3SNuD fixes
* Use old memory access methods, made a start on SIMD memory insts support, some fixes
* Fix float constant passed to functions, save and restore non-volatile XMM registers, other fixes
* Fix arguments count with struct return values, other fixes
* More instructions
* Misc. fixes and integrate LDj3SNuD fixes
* Update tests
* Add a faster linear scan allocator, unwinding support on windows, and other changes
* Update Ryujinx.HLE
* Update Ryujinx.Graphics
* Fix V128 return pointer passing, RCX is clobbered
* Update Ryujinx.Tests
* Update ITimeZoneService
* Stop using GetFunctionPointer as that can't be called from native code, misc. fixes and tweaks
* Use generic GetFunctionPointerForDelegate method and other tweaks
* Some refactoring on the code generator, assert on invalid operations and use a separate enum for intrinsics
* Remove some unused code on the assembler
* Fix REX.W prefix regression on float conversion instructions, add some sort of profiler
* Add hardware capability detection
* Fix regression on Sha1h and revert Fcm** changes
* Add SSE2-only paths on vector extract and insert, some refactoring on the pre-allocator
* Fix silly mistake introduced on last commit on CpuId
* Generate inline stack probes when the stack allocation is too large
* Initial support for the System-V ABI
* Support multiple destination operands
* Fix SSE2 VectorInsert8 path, and other fixes
* Change placement of XMM callee save and restore code to match other compilers
* Rename Dest to Destination and Inst to Instruction
* Fix a regression related to calls and the V128 type
* Add an extra space on comments to match code style
* Some refactoring
* Fix vector insert FP32 SSE2 path
* Port over the ARM32 instructions
* Avoid memory protection races on JIT Cache
* Another fix on VectorInsert FP32 (thanks to LDj3SNuD
* Float operands don't need to use the same register when VEX is supported
* Add a new register allocator, higher quality code for hot code (tier up), and other tweaks
* Some nits, small improvements on the pre allocator
* CpuThreadState is gone
* Allow changing CPU emulators with a config entry
* Add runtime identifiers on the ARMeilleure project
* Allow switching between CPUs through a config entry (pt. 2)
* Change win10-x64 to win-x64 on projects
* Update the Ryujinx project to use ARMeilleure
* Ensure that the selected register is valid on the hybrid allocator
* Allow exiting on returns to 0 (should fix test regression)
* Remove register assignments for most used variables on the hybrid allocator
* Do not use fixed registers as spill temp
* Add missing namespace and remove unneeded using
* Address PR feedback
* Fix types, etc
* Enable AssumeStrictAbiCompliance by default
* Ensure that Spill and Fill don't load or store any more than necessary
2019-08-08 11:56:22 -07:00
|
|
|
Add(Intrinsic.X86Cvtss2sd, new IntrinsicInfo(X86Instruction.Cvtss2sd, IntrinsicType.Binary));
|
2019-12-29 17:22:47 -08:00
|
|
|
Add(Intrinsic.X86Cvtss2si, new IntrinsicInfo(X86Instruction.Cvtss2si, IntrinsicType.UnaryToGpr));
|
Add a new JIT compiler for CPU code (#693)
* Start of the ARMeilleure project
* Refactoring around the old IRAdapter, now renamed to PreAllocator
* Optimize the LowestBitSet method
* Add CLZ support and fix CLS implementation
* Add missing Equals and GetHashCode overrides on some structs, misc small tweaks
* Implement the ByteSwap IR instruction, and some refactoring on the assembler
* Implement the DivideUI IR instruction and fix 64-bits IDIV
* Correct constant operand type on CSINC
* Move division instructions implementation to InstEmitDiv
* Fix destination type for the ConditionalSelect IR instruction
* Implement UMULH and SMULH, with new IR instructions
* Fix some issues with shift instructions
* Fix constant types for BFM instructions
* Fix up new tests using the new V128 struct
* Update tests
* Move DIV tests to a separate file
* Add support for calls, and some instructions that depends on them
* Start adding support for SIMD & FP types, along with some of the related ARM instructions
* Fix some typos and the divide instruction with FP operands
* Fix wrong method call on Clz_V
* Implement ARM FP & SIMD move instructions, Saddlv_V, and misc. fixes
* Implement SIMD logical instructions and more misc. fixes
* Fix PSRAD x86 instruction encoding, TRN, UABD and UABDL implementations
* Implement float conversion instruction, merge in LDj3SNuD fixes, and some other misc. fixes
* Implement SIMD shift instruction and fix Dup_V
* Add SCVTF and UCVTF (vector, fixed-point) variants to the opcode table
* Fix check with tolerance on tester
* Implement FP & SIMD comparison instructions, and some fixes
* Update FCVT (Scalar) encoding on the table to support the Half-float variants
* Support passing V128 structs, some cleanup on the register allocator, merge LDj3SNuD fixes
* Use old memory access methods, made a start on SIMD memory insts support, some fixes
* Fix float constant passed to functions, save and restore non-volatile XMM registers, other fixes
* Fix arguments count with struct return values, other fixes
* More instructions
* Misc. fixes and integrate LDj3SNuD fixes
* Update tests
* Add a faster linear scan allocator, unwinding support on windows, and other changes
* Update Ryujinx.HLE
* Update Ryujinx.Graphics
* Fix V128 return pointer passing, RCX is clobbered
* Update Ryujinx.Tests
* Update ITimeZoneService
* Stop using GetFunctionPointer as that can't be called from native code, misc. fixes and tweaks
* Use generic GetFunctionPointerForDelegate method and other tweaks
* Some refactoring on the code generator, assert on invalid operations and use a separate enum for intrinsics
* Remove some unused code on the assembler
* Fix REX.W prefix regression on float conversion instructions, add some sort of profiler
* Add hardware capability detection
* Fix regression on Sha1h and revert Fcm** changes
* Add SSE2-only paths on vector extract and insert, some refactoring on the pre-allocator
* Fix silly mistake introduced on last commit on CpuId
* Generate inline stack probes when the stack allocation is too large
* Initial support for the System-V ABI
* Support multiple destination operands
* Fix SSE2 VectorInsert8 path, and other fixes
* Change placement of XMM callee save and restore code to match other compilers
* Rename Dest to Destination and Inst to Instruction
* Fix a regression related to calls and the V128 type
* Add an extra space on comments to match code style
* Some refactoring
* Fix vector insert FP32 SSE2 path
* Port over the ARM32 instructions
* Avoid memory protection races on JIT Cache
* Another fix on VectorInsert FP32 (thanks to LDj3SNuD
* Float operands don't need to use the same register when VEX is supported
* Add a new register allocator, higher quality code for hot code (tier up), and other tweaks
* Some nits, small improvements on the pre allocator
* CpuThreadState is gone
* Allow changing CPU emulators with a config entry
* Add runtime identifiers on the ARMeilleure project
* Allow switching between CPUs through a config entry (pt. 2)
* Change win10-x64 to win-x64 on projects
* Update the Ryujinx project to use ARMeilleure
* Ensure that the selected register is valid on the hybrid allocator
* Allow exiting on returns to 0 (should fix test regression)
* Remove register assignments for most used variables on the hybrid allocator
* Do not use fixed registers as spill temp
* Add missing namespace and remove unneeded using
* Address PR feedback
* Fix types, etc
* Enable AssumeStrictAbiCompliance by default
* Ensure that Spill and Fill don't load or store any more than necessary
2019-08-08 11:56:22 -07:00
|
|
|
Add(Intrinsic.X86Divpd, new IntrinsicInfo(X86Instruction.Divpd, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Divps, new IntrinsicInfo(X86Instruction.Divps, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Divsd, new IntrinsicInfo(X86Instruction.Divsd, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Divss, new IntrinsicInfo(X86Instruction.Divss, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Haddpd, new IntrinsicInfo(X86Instruction.Haddpd, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Haddps, new IntrinsicInfo(X86Instruction.Haddps, IntrinsicType.Binary));
|
2020-03-04 16:41:33 -08:00
|
|
|
Add(Intrinsic.X86Insertps, new IntrinsicInfo(X86Instruction.Insertps, IntrinsicType.TernaryImm));
|
Add a new JIT compiler for CPU code (#693)
* Start of the ARMeilleure project
* Refactoring around the old IRAdapter, now renamed to PreAllocator
* Optimize the LowestBitSet method
* Add CLZ support and fix CLS implementation
* Add missing Equals and GetHashCode overrides on some structs, misc small tweaks
* Implement the ByteSwap IR instruction, and some refactoring on the assembler
* Implement the DivideUI IR instruction and fix 64-bits IDIV
* Correct constant operand type on CSINC
* Move division instructions implementation to InstEmitDiv
* Fix destination type for the ConditionalSelect IR instruction
* Implement UMULH and SMULH, with new IR instructions
* Fix some issues with shift instructions
* Fix constant types for BFM instructions
* Fix up new tests using the new V128 struct
* Update tests
* Move DIV tests to a separate file
* Add support for calls, and some instructions that depends on them
* Start adding support for SIMD & FP types, along with some of the related ARM instructions
* Fix some typos and the divide instruction with FP operands
* Fix wrong method call on Clz_V
* Implement ARM FP & SIMD move instructions, Saddlv_V, and misc. fixes
* Implement SIMD logical instructions and more misc. fixes
* Fix PSRAD x86 instruction encoding, TRN, UABD and UABDL implementations
* Implement float conversion instruction, merge in LDj3SNuD fixes, and some other misc. fixes
* Implement SIMD shift instruction and fix Dup_V
* Add SCVTF and UCVTF (vector, fixed-point) variants to the opcode table
* Fix check with tolerance on tester
* Implement FP & SIMD comparison instructions, and some fixes
* Update FCVT (Scalar) encoding on the table to support the Half-float variants
* Support passing V128 structs, some cleanup on the register allocator, merge LDj3SNuD fixes
* Use old memory access methods, made a start on SIMD memory insts support, some fixes
* Fix float constant passed to functions, save and restore non-volatile XMM registers, other fixes
* Fix arguments count with struct return values, other fixes
* More instructions
* Misc. fixes and integrate LDj3SNuD fixes
* Update tests
* Add a faster linear scan allocator, unwinding support on windows, and other changes
* Update Ryujinx.HLE
* Update Ryujinx.Graphics
* Fix V128 return pointer passing, RCX is clobbered
* Update Ryujinx.Tests
* Update ITimeZoneService
* Stop using GetFunctionPointer as that can't be called from native code, misc. fixes and tweaks
* Use generic GetFunctionPointerForDelegate method and other tweaks
* Some refactoring on the code generator, assert on invalid operations and use a separate enum for intrinsics
* Remove some unused code on the assembler
* Fix REX.W prefix regression on float conversion instructions, add some sort of profiler
* Add hardware capability detection
* Fix regression on Sha1h and revert Fcm** changes
* Add SSE2-only paths on vector extract and insert, some refactoring on the pre-allocator
* Fix silly mistake introduced on last commit on CpuId
* Generate inline stack probes when the stack allocation is too large
* Initial support for the System-V ABI
* Support multiple destination operands
* Fix SSE2 VectorInsert8 path, and other fixes
* Change placement of XMM callee save and restore code to match other compilers
* Rename Dest to Destination and Inst to Instruction
* Fix a regression related to calls and the V128 type
* Add an extra space on comments to match code style
* Some refactoring
* Fix vector insert FP32 SSE2 path
* Port over the ARM32 instructions
* Avoid memory protection races on JIT Cache
* Another fix on VectorInsert FP32 (thanks to LDj3SNuD
* Float operands don't need to use the same register when VEX is supported
* Add a new register allocator, higher quality code for hot code (tier up), and other tweaks
* Some nits, small improvements on the pre allocator
* CpuThreadState is gone
* Allow changing CPU emulators with a config entry
* Add runtime identifiers on the ARMeilleure project
* Allow switching between CPUs through a config entry (pt. 2)
* Change win10-x64 to win-x64 on projects
* Update the Ryujinx project to use ARMeilleure
* Ensure that the selected register is valid on the hybrid allocator
* Allow exiting on returns to 0 (should fix test regression)
* Remove register assignments for most used variables on the hybrid allocator
* Do not use fixed registers as spill temp
* Add missing namespace and remove unneeded using
* Address PR feedback
* Fix types, etc
* Enable AssumeStrictAbiCompliance by default
* Ensure that Spill and Fill don't load or store any more than necessary
2019-08-08 11:56:22 -07:00
|
|
|
Add(Intrinsic.X86Maxpd, new IntrinsicInfo(X86Instruction.Maxpd, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Maxps, new IntrinsicInfo(X86Instruction.Maxps, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Maxsd, new IntrinsicInfo(X86Instruction.Maxsd, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Maxss, new IntrinsicInfo(X86Instruction.Maxss, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Minpd, new IntrinsicInfo(X86Instruction.Minpd, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Minps, new IntrinsicInfo(X86Instruction.Minps, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Minsd, new IntrinsicInfo(X86Instruction.Minsd, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Minss, new IntrinsicInfo(X86Instruction.Minss, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Movhlps, new IntrinsicInfo(X86Instruction.Movhlps, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Movlhps, new IntrinsicInfo(X86Instruction.Movlhps, IntrinsicType.Binary));
|
2020-03-04 16:41:33 -08:00
|
|
|
Add(Intrinsic.X86Movss, new IntrinsicInfo(X86Instruction.Movss, IntrinsicType.Binary));
|
Add a new JIT compiler for CPU code (#693)
* Start of the ARMeilleure project
* Refactoring around the old IRAdapter, now renamed to PreAllocator
* Optimize the LowestBitSet method
* Add CLZ support and fix CLS implementation
* Add missing Equals and GetHashCode overrides on some structs, misc small tweaks
* Implement the ByteSwap IR instruction, and some refactoring on the assembler
* Implement the DivideUI IR instruction and fix 64-bits IDIV
* Correct constant operand type on CSINC
* Move division instructions implementation to InstEmitDiv
* Fix destination type for the ConditionalSelect IR instruction
* Implement UMULH and SMULH, with new IR instructions
* Fix some issues with shift instructions
* Fix constant types for BFM instructions
* Fix up new tests using the new V128 struct
* Update tests
* Move DIV tests to a separate file
* Add support for calls, and some instructions that depends on them
* Start adding support for SIMD & FP types, along with some of the related ARM instructions
* Fix some typos and the divide instruction with FP operands
* Fix wrong method call on Clz_V
* Implement ARM FP & SIMD move instructions, Saddlv_V, and misc. fixes
* Implement SIMD logical instructions and more misc. fixes
* Fix PSRAD x86 instruction encoding, TRN, UABD and UABDL implementations
* Implement float conversion instruction, merge in LDj3SNuD fixes, and some other misc. fixes
* Implement SIMD shift instruction and fix Dup_V
* Add SCVTF and UCVTF (vector, fixed-point) variants to the opcode table
* Fix check with tolerance on tester
* Implement FP & SIMD comparison instructions, and some fixes
* Update FCVT (Scalar) encoding on the table to support the Half-float variants
* Support passing V128 structs, some cleanup on the register allocator, merge LDj3SNuD fixes
* Use old memory access methods, made a start on SIMD memory insts support, some fixes
* Fix float constant passed to functions, save and restore non-volatile XMM registers, other fixes
* Fix arguments count with struct return values, other fixes
* More instructions
* Misc. fixes and integrate LDj3SNuD fixes
* Update tests
* Add a faster linear scan allocator, unwinding support on windows, and other changes
* Update Ryujinx.HLE
* Update Ryujinx.Graphics
* Fix V128 return pointer passing, RCX is clobbered
* Update Ryujinx.Tests
* Update ITimeZoneService
* Stop using GetFunctionPointer as that can't be called from native code, misc. fixes and tweaks
* Use generic GetFunctionPointerForDelegate method and other tweaks
* Some refactoring on the code generator, assert on invalid operations and use a separate enum for intrinsics
* Remove some unused code on the assembler
* Fix REX.W prefix regression on float conversion instructions, add some sort of profiler
* Add hardware capability detection
* Fix regression on Sha1h and revert Fcm** changes
* Add SSE2-only paths on vector extract and insert, some refactoring on the pre-allocator
* Fix silly mistake introduced on last commit on CpuId
* Generate inline stack probes when the stack allocation is too large
* Initial support for the System-V ABI
* Support multiple destination operands
* Fix SSE2 VectorInsert8 path, and other fixes
* Change placement of XMM callee save and restore code to match other compilers
* Rename Dest to Destination and Inst to Instruction
* Fix a regression related to calls and the V128 type
* Add an extra space on comments to match code style
* Some refactoring
* Fix vector insert FP32 SSE2 path
* Port over the ARM32 instructions
* Avoid memory protection races on JIT Cache
* Another fix on VectorInsert FP32 (thanks to LDj3SNuD
* Float operands don't need to use the same register when VEX is supported
* Add a new register allocator, higher quality code for hot code (tier up), and other tweaks
* Some nits, small improvements on the pre allocator
* CpuThreadState is gone
* Allow changing CPU emulators with a config entry
* Add runtime identifiers on the ARMeilleure project
* Allow switching between CPUs through a config entry (pt. 2)
* Change win10-x64 to win-x64 on projects
* Update the Ryujinx project to use ARMeilleure
* Ensure that the selected register is valid on the hybrid allocator
* Allow exiting on returns to 0 (should fix test regression)
* Remove register assignments for most used variables on the hybrid allocator
* Do not use fixed registers as spill temp
* Add missing namespace and remove unneeded using
* Address PR feedback
* Fix types, etc
* Enable AssumeStrictAbiCompliance by default
* Ensure that Spill and Fill don't load or store any more than necessary
2019-08-08 11:56:22 -07:00
|
|
|
Add(Intrinsic.X86Mulpd, new IntrinsicInfo(X86Instruction.Mulpd, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Mulps, new IntrinsicInfo(X86Instruction.Mulps, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Mulsd, new IntrinsicInfo(X86Instruction.Mulsd, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Mulss, new IntrinsicInfo(X86Instruction.Mulss, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Paddb, new IntrinsicInfo(X86Instruction.Paddb, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Paddd, new IntrinsicInfo(X86Instruction.Paddd, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Paddq, new IntrinsicInfo(X86Instruction.Paddq, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Paddw, new IntrinsicInfo(X86Instruction.Paddw, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pand, new IntrinsicInfo(X86Instruction.Pand, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pandn, new IntrinsicInfo(X86Instruction.Pandn, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pavgb, new IntrinsicInfo(X86Instruction.Pavgb, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pavgw, new IntrinsicInfo(X86Instruction.Pavgw, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pblendvb, new IntrinsicInfo(X86Instruction.Pblendvb, IntrinsicType.Ternary));
|
2020-06-05 03:58:27 -07:00
|
|
|
Add(Intrinsic.X86Pclmulqdq, new IntrinsicInfo(X86Instruction.Pclmulqdq, IntrinsicType.TernaryImm));
|
Add a new JIT compiler for CPU code (#693)
* Start of the ARMeilleure project
* Refactoring around the old IRAdapter, now renamed to PreAllocator
* Optimize the LowestBitSet method
* Add CLZ support and fix CLS implementation
* Add missing Equals and GetHashCode overrides on some structs, misc small tweaks
* Implement the ByteSwap IR instruction, and some refactoring on the assembler
* Implement the DivideUI IR instruction and fix 64-bits IDIV
* Correct constant operand type on CSINC
* Move division instructions implementation to InstEmitDiv
* Fix destination type for the ConditionalSelect IR instruction
* Implement UMULH and SMULH, with new IR instructions
* Fix some issues with shift instructions
* Fix constant types for BFM instructions
* Fix up new tests using the new V128 struct
* Update tests
* Move DIV tests to a separate file
* Add support for calls, and some instructions that depends on them
* Start adding support for SIMD & FP types, along with some of the related ARM instructions
* Fix some typos and the divide instruction with FP operands
* Fix wrong method call on Clz_V
* Implement ARM FP & SIMD move instructions, Saddlv_V, and misc. fixes
* Implement SIMD logical instructions and more misc. fixes
* Fix PSRAD x86 instruction encoding, TRN, UABD and UABDL implementations
* Implement float conversion instruction, merge in LDj3SNuD fixes, and some other misc. fixes
* Implement SIMD shift instruction and fix Dup_V
* Add SCVTF and UCVTF (vector, fixed-point) variants to the opcode table
* Fix check with tolerance on tester
* Implement FP & SIMD comparison instructions, and some fixes
* Update FCVT (Scalar) encoding on the table to support the Half-float variants
* Support passing V128 structs, some cleanup on the register allocator, merge LDj3SNuD fixes
* Use old memory access methods, made a start on SIMD memory insts support, some fixes
* Fix float constant passed to functions, save and restore non-volatile XMM registers, other fixes
* Fix arguments count with struct return values, other fixes
* More instructions
* Misc. fixes and integrate LDj3SNuD fixes
* Update tests
* Add a faster linear scan allocator, unwinding support on windows, and other changes
* Update Ryujinx.HLE
* Update Ryujinx.Graphics
* Fix V128 return pointer passing, RCX is clobbered
* Update Ryujinx.Tests
* Update ITimeZoneService
* Stop using GetFunctionPointer as that can't be called from native code, misc. fixes and tweaks
* Use generic GetFunctionPointerForDelegate method and other tweaks
* Some refactoring on the code generator, assert on invalid operations and use a separate enum for intrinsics
* Remove some unused code on the assembler
* Fix REX.W prefix regression on float conversion instructions, add some sort of profiler
* Add hardware capability detection
* Fix regression on Sha1h and revert Fcm** changes
* Add SSE2-only paths on vector extract and insert, some refactoring on the pre-allocator
* Fix silly mistake introduced on last commit on CpuId
* Generate inline stack probes when the stack allocation is too large
* Initial support for the System-V ABI
* Support multiple destination operands
* Fix SSE2 VectorInsert8 path, and other fixes
* Change placement of XMM callee save and restore code to match other compilers
* Rename Dest to Destination and Inst to Instruction
* Fix a regression related to calls and the V128 type
* Add an extra space on comments to match code style
* Some refactoring
* Fix vector insert FP32 SSE2 path
* Port over the ARM32 instructions
* Avoid memory protection races on JIT Cache
* Another fix on VectorInsert FP32 (thanks to LDj3SNuD
* Float operands don't need to use the same register when VEX is supported
* Add a new register allocator, higher quality code for hot code (tier up), and other tweaks
* Some nits, small improvements on the pre allocator
* CpuThreadState is gone
* Allow changing CPU emulators with a config entry
* Add runtime identifiers on the ARMeilleure project
* Allow switching between CPUs through a config entry (pt. 2)
* Change win10-x64 to win-x64 on projects
* Update the Ryujinx project to use ARMeilleure
* Ensure that the selected register is valid on the hybrid allocator
* Allow exiting on returns to 0 (should fix test regression)
* Remove register assignments for most used variables on the hybrid allocator
* Do not use fixed registers as spill temp
* Add missing namespace and remove unneeded using
* Address PR feedback
* Fix types, etc
* Enable AssumeStrictAbiCompliance by default
* Ensure that Spill and Fill don't load or store any more than necessary
2019-08-08 11:56:22 -07:00
|
|
|
Add(Intrinsic.X86Pcmpeqb, new IntrinsicInfo(X86Instruction.Pcmpeqb, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pcmpeqd, new IntrinsicInfo(X86Instruction.Pcmpeqd, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pcmpeqq, new IntrinsicInfo(X86Instruction.Pcmpeqq, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pcmpeqw, new IntrinsicInfo(X86Instruction.Pcmpeqw, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pcmpgtb, new IntrinsicInfo(X86Instruction.Pcmpgtb, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pcmpgtd, new IntrinsicInfo(X86Instruction.Pcmpgtd, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pcmpgtq, new IntrinsicInfo(X86Instruction.Pcmpgtq, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pcmpgtw, new IntrinsicInfo(X86Instruction.Pcmpgtw, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pmaxsb, new IntrinsicInfo(X86Instruction.Pmaxsb, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pmaxsd, new IntrinsicInfo(X86Instruction.Pmaxsd, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pmaxsw, new IntrinsicInfo(X86Instruction.Pmaxsw, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pmaxub, new IntrinsicInfo(X86Instruction.Pmaxub, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pmaxud, new IntrinsicInfo(X86Instruction.Pmaxud, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pmaxuw, new IntrinsicInfo(X86Instruction.Pmaxuw, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pminsb, new IntrinsicInfo(X86Instruction.Pminsb, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pminsd, new IntrinsicInfo(X86Instruction.Pminsd, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pminsw, new IntrinsicInfo(X86Instruction.Pminsw, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pminub, new IntrinsicInfo(X86Instruction.Pminub, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pminud, new IntrinsicInfo(X86Instruction.Pminud, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pminuw, new IntrinsicInfo(X86Instruction.Pminuw, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pmovsxbw, new IntrinsicInfo(X86Instruction.Pmovsxbw, IntrinsicType.Unary));
|
|
|
|
Add(Intrinsic.X86Pmovsxdq, new IntrinsicInfo(X86Instruction.Pmovsxdq, IntrinsicType.Unary));
|
|
|
|
Add(Intrinsic.X86Pmovsxwd, new IntrinsicInfo(X86Instruction.Pmovsxwd, IntrinsicType.Unary));
|
|
|
|
Add(Intrinsic.X86Pmovzxbw, new IntrinsicInfo(X86Instruction.Pmovzxbw, IntrinsicType.Unary));
|
|
|
|
Add(Intrinsic.X86Pmovzxdq, new IntrinsicInfo(X86Instruction.Pmovzxdq, IntrinsicType.Unary));
|
|
|
|
Add(Intrinsic.X86Pmovzxwd, new IntrinsicInfo(X86Instruction.Pmovzxwd, IntrinsicType.Unary));
|
|
|
|
Add(Intrinsic.X86Pmulld, new IntrinsicInfo(X86Instruction.Pmulld, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pmullw, new IntrinsicInfo(X86Instruction.Pmullw, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Popcnt, new IntrinsicInfo(X86Instruction.Popcnt, IntrinsicType.PopCount));
|
|
|
|
Add(Intrinsic.X86Por, new IntrinsicInfo(X86Instruction.Por, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pshufb, new IntrinsicInfo(X86Instruction.Pshufb, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pslld, new IntrinsicInfo(X86Instruction.Pslld, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pslldq, new IntrinsicInfo(X86Instruction.Pslldq, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Psllq, new IntrinsicInfo(X86Instruction.Psllq, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Psllw, new IntrinsicInfo(X86Instruction.Psllw, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Psrad, new IntrinsicInfo(X86Instruction.Psrad, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Psraw, new IntrinsicInfo(X86Instruction.Psraw, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Psrld, new IntrinsicInfo(X86Instruction.Psrld, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Psrlq, new IntrinsicInfo(X86Instruction.Psrlq, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Psrldq, new IntrinsicInfo(X86Instruction.Psrldq, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Psrlw, new IntrinsicInfo(X86Instruction.Psrlw, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Psubb, new IntrinsicInfo(X86Instruction.Psubb, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Psubd, new IntrinsicInfo(X86Instruction.Psubd, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Psubq, new IntrinsicInfo(X86Instruction.Psubq, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Psubw, new IntrinsicInfo(X86Instruction.Psubw, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Punpckhbw, new IntrinsicInfo(X86Instruction.Punpckhbw, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Punpckhdq, new IntrinsicInfo(X86Instruction.Punpckhdq, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Punpckhqdq, new IntrinsicInfo(X86Instruction.Punpckhqdq, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Punpckhwd, new IntrinsicInfo(X86Instruction.Punpckhwd, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Punpcklbw, new IntrinsicInfo(X86Instruction.Punpcklbw, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Punpckldq, new IntrinsicInfo(X86Instruction.Punpckldq, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Punpcklqdq, new IntrinsicInfo(X86Instruction.Punpcklqdq, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Punpcklwd, new IntrinsicInfo(X86Instruction.Punpcklwd, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Pxor, new IntrinsicInfo(X86Instruction.Pxor, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Rcpps, new IntrinsicInfo(X86Instruction.Rcpps, IntrinsicType.Unary));
|
|
|
|
Add(Intrinsic.X86Rcpss, new IntrinsicInfo(X86Instruction.Rcpss, IntrinsicType.Unary));
|
|
|
|
Add(Intrinsic.X86Roundpd, new IntrinsicInfo(X86Instruction.Roundpd, IntrinsicType.BinaryImm));
|
|
|
|
Add(Intrinsic.X86Roundps, new IntrinsicInfo(X86Instruction.Roundps, IntrinsicType.BinaryImm));
|
|
|
|
Add(Intrinsic.X86Roundsd, new IntrinsicInfo(X86Instruction.Roundsd, IntrinsicType.BinaryImm));
|
|
|
|
Add(Intrinsic.X86Roundss, new IntrinsicInfo(X86Instruction.Roundss, IntrinsicType.BinaryImm));
|
|
|
|
Add(Intrinsic.X86Rsqrtps, new IntrinsicInfo(X86Instruction.Rsqrtps, IntrinsicType.Unary));
|
|
|
|
Add(Intrinsic.X86Rsqrtss, new IntrinsicInfo(X86Instruction.Rsqrtss, IntrinsicType.Unary));
|
|
|
|
Add(Intrinsic.X86Shufpd, new IntrinsicInfo(X86Instruction.Shufpd, IntrinsicType.TernaryImm));
|
|
|
|
Add(Intrinsic.X86Shufps, new IntrinsicInfo(X86Instruction.Shufps, IntrinsicType.TernaryImm));
|
|
|
|
Add(Intrinsic.X86Sqrtpd, new IntrinsicInfo(X86Instruction.Sqrtpd, IntrinsicType.Unary));
|
|
|
|
Add(Intrinsic.X86Sqrtps, new IntrinsicInfo(X86Instruction.Sqrtps, IntrinsicType.Unary));
|
|
|
|
Add(Intrinsic.X86Sqrtsd, new IntrinsicInfo(X86Instruction.Sqrtsd, IntrinsicType.Unary));
|
|
|
|
Add(Intrinsic.X86Sqrtss, new IntrinsicInfo(X86Instruction.Sqrtss, IntrinsicType.Unary));
|
|
|
|
Add(Intrinsic.X86Subpd, new IntrinsicInfo(X86Instruction.Subpd, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Subps, new IntrinsicInfo(X86Instruction.Subps, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Subsd, new IntrinsicInfo(X86Instruction.Subsd, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Subss, new IntrinsicInfo(X86Instruction.Subss, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Unpckhpd, new IntrinsicInfo(X86Instruction.Unpckhpd, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Unpckhps, new IntrinsicInfo(X86Instruction.Unpckhps, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Unpcklpd, new IntrinsicInfo(X86Instruction.Unpcklpd, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Unpcklps, new IntrinsicInfo(X86Instruction.Unpcklps, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Xorpd, new IntrinsicInfo(X86Instruction.Xorpd, IntrinsicType.Binary));
|
|
|
|
Add(Intrinsic.X86Xorps, new IntrinsicInfo(X86Instruction.Xorps, IntrinsicType.Binary));
|
|
|
|
}
|
|
|
|
|
|
|
|
private static void Add(Intrinsic intrin, IntrinsicInfo info)
|
|
|
|
{
|
|
|
|
_intrinTable[(int)intrin] = info;
|
|
|
|
}
|
|
|
|
|
|
|
|
public static IntrinsicInfo GetInfo(Intrinsic intrin)
|
|
|
|
{
|
|
|
|
return _intrinTable[(int)intrin];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|