Add a new JIT compiler for CPU code (#693)
* Start of the ARMeilleure project
* Refactoring around the old IRAdapter, now renamed to PreAllocator
* Optimize the LowestBitSet method
* Add CLZ support and fix CLS implementation
* Add missing Equals and GetHashCode overrides on some structs, misc small tweaks
* Implement the ByteSwap IR instruction, and some refactoring on the assembler
* Implement the DivideUI IR instruction and fix 64-bits IDIV
* Correct constant operand type on CSINC
* Move division instructions implementation to InstEmitDiv
* Fix destination type for the ConditionalSelect IR instruction
* Implement UMULH and SMULH, with new IR instructions
* Fix some issues with shift instructions
* Fix constant types for BFM instructions
* Fix up new tests using the new V128 struct
* Update tests
* Move DIV tests to a separate file
* Add support for calls, and some instructions that depends on them
* Start adding support for SIMD & FP types, along with some of the related ARM instructions
* Fix some typos and the divide instruction with FP operands
* Fix wrong method call on Clz_V
* Implement ARM FP & SIMD move instructions, Saddlv_V, and misc. fixes
* Implement SIMD logical instructions and more misc. fixes
* Fix PSRAD x86 instruction encoding, TRN, UABD and UABDL implementations
* Implement float conversion instruction, merge in LDj3SNuD fixes, and some other misc. fixes
* Implement SIMD shift instruction and fix Dup_V
* Add SCVTF and UCVTF (vector, fixed-point) variants to the opcode table
* Fix check with tolerance on tester
* Implement FP & SIMD comparison instructions, and some fixes
* Update FCVT (Scalar) encoding on the table to support the Half-float variants
* Support passing V128 structs, some cleanup on the register allocator, merge LDj3SNuD fixes
* Use old memory access methods, made a start on SIMD memory insts support, some fixes
* Fix float constant passed to functions, save and restore non-volatile XMM registers, other fixes
* Fix arguments count with struct return values, other fixes
* More instructions
* Misc. fixes and integrate LDj3SNuD fixes
* Update tests
* Add a faster linear scan allocator, unwinding support on windows, and other changes
* Update Ryujinx.HLE
* Update Ryujinx.Graphics
* Fix V128 return pointer passing, RCX is clobbered
* Update Ryujinx.Tests
* Update ITimeZoneService
* Stop using GetFunctionPointer as that can't be called from native code, misc. fixes and tweaks
* Use generic GetFunctionPointerForDelegate method and other tweaks
* Some refactoring on the code generator, assert on invalid operations and use a separate enum for intrinsics
* Remove some unused code on the assembler
* Fix REX.W prefix regression on float conversion instructions, add some sort of profiler
* Add hardware capability detection
* Fix regression on Sha1h and revert Fcm** changes
* Add SSE2-only paths on vector extract and insert, some refactoring on the pre-allocator
* Fix silly mistake introduced on last commit on CpuId
* Generate inline stack probes when the stack allocation is too large
* Initial support for the System-V ABI
* Support multiple destination operands
* Fix SSE2 VectorInsert8 path, and other fixes
* Change placement of XMM callee save and restore code to match other compilers
* Rename Dest to Destination and Inst to Instruction
* Fix a regression related to calls and the V128 type
* Add an extra space on comments to match code style
* Some refactoring
* Fix vector insert FP32 SSE2 path
* Port over the ARM32 instructions
* Avoid memory protection races on JIT Cache
* Another fix on VectorInsert FP32 (thanks to LDj3SNuD
* Float operands don't need to use the same register when VEX is supported
* Add a new register allocator, higher quality code for hot code (tier up), and other tweaks
* Some nits, small improvements on the pre allocator
* CpuThreadState is gone
* Allow changing CPU emulators with a config entry
* Add runtime identifiers on the ARMeilleure project
* Allow switching between CPUs through a config entry (pt. 2)
* Change win10-x64 to win-x64 on projects
* Update the Ryujinx project to use ARMeilleure
* Ensure that the selected register is valid on the hybrid allocator
* Allow exiting on returns to 0 (should fix test regression)
* Remove register assignments for most used variables on the hybrid allocator
* Do not use fixed registers as spill temp
* Add missing namespace and remove unneeded using
* Address PR feedback
* Fix types, etc
* Enable AssumeStrictAbiCompliance by default
* Ensure that Spill and Fill don't load or store any more than necessary
2019-08-08 11:56:22 -07:00
|
|
|
namespace ARMeilleure.Instructions
|
|
|
|
{
|
|
|
|
enum InstName
|
|
|
|
{
|
|
|
|
// Base (AArch64)
|
|
|
|
Adc,
|
|
|
|
Adcs,
|
|
|
|
Add,
|
|
|
|
Adds,
|
|
|
|
Adr,
|
|
|
|
Adrp,
|
|
|
|
And,
|
|
|
|
Ands,
|
|
|
|
Asrv,
|
|
|
|
B,
|
|
|
|
B_Cond,
|
|
|
|
Bfm,
|
|
|
|
Bic,
|
|
|
|
Bics,
|
|
|
|
Bl,
|
|
|
|
Blr,
|
|
|
|
Br,
|
|
|
|
Brk,
|
|
|
|
Cbnz,
|
|
|
|
Cbz,
|
|
|
|
Ccmn,
|
|
|
|
Ccmp,
|
|
|
|
Clrex,
|
|
|
|
Cls,
|
|
|
|
Clz,
|
|
|
|
Crc32b,
|
|
|
|
Crc32h,
|
|
|
|
Crc32w,
|
|
|
|
Crc32x,
|
|
|
|
Crc32cb,
|
|
|
|
Crc32ch,
|
|
|
|
Crc32cw,
|
|
|
|
Crc32cx,
|
|
|
|
Csel,
|
|
|
|
Csinc,
|
|
|
|
Csinv,
|
|
|
|
Csneg,
|
|
|
|
Dmb,
|
|
|
|
Dsb,
|
|
|
|
Eon,
|
|
|
|
Eor,
|
|
|
|
Extr,
|
|
|
|
Hint,
|
|
|
|
Isb,
|
|
|
|
Ldar,
|
|
|
|
Ldaxp,
|
|
|
|
Ldaxr,
|
|
|
|
Ldp,
|
|
|
|
Ldr,
|
|
|
|
Ldr_Literal,
|
|
|
|
Ldrs,
|
|
|
|
Ldxr,
|
|
|
|
Ldxp,
|
|
|
|
Lslv,
|
|
|
|
Lsrv,
|
|
|
|
Madd,
|
|
|
|
Movk,
|
|
|
|
Movn,
|
|
|
|
Movz,
|
|
|
|
Mrs,
|
|
|
|
Msr,
|
|
|
|
Msub,
|
|
|
|
Nop,
|
|
|
|
Orn,
|
|
|
|
Orr,
|
|
|
|
Pfrm,
|
|
|
|
Rbit,
|
|
|
|
Ret,
|
|
|
|
Rev16,
|
|
|
|
Rev32,
|
|
|
|
Rev64,
|
|
|
|
Rorv,
|
|
|
|
Sbc,
|
|
|
|
Sbcs,
|
|
|
|
Sbfm,
|
|
|
|
Sdiv,
|
|
|
|
Smaddl,
|
|
|
|
Smsubl,
|
|
|
|
Smulh,
|
|
|
|
Stlr,
|
|
|
|
Stlxp,
|
|
|
|
Stlxr,
|
|
|
|
Stp,
|
|
|
|
Str,
|
|
|
|
Stxp,
|
|
|
|
Stxr,
|
|
|
|
Sub,
|
|
|
|
Subs,
|
|
|
|
Svc,
|
|
|
|
Sys,
|
|
|
|
Tbnz,
|
|
|
|
Tbz,
|
|
|
|
Ubfm,
|
|
|
|
Udiv,
|
|
|
|
Umaddl,
|
|
|
|
Umsubl,
|
|
|
|
Umulh,
|
|
|
|
Und,
|
|
|
|
|
|
|
|
// FP & SIMD (AArch64)
|
|
|
|
Abs_S,
|
|
|
|
Abs_V,
|
|
|
|
Add_S,
|
|
|
|
Add_V,
|
|
|
|
Addhn_V,
|
|
|
|
Addp_S,
|
|
|
|
Addp_V,
|
|
|
|
Addv_V,
|
|
|
|
Aesd_V,
|
|
|
|
Aese_V,
|
|
|
|
Aesimc_V,
|
|
|
|
Aesmc_V,
|
|
|
|
And_V,
|
|
|
|
Bic_V,
|
|
|
|
Bic_Vi,
|
|
|
|
Bif_V,
|
|
|
|
Bit_V,
|
|
|
|
Bsl_V,
|
|
|
|
Cls_V,
|
|
|
|
Clz_V,
|
|
|
|
Cmeq_S,
|
|
|
|
Cmeq_V,
|
|
|
|
Cmge_S,
|
|
|
|
Cmge_V,
|
|
|
|
Cmgt_S,
|
|
|
|
Cmgt_V,
|
|
|
|
Cmhi_S,
|
|
|
|
Cmhi_V,
|
|
|
|
Cmhs_S,
|
|
|
|
Cmhs_V,
|
|
|
|
Cmle_S,
|
|
|
|
Cmle_V,
|
|
|
|
Cmlt_S,
|
|
|
|
Cmlt_V,
|
|
|
|
Cmtst_S,
|
|
|
|
Cmtst_V,
|
|
|
|
Cnt_V,
|
|
|
|
Dup_Gp,
|
|
|
|
Dup_S,
|
|
|
|
Dup_V,
|
|
|
|
Eor_V,
|
|
|
|
Ext_V,
|
|
|
|
Fabd_S,
|
|
|
|
Fabd_V,
|
|
|
|
Fabs_S,
|
|
|
|
Fabs_V,
|
|
|
|
Fadd_S,
|
|
|
|
Fadd_V,
|
|
|
|
Faddp_S,
|
|
|
|
Faddp_V,
|
|
|
|
Fccmp_S,
|
|
|
|
Fccmpe_S,
|
|
|
|
Fcmeq_S,
|
|
|
|
Fcmeq_V,
|
|
|
|
Fcmge_S,
|
|
|
|
Fcmge_V,
|
|
|
|
Fcmgt_S,
|
|
|
|
Fcmgt_V,
|
|
|
|
Fcmle_S,
|
|
|
|
Fcmle_V,
|
|
|
|
Fcmlt_S,
|
|
|
|
Fcmlt_V,
|
|
|
|
Fcmp_S,
|
|
|
|
Fcmpe_S,
|
|
|
|
Fcsel_S,
|
|
|
|
Fcvt_S,
|
|
|
|
Fcvtas_Gp,
|
|
|
|
Fcvtau_Gp,
|
|
|
|
Fcvtl_V,
|
|
|
|
Fcvtms_Gp,
|
|
|
|
Fcvtmu_Gp,
|
|
|
|
Fcvtn_V,
|
|
|
|
Fcvtns_S,
|
|
|
|
Fcvtns_V,
|
|
|
|
Fcvtnu_S,
|
|
|
|
Fcvtnu_V,
|
|
|
|
Fcvtps_Gp,
|
|
|
|
Fcvtpu_Gp,
|
|
|
|
Fcvtzs_Gp,
|
|
|
|
Fcvtzs_Gp_Fixed,
|
|
|
|
Fcvtzs_S,
|
|
|
|
Fcvtzs_V,
|
|
|
|
Fcvtzs_V_Fixed,
|
|
|
|
Fcvtzu_Gp,
|
|
|
|
Fcvtzu_Gp_Fixed,
|
|
|
|
Fcvtzu_S,
|
|
|
|
Fcvtzu_V,
|
|
|
|
Fcvtzu_V_Fixed,
|
|
|
|
Fdiv_S,
|
|
|
|
Fdiv_V,
|
|
|
|
Fmadd_S,
|
|
|
|
Fmax_S,
|
|
|
|
Fmax_V,
|
|
|
|
Fmaxnm_S,
|
|
|
|
Fmaxnm_V,
|
|
|
|
Fmaxp_V,
|
|
|
|
Fmin_S,
|
|
|
|
Fmin_V,
|
|
|
|
Fminnm_S,
|
|
|
|
Fminnm_V,
|
|
|
|
Fminp_V,
|
|
|
|
Fmla_Se,
|
|
|
|
Fmla_V,
|
|
|
|
Fmla_Ve,
|
|
|
|
Fmls_Se,
|
|
|
|
Fmls_V,
|
|
|
|
Fmls_Ve,
|
|
|
|
Fmov_S,
|
|
|
|
Fmov_Si,
|
|
|
|
Fmov_Vi,
|
|
|
|
Fmov_Ftoi,
|
|
|
|
Fmov_Itof,
|
|
|
|
Fmov_Ftoi1,
|
|
|
|
Fmov_Itof1,
|
|
|
|
Fmsub_S,
|
|
|
|
Fmul_S,
|
|
|
|
Fmul_Se,
|
|
|
|
Fmul_V,
|
|
|
|
Fmul_Ve,
|
|
|
|
Fmulx_S,
|
|
|
|
Fmulx_Se,
|
|
|
|
Fmulx_V,
|
|
|
|
Fmulx_Ve,
|
|
|
|
Fneg_S,
|
|
|
|
Fneg_V,
|
|
|
|
Fnmadd_S,
|
|
|
|
Fnmsub_S,
|
|
|
|
Fnmul_S,
|
|
|
|
Frecpe_S,
|
|
|
|
Frecpe_V,
|
|
|
|
Frecps_S,
|
|
|
|
Frecps_V,
|
|
|
|
Frecpx_S,
|
|
|
|
Frinta_S,
|
|
|
|
Frinta_V,
|
|
|
|
Frinti_S,
|
|
|
|
Frinti_V,
|
|
|
|
Frintm_S,
|
|
|
|
Frintm_V,
|
|
|
|
Frintn_S,
|
|
|
|
Frintn_V,
|
|
|
|
Frintp_S,
|
|
|
|
Frintp_V,
|
|
|
|
Frintx_S,
|
|
|
|
Frintx_V,
|
|
|
|
Frintz_S,
|
|
|
|
Frintz_V,
|
|
|
|
Frsqrte_S,
|
|
|
|
Frsqrte_V,
|
|
|
|
Frsqrts_S,
|
|
|
|
Frsqrts_V,
|
|
|
|
Fsqrt_S,
|
|
|
|
Fsqrt_V,
|
|
|
|
Fsub_S,
|
|
|
|
Fsub_V,
|
|
|
|
Ins_Gp,
|
|
|
|
Ins_V,
|
|
|
|
Ld__Vms,
|
|
|
|
Ld__Vss,
|
|
|
|
Mla_V,
|
|
|
|
Mla_Ve,
|
|
|
|
Mls_V,
|
|
|
|
Mls_Ve,
|
|
|
|
Movi_V,
|
|
|
|
Mul_V,
|
|
|
|
Mul_Ve,
|
|
|
|
Mvni_V,
|
|
|
|
Neg_S,
|
|
|
|
Neg_V,
|
|
|
|
Not_V,
|
|
|
|
Orn_V,
|
|
|
|
Orr_V,
|
|
|
|
Orr_Vi,
|
|
|
|
Raddhn_V,
|
|
|
|
Rbit_V,
|
|
|
|
Rev16_V,
|
|
|
|
Rev32_V,
|
|
|
|
Rev64_V,
|
|
|
|
Rshrn_V,
|
|
|
|
Rsubhn_V,
|
|
|
|
Saba_V,
|
|
|
|
Sabal_V,
|
|
|
|
Sabd_V,
|
|
|
|
Sabdl_V,
|
|
|
|
Sadalp_V,
|
|
|
|
Saddl_V,
|
|
|
|
Saddlp_V,
|
|
|
|
Saddlv_V,
|
|
|
|
Saddw_V,
|
|
|
|
Scvtf_Gp,
|
|
|
|
Scvtf_Gp_Fixed,
|
|
|
|
Scvtf_S,
|
|
|
|
Scvtf_V,
|
|
|
|
Scvtf_V_Fixed,
|
|
|
|
Sha1c_V,
|
|
|
|
Sha1h_V,
|
|
|
|
Sha1m_V,
|
|
|
|
Sha1p_V,
|
|
|
|
Sha1su0_V,
|
|
|
|
Sha1su1_V,
|
|
|
|
Sha256h_V,
|
|
|
|
Sha256h2_V,
|
|
|
|
Sha256su0_V,
|
|
|
|
Sha256su1_V,
|
|
|
|
Shadd_V,
|
|
|
|
Shl_S,
|
|
|
|
Shl_V,
|
|
|
|
Shll_V,
|
|
|
|
Shrn_V,
|
|
|
|
Shsub_V,
|
2019-10-24 16:37:42 -07:00
|
|
|
Sli_S,
|
Add a new JIT compiler for CPU code (#693)
* Start of the ARMeilleure project
* Refactoring around the old IRAdapter, now renamed to PreAllocator
* Optimize the LowestBitSet method
* Add CLZ support and fix CLS implementation
* Add missing Equals and GetHashCode overrides on some structs, misc small tweaks
* Implement the ByteSwap IR instruction, and some refactoring on the assembler
* Implement the DivideUI IR instruction and fix 64-bits IDIV
* Correct constant operand type on CSINC
* Move division instructions implementation to InstEmitDiv
* Fix destination type for the ConditionalSelect IR instruction
* Implement UMULH and SMULH, with new IR instructions
* Fix some issues with shift instructions
* Fix constant types for BFM instructions
* Fix up new tests using the new V128 struct
* Update tests
* Move DIV tests to a separate file
* Add support for calls, and some instructions that depends on them
* Start adding support for SIMD & FP types, along with some of the related ARM instructions
* Fix some typos and the divide instruction with FP operands
* Fix wrong method call on Clz_V
* Implement ARM FP & SIMD move instructions, Saddlv_V, and misc. fixes
* Implement SIMD logical instructions and more misc. fixes
* Fix PSRAD x86 instruction encoding, TRN, UABD and UABDL implementations
* Implement float conversion instruction, merge in LDj3SNuD fixes, and some other misc. fixes
* Implement SIMD shift instruction and fix Dup_V
* Add SCVTF and UCVTF (vector, fixed-point) variants to the opcode table
* Fix check with tolerance on tester
* Implement FP & SIMD comparison instructions, and some fixes
* Update FCVT (Scalar) encoding on the table to support the Half-float variants
* Support passing V128 structs, some cleanup on the register allocator, merge LDj3SNuD fixes
* Use old memory access methods, made a start on SIMD memory insts support, some fixes
* Fix float constant passed to functions, save and restore non-volatile XMM registers, other fixes
* Fix arguments count with struct return values, other fixes
* More instructions
* Misc. fixes and integrate LDj3SNuD fixes
* Update tests
* Add a faster linear scan allocator, unwinding support on windows, and other changes
* Update Ryujinx.HLE
* Update Ryujinx.Graphics
* Fix V128 return pointer passing, RCX is clobbered
* Update Ryujinx.Tests
* Update ITimeZoneService
* Stop using GetFunctionPointer as that can't be called from native code, misc. fixes and tweaks
* Use generic GetFunctionPointerForDelegate method and other tweaks
* Some refactoring on the code generator, assert on invalid operations and use a separate enum for intrinsics
* Remove some unused code on the assembler
* Fix REX.W prefix regression on float conversion instructions, add some sort of profiler
* Add hardware capability detection
* Fix regression on Sha1h and revert Fcm** changes
* Add SSE2-only paths on vector extract and insert, some refactoring on the pre-allocator
* Fix silly mistake introduced on last commit on CpuId
* Generate inline stack probes when the stack allocation is too large
* Initial support for the System-V ABI
* Support multiple destination operands
* Fix SSE2 VectorInsert8 path, and other fixes
* Change placement of XMM callee save and restore code to match other compilers
* Rename Dest to Destination and Inst to Instruction
* Fix a regression related to calls and the V128 type
* Add an extra space on comments to match code style
* Some refactoring
* Fix vector insert FP32 SSE2 path
* Port over the ARM32 instructions
* Avoid memory protection races on JIT Cache
* Another fix on VectorInsert FP32 (thanks to LDj3SNuD
* Float operands don't need to use the same register when VEX is supported
* Add a new register allocator, higher quality code for hot code (tier up), and other tweaks
* Some nits, small improvements on the pre allocator
* CpuThreadState is gone
* Allow changing CPU emulators with a config entry
* Add runtime identifiers on the ARMeilleure project
* Allow switching between CPUs through a config entry (pt. 2)
* Change win10-x64 to win-x64 on projects
* Update the Ryujinx project to use ARMeilleure
* Ensure that the selected register is valid on the hybrid allocator
* Allow exiting on returns to 0 (should fix test regression)
* Remove register assignments for most used variables on the hybrid allocator
* Do not use fixed registers as spill temp
* Add missing namespace and remove unneeded using
* Address PR feedback
* Fix types, etc
* Enable AssumeStrictAbiCompliance by default
* Ensure that Spill and Fill don't load or store any more than necessary
2019-08-08 11:56:22 -07:00
|
|
|
Sli_V,
|
|
|
|
Smax_V,
|
|
|
|
Smaxp_V,
|
|
|
|
Smaxv_V,
|
|
|
|
Smin_V,
|
|
|
|
Sminp_V,
|
|
|
|
Sminv_V,
|
|
|
|
Smlal_V,
|
|
|
|
Smlal_Ve,
|
|
|
|
Smlsl_V,
|
|
|
|
Smlsl_Ve,
|
|
|
|
Smov_S,
|
|
|
|
Smull_V,
|
|
|
|
Smull_Ve,
|
|
|
|
Sqabs_S,
|
|
|
|
Sqabs_V,
|
|
|
|
Sqadd_S,
|
|
|
|
Sqadd_V,
|
|
|
|
Sqdmulh_S,
|
|
|
|
Sqdmulh_V,
|
|
|
|
Sqneg_S,
|
|
|
|
Sqneg_V,
|
|
|
|
Sqrdmulh_S,
|
|
|
|
Sqrdmulh_V,
|
|
|
|
Sqrshl_V,
|
|
|
|
Sqrshrn_S,
|
|
|
|
Sqrshrn_V,
|
|
|
|
Sqrshrun_S,
|
|
|
|
Sqrshrun_V,
|
|
|
|
Sqshl_V,
|
|
|
|
Sqshrn_S,
|
|
|
|
Sqshrn_V,
|
|
|
|
Sqshrun_S,
|
|
|
|
Sqshrun_V,
|
|
|
|
Sqsub_S,
|
|
|
|
Sqsub_V,
|
|
|
|
Sqxtn_S,
|
|
|
|
Sqxtn_V,
|
|
|
|
Sqxtun_S,
|
|
|
|
Sqxtun_V,
|
|
|
|
Srhadd_V,
|
2019-10-24 16:37:42 -07:00
|
|
|
Sri_S,
|
|
|
|
Sri_V,
|
Add a new JIT compiler for CPU code (#693)
* Start of the ARMeilleure project
* Refactoring around the old IRAdapter, now renamed to PreAllocator
* Optimize the LowestBitSet method
* Add CLZ support and fix CLS implementation
* Add missing Equals and GetHashCode overrides on some structs, misc small tweaks
* Implement the ByteSwap IR instruction, and some refactoring on the assembler
* Implement the DivideUI IR instruction and fix 64-bits IDIV
* Correct constant operand type on CSINC
* Move division instructions implementation to InstEmitDiv
* Fix destination type for the ConditionalSelect IR instruction
* Implement UMULH and SMULH, with new IR instructions
* Fix some issues with shift instructions
* Fix constant types for BFM instructions
* Fix up new tests using the new V128 struct
* Update tests
* Move DIV tests to a separate file
* Add support for calls, and some instructions that depends on them
* Start adding support for SIMD & FP types, along with some of the related ARM instructions
* Fix some typos and the divide instruction with FP operands
* Fix wrong method call on Clz_V
* Implement ARM FP & SIMD move instructions, Saddlv_V, and misc. fixes
* Implement SIMD logical instructions and more misc. fixes
* Fix PSRAD x86 instruction encoding, TRN, UABD and UABDL implementations
* Implement float conversion instruction, merge in LDj3SNuD fixes, and some other misc. fixes
* Implement SIMD shift instruction and fix Dup_V
* Add SCVTF and UCVTF (vector, fixed-point) variants to the opcode table
* Fix check with tolerance on tester
* Implement FP & SIMD comparison instructions, and some fixes
* Update FCVT (Scalar) encoding on the table to support the Half-float variants
* Support passing V128 structs, some cleanup on the register allocator, merge LDj3SNuD fixes
* Use old memory access methods, made a start on SIMD memory insts support, some fixes
* Fix float constant passed to functions, save and restore non-volatile XMM registers, other fixes
* Fix arguments count with struct return values, other fixes
* More instructions
* Misc. fixes and integrate LDj3SNuD fixes
* Update tests
* Add a faster linear scan allocator, unwinding support on windows, and other changes
* Update Ryujinx.HLE
* Update Ryujinx.Graphics
* Fix V128 return pointer passing, RCX is clobbered
* Update Ryujinx.Tests
* Update ITimeZoneService
* Stop using GetFunctionPointer as that can't be called from native code, misc. fixes and tweaks
* Use generic GetFunctionPointerForDelegate method and other tweaks
* Some refactoring on the code generator, assert on invalid operations and use a separate enum for intrinsics
* Remove some unused code on the assembler
* Fix REX.W prefix regression on float conversion instructions, add some sort of profiler
* Add hardware capability detection
* Fix regression on Sha1h and revert Fcm** changes
* Add SSE2-only paths on vector extract and insert, some refactoring on the pre-allocator
* Fix silly mistake introduced on last commit on CpuId
* Generate inline stack probes when the stack allocation is too large
* Initial support for the System-V ABI
* Support multiple destination operands
* Fix SSE2 VectorInsert8 path, and other fixes
* Change placement of XMM callee save and restore code to match other compilers
* Rename Dest to Destination and Inst to Instruction
* Fix a regression related to calls and the V128 type
* Add an extra space on comments to match code style
* Some refactoring
* Fix vector insert FP32 SSE2 path
* Port over the ARM32 instructions
* Avoid memory protection races on JIT Cache
* Another fix on VectorInsert FP32 (thanks to LDj3SNuD
* Float operands don't need to use the same register when VEX is supported
* Add a new register allocator, higher quality code for hot code (tier up), and other tweaks
* Some nits, small improvements on the pre allocator
* CpuThreadState is gone
* Allow changing CPU emulators with a config entry
* Add runtime identifiers on the ARMeilleure project
* Allow switching between CPUs through a config entry (pt. 2)
* Change win10-x64 to win-x64 on projects
* Update the Ryujinx project to use ARMeilleure
* Ensure that the selected register is valid on the hybrid allocator
* Allow exiting on returns to 0 (should fix test regression)
* Remove register assignments for most used variables on the hybrid allocator
* Do not use fixed registers as spill temp
* Add missing namespace and remove unneeded using
* Address PR feedback
* Fix types, etc
* Enable AssumeStrictAbiCompliance by default
* Ensure that Spill and Fill don't load or store any more than necessary
2019-08-08 11:56:22 -07:00
|
|
|
Srshl_V,
|
|
|
|
Srshr_S,
|
|
|
|
Srshr_V,
|
|
|
|
Srsra_S,
|
|
|
|
Srsra_V,
|
|
|
|
Sshl_V,
|
|
|
|
Sshll_V,
|
|
|
|
Sshr_S,
|
|
|
|
Sshr_V,
|
|
|
|
Ssra_S,
|
|
|
|
Ssra_V,
|
|
|
|
Ssubl_V,
|
|
|
|
Ssubw_V,
|
|
|
|
St__Vms,
|
|
|
|
St__Vss,
|
|
|
|
Sub_S,
|
|
|
|
Sub_V,
|
|
|
|
Subhn_V,
|
|
|
|
Suqadd_S,
|
|
|
|
Suqadd_V,
|
|
|
|
Tbl_V,
|
2019-10-04 07:43:20 -07:00
|
|
|
Tbx_V,
|
Add a new JIT compiler for CPU code (#693)
* Start of the ARMeilleure project
* Refactoring around the old IRAdapter, now renamed to PreAllocator
* Optimize the LowestBitSet method
* Add CLZ support and fix CLS implementation
* Add missing Equals and GetHashCode overrides on some structs, misc small tweaks
* Implement the ByteSwap IR instruction, and some refactoring on the assembler
* Implement the DivideUI IR instruction and fix 64-bits IDIV
* Correct constant operand type on CSINC
* Move division instructions implementation to InstEmitDiv
* Fix destination type for the ConditionalSelect IR instruction
* Implement UMULH and SMULH, with new IR instructions
* Fix some issues with shift instructions
* Fix constant types for BFM instructions
* Fix up new tests using the new V128 struct
* Update tests
* Move DIV tests to a separate file
* Add support for calls, and some instructions that depends on them
* Start adding support for SIMD & FP types, along with some of the related ARM instructions
* Fix some typos and the divide instruction with FP operands
* Fix wrong method call on Clz_V
* Implement ARM FP & SIMD move instructions, Saddlv_V, and misc. fixes
* Implement SIMD logical instructions and more misc. fixes
* Fix PSRAD x86 instruction encoding, TRN, UABD and UABDL implementations
* Implement float conversion instruction, merge in LDj3SNuD fixes, and some other misc. fixes
* Implement SIMD shift instruction and fix Dup_V
* Add SCVTF and UCVTF (vector, fixed-point) variants to the opcode table
* Fix check with tolerance on tester
* Implement FP & SIMD comparison instructions, and some fixes
* Update FCVT (Scalar) encoding on the table to support the Half-float variants
* Support passing V128 structs, some cleanup on the register allocator, merge LDj3SNuD fixes
* Use old memory access methods, made a start on SIMD memory insts support, some fixes
* Fix float constant passed to functions, save and restore non-volatile XMM registers, other fixes
* Fix arguments count with struct return values, other fixes
* More instructions
* Misc. fixes and integrate LDj3SNuD fixes
* Update tests
* Add a faster linear scan allocator, unwinding support on windows, and other changes
* Update Ryujinx.HLE
* Update Ryujinx.Graphics
* Fix V128 return pointer passing, RCX is clobbered
* Update Ryujinx.Tests
* Update ITimeZoneService
* Stop using GetFunctionPointer as that can't be called from native code, misc. fixes and tweaks
* Use generic GetFunctionPointerForDelegate method and other tweaks
* Some refactoring on the code generator, assert on invalid operations and use a separate enum for intrinsics
* Remove some unused code on the assembler
* Fix REX.W prefix regression on float conversion instructions, add some sort of profiler
* Add hardware capability detection
* Fix regression on Sha1h and revert Fcm** changes
* Add SSE2-only paths on vector extract and insert, some refactoring on the pre-allocator
* Fix silly mistake introduced on last commit on CpuId
* Generate inline stack probes when the stack allocation is too large
* Initial support for the System-V ABI
* Support multiple destination operands
* Fix SSE2 VectorInsert8 path, and other fixes
* Change placement of XMM callee save and restore code to match other compilers
* Rename Dest to Destination and Inst to Instruction
* Fix a regression related to calls and the V128 type
* Add an extra space on comments to match code style
* Some refactoring
* Fix vector insert FP32 SSE2 path
* Port over the ARM32 instructions
* Avoid memory protection races on JIT Cache
* Another fix on VectorInsert FP32 (thanks to LDj3SNuD
* Float operands don't need to use the same register when VEX is supported
* Add a new register allocator, higher quality code for hot code (tier up), and other tweaks
* Some nits, small improvements on the pre allocator
* CpuThreadState is gone
* Allow changing CPU emulators with a config entry
* Add runtime identifiers on the ARMeilleure project
* Allow switching between CPUs through a config entry (pt. 2)
* Change win10-x64 to win-x64 on projects
* Update the Ryujinx project to use ARMeilleure
* Ensure that the selected register is valid on the hybrid allocator
* Allow exiting on returns to 0 (should fix test regression)
* Remove register assignments for most used variables on the hybrid allocator
* Do not use fixed registers as spill temp
* Add missing namespace and remove unneeded using
* Address PR feedback
* Fix types, etc
* Enable AssumeStrictAbiCompliance by default
* Ensure that Spill and Fill don't load or store any more than necessary
2019-08-08 11:56:22 -07:00
|
|
|
Trn1_V,
|
|
|
|
Trn2_V,
|
|
|
|
Uaba_V,
|
|
|
|
Uabal_V,
|
|
|
|
Uabd_V,
|
|
|
|
Uabdl_V,
|
|
|
|
Uadalp_V,
|
|
|
|
Uaddl_V,
|
|
|
|
Uaddlp_V,
|
|
|
|
Uaddlv_V,
|
|
|
|
Uaddw_V,
|
|
|
|
Ucvtf_Gp,
|
|
|
|
Ucvtf_Gp_Fixed,
|
|
|
|
Ucvtf_S,
|
|
|
|
Ucvtf_V,
|
|
|
|
Ucvtf_V_Fixed,
|
|
|
|
Uhadd_V,
|
|
|
|
Uhsub_V,
|
|
|
|
Umax_V,
|
|
|
|
Umaxp_V,
|
|
|
|
Umaxv_V,
|
|
|
|
Umin_V,
|
|
|
|
Uminp_V,
|
|
|
|
Uminv_V,
|
|
|
|
Umlal_V,
|
|
|
|
Umlal_Ve,
|
|
|
|
Umlsl_V,
|
|
|
|
Umlsl_Ve,
|
|
|
|
Umov_S,
|
|
|
|
Umull_V,
|
|
|
|
Umull_Ve,
|
|
|
|
Uqadd_S,
|
|
|
|
Uqadd_V,
|
|
|
|
Uqrshl_V,
|
|
|
|
Uqrshrn_S,
|
|
|
|
Uqrshrn_V,
|
|
|
|
Uqshl_V,
|
|
|
|
Uqshrn_S,
|
|
|
|
Uqshrn_V,
|
|
|
|
Uqsub_S,
|
|
|
|
Uqsub_V,
|
|
|
|
Uqxtn_S,
|
|
|
|
Uqxtn_V,
|
|
|
|
Urhadd_V,
|
|
|
|
Urshl_V,
|
|
|
|
Urshr_S,
|
|
|
|
Urshr_V,
|
|
|
|
Ursra_S,
|
|
|
|
Ursra_V,
|
|
|
|
Ushl_V,
|
|
|
|
Ushll_V,
|
|
|
|
Ushr_S,
|
|
|
|
Ushr_V,
|
|
|
|
Usqadd_S,
|
|
|
|
Usqadd_V,
|
|
|
|
Usra_S,
|
|
|
|
Usra_V,
|
|
|
|
Usubl_V,
|
|
|
|
Usubw_V,
|
|
|
|
Uzp1_V,
|
|
|
|
Uzp2_V,
|
|
|
|
Xtn_V,
|
|
|
|
Zip1_V,
|
|
|
|
Zip2_V,
|
|
|
|
|
|
|
|
// Base (AArch32)
|
|
|
|
Blx,
|
|
|
|
Bx,
|
|
|
|
Cmp,
|
|
|
|
Ldm,
|
|
|
|
Ldrb,
|
|
|
|
Ldrd,
|
|
|
|
Ldrh,
|
|
|
|
Ldrsb,
|
|
|
|
Ldrsh,
|
|
|
|
Mov,
|
|
|
|
Stm,
|
|
|
|
Strb,
|
|
|
|
Strd,
|
|
|
|
Strh
|
|
|
|
}
|
2019-10-04 07:43:20 -07:00
|
|
|
}
|