mirror of
				https://github.com/Ryujinx/Ryujinx.git
				synced 2025-10-24 15:20:32 -07:00 
			
		
		
		
	Misc. CPU optimizations (#575)
* Add optimizations related to caller/callee saved registers, thread synchronization and disable tier 0 * Refactoring * Add a config entry to enable or disable the reg load/store opt. * Remove unnecessary register state stores for calls when the callee is know * Rename IoType to VarType * Enable tier 0 while fixing some perf issues related to tier 0 * Small tweak -- Compile before adding to the cache, to avoid lags * Add required config entry
This commit is contained in:
		
							
								
								
									
										9
									
								
								ChocolArm64/Translation/CallType.cs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								ChocolArm64/Translation/CallType.cs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,9 @@ | ||||
| namespace ChocolArm64.Translation | ||||
| { | ||||
|     enum CallType | ||||
|     { | ||||
|         Call, | ||||
|         VirtualCall, | ||||
|         VirtualJump | ||||
|     } | ||||
| } | ||||
| @@ -4,13 +4,13 @@ namespace ChocolArm64.Translation | ||||
| { | ||||
|     class ILBlock : IILEmit | ||||
|     { | ||||
|         public long IntInputs    { get; private set; } | ||||
|         public long IntOutputs   { get; private set; } | ||||
|         public long IntAwOutputs { get; private set; } | ||||
|         public  long IntInputs  { get; private set; } | ||||
|         public  long IntOutputs { get; private set; } | ||||
|         private long _intAwOutputs; | ||||
|  | ||||
|         public long VecInputs    { get; private set; } | ||||
|         public long VecOutputs   { get; private set; } | ||||
|         public long VecAwOutputs { get; private set; } | ||||
|         public  long VecInputs  { get; private set; } | ||||
|         public  long VecOutputs { get; private set; } | ||||
|         private long _vecAwOutputs; | ||||
|  | ||||
|         public bool HasStateStore { get; private set; } | ||||
|  | ||||
| @@ -34,25 +34,25 @@ namespace ChocolArm64.Translation | ||||
|                 //opcodes emitted by each ARM instruction. | ||||
|                 //We can only consider the new outputs for doing input elimination | ||||
|                 //after all the CIL opcodes used by the instruction being emitted. | ||||
|                 IntAwOutputs = IntOutputs; | ||||
|                 VecAwOutputs = VecOutputs; | ||||
|                 _intAwOutputs = IntOutputs; | ||||
|                 _vecAwOutputs = VecOutputs; | ||||
|             } | ||||
|             else if (emitter is ILOpCodeLoad ld && ILMethodBuilder.IsRegIndex(ld.Index)) | ||||
|             { | ||||
|                 switch (ld.IoType) | ||||
|                 switch (ld.VarType) | ||||
|                 { | ||||
|                     case IoType.Flag:   IntInputs |= ((1L << ld.Index) << 32) & ~IntAwOutputs; break; | ||||
|                     case IoType.Int:    IntInputs |=  (1L << ld.Index)        & ~IntAwOutputs; break; | ||||
|                     case IoType.Vector: VecInputs |=  (1L << ld.Index)        & ~VecAwOutputs; break; | ||||
|                     case VarType.Flag:   IntInputs |= ((1L << ld.Index) << 32) & ~_intAwOutputs; break; | ||||
|                     case VarType.Int:    IntInputs |=  (1L << ld.Index)        & ~_intAwOutputs; break; | ||||
|                     case VarType.Vector: VecInputs |=  (1L << ld.Index)        & ~_vecAwOutputs; break; | ||||
|                 } | ||||
|             } | ||||
|             else if (emitter is ILOpCodeStore st && ILMethodBuilder.IsRegIndex(st.Index)) | ||||
|             { | ||||
|                 switch (st.IoType) | ||||
|                 switch (st.VarType) | ||||
|                 { | ||||
|                     case IoType.Flag:   IntOutputs |= (1L << st.Index) << 32; break; | ||||
|                     case IoType.Int:    IntOutputs |=  1L << st.Index;        break; | ||||
|                     case IoType.Vector: VecOutputs |=  1L << st.Index;        break; | ||||
|                     case VarType.Flag:   IntOutputs |= (1L << st.Index) << 32; break; | ||||
|                     case VarType.Int:    IntOutputs |=  1L << st.Index;        break; | ||||
|                     case VarType.Vector: VecOutputs |=  1L << st.Index;        break; | ||||
|                 } | ||||
|             } | ||||
|             else if (emitter is ILOpCodeStoreState) | ||||
|   | ||||
| @@ -31,6 +31,10 @@ namespace ChocolArm64.Translation | ||||
|  | ||||
|         public Aarch32Mode Mode { get; } = Aarch32Mode.User; //TODO | ||||
|  | ||||
|         public bool HasIndirectJump { get; set; } | ||||
|  | ||||
|         public bool HasSlowCall { get; set; } | ||||
|  | ||||
|         private Dictionary<Block, ILBlock> _visitedBlocks; | ||||
|  | ||||
|         private Queue<Block> _branchTargets; | ||||
| @@ -91,7 +95,12 @@ namespace ChocolArm64.Translation | ||||
|  | ||||
|             ResetBlockState(); | ||||
|  | ||||
|             AdvanceOpCode(); | ||||
|             if (AdvanceOpCode()) | ||||
|             { | ||||
|                 EmitSynchronization(); | ||||
|  | ||||
|                 _ilBlock.Add(new ILOpCodeLoadState(_ilBlock, isSubEntry: true)); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         public static int GetIntTempIndex() | ||||
| @@ -127,10 +136,18 @@ namespace ChocolArm64.Translation | ||||
|                 return; | ||||
|             } | ||||
|  | ||||
|             if (_opcIndex == 0) | ||||
|             int opcIndex = _opcIndex; | ||||
|  | ||||
|             if (opcIndex == 0) | ||||
|             { | ||||
|                 MarkLabel(GetLabel(_currBlock.Position)); | ||||
|             } | ||||
|  | ||||
|             bool isLastOp = opcIndex == CurrBlock.OpCodes.Count - 1; | ||||
|  | ||||
|             if (isLastOp && CurrBlock.Branch != null && | ||||
|                      (ulong)CurrBlock.Branch.Position <= (ulong)CurrBlock.Position) | ||||
|             { | ||||
|                 EmitSynchronization(); | ||||
|             } | ||||
|  | ||||
| @@ -161,7 +178,7 @@ namespace ChocolArm64.Translation | ||||
|                 //of the next instruction to be executed (in the case that the condition | ||||
|                 //is false, and the branch was not taken, as all basic blocks should end with | ||||
|                 //some kind of branch). | ||||
|                 if (CurrOp == CurrBlock.GetLastOp() && CurrBlock.Next == null) | ||||
|                 if (isLastOp && CurrBlock.Next == null) | ||||
|                 { | ||||
|                     EmitStoreState(); | ||||
|                     EmitLdc_I8(CurrOp.Position + CurrOp.OpCodeSizeInBytes); | ||||
| @@ -285,32 +302,43 @@ namespace ChocolArm64.Translation | ||||
|                 return; | ||||
|             } | ||||
|  | ||||
|             _queue.Enqueue(new TranslatorQueueItem(position, mode, TranslationTier.Tier1)); | ||||
|             _queue.Enqueue(position, mode, TranslationTier.Tier1, isComplete: true); | ||||
|         } | ||||
|  | ||||
|         public bool TryOptEmitSubroutineCall() | ||||
|         { | ||||
|             //Calls should always have a next block, unless | ||||
|             //we're translating a single basic block. | ||||
|             if (_currBlock.Next == null) | ||||
|             { | ||||
|                 return false; | ||||
|             } | ||||
|  | ||||
|             if (CurrOp.Emitter != InstEmit.Bl) | ||||
|             if (!(CurrOp is IOpCodeBImm op)) | ||||
|             { | ||||
|                 return false; | ||||
|             } | ||||
|  | ||||
|             if (!_cache.TryGetSubroutine(((OpCodeBImmAl64)CurrOp).Imm, out TranslatedSub subroutine)) | ||||
|             if (!_cache.TryGetSubroutine(op.Imm, out TranslatedSub sub)) | ||||
|             { | ||||
|                 return false; | ||||
|             } | ||||
|  | ||||
|             //It's not worth to call a Tier0 method, because | ||||
|             //it contains slow code, rather than the entire function. | ||||
|             if (sub.Tier == TranslationTier.Tier0) | ||||
|             { | ||||
|                 return false; | ||||
|             } | ||||
|  | ||||
|             EmitStoreState(sub); | ||||
|  | ||||
|             for (int index = 0; index < TranslatedSub.FixedArgTypes.Length; index++) | ||||
|             { | ||||
|                 EmitLdarg(index); | ||||
|             } | ||||
|  | ||||
|             EmitCall(subroutine.Method); | ||||
|             EmitCall(sub.Method); | ||||
|  | ||||
|             return true; | ||||
|         } | ||||
| @@ -321,8 +349,8 @@ namespace ChocolArm64.Translation | ||||
|  | ||||
|             InstEmitAluHelper.EmitAluLoadOpers(this); | ||||
|  | ||||
|             Stloc(CmpOptTmp2Index, IoType.Int); | ||||
|             Stloc(CmpOptTmp1Index, IoType.Int); | ||||
|             Stloc(CmpOptTmp2Index, VarType.Int); | ||||
|             Stloc(CmpOptTmp1Index, VarType.Int); | ||||
|         } | ||||
|  | ||||
|         private Dictionary<Condition, OpCode> _branchOps = new Dictionary<Condition, OpCode>() | ||||
| @@ -346,8 +374,8 @@ namespace ChocolArm64.Translation | ||||
|             { | ||||
|                 if (_optOpLastCompare.Emitter == InstEmit.Subs) | ||||
|                 { | ||||
|                     Ldloc(CmpOptTmp1Index, IoType.Int, _optOpLastCompare.RegisterSize); | ||||
|                     Ldloc(CmpOptTmp2Index, IoType.Int, _optOpLastCompare.RegisterSize); | ||||
|                     Ldloc(CmpOptTmp1Index, VarType.Int, _optOpLastCompare.RegisterSize); | ||||
|                     Ldloc(CmpOptTmp2Index, VarType.Int, _optOpLastCompare.RegisterSize); | ||||
|  | ||||
|                     Emit(_branchOps[cond], target); | ||||
|  | ||||
| @@ -369,7 +397,7 @@ namespace ChocolArm64.Translation | ||||
|                     //Such invalid values can't be encoded on the immediate encodings. | ||||
|                     if (_optOpLastCompare is IOpCodeAluImm64 op) | ||||
|                     { | ||||
|                         Ldloc(CmpOptTmp1Index, IoType.Int, _optOpLastCompare.RegisterSize); | ||||
|                         Ldloc(CmpOptTmp1Index, VarType.Int, _optOpLastCompare.RegisterSize); | ||||
|  | ||||
|                         if (_optOpLastCompare.RegisterSize == RegisterSize.Int32) | ||||
|                         { | ||||
| @@ -491,14 +519,14 @@ namespace ChocolArm64.Translation | ||||
|         { | ||||
|             if (amount > 0) | ||||
|             { | ||||
|                 Stloc(RorTmpIndex, IoType.Int); | ||||
|                 Ldloc(RorTmpIndex, IoType.Int); | ||||
|                 Stloc(RorTmpIndex, VarType.Int); | ||||
|                 Ldloc(RorTmpIndex, VarType.Int); | ||||
|  | ||||
|                 EmitLdc_I4(amount); | ||||
|  | ||||
|                 Emit(OpCodes.Shr_Un); | ||||
|  | ||||
|                 Ldloc(RorTmpIndex, IoType.Int); | ||||
|                 Ldloc(RorTmpIndex, VarType.Int); | ||||
|  | ||||
|                 EmitLdc_I4(CurrOp.GetBitsCount() - amount); | ||||
|  | ||||
| @@ -546,7 +574,7 @@ namespace ChocolArm64.Translation | ||||
|  | ||||
|         public void EmitLdarg(int index) | ||||
|         { | ||||
|             _ilBlock.Add(new ILOpCodeLoad(index, IoType.Arg)); | ||||
|             _ilBlock.Add(new ILOpCodeLoad(index, VarType.Arg)); | ||||
|         } | ||||
|  | ||||
|         public void EmitLdintzr(int index) | ||||
| @@ -588,6 +616,11 @@ namespace ChocolArm64.Translation | ||||
|             _ilBlock.Add(new ILOpCodeStoreState(_ilBlock)); | ||||
|         } | ||||
|  | ||||
|         private void EmitStoreState(TranslatedSub callSub) | ||||
|         { | ||||
|             _ilBlock.Add(new ILOpCodeStoreState(_ilBlock, callSub)); | ||||
|         } | ||||
|  | ||||
|         public void EmitLdtmp() => EmitLdint(IntGpTmp1Index); | ||||
|         public void EmitSttmp() => EmitStint(IntGpTmp1Index); | ||||
|  | ||||
| @@ -600,13 +633,13 @@ namespace ChocolArm64.Translation | ||||
|         public void EmitLdvectmp2() => EmitLdvec(VecGpTmp2Index); | ||||
|         public void EmitStvectmp2() => EmitStvec(VecGpTmp2Index); | ||||
|  | ||||
|         public void EmitLdint(int index) => Ldloc(index, IoType.Int); | ||||
|         public void EmitStint(int index) => Stloc(index, IoType.Int); | ||||
|         public void EmitLdint(int index) => Ldloc(index, VarType.Int); | ||||
|         public void EmitStint(int index) => Stloc(index, VarType.Int); | ||||
|  | ||||
|         public void EmitLdvec(int index) => Ldloc(index, IoType.Vector); | ||||
|         public void EmitStvec(int index) => Stloc(index, IoType.Vector); | ||||
|         public void EmitLdvec(int index) => Ldloc(index, VarType.Vector); | ||||
|         public void EmitStvec(int index) => Stloc(index, VarType.Vector); | ||||
|  | ||||
|         public void EmitLdflg(int index) => Ldloc(index, IoType.Flag); | ||||
|         public void EmitLdflg(int index) => Ldloc(index, VarType.Flag); | ||||
|         public void EmitStflg(int index) | ||||
|         { | ||||
|             //Set this only if any of the NZCV flag bits were modified. | ||||
| @@ -619,22 +652,22 @@ namespace ChocolArm64.Translation | ||||
|                 _optOpLastFlagSet = CurrOp; | ||||
|             } | ||||
|  | ||||
|             Stloc(index, IoType.Flag); | ||||
|             Stloc(index, VarType.Flag); | ||||
|         } | ||||
|  | ||||
|         private void Ldloc(int index, IoType ioType) | ||||
|         private void Ldloc(int index, VarType varType) | ||||
|         { | ||||
|             _ilBlock.Add(new ILOpCodeLoad(index, ioType, CurrOp.RegisterSize)); | ||||
|             _ilBlock.Add(new ILOpCodeLoad(index, varType, CurrOp.RegisterSize)); | ||||
|         } | ||||
|  | ||||
|         private void Ldloc(int index, IoType ioType, RegisterSize registerSize) | ||||
|         private void Ldloc(int index, VarType varType, RegisterSize registerSize) | ||||
|         { | ||||
|             _ilBlock.Add(new ILOpCodeLoad(index, ioType, registerSize)); | ||||
|             _ilBlock.Add(new ILOpCodeLoad(index, varType, registerSize)); | ||||
|         } | ||||
|  | ||||
|         private void Stloc(int index, IoType ioType) | ||||
|         private void Stloc(int index, VarType varType) | ||||
|         { | ||||
|             _ilBlock.Add(new ILOpCodeStore(index, ioType, CurrOp.RegisterSize)); | ||||
|             _ilBlock.Add(new ILOpCodeStore(index, varType, CurrOp.RegisterSize)); | ||||
|         } | ||||
|  | ||||
|         public void EmitCallPropGet(Type objType, string propName) | ||||
|   | ||||
| @@ -6,7 +6,7 @@ namespace ChocolArm64.Translation | ||||
|     { | ||||
|         private bool _hasLabel; | ||||
|  | ||||
|         private Label _lbl; | ||||
|         private Label _label; | ||||
|  | ||||
|         public void Emit(ILMethodBuilder context) | ||||
|         { | ||||
| @@ -17,12 +17,12 @@ namespace ChocolArm64.Translation | ||||
|         { | ||||
|             if (!_hasLabel) | ||||
|             { | ||||
|                 _lbl = context.Generator.DefineLabel(); | ||||
|                 _label = context.Generator.DefineLabel(); | ||||
|  | ||||
|                 _hasLabel = true; | ||||
|             } | ||||
|  | ||||
|             return _lbl; | ||||
|             return _label; | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -8,7 +8,10 @@ namespace ChocolArm64.Translation | ||||
| { | ||||
|     class ILMethodBuilder | ||||
|     { | ||||
|         public LocalAlloc LocalAlloc { get; private set; } | ||||
|         private const int RegsCount = 32; | ||||
|         private const int RegsMask  = RegsCount - 1; | ||||
|  | ||||
|         public RegisterUsage RegUsage { get; private set; } | ||||
|  | ||||
|         public ILGenerator Generator { get; private set; } | ||||
|  | ||||
| @@ -18,29 +21,47 @@ namespace ChocolArm64.Translation | ||||
|  | ||||
|         private string _subName; | ||||
|  | ||||
|         public bool IsAarch64 { get; } | ||||
|  | ||||
|         public bool IsSubComplete { get; } | ||||
|  | ||||
|         private int _localsCount; | ||||
|  | ||||
|         public ILMethodBuilder(ILBlock[] ilBlocks, string subName) | ||||
|         public ILMethodBuilder( | ||||
|             ILBlock[] ilBlocks, | ||||
|             string    subName, | ||||
|             bool      isAarch64, | ||||
|             bool      isSubComplete = false) | ||||
|         { | ||||
|             _ilBlocks = ilBlocks; | ||||
|             _subName  = subName; | ||||
|             _ilBlocks     = ilBlocks; | ||||
|             _subName      = subName; | ||||
|             IsAarch64     = isAarch64; | ||||
|             IsSubComplete = isSubComplete; | ||||
|         } | ||||
|  | ||||
|         public TranslatedSub GetSubroutine(TranslationTier tier) | ||||
|         public TranslatedSub GetSubroutine(TranslationTier tier, bool isWorthOptimizing) | ||||
|         { | ||||
|             LocalAlloc = new LocalAlloc(_ilBlocks, _ilBlocks[0]); | ||||
|             RegUsage = new RegisterUsage(); | ||||
|  | ||||
|             RegUsage.BuildUses(_ilBlocks[0]); | ||||
|  | ||||
|             DynamicMethod method = new DynamicMethod(_subName, typeof(long), TranslatedSub.FixedArgTypes); | ||||
|  | ||||
|             Generator = method.GetILGenerator(); | ||||
|             long intNiRegsMask = RegUsage.GetIntNotInputs(_ilBlocks[0]); | ||||
|             long vecNiRegsMask = RegUsage.GetVecNotInputs(_ilBlocks[0]); | ||||
|  | ||||
|             TranslatedSub subroutine = new TranslatedSub(method, tier); | ||||
|             TranslatedSub subroutine = new TranslatedSub( | ||||
|                 method, | ||||
|                 intNiRegsMask, | ||||
|                 vecNiRegsMask, | ||||
|                 tier, | ||||
|                 isWorthOptimizing); | ||||
|  | ||||
|             _locals = new Dictionary<Register, int>(); | ||||
|  | ||||
|             _localsCount = 0; | ||||
|  | ||||
|             new ILOpCodeLoadState(_ilBlocks[0]).Emit(this); | ||||
|             Generator = method.GetILGenerator(); | ||||
|  | ||||
|             foreach (ILBlock ilBlock in _ilBlocks) | ||||
|             { | ||||
| @@ -80,13 +101,13 @@ namespace ChocolArm64.Translation | ||||
|  | ||||
|         public static Register GetRegFromBit(int bit, RegisterType baseType) | ||||
|         { | ||||
|             if (bit < 32) | ||||
|             if (bit < RegsCount) | ||||
|             { | ||||
|                 return new Register(bit, baseType); | ||||
|             } | ||||
|             else if (baseType == RegisterType.Int) | ||||
|             { | ||||
|                 return new Register(bit & 0x1f, RegisterType.Flag); | ||||
|                 return new Register(bit & RegsMask, RegisterType.Flag); | ||||
|             } | ||||
|             else | ||||
|             { | ||||
| @@ -96,7 +117,7 @@ namespace ChocolArm64.Translation | ||||
|  | ||||
|         public static bool IsRegIndex(int index) | ||||
|         { | ||||
|             return (uint)index < 32; | ||||
|             return (uint)index < RegsCount; | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -4,16 +4,16 @@ namespace ChocolArm64.Translation | ||||
| { | ||||
|     struct ILOpCode : IILEmit | ||||
|     { | ||||
|         private OpCode _ilOp; | ||||
|         public OpCode ILOp { get; } | ||||
|  | ||||
|         public ILOpCode(OpCode ilOp) | ||||
|         { | ||||
|             _ilOp = ilOp; | ||||
|             ILOp = ilOp; | ||||
|         } | ||||
|  | ||||
|         public void Emit(ILMethodBuilder context) | ||||
|         { | ||||
|             context.Generator.Emit(_ilOp); | ||||
|             context.Generator.Emit(ILOp); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -4,18 +4,18 @@ namespace ChocolArm64.Translation | ||||
| { | ||||
|     struct ILOpCodeBranch : IILEmit | ||||
|     { | ||||
|         private OpCode   _ilOp; | ||||
|         private ILLabel _label; | ||||
|         public OpCode  ILOp  { get; } | ||||
|         public ILLabel Label { get; } | ||||
|  | ||||
|         public ILOpCodeBranch(OpCode ilOp, ILLabel label) | ||||
|         { | ||||
|             _ilOp  = ilOp; | ||||
|             _label = label; | ||||
|             ILOp  = ilOp; | ||||
|             Label = label; | ||||
|         } | ||||
|  | ||||
|         public void Emit(ILMethodBuilder context) | ||||
|         { | ||||
|             context.Generator.Emit(_ilOp, _label.GetLabel(context)); | ||||
|             context.Generator.Emit(ILOp, Label.GetLabel(context)); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -5,9 +5,9 @@ namespace ChocolArm64.Translation | ||||
| { | ||||
|     struct ILOpCodeCall : IILEmit | ||||
|     { | ||||
|         public MethodInfo Info { get; private set; } | ||||
|         public MethodInfo Info { get; } | ||||
|  | ||||
|         public bool IsVirtual { get; private set; } | ||||
|         public bool IsVirtual { get; } | ||||
|  | ||||
|         public ILOpCodeCall(MethodInfo info, bool isVirtual) | ||||
|         { | ||||
|   | ||||
| @@ -16,6 +16,8 @@ namespace ChocolArm64.Translation | ||||
|  | ||||
|         private ImmVal _value; | ||||
|  | ||||
|         public long Value => _value.I8; | ||||
|  | ||||
|         private enum ConstType | ||||
|         { | ||||
|             Int32, | ||||
|   | ||||
| @@ -5,28 +5,28 @@ namespace ChocolArm64.Translation | ||||
| { | ||||
|     struct ILOpCodeLoad : IILEmit | ||||
|     { | ||||
|         public int Index { get; private set; } | ||||
|         public int Index { get; } | ||||
|  | ||||
|         public IoType IoType { get; private set; } | ||||
|         public VarType VarType { get; } | ||||
|  | ||||
|         public RegisterSize RegisterSize { get; private set; } | ||||
|         public RegisterSize RegisterSize { get; } | ||||
|  | ||||
|         public ILOpCodeLoad(int index, IoType ioType, RegisterSize registerSize = 0) | ||||
|         public ILOpCodeLoad(int index, VarType varType, RegisterSize registerSize = 0) | ||||
|         { | ||||
|             Index        = index; | ||||
|             IoType       = ioType; | ||||
|             VarType      = varType; | ||||
|             RegisterSize = registerSize; | ||||
|         } | ||||
|  | ||||
|         public void Emit(ILMethodBuilder context) | ||||
|         { | ||||
|             switch (IoType) | ||||
|             switch (VarType) | ||||
|             { | ||||
|                 case IoType.Arg: context.Generator.EmitLdarg(Index); break; | ||||
|                 case VarType.Arg: context.Generator.EmitLdarg(Index); break; | ||||
|  | ||||
|                 case IoType.Flag:   EmitLdloc(context, Index, RegisterType.Flag);   break; | ||||
|                 case IoType.Int:    EmitLdloc(context, Index, RegisterType.Int);    break; | ||||
|                 case IoType.Vector: EmitLdloc(context, Index, RegisterType.Vector); break; | ||||
|                 case VarType.Flag:   EmitLdloc(context, Index, RegisterType.Flag);   break; | ||||
|                 case VarType.Int:    EmitLdloc(context, Index, RegisterType.Int);    break; | ||||
|                 case VarType.Vector: EmitLdloc(context, Index, RegisterType.Vector); break; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|   | ||||
| @@ -5,7 +5,7 @@ namespace ChocolArm64.Translation | ||||
| { | ||||
|     struct ILOpCodeLoadField : IILEmit | ||||
|     { | ||||
|         public FieldInfo Info { get; private set; } | ||||
|         public FieldInfo Info { get; } | ||||
|  | ||||
|         public ILOpCodeLoadField(FieldInfo info) | ||||
|         { | ||||
|   | ||||
| @@ -7,15 +7,24 @@ namespace ChocolArm64.Translation | ||||
|     { | ||||
|         private ILBlock _block; | ||||
|  | ||||
|         public ILOpCodeLoadState(ILBlock block) | ||||
|         private bool _isSubEntry; | ||||
|  | ||||
|         public ILOpCodeLoadState(ILBlock block, bool isSubEntry = false) | ||||
|         { | ||||
|             _block = block; | ||||
|             _block      = block; | ||||
|             _isSubEntry = isSubEntry; | ||||
|         } | ||||
|  | ||||
|         public void Emit(ILMethodBuilder context) | ||||
|         { | ||||
|             long intInputs = context.LocalAlloc.GetIntInputs(_block); | ||||
|             long vecInputs = context.LocalAlloc.GetVecInputs(_block); | ||||
|             long intInputs = context.RegUsage.GetIntInputs(_block); | ||||
|             long vecInputs = context.RegUsage.GetVecInputs(_block); | ||||
|  | ||||
|             if (Optimizations.AssumeStrictAbiCompliance && context.IsSubComplete) | ||||
|             { | ||||
|                 intInputs = RegisterUsage.ClearCallerSavedIntRegs(intInputs, context.IsAarch64); | ||||
|                 vecInputs = RegisterUsage.ClearCallerSavedVecRegs(vecInputs, context.IsAarch64); | ||||
|             } | ||||
|  | ||||
|             LoadLocals(context, intInputs, RegisterType.Int); | ||||
|             LoadLocals(context, vecInputs, RegisterType.Vector); | ||||
|   | ||||
| @@ -2,16 +2,16 @@ namespace ChocolArm64.Translation | ||||
| { | ||||
|     struct ILOpCodeLog : IILEmit | ||||
|     { | ||||
|         private string _text; | ||||
|         public string Text { get; } | ||||
|  | ||||
|         public ILOpCodeLog(string text) | ||||
|         { | ||||
|             _text = text; | ||||
|             Text = text; | ||||
|         } | ||||
|  | ||||
|         public void Emit(ILMethodBuilder context) | ||||
|         { | ||||
|             context.Generator.EmitWriteLine(_text); | ||||
|             context.Generator.EmitWriteLine(Text); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -5,28 +5,28 @@ namespace ChocolArm64.Translation | ||||
| { | ||||
|     struct ILOpCodeStore : IILEmit | ||||
|     { | ||||
|         public int Index { get; private set; } | ||||
|         public int Index { get; } | ||||
|  | ||||
|         public IoType IoType { get; private set; } | ||||
|         public VarType VarType { get; } | ||||
|  | ||||
|         public RegisterSize RegisterSize { get; private set; } | ||||
|         public RegisterSize RegisterSize { get; } | ||||
|  | ||||
|         public ILOpCodeStore(int index, IoType ioType, RegisterSize registerSize = 0) | ||||
|         public ILOpCodeStore(int index, VarType varType, RegisterSize registerSize = 0) | ||||
|         { | ||||
|             Index        = index; | ||||
|             IoType       = ioType; | ||||
|             VarType      = varType; | ||||
|             RegisterSize = registerSize; | ||||
|         } | ||||
|  | ||||
|         public void Emit(ILMethodBuilder context) | ||||
|         { | ||||
|             switch (IoType) | ||||
|             switch (VarType) | ||||
|             { | ||||
|                 case IoType.Arg: context.Generator.EmitStarg(Index); break; | ||||
|                 case VarType.Arg: context.Generator.EmitStarg(Index); break; | ||||
|  | ||||
|                 case IoType.Flag:   EmitStloc(context, Index, RegisterType.Flag);   break; | ||||
|                 case IoType.Int:    EmitStloc(context, Index, RegisterType.Int);    break; | ||||
|                 case IoType.Vector: EmitStloc(context, Index, RegisterType.Vector); break; | ||||
|                 case VarType.Flag:   EmitStloc(context, Index, RegisterType.Flag);   break; | ||||
|                 case VarType.Int:    EmitStloc(context, Index, RegisterType.Int);    break; | ||||
|                 case VarType.Vector: EmitStloc(context, Index, RegisterType.Vector); break; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|   | ||||
| @@ -7,15 +7,33 @@ namespace ChocolArm64.Translation | ||||
|     { | ||||
|         private ILBlock _block; | ||||
|  | ||||
|         public ILOpCodeStoreState(ILBlock block) | ||||
|         private TranslatedSub _callSub; | ||||
|  | ||||
|         public ILOpCodeStoreState(ILBlock block, TranslatedSub callSub = null) | ||||
|         { | ||||
|             _block = block; | ||||
|             _block   = block; | ||||
|             _callSub = callSub; | ||||
|         } | ||||
|  | ||||
|         public void Emit(ILMethodBuilder context) | ||||
|         { | ||||
|             long intOutputs = context.LocalAlloc.GetIntOutputs(_block); | ||||
|             long vecOutputs = context.LocalAlloc.GetVecOutputs(_block); | ||||
|             long intOutputs = context.RegUsage.GetIntOutputs(_block); | ||||
|             long vecOutputs = context.RegUsage.GetVecOutputs(_block); | ||||
|  | ||||
|             if (Optimizations.AssumeStrictAbiCompliance && context.IsSubComplete) | ||||
|             { | ||||
|                 intOutputs = RegisterUsage.ClearCallerSavedIntRegs(intOutputs, context.IsAarch64); | ||||
|                 vecOutputs = RegisterUsage.ClearCallerSavedVecRegs(vecOutputs, context.IsAarch64); | ||||
|             } | ||||
|  | ||||
|             if (_callSub != null) | ||||
|             { | ||||
|                 //Those register are assigned on the callee function, without | ||||
|                 //reading it's value first. We don't need to write them because | ||||
|                 //they are not going to be read on the callee. | ||||
|                 intOutputs &= ~_callSub.IntNiRegsMask; | ||||
|                 vecOutputs &= ~_callSub.VecNiRegsMask; | ||||
|             } | ||||
|  | ||||
|             StoreLocals(context, intOutputs, RegisterType.Int); | ||||
|             StoreLocals(context, vecOutputs, RegisterType.Vector); | ||||
|   | ||||
| @@ -3,8 +3,13 @@ using System.Collections.Generic; | ||||
| 
 | ||||
| namespace ChocolArm64.Translation | ||||
| { | ||||
|     class LocalAlloc | ||||
|     class RegisterUsage | ||||
|     { | ||||
|         public const long CallerSavedIntRegistersMask = 0x7fL  << 9; | ||||
|         public const long PStateNzcvFlagsMask         = 0xfL   << 60; | ||||
| 
 | ||||
|         public const long CallerSavedVecRegistersMask = 0xffffL << 16; | ||||
| 
 | ||||
|         private class PathIo | ||||
|         { | ||||
|             private Dictionary<ILBlock, long> _allInputs; | ||||
| @@ -18,31 +23,30 @@ namespace ChocolArm64.Translation | ||||
|                 _cmnOutputs = new Dictionary<ILBlock, long>(); | ||||
|             } | ||||
| 
 | ||||
|             public PathIo(ILBlock root, long inputs, long outputs) : this() | ||||
|             public void Set(ILBlock entry, long inputs, long outputs) | ||||
|             { | ||||
|                 Set(root, inputs, outputs); | ||||
|             } | ||||
| 
 | ||||
|             public void Set(ILBlock root, long inputs, long outputs) | ||||
|             { | ||||
|                 if (!_allInputs.TryAdd(root, inputs)) | ||||
|                 if (!_allInputs.TryAdd(entry, inputs)) | ||||
|                 { | ||||
|                     _allInputs[root] |= inputs; | ||||
|                     _allInputs[entry] |= inputs; | ||||
|                 } | ||||
| 
 | ||||
|                 if (!_cmnOutputs.TryAdd(root, outputs)) | ||||
|                 if (!_cmnOutputs.TryAdd(entry, outputs)) | ||||
|                 { | ||||
|                     _cmnOutputs[root] &= outputs; | ||||
|                     _cmnOutputs[entry] &= outputs; | ||||
|                 } | ||||
| 
 | ||||
|                 _allOutputs |= outputs; | ||||
|             } | ||||
| 
 | ||||
|             public long GetInputs(ILBlock root) | ||||
|             public long GetInputs(ILBlock entry) | ||||
|             { | ||||
|                 if (_allInputs.TryGetValue(root, out long inputs)) | ||||
|                 if (_allInputs.TryGetValue(entry, out long inputs)) | ||||
|                 { | ||||
|                     return inputs | (_allOutputs & ~_cmnOutputs[root]); | ||||
|                     //We also need to read the registers that may not be written | ||||
|                     //by all paths that can reach a exit point, to ensure that | ||||
|                     //the local variable will not remain uninitialized depending | ||||
|                     //on the flow path taken. | ||||
|                     return inputs | (_allOutputs & ~_cmnOutputs[entry]); | ||||
|                 } | ||||
| 
 | ||||
|                 return 0; | ||||
| @@ -57,15 +61,38 @@ namespace ChocolArm64.Translation | ||||
|         private Dictionary<ILBlock, PathIo> _intPaths; | ||||
|         private Dictionary<ILBlock, PathIo> _vecPaths; | ||||
| 
 | ||||
|         private struct BlockIo | ||||
|         private struct BlockIo : IEquatable<BlockIo> | ||||
|         { | ||||
|             public ILBlock Block; | ||||
|             public ILBlock Entry; | ||||
|             public ILBlock Block { get; } | ||||
|             public ILBlock Entry { get; } | ||||
| 
 | ||||
|             public long IntInputs; | ||||
|             public long VecInputs; | ||||
|             public long IntOutputs; | ||||
|             public long VecOutputs; | ||||
|             public long IntInputs  { get; set; } | ||||
|             public long VecInputs  { get; set; } | ||||
|             public long IntOutputs { get; set; } | ||||
|             public long VecOutputs { get; set; } | ||||
| 
 | ||||
|             public BlockIo(ILBlock block, ILBlock entry) | ||||
|             { | ||||
|                 Block = block; | ||||
|                 Entry = entry; | ||||
| 
 | ||||
|                 IntInputs = IntOutputs = 0; | ||||
|                 VecInputs = VecOutputs = 0; | ||||
|             } | ||||
| 
 | ||||
|             public BlockIo( | ||||
|                 ILBlock block, | ||||
|                 ILBlock entry, | ||||
|                 long    intInputs, | ||||
|                 long    vecInputs, | ||||
|                 long    intOutputs, | ||||
|                 long    vecOutputs) : this(block, entry) | ||||
|             { | ||||
|                 IntInputs  = intInputs; | ||||
|                 VecInputs  = vecInputs; | ||||
|                 IntOutputs = intOutputs; | ||||
|                 VecOutputs = vecOutputs; | ||||
|             } | ||||
| 
 | ||||
|             public override bool Equals(object obj) | ||||
|             { | ||||
| @@ -74,6 +101,11 @@ namespace ChocolArm64.Translation | ||||
|                     return false; | ||||
|                 } | ||||
| 
 | ||||
|                 return Equals(other); | ||||
|             } | ||||
| 
 | ||||
|             public bool Equals(BlockIo other) | ||||
|             { | ||||
|                 return other.Block      == Block      && | ||||
|                        other.Entry      == Entry      && | ||||
|                        other.IntInputs  == IntInputs  && | ||||
| @@ -98,25 +130,13 @@ namespace ChocolArm64.Translation | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         private const int MaxOptGraphLength = 40; | ||||
| 
 | ||||
|         public LocalAlloc(ILBlock[] graph, ILBlock entry) | ||||
|         public RegisterUsage() | ||||
|         { | ||||
|             _intPaths = new Dictionary<ILBlock, PathIo>(); | ||||
|             _vecPaths = new Dictionary<ILBlock, PathIo>(); | ||||
| 
 | ||||
|             if (graph.Length > 1 && | ||||
|                 graph.Length < MaxOptGraphLength) | ||||
|             { | ||||
|                 InitializeOptimal(graph, entry); | ||||
|             } | ||||
|             else | ||||
|             { | ||||
|                 InitializeFast(graph); | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         private void InitializeOptimal(ILBlock[] graph, ILBlock entry) | ||||
|         public void BuildUses(ILBlock entry) | ||||
|         { | ||||
|             //This will go through all possible paths on the graph, | ||||
|             //and store all inputs/outputs for each block. A register | ||||
| @@ -124,7 +144,7 @@ namespace ChocolArm64.Translation | ||||
|             //When a block can be reached by more than one path, then the | ||||
|             //output from all paths needs to be set for this block, and | ||||
|             //only outputs present in all of the parent blocks can be considered | ||||
|             //when doing input elimination. Each block chain have a entry, that's where | ||||
|             //when doing input elimination. Each block chain has a entry, that's where | ||||
|             //the code starts executing. They are present on the subroutine start point, | ||||
|             //and on call return points too (address written to X30 by BL). | ||||
|             HashSet<BlockIo> visited = new HashSet<BlockIo>(); | ||||
| @@ -133,19 +153,13 @@ namespace ChocolArm64.Translation | ||||
| 
 | ||||
|             void Enqueue(BlockIo block) | ||||
|             { | ||||
|                 if (!visited.Contains(block)) | ||||
|                 if (visited.Add(block)) | ||||
|                 { | ||||
|                     unvisited.Enqueue(block); | ||||
| 
 | ||||
|                     visited.Add(block); | ||||
|                 } | ||||
|             } | ||||
| 
 | ||||
|             Enqueue(new BlockIo() | ||||
|             { | ||||
|                 Block = entry, | ||||
|                 Entry = entry | ||||
|             }); | ||||
|             Enqueue(new BlockIo(entry, entry)); | ||||
| 
 | ||||
|             while (unvisited.Count > 0) | ||||
|             { | ||||
| @@ -177,19 +191,21 @@ namespace ChocolArm64.Translation | ||||
| 
 | ||||
|                 void EnqueueFromCurrent(ILBlock block, bool retTarget) | ||||
|                 { | ||||
|                     BlockIo blockIo = new BlockIo() { Block = block }; | ||||
|                     BlockIo blockIo; | ||||
| 
 | ||||
|                     if (retTarget) | ||||
|                     { | ||||
|                         blockIo.Entry = block; | ||||
|                         blockIo = new BlockIo(block, block); | ||||
|                     } | ||||
|                     else | ||||
|                     { | ||||
|                         blockIo.Entry      = current.Entry; | ||||
|                         blockIo.IntInputs  = current.IntInputs; | ||||
|                         blockIo.VecInputs  = current.VecInputs; | ||||
|                         blockIo.IntOutputs = current.IntOutputs; | ||||
|                         blockIo.VecOutputs = current.VecOutputs; | ||||
|                         blockIo = new BlockIo( | ||||
|                             block, | ||||
|                             current.Entry, | ||||
|                             current.IntInputs, | ||||
|                             current.VecInputs, | ||||
|                             current.IntOutputs, | ||||
|                             current.VecOutputs); | ||||
|                     } | ||||
| 
 | ||||
|                     Enqueue(blockIo); | ||||
| @@ -207,54 +223,63 @@ namespace ChocolArm64.Translation | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         private void InitializeFast(ILBlock[] graph) | ||||
|         { | ||||
|             //This is WAY faster than InitializeOptimal, but results in | ||||
|             //unneeded loads and stores, so the resulting code will be slower. | ||||
|             long intInputs = 0, intOutputs = 0; | ||||
|             long vecInputs = 0, vecOutputs = 0; | ||||
|         public long GetIntInputs(ILBlock entry) => GetInputsImpl(entry, _intPaths.Values); | ||||
|         public long GetVecInputs(ILBlock entry) => GetInputsImpl(entry, _vecPaths.Values); | ||||
| 
 | ||||
|             foreach (ILBlock block in graph) | ||||
|             { | ||||
|                 intInputs  |= block.IntInputs; | ||||
|                 intOutputs |= block.IntOutputs; | ||||
|                 vecInputs  |= block.VecInputs; | ||||
|                 vecOutputs |= block.VecOutputs; | ||||
|             } | ||||
| 
 | ||||
|             //It's possible that not all code paths writes to those output registers, | ||||
|             //in those cases if we attempt to write an output registers that was | ||||
|             //not written, we will be just writing zero and messing up the old register value. | ||||
|             //So we just need to ensure that all outputs are loaded. | ||||
|             if (graph.Length > 1) | ||||
|             { | ||||
|                 intInputs |= intOutputs; | ||||
|                 vecInputs |= vecOutputs; | ||||
|             } | ||||
| 
 | ||||
|             foreach (ILBlock block in graph) | ||||
|             { | ||||
|                 _intPaths.Add(block, new PathIo(block, intInputs, intOutputs)); | ||||
|                 _vecPaths.Add(block, new PathIo(block, vecInputs, vecOutputs)); | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         public long GetIntInputs(ILBlock root) => GetInputsImpl(root, _intPaths.Values); | ||||
|         public long GetVecInputs(ILBlock root) => GetInputsImpl(root, _vecPaths.Values); | ||||
| 
 | ||||
|         private long GetInputsImpl(ILBlock root, IEnumerable<PathIo> values) | ||||
|         private long GetInputsImpl(ILBlock entry, IEnumerable<PathIo> values) | ||||
|         { | ||||
|             long inputs = 0; | ||||
| 
 | ||||
|             foreach (PathIo path in values) | ||||
|             { | ||||
|                 inputs |= path.GetInputs(root); | ||||
|                 inputs |= path.GetInputs(entry); | ||||
|             } | ||||
| 
 | ||||
|             return inputs; | ||||
|         } | ||||
| 
 | ||||
|         public long GetIntNotInputs(ILBlock entry) => GetNotInputsImpl(entry, _intPaths.Values); | ||||
|         public long GetVecNotInputs(ILBlock entry) => GetNotInputsImpl(entry, _vecPaths.Values); | ||||
| 
 | ||||
|         private long GetNotInputsImpl(ILBlock entry, IEnumerable<PathIo> values) | ||||
|         { | ||||
|             //Returns a mask with registers that are written to | ||||
|             //before being read. Only those registers that are | ||||
|             //written in all paths, and is not read before being | ||||
|             //written to on those paths, should be set on the mask. | ||||
|             long mask = -1L; | ||||
| 
 | ||||
|             foreach (PathIo path in values) | ||||
|             { | ||||
|                 mask &= path.GetOutputs() & ~path.GetInputs(entry); | ||||
|             } | ||||
| 
 | ||||
|             return mask; | ||||
|         } | ||||
| 
 | ||||
|         public long GetIntOutputs(ILBlock block) => _intPaths[block].GetOutputs(); | ||||
|         public long GetVecOutputs(ILBlock block) => _vecPaths[block].GetOutputs(); | ||||
| 
 | ||||
|         public static long ClearCallerSavedIntRegs(long mask, bool isAarch64) | ||||
|         { | ||||
|             //TODO: ARM32 support. | ||||
|             if (isAarch64) | ||||
|             { | ||||
|                 mask &= ~(CallerSavedIntRegistersMask | PStateNzcvFlagsMask); | ||||
|             } | ||||
| 
 | ||||
|             return mask; | ||||
|         } | ||||
| 
 | ||||
|         public static long ClearCallerSavedVecRegs(long mask, bool isAarch64) | ||||
|         { | ||||
|             //TODO: ARM32 support. | ||||
|             if (isAarch64) | ||||
|             { | ||||
|                 mask &= ~CallerSavedVecRegistersMask; | ||||
|             } | ||||
| 
 | ||||
|             return mask; | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -10,21 +10,41 @@ namespace ChocolArm64.Translation | ||||
|  | ||||
|     class TranslatedSub | ||||
|     { | ||||
|         //This is the minimum amount of calls needed for the method | ||||
|         //to be retranslated with higher quality code. It's only worth | ||||
|         //doing that for hot code. | ||||
|         private const int MinCallCountForOpt = 30; | ||||
|  | ||||
|         public ArmSubroutine Delegate { get; private set; } | ||||
|  | ||||
|         public static int StateArgIdx  { get; private set; } | ||||
|         public static int MemoryArgIdx { get; private set; } | ||||
|         public static int StateArgIdx  { get; } | ||||
|         public static int MemoryArgIdx { get; } | ||||
|  | ||||
|         public static Type[] FixedArgTypes { get; private set; } | ||||
|         public static Type[] FixedArgTypes { get; } | ||||
|  | ||||
|         public DynamicMethod Method { get; private set; } | ||||
|         public DynamicMethod Method { get; } | ||||
|  | ||||
|         public TranslationTier Tier { get; private set; } | ||||
|         public TranslationTier Tier { get; } | ||||
|  | ||||
|         public TranslatedSub(DynamicMethod method, TranslationTier tier) | ||||
|         public long IntNiRegsMask { get; } | ||||
|         public long VecNiRegsMask { get; } | ||||
|  | ||||
|         private bool _isWorthOptimizing; | ||||
|  | ||||
|         private int _callCount; | ||||
|  | ||||
|         public TranslatedSub( | ||||
|             DynamicMethod   method, | ||||
|             long            intNiRegsMask, | ||||
|             long            vecNiRegsMask, | ||||
|             TranslationTier tier, | ||||
|             bool            isWorthOptimizing) | ||||
|         { | ||||
|             Method = method ?? throw new ArgumentNullException(nameof(method));; | ||||
|             Tier   = tier; | ||||
|             Method             = method ?? throw new ArgumentNullException(nameof(method));; | ||||
|             IntNiRegsMask      = intNiRegsMask; | ||||
|             VecNiRegsMask      = vecNiRegsMask; | ||||
|             _isWorthOptimizing = isWorthOptimizing; | ||||
|             Tier               = tier; | ||||
|         } | ||||
|  | ||||
|         static TranslatedSub() | ||||
| @@ -61,5 +81,24 @@ namespace ChocolArm64.Translation | ||||
|         { | ||||
|             return Delegate(threadState, memory); | ||||
|         } | ||||
|  | ||||
|         public bool IsWorthOptimizing() | ||||
|         { | ||||
|            if (!_isWorthOptimizing) | ||||
|             { | ||||
|                 return false; | ||||
|             } | ||||
|  | ||||
|             if (_callCount++ < MinCallCountForOpt) | ||||
|             { | ||||
|                 return false; | ||||
|             } | ||||
|  | ||||
|             //Only return true once, so that it is | ||||
|             //added to the queue only once. | ||||
|             _isWorthOptimizing = false; | ||||
|  | ||||
|             return true; | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -63,48 +63,36 @@ namespace ChocolArm64.Translation | ||||
|                     CpuTrace?.Invoke(this, new CpuTraceEventArgs(position)); | ||||
|                 } | ||||
|  | ||||
|                 TranslatedSub subroutine = GetOrTranslateSubroutine(state, position); | ||||
|                 if (!_cache.TryGetSubroutine(position, out TranslatedSub sub)) | ||||
|                 { | ||||
|                     sub = TranslateLowCq(position, state.GetExecutionMode()); | ||||
|                 } | ||||
|  | ||||
|                 position = subroutine.Execute(state, _memory); | ||||
|                 position = sub.Execute(state, _memory); | ||||
|             } | ||||
|             while (position != 0 && state.Running); | ||||
|  | ||||
|             state.CurrentTranslator = null; | ||||
|         } | ||||
|  | ||||
|         internal void TranslateVirtualSubroutine(CpuThreadState state, long position) | ||||
|         { | ||||
|             if (!_cache.TryGetSubroutine(position, out TranslatedSub sub) || sub.Tier == TranslationTier.Tier0) | ||||
|             { | ||||
|                 _queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier1)); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         internal ArmSubroutine GetOrTranslateVirtualSubroutine(CpuThreadState state, long position) | ||||
|         internal ArmSubroutine GetOrTranslateSubroutine(CpuThreadState state, long position, CallType cs) | ||||
|         { | ||||
|             if (!_cache.TryGetSubroutine(position, out TranslatedSub sub)) | ||||
|             { | ||||
|                 sub = TranslateLowCq(position, state.GetExecutionMode()); | ||||
|             } | ||||
|  | ||||
|             if (sub.Tier == TranslationTier.Tier0) | ||||
|             if (sub.IsWorthOptimizing()) | ||||
|             { | ||||
|                 _queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier1)); | ||||
|                 bool isComplete = cs == CallType.Call || | ||||
|                                   cs == CallType.VirtualCall; | ||||
|  | ||||
|                 _queue.Enqueue(position, state.GetExecutionMode(), TranslationTier.Tier1, isComplete); | ||||
|             } | ||||
|  | ||||
|             return sub.Delegate; | ||||
|         } | ||||
|  | ||||
|         internal TranslatedSub GetOrTranslateSubroutine(CpuThreadState state, long position) | ||||
|         { | ||||
|             if (!_cache.TryGetSubroutine(position, out TranslatedSub subroutine)) | ||||
|             { | ||||
|                 subroutine = TranslateLowCq(position, state.GetExecutionMode()); | ||||
|             } | ||||
|  | ||||
|             return subroutine; | ||||
|         } | ||||
|  | ||||
|         private void TranslateQueuedSubs() | ||||
|         { | ||||
|             while (_threadCount != 0) | ||||
| @@ -124,7 +112,7 @@ namespace ChocolArm64.Translation | ||||
|                     } | ||||
|                     else | ||||
|                     { | ||||
|                         TranslateHighCq(item.Position, item.Mode); | ||||
|                         TranslateHighCq(item.Position, item.Mode, item.IsComplete); | ||||
|                     } | ||||
|                 } | ||||
|                 else | ||||
| @@ -142,14 +130,16 @@ namespace ChocolArm64.Translation | ||||
|  | ||||
|             string subName = GetSubroutineName(position); | ||||
|  | ||||
|             ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName); | ||||
|             bool isAarch64 = mode == ExecutionMode.Aarch64; | ||||
|  | ||||
|             TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0); | ||||
|             ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName, isAarch64); | ||||
|  | ||||
|             TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0, isWorthOptimizing: true); | ||||
|  | ||||
|             return _cache.GetOrAdd(position, subroutine, block.OpCodes.Count); | ||||
|         } | ||||
|  | ||||
|         private void TranslateHighCq(long position, ExecutionMode mode) | ||||
|         private TranslatedSub TranslateHighCq(long position, ExecutionMode mode, bool isComplete) | ||||
|         { | ||||
|             Block graph = Decoder.DecodeSubroutine(_memory, position, mode); | ||||
|  | ||||
| @@ -159,9 +149,13 @@ namespace ChocolArm64.Translation | ||||
|  | ||||
|             string subName = GetSubroutineName(position); | ||||
|  | ||||
|             ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName); | ||||
|             bool isAarch64 = mode == ExecutionMode.Aarch64; | ||||
|  | ||||
|             TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1); | ||||
|             isComplete &= !context.HasIndirectJump; | ||||
|  | ||||
|             ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName, isAarch64, isComplete); | ||||
|  | ||||
|             TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1, context.HasSlowCall); | ||||
|  | ||||
|             int ilOpCount = 0; | ||||
|  | ||||
| @@ -170,9 +164,11 @@ namespace ChocolArm64.Translation | ||||
|                 ilOpCount += ilBlock.Count; | ||||
|             } | ||||
|  | ||||
|             ForceAheadOfTimeCompilation(subroutine); | ||||
|  | ||||
|             _cache.AddOrUpdate(position, subroutine, ilOpCount); | ||||
|  | ||||
|             ForceAheadOfTimeCompilation(subroutine); | ||||
|             return subroutine; | ||||
|         } | ||||
|  | ||||
|         private string GetSubroutineName(long position) | ||||
|   | ||||
| @@ -1,3 +1,4 @@ | ||||
| using ChocolArm64.State; | ||||
| using System.Collections.Concurrent; | ||||
| using System.Threading; | ||||
|  | ||||
| @@ -5,10 +6,6 @@ namespace ChocolArm64.Translation | ||||
| { | ||||
|     class TranslatorQueue | ||||
|     { | ||||
|         //This is the maximum number of functions to be translated that the queue can hold. | ||||
|         //The value may need some tuning to find the sweet spot. | ||||
|         private const int MaxQueueSize = 1024; | ||||
|  | ||||
|         private ConcurrentStack<TranslatorQueueItem>[] _translationQueue; | ||||
|  | ||||
|         private ManualResetEvent _queueDataReceivedEvent; | ||||
| @@ -27,14 +24,11 @@ namespace ChocolArm64.Translation | ||||
|             _queueDataReceivedEvent = new ManualResetEvent(false); | ||||
|         } | ||||
|  | ||||
|         public void Enqueue(TranslatorQueueItem item) | ||||
|         public void Enqueue(long position, ExecutionMode mode, TranslationTier tier, bool isComplete) | ||||
|         { | ||||
|             ConcurrentStack<TranslatorQueueItem> queue = _translationQueue[(int)item.Tier]; | ||||
|             TranslatorQueueItem item = new TranslatorQueueItem(position, mode, tier, isComplete); | ||||
|  | ||||
|             if (queue.Count >= MaxQueueSize) | ||||
|             { | ||||
|                 queue.TryPop(out _); | ||||
|             } | ||||
|             ConcurrentStack<TranslatorQueueItem> queue = _translationQueue[(int)tier]; | ||||
|  | ||||
|             queue.Push(item); | ||||
|  | ||||
|   | ||||
| @@ -10,11 +10,18 @@ namespace ChocolArm64.Translation | ||||
|  | ||||
|         public TranslationTier Tier { get; } | ||||
|  | ||||
|         public TranslatorQueueItem(long position, ExecutionMode mode, TranslationTier tier) | ||||
|         public bool IsComplete { get; } | ||||
|  | ||||
|         public TranslatorQueueItem( | ||||
|             long            position, | ||||
|             ExecutionMode   mode, | ||||
|             TranslationTier tier, | ||||
|             bool            isComplete = false) | ||||
|         { | ||||
|             Position = position; | ||||
|             Mode     = mode; | ||||
|             Tier     = tier; | ||||
|             Position   = position; | ||||
|             Mode       = mode; | ||||
|             Tier       = tier; | ||||
|             IsComplete = isComplete; | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -1,6 +1,6 @@ | ||||
| namespace ChocolArm64.Translation | ||||
| { | ||||
|     enum IoType | ||||
|     enum VarType | ||||
|     { | ||||
|         Arg, | ||||
|         Flag, | ||||
		Reference in New Issue
	
	Block a user