Add an early TailMerge pass (#2721)

* Add an early `TailMerge` pass

Some translations can have a lot of guest calls and since for each guest
call there is a call guard which may return. This can produce a lot of
epilogue code for returns. This pass merges the epilogue into a single
block.

```
Using filter 'hcq'.
Using metric 'code size'.

Total diff: -1648111 (-7.19 %) (bytes):
  Base: 22913847
  Diff: 21265736

Improved: 4567, regressed: 14, unchanged: 144
```

* Set PTC version

* Address feedback

* Handle `void` returning functions

* Actually handle `void` returning functions

* Fix `RegisterToLocal` logging
This commit is contained in:
FICTURE7 2021-10-19 02:51:22 +04:00 committed by GitHub
parent d512ce122c
commit fbf40424f4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 148 additions and 26 deletions

View File

@ -17,7 +17,7 @@ namespace ARMeilleure.CodeGen.Optimizations
BasicBlock lastBlock = cfg.Blocks.Last;
// Move cold blocks at the end of the list, so that they are emitted away from hot code.
for (block = cfg.Blocks.First; block != lastBlock; block = nextBlock)
for (block = cfg.Blocks.First; block != null; block = nextBlock)
{
nextBlock = block.ListNext;
@ -26,6 +26,11 @@ namespace ARMeilleure.CodeGen.Optimizations
cfg.Blocks.Remove(block);
cfg.Blocks.AddLast(block);
}
if (block == lastBlock)
{
break;
}
}
for (block = cfg.Blocks.First; block != null; block = nextBlock)

View File

@ -0,0 +1,83 @@
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.Translation;
using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
namespace ARMeilleure.CodeGen.Optimizations
{
static class TailMerge
{
public static void RunPass(in CompilerContext cctx)
{
ControlFlowGraph cfg = cctx.Cfg;
BasicBlock mergedReturn = new(cfg.Blocks.Count);
Operand returnValue;
Operation returnOp;
if (cctx.FuncReturnType == OperandType.None)
{
returnValue = default;
returnOp = Operation(Instruction.Return, default);
}
else
{
returnValue = cfg.AllocateLocal(cctx.FuncReturnType);
returnOp = Operation(Instruction.Return, default, returnValue);
}
mergedReturn.Frequency = BasicBlockFrequency.Cold;
mergedReturn.Operations.AddLast(returnOp);
for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
{
Operation op = block.Operations.Last;
if (op != default && op.Instruction == Instruction.Return)
{
block.Operations.Remove(op);
if (cctx.FuncReturnType == OperandType.None)
{
PrepareMerge(block, mergedReturn);
}
else
{
Operation copyOp = Operation(Instruction.Copy, returnValue, op.GetSource(0));
PrepareMerge(block, mergedReturn).Append(copyOp);
}
}
}
cfg.Blocks.AddLast(mergedReturn);
cfg.Update();
}
private static BasicBlock PrepareMerge(BasicBlock from, BasicBlock to)
{
BasicBlock fromPred = from.Predecessors.Count == 1 ? from.Predecessors[0] : null;
// If the block is empty, we can try to append to the predecessor and avoid unnecessary jumps.
if (from.Operations.Count == 0 && fromPred != null)
{
for (int i = 0; i < fromPred.SuccessorsCount; i++)
{
if (fromPred.GetSuccessor(i) == from)
{
fromPred.SetSuccessor(i, to);
}
}
// NOTE: `from` becomes unreachable and the call to `cfg.Update()` will remove it.
return fromPred;
}
else
{
from.AddSuccessor(to);
return from;
}
}
}
}

View File

@ -5,8 +5,10 @@ namespace ARMeilleure.Diagnostics
Decoding,
Translation,
RegisterUsage,
TailMerge,
Dominance,
SsaConstruction,
RegisterToLocal,
Optimization,
PreAllocation,
RegisterAllocation,

View File

@ -1,4 +1,5 @@
using ARMeilleure.CodeGen;
using ARMeilleure.CodeGen.Optimizations;
using ARMeilleure.CodeGen.X86;
using ARMeilleure.Diagnostics;
using ARMeilleure.IntermediateRepresentation;
@ -12,32 +13,42 @@ namespace ARMeilleure.Translation
OperandType[] argTypes,
OperandType retType,
CompilerOptions options)
{
CompilerContext cctx = new(cfg, argTypes, retType, options);
if (options.HasFlag(CompilerOptions.Optimize))
{
Logger.StartPass(PassName.TailMerge);
TailMerge.RunPass(cctx);
Logger.EndPass(PassName.TailMerge, cfg);
}
if (options.HasFlag(CompilerOptions.SsaForm))
{
Logger.StartPass(PassName.Dominance);
if ((options & CompilerOptions.SsaForm) != 0)
{
Dominance.FindDominators(cfg);
Dominance.FindDominanceFrontiers(cfg);
}
Logger.EndPass(PassName.Dominance);
Logger.StartPass(PassName.SsaConstruction);
if ((options & CompilerOptions.SsaForm) != 0)
{
Ssa.Construct(cfg);
Logger.EndPass(PassName.SsaConstruction, cfg);
}
else
{
Logger.StartPass(PassName.RegisterToLocal);
RegisterToLocal.Rename(cfg);
Logger.EndPass(PassName.RegisterToLocal, cfg);
}
Logger.EndPass(PassName.SsaConstruction, cfg);
CompilerContext cctx = new(cfg, argTypes, retType, options);
return CodeGenerator.Generate(cctx);
}
}

View File

@ -10,7 +10,7 @@ namespace ARMeilleure.Translation
private BasicBlock[] _postOrderBlocks;
private int[] _postOrderMap;
public int LocalsCount { get; }
public int LocalsCount { get; private set; }
public BasicBlock Entry { get; }
public IntrusiveList<BasicBlock> Blocks { get; }
public BasicBlock[] PostOrderBlocks => _postOrderBlocks;
@ -25,6 +25,15 @@ namespace ARMeilleure.Translation
Update();
}
public Operand AllocateLocal(OperandType type)
{
Operand result = Operand.Factory.Local(type);
result.NumberLocal(++LocalsCount);
return result;
}
public void Update()
{
RemoveUnreachableBlocks(Blocks);

View File

@ -27,7 +27,7 @@ namespace ARMeilleure.Translation.PTC
private const string OuterHeaderMagicString = "PTCohd\0\0";
private const string InnerHeaderMagicString = "PTCihd\0\0";
private const uint InternalVersion = 2680; //! To be incremented manually for each change to the ARMeilleure project.
private const uint InternalVersion = 2721; //! To be incremented manually for each change to the ARMeilleure project.
private const string ActualDir = "0";
private const string BackupDir = "1";

View File

@ -202,13 +202,19 @@ namespace ARMeilleure.Translation
// The only block without any predecessor should be the entry block.
// It always needs a context load as it is the first block to run.
if (block.Predecessors.Count == 0 || hasContextLoad)
{
long vecMask = globalInputs[block.Index].VecMask;
long intMask = globalInputs[block.Index].IntMask;
if (vecMask != 0 || intMask != 0)
{
arg = Local(OperandType.I64);
Operation loadArg = block.Operations.AddFirst(Operation(Instruction.LoadArgument, arg, Const(0)));
LoadLocals(block, globalInputs[block.Index].VecMask, RegisterType.Vector, mode, loadArg, arg);
LoadLocals(block, globalInputs[block.Index].IntMask, RegisterType.Integer, mode, loadArg, arg);
LoadLocals(block, vecMask, RegisterType.Vector, mode, loadArg, arg);
LoadLocals(block, intMask, RegisterType.Integer, mode, loadArg, arg);
}
}
bool hasContextStore = HasContextStore(block);
@ -219,6 +225,11 @@ namespace ARMeilleure.Translation
}
if (EndsWithReturn(block) || hasContextStore)
{
long vecMask = globalOutputs[block.Index].VecMask;
long intMask = globalOutputs[block.Index].IntMask;
if (vecMask != 0 || intMask != 0)
{
if (arg == default)
{
@ -227,8 +238,9 @@ namespace ARMeilleure.Translation
block.Append(Operation(Instruction.LoadArgument, arg, Const(0)));
}
StoreLocals(block, globalOutputs[block.Index].IntMask, RegisterType.Integer, mode, arg);
StoreLocals(block, globalOutputs[block.Index].VecMask, RegisterType.Vector, mode, arg);
StoreLocals(block, intMask, RegisterType.Integer, mode, arg);
StoreLocals(block, vecMask, RegisterType.Vector, mode, arg);
}
}
}
}