Speed up buffer -> texture copies.

No longer copies byte by byte. Fast path when formats are identical.
This commit is contained in:
riperiperi
2020-05-22 00:17:25 +01:00
parent fc2d5086e7
commit 8a7e25de71
5 changed files with 147 additions and 21 deletions

View File

@ -1,11 +1,13 @@
using Ryujinx.Graphics.Gpu.State;
using Ryujinx.Graphics.Texture;
using System;
using System.Runtime.Intrinsics;
namespace Ryujinx.Graphics.Gpu.Engine
{
partial class Methods
{
/// <summary>
/// Performs a buffer to buffer, or buffer to texture copy.
/// </summary>
@ -56,19 +58,55 @@ namespace Ryujinx.Graphics.Gpu.Engine
ulong srcBaseAddress = _context.MemoryManager.Translate(cbp.SrcAddress.Pack());
ulong dstBaseAddress = _context.MemoryManager.Translate(cbp.DstAddress.Pack());
for (int y = 0; y < cbp.YCount; y++)
for (int x = 0; x < cbp.XCount; x++)
(int srcBaseOffset, int srcSize) = srcCalculator.GetRectangleRange(src.RegionX, src.RegionY, cbp.XCount, cbp.YCount);
(int dstBaseOffset, int dstSize) = dstCalculator.GetRectangleRange(dst.RegionX, dst.RegionY, cbp.XCount, cbp.YCount);
ReadOnlySpan<byte> srcSpan = _context.PhysicalMemory.GetSpan(srcBaseAddress + (ulong)srcBaseOffset, srcSize);
Span<byte> dstSpan = new Span<byte>(_context.PhysicalMemory.GetSpan(dstBaseAddress + (ulong)dstBaseOffset, dstSize).ToArray());
bool completeSource = src.RegionX == 0 && src.RegionY == 0 && src.Width == cbp.XCount && src.Height == cbp.YCount;
bool completeDest = dst.RegionX == 0 && dst.RegionY == 0 && dst.Width == cbp.XCount && dst.Height == cbp.YCount;
if (completeSource && completeDest && srcCalculator.LayoutMatches(dstCalculator))
{
int srcOffset = srcCalculator.GetOffset(src.RegionX + x, src.RegionY + y);
int dstOffset = dstCalculator.GetOffset(dst.RegionX + x, dst.RegionY + y);
ulong srcAddress = srcBaseAddress + (ulong)srcOffset;
ulong dstAddress = dstBaseAddress + (ulong)dstOffset;
ReadOnlySpan<byte> pixel = _context.PhysicalMemory.GetSpan(srcAddress, srcBpp);
_context.PhysicalMemory.Write(dstAddress, pixel);
srcSpan.CopyTo(dstSpan); // No layout conversion has to be performed, just copy the data entirely.
}
else
{
unsafe int Convert<T>(Span<byte> dstSpan, ReadOnlySpan<byte> srcSpan) where T : unmanaged
{
fixed (byte* dstPtr = dstSpan, srcPtr = srcSpan)
{
for (int y = 0; y < cbp.YCount; y++)
{
srcCalculator.SetY(src.RegionY + y);
dstCalculator.SetY(dst.RegionY + y);
for (int x = 0; x < cbp.XCount; x++)
{
int srcOffset = srcBaseOffset + srcCalculator.GetOffset(src.RegionX + x);
int dstOffset = dstBaseOffset + dstCalculator.GetOffset(dst.RegionX + x);
*(T*)(dstPtr + dstOffset) = *(T*)(srcPtr + srcOffset);
}
}
}
return 1;
}
int _ = srcBpp switch
{
1 => Convert<byte>(dstSpan, srcSpan),
2 => Convert<ushort>(dstSpan, srcSpan),
4 => Convert<uint>(dstSpan, srcSpan),
8 => Convert<ulong>(dstSpan, srcSpan),
12 => Convert<Bpp12Pixel>(dstSpan, srcSpan),
16 => Convert<Vector128<byte>>(dstSpan, srcSpan),
_ => throw new NotSupportedException($"Unable to copy ${srcBpp} bpp pixel format.")
};
}
_context.PhysicalMemory.Write(dstBaseAddress, dstSpan);
}
else
{