mirror of
				https://github.com/Ryujinx/Ryujinx.git
				synced 2025-10-25 16:23:56 -07:00 
			
		
		
		
	Fix a few things, 64 byte block fast copy.
This commit is contained in:
		| @@ -73,7 +73,7 @@ namespace Ryujinx.Graphics.Gpu.Engine | ||||
|                 } | ||||
|                 else  | ||||
|                 { | ||||
|                     unsafe int Convert<T>(Span<byte> dstSpan, ReadOnlySpan<byte> srcSpan) where T : unmanaged | ||||
|                     unsafe bool Convert<T>(Span<byte> dstSpan, ReadOnlySpan<byte> srcSpan) where T : unmanaged | ||||
|                     { | ||||
|                         fixed (byte* dstPtr = dstSpan, srcPtr = srcSpan) | ||||
|                         { | ||||
| @@ -91,10 +91,10 @@ namespace Ryujinx.Graphics.Gpu.Engine | ||||
|                                 } | ||||
|                             } | ||||
|                         } | ||||
|                         return 1; | ||||
|                         return true; | ||||
|                     } | ||||
|  | ||||
|                     int _ = srcBpp switch | ||||
|                     bool _ = srcBpp switch | ||||
|                     { | ||||
|                         1 => Convert<byte>(dstSpan, srcSpan), | ||||
|                         2 => Convert<ushort>(dstSpan, srcSpan), | ||||
|   | ||||
| @@ -35,6 +35,7 @@ namespace Ryujinx.Graphics.Texture | ||||
|  | ||||
|         // Variables for built in iteration. | ||||
|         private int _yPart; | ||||
|         private int _yzPart; | ||||
|         private int _zPart; | ||||
|  | ||||
|         public BlockLinearLayout( | ||||
| @@ -105,13 +106,14 @@ namespace Ryujinx.Graphics.Texture | ||||
|         public (int offset, int size) GetRectangleRange(int x, int y, int width, int height) | ||||
|         { | ||||
|             // Justification: | ||||
|             // The offset is a combination of separate x and y parts. | ||||
|             // The 2D offset is a combination of separate x and y parts. | ||||
|             // Both components increase with input and never overlap bits. | ||||
|             // Therefore for each component, the minimum input value is the lowest that component can go. Opposite goes for maximum. | ||||
|             // Therefore for each component, the minimum input value is the lowest that component can go. | ||||
|             // Minimum total value is minimum X component + minimum Y component. Similar goes for maximum. | ||||
|  | ||||
|             int start = GetOffset(x, y, 0); | ||||
|             int end = GetOffset(x + width, y + height, 0); | ||||
|             return (start, (end - start) + _texBpp); | ||||
|             int end = GetOffset(x + width - 1, y + height - 1, 0) + _texBpp; // Cover the last pixel. | ||||
|             return (start, end - start); | ||||
|         } | ||||
|  | ||||
|         public bool LayoutMatches(BlockLinearLayout other) | ||||
| @@ -134,6 +136,7 @@ namespace Ryujinx.Graphics.Texture | ||||
|             offset += ((y & 0x01) >> 0) << 4; | ||||
|  | ||||
|             _yPart = offset; | ||||
|             _yzPart = offset + _zPart; | ||||
|         } | ||||
|  | ||||
|         [MethodImpl(MethodImplOptions.AggressiveInlining)] | ||||
| @@ -144,24 +147,45 @@ namespace Ryujinx.Graphics.Texture | ||||
|             offset += ((z & _bdMask) * GobSize) << _bhShift; | ||||
|  | ||||
|             _zPart = offset; | ||||
|             _yzPart = offset + _yPart; | ||||
|         } | ||||
|  | ||||
|         /// <summary> | ||||
|         /// Optimized conversion for line offset in bytes to an absolute offset. Input x must be divisible by 16. | ||||
|         /// </summary> | ||||
|         [MethodImpl(MethodImplOptions.AggressiveInlining)] | ||||
|         public int GetOffsetWithLineOffset16(int x) | ||||
|         { | ||||
|             int offset = (x / GobStride) << _xShift; | ||||
|  | ||||
|             offset += ((x & 0x3f) >> 5) << 8; | ||||
|             offset += ((x & 0x1f) >> 4) << 5; | ||||
|  | ||||
|             return offset + _yzPart; | ||||
|         } | ||||
|  | ||||
|         /// <summary> | ||||
|         /// Optimized conversion for line offset in bytes to an absolute offset. Input x must be divisible by 64. | ||||
|         /// </summary> | ||||
|         [MethodImpl(MethodImplOptions.AggressiveInlining)] | ||||
|         public int GetOffsetWithLineOffset64(int x) | ||||
|         { | ||||
|             int offset = (x / GobStride) << _xShift; | ||||
|  | ||||
|             return offset + _yzPart; | ||||
|         } | ||||
|  | ||||
|         [MethodImpl(MethodImplOptions.AggressiveInlining)] | ||||
|         public int GetOffsetWithLineOffset(int x) | ||||
|         public int GetOffset(int x) | ||||
|         { | ||||
|             x <<= _bppShift; | ||||
|             int offset = (x / GobStride) << _xShift; | ||||
|  | ||||
|             offset += ((x & 0x3f) >> 5) << 8; | ||||
|             offset += ((x & 0x1f) >> 4) << 5; | ||||
|             offset += (x & 0x0f); | ||||
|  | ||||
|             return offset + _yPart + _zPart; | ||||
|         } | ||||
|  | ||||
|         [MethodImpl(MethodImplOptions.AggressiveInlining)] | ||||
|         public int GetOffset(int x) | ||||
|         { | ||||
|             return GetOffsetWithLineOffset(x << _bppShift); | ||||
|             return offset + _yzPart; | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -65,10 +65,14 @@ namespace Ryujinx.Graphics.Texture | ||||
|  | ||||
|                 int strideTrunc = BitUtils.AlignDown(w * bytesPerPixel, 16); | ||||
|  | ||||
|                 int strideTrunc64 = BitUtils.AlignDown(w * bytesPerPixel, 64); | ||||
|  | ||||
|                 int xStart = strideTrunc / bytesPerPixel; | ||||
|  | ||||
|                 int stride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment); | ||||
|  | ||||
|                 int outStrideGap = stride - w * bytesPerPixel; | ||||
|  | ||||
|                 int alignment = gobWidth; | ||||
|  | ||||
|                 if (d < gobBlocksInZ || w <= gobWidth || h <= gobHeight) | ||||
| @@ -86,13 +90,14 @@ namespace Ryujinx.Graphics.Texture | ||||
|                     mipGobBlocksInZ, | ||||
|                     bytesPerPixel); | ||||
|  | ||||
|                 unsafe void Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged | ||||
|                 unsafe bool Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged | ||||
|                 { | ||||
|                     fixed (byte* outputBPtr = output, dataBPtr = data) | ||||
|                     fixed (byte* outputPtr = output, dataPtr = data) | ||||
|                     { | ||||
|                         byte* outPtr = outputPtr + outOffs; | ||||
|                         for (int layer = 0; layer < layers; layer++) | ||||
|                         { | ||||
|                             int inBaseOffset = layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level); | ||||
|                             byte* inBaseOffset = dataPtr + (layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level)); | ||||
|  | ||||
|                             for (int z = 0; z < d; z++) | ||||
|                             { | ||||
| @@ -100,51 +105,58 @@ namespace Ryujinx.Graphics.Texture | ||||
|                                 for (int y = 0; y < h; y++) | ||||
|                                 { | ||||
|                                     layoutConverter.SetY(y); | ||||
|                                     for (int x = 0; x < strideTrunc; x += 16) | ||||
|                                     { | ||||
|                                         int offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset(x); | ||||
|  | ||||
|                                         *(Vector128<byte>*)(outputBPtr + outOffs + x) = *(Vector128<byte>*)(dataBPtr + offset); | ||||
|                                     for (int x = 0; x < strideTrunc64; x += 64, outPtr += 64) | ||||
|                                     { | ||||
|                                         byte* offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset64(x); | ||||
|                                         byte* offset2 = offset + 0x20; | ||||
|                                         byte* offset3 = offset + 0x100; | ||||
|                                         byte* offset4 = offset + 0x120; | ||||
|  | ||||
|                                         Vector128<byte> value = *(Vector128<byte>*)offset; | ||||
|                                         Vector128<byte> value2 = *(Vector128<byte>*)offset2; | ||||
|                                         Vector128<byte> value3 = *(Vector128<byte>*)offset3; | ||||
|                                         Vector128<byte> value4 = *(Vector128<byte>*)offset4; | ||||
|  | ||||
|                                         *(Vector128<byte>*)outPtr = value; | ||||
|                                         *(Vector128<byte>*)(outPtr + 16) = value2; | ||||
|                                         *(Vector128<byte>*)(outPtr + 32) = value3; | ||||
|                                         *(Vector128<byte>*)(outPtr + 48) = value4; | ||||
|                                     } | ||||
|  | ||||
|                                     for (int x = xStart; x < w; x++) | ||||
|                                     for (int x = strideTrunc64; x < strideTrunc; x += 16, outPtr += 16) | ||||
|                                     { | ||||
|                                         int offset = inBaseOffset + layoutConverter.GetOffset(x); | ||||
|                                         byte* offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset16(x); | ||||
|  | ||||
|                                         ((T*)(outputBPtr + outOffs))[x] = *(T*)(dataBPtr + offset); | ||||
|                                         *(Vector128<byte>*)outPtr = *(Vector128<byte>*)offset; | ||||
|                                     } | ||||
|  | ||||
|                                     outOffs += stride; | ||||
|                                     for (int x = xStart; x < w; x++, outPtr += bytesPerPixel) | ||||
|                                     { | ||||
|                                         byte* offset = inBaseOffset + layoutConverter.GetOffset(x); | ||||
|  | ||||
|                                         *(T*)outPtr = *(T*)offset; | ||||
|                                     } | ||||
|  | ||||
|                                     outPtr += outStrideGap; | ||||
|                                 } | ||||
|                             } | ||||
|                         } | ||||
|                         outOffs += stride * h * d * layers; | ||||
|                     } | ||||
|                     return true; | ||||
|                 } | ||||
|  | ||||
|                 switch (bytesPerPixel) | ||||
|                 bool _ = bytesPerPixel switch | ||||
|                 { | ||||
|                     case 1: | ||||
|                         Convert<byte>(output, data); | ||||
|                         break; | ||||
|                     case 2: | ||||
|                         Convert<ushort>(output, data); | ||||
|                         break; | ||||
|                     case 4: | ||||
|                         Convert<uint>(output, data); | ||||
|                         break; | ||||
|                     case 8: | ||||
|                         Convert<ulong>(output, data); | ||||
|                         break; | ||||
|                     case 12: | ||||
|                         Convert<Bpp12Pixel>(output, data); | ||||
|                         break; | ||||
|                     case 16: | ||||
|                         Convert<Vector128<byte>>(output, data); | ||||
|                         break; | ||||
|  | ||||
|                     default: | ||||
|                         throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format."); | ||||
|                 } | ||||
|                     1 => Convert<byte>(output, data), | ||||
|                     2 => Convert<ushort>(output, data), | ||||
|                     4 => Convert<uint>(output, data), | ||||
|                     8 => Convert<ulong>(output, data), | ||||
|                     12 => Convert<Bpp12Pixel>(output, data), | ||||
|                     16 => Convert<Vector128<byte>>(output, data), | ||||
|                     _ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.") | ||||
|                 }; | ||||
|             } | ||||
|             return output; | ||||
|         } | ||||
| @@ -162,52 +174,19 @@ namespace Ryujinx.Graphics.Texture | ||||
|             int h = BitUtils.DivRoundUp(height, blockHeight); | ||||
|  | ||||
|             int outStride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment); | ||||
|             int lineSize = w * bytesPerPixel; | ||||
|  | ||||
|             Span<byte> output = new byte[h * outStride]; | ||||
|  | ||||
|             int outOffs = 0; | ||||
|             int inOffs = 0; | ||||
|  | ||||
|             unsafe void Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged | ||||
|             for (int y = 0; y < h; y++) | ||||
|             { | ||||
|                 fixed (byte* outputBPtr = output, dataBPtr = data) | ||||
|                 { | ||||
|                     for (int y = 0; y < h; y++) | ||||
|                     { | ||||
|                         for (int x = 0; x < w; x++) | ||||
|                         { | ||||
|                             int offset = y * stride + x * bytesPerPixel; | ||||
|                 data.Slice(inOffs, lineSize).CopyTo(output.Slice(outOffs, lineSize)); | ||||
|  | ||||
|                             ((T*)(outputBPtr + outOffs))[x] = *(T*)(dataBPtr + offset); | ||||
|                         } | ||||
|  | ||||
|                         outOffs += outStride; | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             switch (bytesPerPixel) | ||||
|             { | ||||
|                 case 1: | ||||
|                     Convert<byte>(output, data); | ||||
|                     break; | ||||
|                 case 2: | ||||
|                     Convert<ushort>(output, data); | ||||
|                     break; | ||||
|                 case 4: | ||||
|                     Convert<uint>(output, data); | ||||
|                     break; | ||||
|                 case 8: | ||||
|                     Convert<ulong>(output, data); | ||||
|                     break; | ||||
|                 case 12: | ||||
|                     Convert<Bpp12Pixel>(output, data); | ||||
|                     break; | ||||
|                 case 16: | ||||
|                     Convert<Vector128<byte>>(output, data); | ||||
|                     break; | ||||
|  | ||||
|                 default: | ||||
|                     throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format."); | ||||
|                 inOffs += stride; | ||||
|                 outOffs += outStride; | ||||
|             } | ||||
|  | ||||
|             return output; | ||||
| @@ -257,8 +236,16 @@ namespace Ryujinx.Graphics.Texture | ||||
|                     mipGobBlocksInZ >>= 1; | ||||
|                 } | ||||
|  | ||||
|                 int strideTrunc = BitUtils.AlignDown(w * bytesPerPixel, 16); | ||||
|  | ||||
|                 int strideTrunc64 = BitUtils.AlignDown(w * bytesPerPixel, 64); | ||||
|  | ||||
|                 int xStart = strideTrunc / bytesPerPixel; | ||||
|  | ||||
|                 int stride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment); | ||||
|  | ||||
|                 int inStrideGap = stride - w * bytesPerPixel; | ||||
|  | ||||
|                 int alignment = gobWidth; | ||||
|  | ||||
|                 if (d < gobBlocksInZ || w <= gobWidth || h <= gobHeight) | ||||
| @@ -276,13 +263,14 @@ namespace Ryujinx.Graphics.Texture | ||||
|                     mipGobBlocksInZ, | ||||
|                     bytesPerPixel); | ||||
|  | ||||
|                 unsafe void Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged | ||||
|                 unsafe bool Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged | ||||
|                 { | ||||
|                     fixed (byte* outputBPtr = output, dataBPtr = data) | ||||
|                     fixed (byte* outputPtr = output, dataPtr = data) | ||||
|                     { | ||||
|                         byte* inPtr = dataPtr + inOffs; | ||||
|                         for (int layer = 0; layer < layers; layer++) | ||||
|                         { | ||||
|                             int outBaseOffset = layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level); | ||||
|                             byte* outBaseOffset = outputPtr + (layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level)); | ||||
|  | ||||
|                             for (int z = 0; z < d; z++) | ||||
|                             { | ||||
| @@ -290,44 +278,58 @@ namespace Ryujinx.Graphics.Texture | ||||
|                                 for (int y = 0; y < h; y++) | ||||
|                                 { | ||||
|                                     layoutConverter.SetY(y); | ||||
|                                     for (int x = 0; x < w; x++) | ||||
|                                     { | ||||
|                                         int offset = outBaseOffset + layoutConverter.GetOffset(x); | ||||
|  | ||||
|                                         *(T*)(outputBPtr + offset) = ((T*)(dataBPtr + inOffs))[x]; | ||||
|                                     for (int x = 0; x < strideTrunc64; x += 64, inPtr += 64) | ||||
|                                     { | ||||
|                                         byte* offset = outBaseOffset + layoutConverter.GetOffsetWithLineOffset64(x); | ||||
|                                         byte* offset2 = offset + 0x20; | ||||
|                                         byte* offset3 = offset + 0x100; | ||||
|                                         byte* offset4 = offset + 0x120; | ||||
|  | ||||
|                                         Vector128<byte> value = *(Vector128<byte>*)inPtr; | ||||
|                                         Vector128<byte> value2 = *(Vector128<byte>*)(inPtr + 16); | ||||
|                                         Vector128<byte> value3 = *(Vector128<byte>*)(inPtr + 32); | ||||
|                                         Vector128<byte> value4 = *(Vector128<byte>*)(inPtr + 48); | ||||
|  | ||||
|                                         *(Vector128<byte>*)offset = value; | ||||
|                                         *(Vector128<byte>*)offset2 = value2; | ||||
|                                         *(Vector128<byte>*)offset3 = value3; | ||||
|                                         *(Vector128<byte>*)offset4 = value4; | ||||
|                                     } | ||||
|  | ||||
|                                     inOffs += stride; | ||||
|                                     for (int x = 0; x < strideTrunc; x += 16, inPtr += 16) | ||||
|                                     { | ||||
|                                         byte* offset = outBaseOffset + layoutConverter.GetOffsetWithLineOffset16(x); | ||||
|  | ||||
|                                         *(Vector128<byte>*)offset = *(Vector128<byte>*)inPtr; | ||||
|                                     } | ||||
|  | ||||
|                                     for (int x = xStart; x < w; x++, inPtr += bytesPerPixel) | ||||
|                                     { | ||||
|                                         byte* offset = outBaseOffset + layoutConverter.GetOffset(x); | ||||
|  | ||||
|                                         *(T*)offset = *(T*)inPtr; | ||||
|                                     } | ||||
|  | ||||
|                                     inPtr += inStrideGap; | ||||
|                                 } | ||||
|                             } | ||||
|                         } | ||||
|                         inOffs += stride * h * d * layers; | ||||
|                     } | ||||
|                     return true; | ||||
|                 } | ||||
|  | ||||
|                 switch (bytesPerPixel) | ||||
|                 bool _ = bytesPerPixel switch | ||||
|                 { | ||||
|                     case 1: | ||||
|                         Convert<byte>(output, data); | ||||
|                         break; | ||||
|                     case 2: | ||||
|                         Convert<ushort>(output, data); | ||||
|                         break; | ||||
|                     case 4: | ||||
|                         Convert<uint>(output, data); | ||||
|                         break; | ||||
|                     case 8: | ||||
|                         Convert<ulong>(output, data); | ||||
|                         break; | ||||
|                     case 12: | ||||
|                         Convert<Bpp12Pixel>(output, data); | ||||
|                         break; | ||||
|                     case 16: | ||||
|                         Convert<Vector128<byte>>(output, data); | ||||
|                         break; | ||||
|  | ||||
|                     default: | ||||
|                         throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format."); | ||||
|                 } | ||||
|                     1 => Convert<byte>(output, data), | ||||
|                     2 => Convert<ushort>(output, data), | ||||
|                     4 => Convert<uint>(output, data), | ||||
|                     8 => Convert<ulong>(output, data), | ||||
|                     12 => Convert<Bpp12Pixel>(output, data), | ||||
|                     16 => Convert<Vector128<byte>>(output, data), | ||||
|                     _ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.") | ||||
|                 }; | ||||
|             } | ||||
|  | ||||
|             return output; | ||||
| @@ -342,56 +344,23 @@ namespace Ryujinx.Graphics.Texture | ||||
|             int bytesPerPixel, | ||||
|             ReadOnlySpan<byte> data) | ||||
|         { | ||||
|             int w = BitUtils.DivRoundUp(width,  blockWidth); | ||||
|             int w = BitUtils.DivRoundUp(width, blockWidth); | ||||
|             int h = BitUtils.DivRoundUp(height, blockHeight); | ||||
|  | ||||
|             int inStride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment); | ||||
|             int lineSize = width * bytesPerPixel; | ||||
|  | ||||
|             Span<byte> output = new byte[h * stride]; | ||||
|  | ||||
|             int inOffs = 0; | ||||
|             int outOffs = 0; | ||||
|  | ||||
|             unsafe void Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged | ||||
|             for (int y = 0; y < h; y++) | ||||
|             { | ||||
|                 fixed (byte* outputBPtr = output, dataBPtr = data) | ||||
|                 { | ||||
|                     for (int y = 0; y < h; y++) | ||||
|                     { | ||||
|                         for (int x = 0; x < w; x++) | ||||
|                         { | ||||
|                             int offset = y * stride + x * bytesPerPixel; | ||||
|                 data.Slice(inOffs, lineSize).CopyTo(output.Slice(outOffs, lineSize)); | ||||
|  | ||||
|                             *(T*)(outputBPtr + offset) = ((T*)(dataBPtr + inOffs))[x]; | ||||
|                         } | ||||
|  | ||||
|                         inOffs += inStride; | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             switch (bytesPerPixel) | ||||
|             { | ||||
|                 case 1: | ||||
|                     Convert<byte>(output, data); | ||||
|                     break; | ||||
|                 case 2: | ||||
|                     Convert<ushort>(output, data); | ||||
|                     break; | ||||
|                 case 4: | ||||
|                     Convert<uint>(output, data); | ||||
|                     break; | ||||
|                 case 8: | ||||
|                     Convert<ulong>(output, data); | ||||
|                     break; | ||||
|                 case 12: | ||||
|                     Convert<Bpp12Pixel>(output, data); | ||||
|                     break; | ||||
|                 case 16: | ||||
|                     Convert<Vector128<byte>>(output, data); | ||||
|                     break; | ||||
|  | ||||
|                 default: | ||||
|                     throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format."); | ||||
|                 inOffs += inStride; | ||||
|                 outOffs += stride; | ||||
|             } | ||||
|  | ||||
|             return output; | ||||
|   | ||||
| @@ -84,24 +84,11 @@ namespace Ryujinx.Graphics.Texture | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         [MethodImpl(MethodImplOptions.AggressiveInlining)] | ||||
|         public int GetOffsetWithLineOffset(int x) | ||||
|         { | ||||
|             if (_isLinear) | ||||
|             { | ||||
|                 return x + _yPart; | ||||
|             } | ||||
|             else | ||||
|             { | ||||
|                 return _layoutConverter.GetOffsetWithLineOffset(x); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         public (int offset, int size) GetRectangleRange(int x, int y, int width, int height) | ||||
|         { | ||||
|             if (_isLinear) | ||||
|             { | ||||
|                 return (y * _stride + x, height * _stride); | ||||
|                 return (y * _stride + x * _bytesPerPixel, height * _stride); | ||||
|             } | ||||
|             else | ||||
|             { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user