From eafee34feebd432151809df402f3f696e4d93d08 Mon Sep 17 00:00:00 2001
From: gdkchan <gab.dark.100@gmail.com>
Date: Wed, 18 Nov 2020 15:28:40 -0300
Subject: [PATCH] Improvements with new .NET 5 functions or bugfixes (#1714)

* Improvements with new .NET 5 functions or bugfixes

* This no longer needs to be unsafe
---
 Ryujinx.Common/XXHash128.cs                   | 31 ++++---------------
 Ryujinx.Graphics.Nvdec.Vp9/Common/BitUtils.cs |  3 +-
 Ryujinx.Graphics.Nvdec.Vp9/DecodeFrame.cs     |  9 ++----
 Ryujinx.Graphics.Nvdec.Vp9/Dsp/Convolve.cs    |  9 ++----
 4 files changed, 11 insertions(+), 41 deletions(-)

diff --git a/Ryujinx.Common/XXHash128.cs b/Ryujinx.Common/XXHash128.cs
index 827e4cb221..731a624ec5 100644
--- a/Ryujinx.Common/XXHash128.cs
+++ b/Ryujinx.Common/XXHash128.cs
@@ -66,33 +66,13 @@ namespace Ryujinx.Common
         }
 
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private unsafe static Hash128 Mult64To128(ulong lhs, ulong rhs)
+        private static Hash128 Mult64To128(ulong lhs, ulong rhs)
         {
-            // TODO: Use BigMul once .NET 5 lands.
-            if (Bmi2.X64.IsSupported)
-            {
-                ulong low;
-                ulong high = Bmi2.X64.MultiplyNoFlags(lhs, rhs, &low);
-                return new Hash128
-                {
-                    Low = low,
-                    High = high
-                };
-            }
-
-            ulong loLo = Mult32To64((uint)lhs, (uint)rhs);
-            ulong hiLo = Mult32To64(lhs >> 32, (uint)rhs);
-            ulong loHi = Mult32To64((uint)lhs, rhs >> 32);
-            ulong hiHi = Mult32To64(lhs >> 32, rhs >> 32);
-
-            ulong cross = (loLo >> 32) + (uint)hiLo + loHi;
-            ulong upper = (hiLo >> 32) + (cross >> 32) + hiHi;
-            ulong lower = (cross << 32) | (uint)loLo;
-
+            ulong high = Math.BigMul(lhs, rhs, out ulong low);
             return new Hash128
             {
-                Low = lower,
-                High = upper
+                Low = low,
+                High = high
             };
         }
 
@@ -321,9 +301,10 @@ namespace Ryujinx.Common
             return Xxh3Avalanche(result64);
         }
 
+        [SkipLocalsInit]
         private static Hash128 Xxh3HashLong128bInternal(ReadOnlySpan<byte> input, ReadOnlySpan<byte> secret)
         {
-            Span<ulong> acc = stackalloc ulong[AccNb]; // TODO: Use SkipLocalsInit attribute once .NET 5 lands.
+            Span<ulong> acc = stackalloc ulong[AccNb];
             Xxh3InitAcc.CopyTo(acc);
 
             Xxh3HashLongInternalLoop(acc, input, secret);
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Common/BitUtils.cs b/Ryujinx.Graphics.Nvdec.Vp9/Common/BitUtils.cs
index a7c6d148f8..641188f8a3 100644
--- a/Ryujinx.Graphics.Nvdec.Vp9/Common/BitUtils.cs
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Common/BitUtils.cs
@@ -7,8 +7,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Common
 {
     internal static class BitUtils
     {
-        // FIXME: Enable inlining here after AVX2 gather bug is fixed.
-        // [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static byte ClipPixel(int val)
         {
             return (byte)((val > 255) ? 255 : (val < 0) ? 0 : val);
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/DecodeFrame.cs b/Ryujinx.Graphics.Nvdec.Vp9/DecodeFrame.cs
index 012b0c60ad..9e267376fd 100644
--- a/Ryujinx.Graphics.Nvdec.Vp9/DecodeFrame.cs
+++ b/Ryujinx.Graphics.Nvdec.Vp9/DecodeFrame.cs
@@ -374,11 +374,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
             } while (--bH != 0);
         }
 
-        [StructLayout(LayoutKind.Sequential, Size = 80 * 2 * 80 * 2)]
-        struct McBufHigh
-        {
-        }
-
+        [SkipLocalsInit]
         private static unsafe void ExtendAndPredict(
             byte* bufPtr1,
             int preBufStride,
@@ -402,8 +398,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
             int xs,
             int ys)
         {
-            McBufHigh mcBufHighStruct;
-            ushort* mcBufHigh = (ushort*)Unsafe.AsPointer(ref mcBufHighStruct); // Avoid zero initialization.
+            ushort* mcBufHigh = stackalloc ushort[80 * 2 * 80 * 2];
             if (xd.CurBuf.HighBd)
             {
                 HighBuildMcBorder(bufPtr1, preBufStride, mcBufHigh, bW, x0, y0, bW, bH, frameWidth, frameHeight);
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Convolve.cs b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Convolve.cs
index b74c33dc24..1a2969af44 100644
--- a/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Convolve.cs
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Convolve.cs
@@ -389,11 +389,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
             ConvolveAvgVert(src, srcStride, dst, dstStride, filter, y0Q4, yStepQ4, w, h);
         }
 
-        [StructLayout(LayoutKind.Sequential, Size = 64 * 135)]
-        struct Temp
-        {
-        }
-
+        [SkipLocalsInit]
         public static unsafe void Convolve8(
             byte* src,
             int srcStride,
@@ -422,8 +418,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
             // When calling in frame scaling function, the smallest scaling factor is x1/4
             // ==> yStepQ4 = 64. Since w and h are at most 16, the temp buffer is still
             // big enough.
-            Temp tempStruct;
-            byte* temp = (byte*)Unsafe.AsPointer(ref tempStruct); // Avoid zero initialization.
+            byte* temp = stackalloc byte[64 * 135];
             int intermediateHeight = (((h - 1) * yStepQ4 + y0Q4) >> SubpelBits) + SubpelTaps;
 
             Debug.Assert(w <= 64);