1
0
mirror of https://github.com/Ryujinx/Ryujinx.git synced 2025-01-11 23:11:56 -08:00

New NVDEC and VIC implementation ()

* Initial NVDEC and VIC implementation

* Update FFmpeg.AutoGen to 4.3.0

* Add nvdec dependencies for Windows

* Unify some VP9 structures

* Rename VP9 structure fields

* Improvements to Video API

* XML docs for Common.Memory

* Remove now unused or redundant overloads from MemoryAccessor

* NVDEC UV surface read/write scalar paths

* Add FIXME comments about hacky things/stuff that will need to be fixed in the future

* Cleaned up VP9 memory allocation

* Remove some debug logs

* Rename some VP9 structs

* Remove unused struct

* No need to compile Ryujinx.Graphics.Host1x with unsafe anymore

* Name AsyncWorkQueue threads to make debugging easier

* Make Vp9PictureInfo a ref struct

* LayoutConverter no longer needs the depth argument (broken by rebase)

* Pooling of VP9 buffers, plus fix a memory leak on VP9

* Really wish VS could rename projects properly...

* Address feedback

* Remove using

* Catch OperationCanceledException

* Add licensing informations

* Add THIRDPARTY.md to release too

Co-authored-by: Thog <me@thog.eu>
This commit is contained in:
gdkchan 2020-07-12 00:07:01 -03:00 committed by GitHub
parent 38b26cf424
commit 4d02a2d2c0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
202 changed files with 20563 additions and 2567 deletions
README.md
Ryujinx.Common
Ryujinx.Cpu
Ryujinx.Graphics.Device
Ryujinx.Graphics.Gpu
Ryujinx.Graphics.Host1x
Ryujinx.Graphics.Nvdec.H264
Ryujinx.Graphics.Nvdec.Vp9

@ -112,3 +112,9 @@ If you need help with setting up Ryujinx, you can ask questions in the #support
If you have contributions, need support, have suggestions, or just want to get in touch with the team, join our [Discord server](https://discord.gg/N2FmfVc)!
If you'd like to donate, please take a look at our [Patreon](https://www.patreon.com/ryujinx).
## License
This software is licensed under the terms of the MIT license.
This project makes use of code authored by the libvpx project, licensed under BSD and the ffmpeg project, licensed under LGPLv3.
See [LICENSE.txt](LICENSE.txt) and [THIRDPARTY.md](Ryujinx/THIRDPARTY.md) for more details.

@ -0,0 +1,100 @@
using System;
using System.Collections.Concurrent;
using System.Threading;
namespace Ryujinx.Common
{
public sealed class AsyncWorkQueue<T> : IDisposable
{
private readonly Thread _workerThread;
private readonly CancellationTokenSource _cts;
private readonly Action<T> _workerAction;
private readonly BlockingCollection<T> _queue;
public bool IsCancellationRequested => _cts.IsCancellationRequested;
public AsyncWorkQueue(Action<T> callback, string name = null) : this(callback, name, new BlockingCollection<T>())
{
}
public AsyncWorkQueue(Action<T> callback, string name, BlockingCollection<T> collection)
{
_cts = new CancellationTokenSource();
_queue = collection;
_workerAction = callback;
_workerThread = new Thread(DoWork) { Name = name };
_workerThread.IsBackground = true;
_workerThread.Start();
}
private void DoWork()
{
try
{
foreach (var item in _queue.GetConsumingEnumerable(_cts.Token))
{
_workerAction(item);
}
}
catch (OperationCanceledException)
{
}
}
public void Cancel()
{
_cts.Cancel();
}
public void CancelAfter(int millisecondsDelay)
{
_cts.CancelAfter(millisecondsDelay);
}
public void CancelAfter(TimeSpan delay)
{
_cts.CancelAfter(delay);
}
public void Add(T workItem)
{
_queue.Add(workItem);
}
public void Add(T workItem, CancellationToken cancellationToken)
{
_queue.Add(workItem, cancellationToken);
}
public bool TryAdd(T workItem)
{
return _queue.TryAdd(workItem);
}
public bool TryAdd(T workItem, int millisecondsDelay)
{
return _queue.TryAdd(workItem, millisecondsDelay);
}
public bool TryAdd(T workItem, int millisecondsDelay, CancellationToken cancellationToken)
{
return _queue.TryAdd(workItem, millisecondsDelay, cancellationToken);
}
public bool TryAdd(T workItem, TimeSpan timeout)
{
return _queue.TryAdd(workItem, timeout);
}
public void Dispose()
{
_queue.CompleteAdding();
_cts.Cancel();
_workerThread.Join();
_queue.Dispose();
_cts.Dispose();
}
}
}

@ -9,12 +9,14 @@ namespace Ryujinx.Common.Logging
Emulation,
Gpu,
Hid,
Host1x,
Kernel,
KernelIpc,
KernelScheduler,
KernelSvc,
Loader,
ModLoader,
Nvdec,
Ptc,
Service,
ServiceAcc,
@ -50,6 +52,7 @@ namespace Ryujinx.Common.Logging
ServiceSss,
ServiceTime,
ServiceVi,
SurfaceFlinger
SurfaceFlinger,
Vic
}
}

@ -0,0 +1,123 @@
using System;
using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace Ryujinx.Common.Memory
{
/// <summary>
/// Represents an array of unmanaged resources.
/// </summary>
/// <typeparam name="T">Array element type</typeparam>
public unsafe struct ArrayPtr<T> : IEquatable<ArrayPtr<T>>, IArray<T> where T : unmanaged
{
private IntPtr _ptr;
/// <summary>
/// Null pointer.
/// </summary>
public static ArrayPtr<T> Null => new ArrayPtr<T>() { _ptr = IntPtr.Zero };
/// <summary>
/// True if the pointer is null, false otherwise.
/// </summary>
public bool IsNull => _ptr == IntPtr.Zero;
/// <summary>
/// Number of elements on the array.
/// </summary>
public int Length { get; }
/// <summary>
/// Gets a reference to the item at the given index.
/// </summary>
/// <remarks>
/// No bounds checks are performed, this allows negative indexing,
/// but care must be taken if the index may be out of bounds.
/// </remarks>
/// <param name="index">Index of the element</param>
/// <returns>Reference to the element at the given index</returns>
public ref T this[int index] => ref Unsafe.AsRef<T>((T*)_ptr + index);
/// <summary>
/// Creates a new array from a given reference.
/// </summary>
/// <remarks>
/// For data on the heap, proper pinning is necessary during
/// use. Failure to do so will result in memory corruption and crashes.
/// </remarks>
/// <param name="value">Reference of the first array element</param>
/// <param name="length">Number of elements on the array</param>
public ArrayPtr(ref T value, int length)
{
_ptr = (IntPtr)Unsafe.AsPointer(ref value);
Length = length;
}
/// <summary>
/// Creates a new array from a given pointer.
/// </summary>
/// <param name="ptr">Array base pointer</param>
/// <param name="length">Number of elements on the array</param>
public ArrayPtr(T* ptr, int length)
{
_ptr = (IntPtr)ptr;
Length = length;
}
/// <summary>
/// Creates a new array from a given pointer.
/// </summary>
/// <param name="ptr">Array base pointer</param>
/// <param name="length">Number of elements on the array</param>
public ArrayPtr(IntPtr ptr, int length)
{
_ptr = ptr;
Length = length;
}
/// <summary>
/// Splits the array starting at the specified position.
/// </summary>
/// <param name="start">Index where the new array should start</param>
/// <returns>New array starting at the specified position</returns>
public ArrayPtr<T> Slice(int start) => new ArrayPtr<T>(ref this[start], Length - start);
/// <summary>
/// Gets a span from the array.
/// </summary>
/// <returns>Span of the array</returns>
public Span<T> ToSpan() => Length == 0 ? Span<T>.Empty : MemoryMarshal.CreateSpan(ref this[0], Length);
/// <summary>
/// Gets the array base pointer.
/// </summary>
/// <returns>Base pointer</returns>
public T* ToPointer() => (T*)_ptr;
public override bool Equals(object obj)
{
return obj is ArrayPtr<T> other && Equals(other);
}
public bool Equals([AllowNull] ArrayPtr<T> other)
{
return _ptr == other._ptr && Length == other.Length;
}
public override int GetHashCode()
{
return HashCode.Combine(_ptr, Length);
}
public static bool operator ==(ArrayPtr<T> left, ArrayPtr<T> right)
{
return left.Equals(right);
}
public static bool operator !=(ArrayPtr<T> left, ArrayPtr<T> right)
{
return !(left == right);
}
}
}

@ -0,0 +1,21 @@
namespace Ryujinx.Common.Memory
{
/// <summary>
/// Array interface.
/// </summary>
/// <typeparam name="T">Element type</typeparam>
public interface IArray<T> where T : unmanaged
{
/// <summary>
/// Used to index the array.
/// </summary>
/// <param name="index">Element index</param>
/// <returns>Element at the specified index</returns>
ref T this[int index] { get; }
/// <summary>
/// Number of elements on the array.
/// </summary>
int Length { get; }
}
}

@ -0,0 +1,68 @@
using System;
using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;
namespace Ryujinx.Common.Memory
{
/// <summary>
/// Represents a pointer to an unmanaged resource.
/// </summary>
/// <typeparam name="T">Type of the unmanaged resource</typeparam>
public unsafe struct Ptr<T> : IEquatable<Ptr<T>> where T : unmanaged
{
private IntPtr _ptr;
/// <summary>
/// Null pointer.
/// </summary>
public static Ptr<T> Null => new Ptr<T>() { _ptr = IntPtr.Zero };
/// <summary>
/// True if the pointer is null, false otherwise.
/// </summary>
public bool IsNull => _ptr == IntPtr.Zero;
/// <summary>
/// Gets a reference to the value.
/// </summary>
public ref T Value => ref Unsafe.AsRef<T>((void*)_ptr);
/// <summary>
/// Creates a new pointer to an unmanaged resource.
/// </summary>
/// <remarks>
/// For data on the heap, proper pinning is necessary during
/// use. Failure to do so will result in memory corruption and crashes.
/// </remarks>
/// <param name="value">Reference to the unmanaged resource</param>
public Ptr(ref T value)
{
_ptr = (IntPtr)Unsafe.AsPointer(ref value);
}
public override bool Equals(object obj)
{
return obj is Ptr<T> other && Equals(other);
}
public bool Equals([AllowNull] Ptr<T> other)
{
return _ptr == other._ptr;
}
public override int GetHashCode()
{
return _ptr.GetHashCode();
}
public static bool operator ==(Ptr<T> left, Ptr<T> right)
{
return left.Equals(right);
}
public static bool operator !=(Ptr<T> left, Ptr<T> right)
{
return !(left == right);
}
}
}

@ -0,0 +1,518 @@
using System;
using System.Runtime.InteropServices;
namespace Ryujinx.Common.Memory
{
public struct Array1<T> : IArray<T> where T : unmanaged
{
T _e0;
public int Length => 1;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 1);
}
public struct Array2<T> : IArray<T> where T : unmanaged
{
T _e0;
Array1<T> _other;
public int Length => 2;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 2);
}
public struct Array3<T> : IArray<T> where T : unmanaged
{
T _e0;
Array2<T> _other;
public int Length => 3;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 3);
}
public struct Array4<T> : IArray<T> where T : unmanaged
{
T _e0;
Array3<T> _other;
public int Length => 4;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 4);
}
public struct Array5<T> : IArray<T> where T : unmanaged
{
T _e0;
Array4<T> _other;
public int Length => 5;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 5);
}
public struct Array6<T> : IArray<T> where T : unmanaged
{
T _e0;
Array5<T> _other;
public int Length => 6;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 6);
}
public struct Array7<T> : IArray<T> where T : unmanaged
{
T _e0;
Array6<T> _other;
public int Length => 7;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 7);
}
public struct Array8<T> : IArray<T> where T : unmanaged
{
T _e0;
Array7<T> _other;
public int Length => 8;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 8);
}
public struct Array9<T> : IArray<T> where T : unmanaged
{
T _e0;
Array8<T> _other;
public int Length => 9;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 9);
}
public struct Array10<T> : IArray<T> where T : unmanaged
{
T _e0;
Array9<T> _other;
public int Length => 10;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 10);
}
public struct Array11<T> : IArray<T> where T : unmanaged
{
T _e0;
Array10<T> _other;
public int Length => 11;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 11);
}
public struct Array12<T> : IArray<T> where T : unmanaged
{
T _e0;
Array11<T> _other;
public int Length => 12;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 12);
}
public struct Array13<T> : IArray<T> where T : unmanaged
{
T _e0;
Array12<T> _other;
public int Length => 13;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 13);
}
public struct Array14<T> : IArray<T> where T : unmanaged
{
T _e0;
Array13<T> _other;
public int Length => 14;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 14);
}
public struct Array15<T> : IArray<T> where T : unmanaged
{
T _e0;
Array14<T> _other;
public int Length => 15;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 15);
}
public struct Array16<T> : IArray<T> where T : unmanaged
{
T _e0;
Array15<T> _other;
public int Length => 16;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 16);
}
public struct Array17<T> : IArray<T> where T : unmanaged
{
T _e0;
Array16<T> _other;
public int Length => 17;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 17);
}
public struct Array18<T> : IArray<T> where T : unmanaged
{
T _e0;
Array17<T> _other;
public int Length => 18;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 18);
}
public struct Array19<T> : IArray<T> where T : unmanaged
{
T _e0;
Array18<T> _other;
public int Length => 19;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 19);
}
public struct Array20<T> : IArray<T> where T : unmanaged
{
T _e0;
Array19<T> _other;
public int Length => 20;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 20);
}
public struct Array21<T> : IArray<T> where T : unmanaged
{
T _e0;
Array20<T> _other;
public int Length => 21;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 21);
}
public struct Array22<T> : IArray<T> where T : unmanaged
{
T _e0;
Array21<T> _other;
public int Length => 22;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 22);
}
public struct Array23<T> : IArray<T> where T : unmanaged
{
T _e0;
Array22<T> _other;
public int Length => 23;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 23);
}
public struct Array24<T> : IArray<T> where T : unmanaged
{
T _e0;
Array23<T> _other;
public int Length => 24;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 24);
}
public struct Array25<T> : IArray<T> where T : unmanaged
{
T _e0;
Array24<T> _other;
public int Length => 25;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 25);
}
public struct Array26<T> : IArray<T> where T : unmanaged
{
T _e0;
Array25<T> _other;
public int Length => 26;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 26);
}
public struct Array27<T> : IArray<T> where T : unmanaged
{
T _e0;
Array26<T> _other;
public int Length => 27;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 27);
}
public struct Array28<T> : IArray<T> where T : unmanaged
{
T _e0;
Array27<T> _other;
public int Length => 28;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 28);
}
public struct Array29<T> : IArray<T> where T : unmanaged
{
T _e0;
Array28<T> _other;
public int Length => 29;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 29);
}
public struct Array30<T> : IArray<T> where T : unmanaged
{
T _e0;
Array29<T> _other;
public int Length => 30;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 30);
}
public struct Array31<T> : IArray<T> where T : unmanaged
{
T _e0;
Array30<T> _other;
public int Length => 31;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 31);
}
public struct Array32<T> : IArray<T> where T : unmanaged
{
T _e0;
Array31<T> _other;
public int Length => 32;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 32);
}
public struct Array33<T> : IArray<T> where T : unmanaged
{
T _e0;
Array32<T> _other;
public int Length => 33;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 33);
}
public struct Array34<T> : IArray<T> where T : unmanaged
{
T _e0;
Array33<T> _other;
public int Length => 34;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 34);
}
public struct Array35<T> : IArray<T> where T : unmanaged
{
T _e0;
Array34<T> _other;
public int Length => 35;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 35);
}
public struct Array36<T> : IArray<T> where T : unmanaged
{
T _e0;
Array35<T> _other;
public int Length => 36;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 36);
}
public struct Array37<T> : IArray<T> where T : unmanaged
{
T _e0;
Array36<T> _other;
public int Length => 37;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 37);
}
public struct Array38<T> : IArray<T> where T : unmanaged
{
T _e0;
Array37<T> _other;
public int Length => 38;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 38);
}
public struct Array39<T> : IArray<T> where T : unmanaged
{
T _e0;
Array38<T> _other;
public int Length => 39;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 39);
}
public struct Array40<T> : IArray<T> where T : unmanaged
{
T _e0;
Array39<T> _other;
public int Length => 40;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 40);
}
public struct Array41<T> : IArray<T> where T : unmanaged
{
T _e0;
Array40<T> _other;
public int Length => 41;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 41);
}
public struct Array42<T> : IArray<T> where T : unmanaged
{
T _e0;
Array41<T> _other;
public int Length => 42;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 42);
}
public struct Array43<T> : IArray<T> where T : unmanaged
{
T _e0;
Array42<T> _other;
public int Length => 43;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 43);
}
public struct Array44<T> : IArray<T> where T : unmanaged
{
T _e0;
Array43<T> _other;
public int Length => 44;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 44);
}
public struct Array45<T> : IArray<T> where T : unmanaged
{
T _e0;
Array44<T> _other;
public int Length => 45;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 45);
}
public struct Array46<T> : IArray<T> where T : unmanaged
{
T _e0;
Array45<T> _other;
public int Length => 46;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 46);
}
public struct Array47<T> : IArray<T> where T : unmanaged
{
T _e0;
Array46<T> _other;
public int Length => 47;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 47);
}
public struct Array48<T> : IArray<T> where T : unmanaged
{
T _e0;
Array47<T> _other;
public int Length => 48;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 48);
}
public struct Array49<T> : IArray<T> where T : unmanaged
{
T _e0;
Array48<T> _other;
public int Length => 49;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 49);
}
public struct Array50<T> : IArray<T> where T : unmanaged
{
T _e0;
Array49<T> _other;
public int Length => 50;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 50);
}
public struct Array51<T> : IArray<T> where T : unmanaged
{
T _e0;
Array50<T> _other;
public int Length => 51;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 51);
}
public struct Array52<T> : IArray<T> where T : unmanaged
{
T _e0;
Array51<T> _other;
public int Length => 52;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 52);
}
public struct Array53<T> : IArray<T> where T : unmanaged
{
T _e0;
Array52<T> _other;
public int Length => 53;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 53);
}
public struct Array54<T> : IArray<T> where T : unmanaged
{
T _e0;
Array53<T> _other;
public int Length => 54;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 54);
}
public struct Array55<T> : IArray<T> where T : unmanaged
{
T _e0;
Array54<T> _other;
public int Length => 55;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 55);
}
public struct Array56<T> : IArray<T> where T : unmanaged
{
T _e0;
Array55<T> _other;
public int Length => 56;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 56);
}
public struct Array57<T> : IArray<T> where T : unmanaged
{
T _e0;
Array56<T> _other;
public int Length => 57;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 57);
}
public struct Array58<T> : IArray<T> where T : unmanaged
{
T _e0;
Array57<T> _other;
public int Length => 58;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 58);
}
public struct Array59<T> : IArray<T> where T : unmanaged
{
T _e0;
Array58<T> _other;
public int Length => 59;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 59);
}
public struct Array60<T> : IArray<T> where T : unmanaged
{
T _e0;
Array59<T> _other;
public int Length => 60;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 60);
}
public struct Array61<T> : IArray<T> where T : unmanaged
{
T _e0;
Array60<T> _other;
public int Length => 61;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 61);
}
public struct Array62<T> : IArray<T> where T : unmanaged
{
T _e0;
Array61<T> _other;
public int Length => 62;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 62);
}
public struct Array63<T> : IArray<T> where T : unmanaged
{
T _e0;
Array62<T> _other;
public int Length => 63;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 63);
}
public struct Array64<T> : IArray<T> where T : unmanaged
{
T _e0;
Array63<T> _other;
public int Length => 64;
public ref T this[int index] => ref ToSpan()[index];
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 64);
}
}

@ -193,6 +193,38 @@ namespace Ryujinx.Cpu
}
}
/// <summary>
/// Gets a region of memory that can be written to.
/// </summary>
/// <remarks>
/// If the requested region is not contiguous in physical memory,
/// this will perform an allocation, and flush the data (writing it
/// back to guest memory) on disposal.
/// </remarks>
/// <param name="va">Virtual address of the data</param>
/// <param name="size">Size of the data</param>
/// <returns>A writable region of memory containing the data</returns>
public WritableRegion GetWritableRegion(ulong va, int size)
{
if (size == 0)
{
return new WritableRegion(null, va, Memory<byte>.Empty);
}
if (IsContiguous(va, size))
{
return new WritableRegion(null, va, _backingMemory.GetMemory(GetPhysicalAddressInternal(va), size));
}
else
{
Memory<byte> memory = new byte[size];
GetSpan(va, size).CopyTo(memory.Span);
return new WritableRegion(this, va, memory);
}
}
/// <summary>
/// Gets a reference for the given type at the specified virtual memory address.
/// </summary>

@ -0,0 +1,29 @@
using System;
namespace Ryujinx.Cpu
{
public sealed class WritableRegion : IDisposable
{
private readonly MemoryManager _mm;
private readonly ulong _va;
private bool NeedsWriteback => _mm != null;
public Memory<byte> Memory { get; }
internal WritableRegion(MemoryManager mm, ulong va, Memory<byte> memory)
{
_mm = mm;
_va = va;
Memory = memory;
}
public void Dispose()
{
if (NeedsWriteback)
{
_mm.Write(_va, Memory.Span);
}
}
}
}

@ -0,0 +1,10 @@
namespace Ryujinx.Graphics.Device
{
public enum AccessControl
{
None = 0,
ReadOnly = 1 << 0,
WriteOnly = 1 << 1,
ReadWrite = ReadOnly | WriteOnly
}
}

@ -0,0 +1,124 @@
using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Reflection;
using System.Runtime.CompilerServices;
namespace Ryujinx.Graphics.Device
{
public class DeviceState<TState> : IDeviceState where TState : unmanaged
{
private const int RegisterSize = sizeof(int);
public TState State;
private readonly BitArray _readableRegisters;
private readonly BitArray _writableRegisters;
private readonly Dictionary<int, Func<int>> _readCallbacks;
private readonly Dictionary<int, Action<int>> _writeCallbacks;
public DeviceState(IReadOnlyDictionary<string, RwCallback> callbacks = null)
{
int size = (Unsafe.SizeOf<TState>() + RegisterSize - 1) / RegisterSize;
_readableRegisters = new BitArray(size);
_writableRegisters = new BitArray(size);
_readCallbacks = new Dictionary<int, Func<int>>();
_writeCallbacks = new Dictionary<int, Action<int>>();
var fields = typeof(TState).GetFields();
int offset = 0;
for (int fieldIndex = 0; fieldIndex < fields.Length; fieldIndex++)
{
var field = fields[fieldIndex];
var regAttr = field.GetCustomAttributes<RegisterAttribute>(false).FirstOrDefault();
int sizeOfField = SizeCalculator.SizeOf(field.FieldType);
for (int i = 0; i < ((sizeOfField + 3) & ~3); i += 4)
{
_readableRegisters[(offset + i) / RegisterSize] = regAttr?.AccessControl.HasFlag(AccessControl.ReadOnly) ?? true;
_writableRegisters[(offset + i) / RegisterSize] = regAttr?.AccessControl.HasFlag(AccessControl.WriteOnly) ?? true;
}
if (callbacks != null && callbacks.TryGetValue(field.Name, out var cb))
{
if (cb.Read != null)
{
_readCallbacks.Add(offset, cb.Read);
}
if (cb.Write != null)
{
_writeCallbacks.Add(offset, cb.Write);
}
}
offset += sizeOfField;
}
Debug.Assert(offset == Unsafe.SizeOf<TState>());
}
public virtual int Read(int offset)
{
if (Check(offset) && _readableRegisters[offset / RegisterSize])
{
int alignedOffset = Align(offset);
if (_readCallbacks.TryGetValue(alignedOffset, out Func<int> read))
{
return read();
}
else
{
return GetRef<int>(alignedOffset);
}
}
return 0;
}
public virtual void Write(int offset, int data)
{
if (Check(offset) && _writableRegisters[offset / RegisterSize])
{
int alignedOffset = Align(offset);
if (_writeCallbacks.TryGetValue(alignedOffset, out Action<int> write))
{
write(data);
}
else
{
GetRef<int>(alignedOffset) = data;
}
}
}
private bool Check(int offset)
{
return (uint)Align(offset) < Unsafe.SizeOf<TState>();
}
public ref T GetRef<T>(int offset) where T : unmanaged
{
if ((uint)(offset + Unsafe.SizeOf<T>()) > Unsafe.SizeOf<TState>())
{
throw new ArgumentOutOfRangeException(nameof(offset));
}
return ref Unsafe.As<TState, T>(ref Unsafe.AddByteOffset(ref State, (IntPtr)offset));
}
private static int Align(int offset)
{
return offset & ~(RegisterSize - 1);
}
}
}

@ -0,0 +1,8 @@
namespace Ryujinx.Graphics.Device
{
public interface IDeviceState
{
int Read(int offset);
void Write(int offset, int data);
}
}

@ -0,0 +1,15 @@
using System;
namespace Ryujinx.Graphics.Device
{
[AttributeUsage(AttributeTargets.Field, AllowMultiple = false)]
public sealed class RegisterAttribute : Attribute
{
public AccessControl AccessControl { get; }
public RegisterAttribute(AccessControl ac)
{
AccessControl = ac;
}
}
}

@ -0,0 +1,16 @@
using System;
namespace Ryujinx.Graphics.Device
{
public struct RwCallback
{
public Action<int> Write { get; }
public Func<int> Read { get; }
public RwCallback(Action<int> write, Func<int> read)
{
Write = write;
Read = read;
}
}
}

@ -0,0 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netcoreapp3.1</TargetFramework>
</PropertyGroup>
</Project>

@ -0,0 +1,63 @@
using System;
using System.Reflection;
namespace Ryujinx.Graphics.Device
{
static class SizeCalculator
{
public static int SizeOf(Type type)
{
// Is type a enum type?
if (type.IsEnum)
{
type = type.GetEnumUnderlyingType();
}
// Is type a pointer type?
if (type.IsPointer || type == typeof(IntPtr) || type == typeof(UIntPtr))
{
return IntPtr.Size;
}
// Is type a struct type?
if (type.IsValueType && !type.IsPrimitive)
{
// Check if the struct has a explicit size, if so, return that.
if (type.StructLayoutAttribute.Size != 0)
{
return type.StructLayoutAttribute.Size;
}
// Otherwise we calculate the sum of the sizes of all fields.
int size = 0;
var fields = type.GetFields(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance);
for (int fieldIndex = 0; fieldIndex < fields.Length; fieldIndex++)
{
size += SizeOf(fields[fieldIndex].FieldType);
}
return size;
}
// Primitive types.
return (Type.GetTypeCode(type)) switch
{
TypeCode.SByte => sizeof(sbyte),
TypeCode.Byte => sizeof(byte),
TypeCode.Int16 => sizeof(short),
TypeCode.UInt16 => sizeof(ushort),
TypeCode.Int32 => sizeof(int),
TypeCode.UInt32 => sizeof(uint),
TypeCode.Int64 => sizeof(long),
TypeCode.UInt64 => sizeof(ulong),
TypeCode.Char => sizeof(char),
TypeCode.Single => sizeof(float),
TypeCode.Double => sizeof(double),
TypeCode.Decimal => sizeof(decimal),
TypeCode.Boolean => sizeof(bool),
_ => throw new ArgumentException($"Length for type \"{type.Name}\" is unknown.")
};
}
}
}

@ -67,7 +67,7 @@ namespace Ryujinx.Graphics.Gpu.Engine
TextureManager.SetComputeTextureBufferIndex(state.Get<int>(MethodOffset.TextureBufferIndex));
ShaderProgramInfo info = cs.Shaders[0].Program.Info;
ShaderProgramInfo info = cs.Shaders[0].Program.Info;
for (int index = 0; index < info.CBuffers.Count; index++)
{

@ -63,7 +63,7 @@ namespace Ryujinx.Graphics.Gpu.Engine
else
{
evt.Flush();
return (_context.MemoryAccessor.ReadUInt64(gpuVa) != 0) ? ConditionalRenderEnabled.True : ConditionalRenderEnabled.False;
return (_context.MemoryAccessor.Read<ulong>(gpuVa) != 0) ? ConditionalRenderEnabled.True : ConditionalRenderEnabled.False;
}
}
@ -87,11 +87,11 @@ namespace Ryujinx.Graphics.Gpu.Engine
if (evt != null && evt2 == null)
{
useHost = _context.Renderer.Pipeline.TryHostConditionalRendering(evt, _context.MemoryAccessor.ReadUInt64(gpuVa + 16), isEqual);
useHost = _context.Renderer.Pipeline.TryHostConditionalRendering(evt, _context.MemoryAccessor.Read<ulong>(gpuVa + 16), isEqual);
}
else if (evt == null && evt2 != null)
{
useHost = _context.Renderer.Pipeline.TryHostConditionalRendering(evt2, _context.MemoryAccessor.ReadUInt64(gpuVa), isEqual);
useHost = _context.Renderer.Pipeline.TryHostConditionalRendering(evt2, _context.MemoryAccessor.Read<ulong>(gpuVa), isEqual);
}
else
{
@ -107,8 +107,8 @@ namespace Ryujinx.Graphics.Gpu.Engine
evt?.Flush();
evt2?.Flush();
ulong x = _context.MemoryAccessor.ReadUInt64(gpuVa);
ulong y = _context.MemoryAccessor.ReadUInt64(gpuVa + 16);
ulong x = _context.MemoryAccessor.Read<ulong>(gpuVa);
ulong y = _context.MemoryAccessor.Read<ulong>(gpuVa + 16);
return (isEqual ? x == y : x != y) ? ConditionalRenderEnabled.True : ConditionalRenderEnabled.False;
}

@ -466,7 +466,7 @@ namespace Ryujinx.Graphics.Gpu.Engine
bool flipY = yControl.HasFlag(YControl.NegateY);
Origin origin = yControl.HasFlag(YControl.TriangleRastFlip) ? Origin.LowerLeft : Origin.UpperLeft;
_context.Renderer.Pipeline.SetOrigin(origin);
// The triangle rast flip flag only affects rasterization, the viewport is not flipped.

@ -77,7 +77,7 @@ namespace Ryujinx.Graphics.Gpu
{
Renderer = renderer;
MemoryManager = new MemoryManager();
MemoryManager = new MemoryManager(this);
MemoryAccessor = new MemoryAccessor(this);

@ -643,6 +643,8 @@ namespace Ryujinx.Graphics.Gpu.Image
overlap.ChangeSize(info.Width, info.Height, info.DepthOrLayers);
}
overlap.SynchronizeMemory();
return overlap;
}
}

@ -58,42 +58,6 @@ namespace Ryujinx.Graphics.Gpu.Memory
return MemoryMarshal.Cast<byte, T>(_context.PhysicalMemory.GetSpan(processVa, Unsafe.SizeOf<T>()))[0];
}
/// <summary>
/// Reads a 32-bits signed integer from GPU mapped memory.
/// </summary>
/// <param name="gpuVa">GPU virtual address where the value is located</param>
/// <returns>The value at the specified memory location</returns>
public int ReadInt32(ulong gpuVa)
{
ulong processVa = _context.MemoryManager.Translate(gpuVa);
return _context.PhysicalMemory.Read<int>(processVa);
}
/// <summary>
/// Reads a 64-bits unsigned integer from GPU mapped memory.
/// </summary>
/// <param name="gpuVa">GPU virtual address where the value is located</param>
/// <returns>The value at the specified memory location</returns>
public ulong ReadUInt64(ulong gpuVa)
{
ulong processVa = _context.MemoryManager.Translate(gpuVa);
return _context.PhysicalMemory.Read<ulong>(processVa);
}
/// <summary>
/// Reads a 8-bits unsigned integer from GPU mapped memory.
/// </summary>
/// <param name="gpuVa">GPU virtual address where the value is located</param>
/// <param name="value">The value to be written</param>
public void WriteByte(ulong gpuVa, byte value)
{
ulong processVa = _context.MemoryManager.Translate(gpuVa);
_context.PhysicalMemory.Write(processVa, MemoryMarshal.CreateSpan(ref value, 1));
}
/// <summary>
/// Writes a 32-bits signed integer to GPU mapped memory.
/// </summary>

@ -1,4 +1,7 @@
using Ryujinx.Cpu;
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Gpu.Memory
{
@ -33,14 +36,69 @@ namespace Ryujinx.Graphics.Gpu.Memory
public event EventHandler<UnmapEventArgs> MemoryUnmapped;
private GpuContext _context;
/// <summary>
/// Creates a new instance of the GPU memory manager.
/// </summary>
public MemoryManager()
public MemoryManager(GpuContext context)
{
_context = context;
_pageTable = new ulong[PtLvl0Size][];
}
/// <summary>
/// Reads data from GPU mapped memory.
/// </summary>
/// <typeparam name="T">Type of the data</typeparam>
/// <param name="gpuVa">GPU virtual address where the data is located</param>
/// <returns>The data at the specified memory location</returns>
public T Read<T>(ulong gpuVa) where T : unmanaged
{
ulong processVa = Translate(gpuVa);
return MemoryMarshal.Cast<byte, T>(_context.PhysicalMemory.GetSpan(processVa, Unsafe.SizeOf<T>()))[0];
}
/// <summary>
/// Gets a read-only span of data from GPU mapped memory.
/// This reads as much data as possible, up to the specified maximum size.
/// </summary>
/// <param name="gpuVa">GPU virtual address where the data is located</param>
/// <param name="size">Size of the data</param>
/// <returns>The span of the data at the specified memory location</returns>
public ReadOnlySpan<byte> GetSpan(ulong gpuVa, int size)
{
ulong processVa = Translate(gpuVa);
return _context.PhysicalMemory.GetSpan(processVa, size);
}
/// <summary>
/// Gets a writable region from GPU mapped memory.
/// </summary>
/// <param name="address">Start address of the range</param>
/// <param name="size">Size in bytes to be range</param>
/// <returns>A writable region with the data at the specified memory location</returns>
public WritableRegion GetWritableRegion(ulong gpuVa, int size)
{
ulong processVa = Translate(gpuVa);
return _context.PhysicalMemory.GetWritableRegion(processVa, size);
}
/// <summary>
/// Writes data to GPU mapped memory.
/// </summary>
/// <param name="gpuVa">GPU virtual address to write the data into</param>
/// <param name="data">The data to be written</param>
public void Write(ulong gpuVa, ReadOnlySpan<byte> data)
{
ulong processVa = Translate(gpuVa);
_context.PhysicalMemory.Write(processVa, data);
}
/// <summary>
/// Maps a given range of pages to the specified CPU virtual address.
/// </summary>

@ -1,3 +1,4 @@
using Ryujinx.Cpu;
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
@ -34,6 +35,17 @@ namespace Ryujinx.Graphics.Gpu.Memory
return _cpuMemory.GetSpan(address, size);
}
/// <summary>
/// Gets a writable region from the application process.
/// </summary>
/// <param name="address">Start address of the range</param>
/// <param name="size">Size in bytes to be range</param>
/// <returns>A writable region with the data at the specified memory location</returns>
public WritableRegion GetWritableRegion(ulong address, int size)
{
return _cpuMemory.GetWritableRegion(address, size);
}
/// <summary>
/// Reads data from the application process.
/// </summary>

@ -0,0 +1,20 @@
namespace Ryujinx.Graphics.Host1x
{
public enum ClassId
{
Host1x = 0x1,
Mpeg = 0x20,
Nvenc = 0x21,
Vi = 0x30,
Isp = 0x32,
Ispb = 0x34,
Vii2c = 0x36,
Vic = 0x5d,
Gr3d = 0x60,
Gpu = 0x61,
Tsec = 0xe0,
Tsecb = 0xe1,
Nvjpg = 0xc0,
Nvdec = 0xf0
}
}

@ -0,0 +1,32 @@
using Ryujinx.Graphics.Device;
using System;
using System.Collections.Generic;
namespace Ryujinx.Graphics.Host1x
{
class Devices : IDisposable
{
private readonly Dictionary<ClassId, IDeviceState> _devices = new Dictionary<ClassId, IDeviceState>();
public void RegisterDevice(ClassId classId, IDeviceState device)
{
_devices[classId] = device;
}
public IDeviceState GetDevice(ClassId classId)
{
return _devices.TryGetValue(classId, out IDeviceState device) ? device : null;
}
public void Dispose()
{
foreach (var device in _devices.Values)
{
if (device is ThiDevice thi)
{
thi.Dispose();
}
}
}
}
}

@ -0,0 +1,33 @@
using Ryujinx.Graphics.Device;
using Ryujinx.Graphics.Gpu.Synchronization;
using System.Collections.Generic;
using System.Threading;
namespace Ryujinx.Graphics.Host1x
{
public class Host1xClass : IDeviceState
{
private readonly SynchronizationManager _syncMgr;
private readonly DeviceState<Host1xClassRegisters> _state;
public Host1xClass(SynchronizationManager syncMgr)
{
_syncMgr = syncMgr;
_state = new DeviceState<Host1xClassRegisters>(new Dictionary<string, RwCallback>
{
{ nameof(Host1xClassRegisters.WaitSyncpt32), new RwCallback(WaitSyncpt32, null) }
});
}
public int Read(int offset) => _state.Read(offset);
public void Write(int offset, int data) => _state.Write(offset, data);
private void WaitSyncpt32(int data)
{
uint syncpointId = (uint)(data & 0xFF);
uint threshold = _state.State.LoadSyncptPayload32;
_syncMgr.WaitOnSyncpoint(syncpointId, threshold, Timeout.InfiniteTimeSpan);
}
}
}

@ -0,0 +1,41 @@
using Ryujinx.Common.Memory;
namespace Ryujinx.Graphics.Host1x
{
struct Host1xClassRegisters
{
public uint IncrSyncpt;
public uint IncrSyncptCntrl;
public uint IncrSyncptError;
public Array5<uint> ReservedC;
public uint WaitSyncpt;
public uint WaitSyncptBase;
public uint WaitSyncptIncr;
public uint LoadSyncptBase;
public uint IncrSyncptBase;
public uint Clear;
public uint Wait;
public uint WaitWithIntr;
public uint DelayUsec;
public uint TickcountHi;
public uint TickcountLo;
public uint Tickctrl;
public Array23<uint> Reserved50;
public uint Indctrl;
public uint Indoff2;
public uint Indoff;
public Array31<uint> Inddata;
public uint Reserved134;
public uint LoadSyncptPayload32;
public uint Stallctrl;
public uint WaitSyncpt32;
public uint WaitSyncptBase32;
public uint LoadSyncptBase32;
public uint IncrSyncptBase32;
public uint StallcountHi;
public uint StallcountLo;
public uint Xrefctrl;
public uint ChannelXrefHi;
public uint ChannelXrefLo;
}
}

@ -0,0 +1,123 @@
using Ryujinx.Common;
using Ryujinx.Common.Logging;
using Ryujinx.Graphics.Device;
using Ryujinx.Graphics.Gpu.Synchronization;
using System;
using System.Numerics;
namespace Ryujinx.Graphics.Host1x
{
public sealed class Host1xDevice : IDisposable
{
private readonly SyncptIncrManager _syncptIncrMgr;
private readonly AsyncWorkQueue<int[]> _commandQueue;
private readonly Devices _devices = new Devices();
public Host1xClass Class { get; }
private IDeviceState _device;
private int _count;
private int _offset;
private int _mask;
private bool _incrementing;
public Host1xDevice(SynchronizationManager syncMgr)
{
_syncptIncrMgr = new SyncptIncrManager(syncMgr);
_commandQueue = new AsyncWorkQueue<int[]>(Process, "Ryujinx.Host1xProcessor");
Class = new Host1xClass(syncMgr);
_devices.RegisterDevice(ClassId.Host1x, Class);
}
public void RegisterDevice(ClassId classId, IDeviceState device)
{
var thi = new ThiDevice(classId, device ?? throw new ArgumentNullException(nameof(device)), _syncptIncrMgr);
_devices.RegisterDevice(classId, thi);
}
public void Submit(ReadOnlySpan<int> commandBuffer)
{
_commandQueue.Add(commandBuffer.ToArray());
}
private void Process(int[] commandBuffer)
{
for (int index = 0; index < commandBuffer.Length; index++)
{
Step(commandBuffer[index]);
}
}
private void Step(int value)
{
if (_mask != 0)
{
int lbs = BitOperations.TrailingZeroCount(_mask);
_mask &= ~(1 << lbs);
DeviceWrite(_offset + lbs, value);
return;
}
else if (_count != 0)
{
_count--;
DeviceWrite(_offset, value);
if (_incrementing)
{
_offset++;
}
return;
}
OpCode opCode = (OpCode)((value >> 28) & 0xf);
switch (opCode)
{
case OpCode.SetClass:
_mask = value & 0x3f;
ClassId classId = (ClassId)((value >> 6) & 0x3ff);
_offset = (value >> 16) & 0xfff;
_device = _devices.GetDevice(classId);
break;
case OpCode.Incr:
case OpCode.NonIncr:
_count = value & 0xffff;
_offset = (value >> 16) & 0xfff;
_incrementing = opCode == OpCode.Incr;
break;
case OpCode.Mask:
_mask = value & 0xffff;
_offset = (value >> 16) & 0xfff;
break;
case OpCode.Imm:
int data = value & 0xfff;
_offset = (value >> 16) & 0xfff;
DeviceWrite(_offset, data);
break;
default:
Logger.PrintError(LogClass.Host1x, $"Unsupported opcode \"{opCode}\".");
break;
}
}
private void DeviceWrite(int offset, int data)
{
_device?.Write(offset * 4, data);
}
public void Dispose()
{
_commandQueue.Dispose();
_devices.Dispose();
}
}
}

@ -0,0 +1,21 @@
namespace Ryujinx.Graphics.Host1x
{
enum OpCode
{
SetClass,
Incr,
NonIncr,
Mask,
Imm,
Restart,
Gather,
SetStrmId,
SetAppId,
SetPyld,
IncrW,
NonIncrW,
GatherW,
RestartW,
Extend
}
}

@ -0,0 +1,20 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netcoreapp3.1</TargetFramework>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
<AllowUnsafeBlocks>false</AllowUnsafeBlocks>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
<AllowUnsafeBlocks>false</AllowUnsafeBlocks>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\Ryujinx.Graphics.Device\Ryujinx.Graphics.Device.csproj" />
<ProjectReference Include="..\Ryujinx.Graphics.Gpu\Ryujinx.Graphics.Gpu.csproj" />
</ItemGroup>
</Project>

@ -0,0 +1,99 @@
using Ryujinx.Graphics.Gpu.Synchronization;
using System.Collections.Generic;
namespace Ryujinx.Graphics.Host1x
{
class SyncptIncrManager
{
private readonly SynchronizationManager _syncMgr;
private struct SyncptIncr
{
public uint Id { get; }
public ClassId ClassId { get; }
public uint SyncptId { get; }
public bool Done { get; }
public SyncptIncr(uint id, ClassId classId, uint syncptId, bool done = false)
{
Id = id;
ClassId = classId;
SyncptId = syncptId;
Done = done;
}
}
private readonly List<SyncptIncr> _incrs = new List<SyncptIncr>();
private uint _currentId;
public SyncptIncrManager(SynchronizationManager syncMgr)
{
_syncMgr = syncMgr;
}
public void Increment(uint id)
{
lock (_incrs)
{
_incrs.Add(new SyncptIncr(0, 0, id, true));
IncrementAllDone();
}
}
public uint IncrementWhenDone(ClassId classId, uint id)
{
lock (_incrs)
{
uint handle = _currentId++;
_incrs.Add(new SyncptIncr(handle, classId, id));
return handle;
}
}
public void SignalDone(uint handle)
{
lock (_incrs)
{
// Set pending increment with the given handle to "done".
for (int i = 0; i < _incrs.Count; i++)
{
SyncptIncr incr = _incrs[i];
if (_incrs[i].Id == handle)
{
_incrs[i] = new SyncptIncr(incr.Id, incr.ClassId, incr.SyncptId, true);
break;
}
}
IncrementAllDone();
}
}
private void IncrementAllDone()
{
lock (_incrs)
{
// Increment all sequential pending increments that are already done.
int doneCount = 0;
for (; doneCount < _incrs.Count; doneCount++)
{
if (!_incrs[doneCount].Done)
{
break;
}
_syncMgr.IncrementSyncpoint(_incrs[doneCount].SyncptId);
}
_incrs.RemoveRange(0, doneCount);
}
}
}
}

@ -0,0 +1,96 @@
using Ryujinx.Common;
using Ryujinx.Graphics.Device;
using System;
using System.Collections.Generic;
namespace Ryujinx.Graphics.Host1x
{
class ThiDevice : IDeviceState, IDisposable
{
private readonly ClassId _classId;
private readonly IDeviceState _device;
private readonly SyncptIncrManager _syncptIncrMgr;
private class CommandAction
{
public int Data { get; }
public CommandAction(int data)
{
Data = data;
}
}
private class MethodCallAction : CommandAction
{
public int Method { get; }
public MethodCallAction(int method, int data) : base(data)
{
Method = method;
}
}
private class SyncptIncrAction : CommandAction
{
public SyncptIncrAction(uint syncptIncrHandle) : base((int)syncptIncrHandle)
{
}
}
private readonly AsyncWorkQueue<CommandAction> _commandQueue;
private readonly DeviceState<ThiRegisters> _state;
public ThiDevice(ClassId classId, IDeviceState device, SyncptIncrManager syncptIncrMgr)
{
_classId = classId;
_device = device;
_syncptIncrMgr = syncptIncrMgr;
_commandQueue = new AsyncWorkQueue<CommandAction>(Process, $"Ryujinx.{classId}Processor");
_state = new DeviceState<ThiRegisters>(new Dictionary<string, RwCallback>
{
{ nameof(ThiRegisters.IncrSyncpt), new RwCallback(IncrSyncpt, null) },
{ nameof(ThiRegisters.Method1), new RwCallback(Method1, null) }
});
}
public int Read(int offset) => _state.Read(offset);
public void Write(int offset, int data) => _state.Write(offset, data);
private void IncrSyncpt(int data)
{
uint syncpointId = (uint)(data & 0xFF);
uint cond = (uint)((data >> 8) & 0xFF); // 0 = Immediate, 1 = Done
if (cond == 0)
{
_syncptIncrMgr.Increment(syncpointId);
}
else
{
_commandQueue.Add(new SyncptIncrAction(_syncptIncrMgr.IncrementWhenDone(_classId, syncpointId)));
}
}
private void Method1(int data)
{
_commandQueue.Add(new MethodCallAction((int)_state.State.Method0 * 4, data));
}
private void Process(CommandAction cmdAction)
{
if (cmdAction is SyncptIncrAction syncptIncrAction)
{
_syncptIncrMgr.SignalDone((uint)syncptIncrAction.Data);
}
else if (cmdAction is MethodCallAction methodCallAction)
{
_device.Write(methodCallAction.Method, methodCallAction.Data);
}
}
public void Dispose() => _commandQueue.Dispose();
}
}

@ -0,0 +1,22 @@
using Ryujinx.Common.Memory;
namespace Ryujinx.Graphics.Host1x
{
struct ThiRegisters
{
public uint IncrSyncpt;
public uint Reserved4;
public uint IncrSyncptErr;
public uint CtxswIncrSyncpt;
public Array4<uint> Reserved10;
public uint Ctxsw;
public uint Reserved24;
public uint ContSyncptEof;
public Array5<uint> Reserved2C;
public uint Method0;
public uint Method1;
public Array12<uint> Reserved48;
public uint IntStatus;
public uint IntMask;
}
}

@ -0,0 +1,40 @@
using Ryujinx.Graphics.Video;
using System;
namespace Ryujinx.Graphics.Nvdec.H264
{
public class Decoder : IH264Decoder
{
public bool IsHardwareAccelerated => false;
private const int WorkBufferSize = 0x200;
private readonly byte[] _workBuffer = new byte[WorkBufferSize];
private readonly FFmpegContext _context = new FFmpegContext();
public ISurface CreateSurface(int width, int height)
{
return new Surface();
}
public bool Decode(ref H264PictureInfo pictureInfo, ISurface output, ReadOnlySpan<byte> bitstream)
{
Span<byte> bs = Prepend(bitstream, SpsAndPpsReconstruction.Reconstruct(ref pictureInfo, _workBuffer));
return _context.DecodeFrame((Surface)output, bs) == 0;
}
private static byte[] Prepend(ReadOnlySpan<byte> data, ReadOnlySpan<byte> prep)
{
byte[] output = new byte[data.Length + prep.Length];
prep.CopyTo(output);
data.CopyTo(new Span<byte>(output).Slice(prep.Length));
return output;
}
public void Dispose() => _context.Dispose();
}
}

@ -0,0 +1,51 @@
using FFmpeg.AutoGen;
using System;
namespace Ryujinx.Graphics.Nvdec.H264
{
unsafe class FFmpegContext : IDisposable
{
private readonly AVCodec* _codec;
private AVCodecContext* _context;
public FFmpegContext()
{
_codec = ffmpeg.avcodec_find_decoder(AVCodecID.AV_CODEC_ID_H264);
_context = ffmpeg.avcodec_alloc_context3(_codec);
ffmpeg.avcodec_open2(_context, _codec, null);
}
public int DecodeFrame(Surface output, ReadOnlySpan<byte> bitstream)
{
AVPacket packet;
ffmpeg.av_init_packet(&packet);
fixed (byte* ptr = bitstream)
{
packet.data = ptr;
packet.size = bitstream.Length;
int rc = ffmpeg.avcodec_send_packet(_context, &packet);
if (rc != 0)
{
return rc;
}
}
return ffmpeg.avcodec_receive_frame(_context, output.Frame);
}
public void Dispose()
{
ffmpeg.avcodec_close(_context);
fixed (AVCodecContext** ppContext = &_context)
{
ffmpeg.avcodec_free_context(ppContext);
}
}
}
}

@ -0,0 +1,121 @@
using System;
using System.Numerics;
namespace Ryujinx.Graphics.Nvdec.H264
{
struct H264BitStreamWriter
{
private const int BufferSize = 8;
private readonly byte[] _workBuffer;
private int _offset;
private int _buffer;
private int _bufferPos;
public H264BitStreamWriter(byte[] workBuffer)
{
_workBuffer = workBuffer;
_offset = 0;
_buffer = 0;
_bufferPos = 0;
}
public void WriteBit(bool value)
{
WriteBits(value ? 1 : 0, 1);
}
public void WriteBits(int value, int valueSize)
{
int valuePos = 0;
int remaining = valueSize;
while (remaining > 0)
{
int copySize = remaining;
int free = GetFreeBufferBits();
if (copySize > free)
{
copySize = free;
}
int mask = (1 << copySize) - 1;
int srcShift = (valueSize - valuePos) - copySize;
int dstShift = (BufferSize - _bufferPos) - copySize;
_buffer |= ((value >> srcShift) & mask) << dstShift;
valuePos += copySize;
_bufferPos += copySize;
remaining -= copySize;
}
}
private int GetFreeBufferBits()
{
if (_bufferPos == BufferSize)
{
Flush();
}
return BufferSize - _bufferPos;
}
public void Flush()
{
if (_bufferPos != 0)
{
_workBuffer[_offset++] = (byte)_buffer;
_buffer = 0;
_bufferPos = 0;
}
}
public void End()
{
WriteBit(true);
Flush();
}
public Span<byte> AsSpan()
{
return new Span<byte>(_workBuffer).Slice(0, _offset);
}
public void WriteU(uint value, int valueSize) => WriteBits((int)value, valueSize);
public void WriteSe(int value) => WriteExpGolombCodedInt(value);
public void WriteUe(uint value) => WriteExpGolombCodedUInt(value);
private void WriteExpGolombCodedInt(int value)
{
int sign = value <= 0 ? 0 : 1;
if (value < 0)
{
value = -value;
}
value = (value << 1) - sign;
WriteExpGolombCodedUInt((uint)value);
}
private void WriteExpGolombCodedUInt(uint value)
{
int size = 32 - BitOperations.LeadingZeroCount(value + 1);
WriteBits(1, size);
value -= (1u << (size - 1)) - 1;
WriteBits((int)value, size - 1);
}
}
}

@ -0,0 +1,23 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netcoreapp3.1</TargetFramework>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="FFmpeg.AutoGen" Version="4.3.0" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Ryujinx.Graphics.Video\Ryujinx.Graphics.Video.csproj" />
</ItemGroup>
</Project>

@ -0,0 +1,159 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Video;
using System;
namespace Ryujinx.Graphics.Nvdec.H264
{
static class SpsAndPpsReconstruction
{
public static Span<byte> Reconstruct(ref H264PictureInfo pictureInfo, byte[] workBuffer)
{
H264BitStreamWriter writer = new H264BitStreamWriter(workBuffer);
// Sequence Parameter Set.
writer.WriteU(1, 24);
writer.WriteU(0, 1);
writer.WriteU(3, 2);
writer.WriteU(7, 5);
writer.WriteU(100, 8); // Profile idc
writer.WriteU(0, 8); // Reserved
writer.WriteU(31, 8); // Level idc
writer.WriteUe(0); // Seq parameter set id
writer.WriteUe(pictureInfo.ChromaFormatIdc);
if (pictureInfo.ChromaFormatIdc == 3)
{
writer.WriteBit(false); // Separate colour plane flag
}
writer.WriteUe(0); // Bit depth luma minus 8
writer.WriteUe(0); // Bit depth chroma minus 8
writer.WriteBit(pictureInfo.QpprimeYZeroTransformBypassFlag);
writer.WriteBit(false); // Scaling matrix present flag
writer.WriteUe(pictureInfo.Log2MaxFrameNumMinus4);
writer.WriteUe(pictureInfo.PicOrderCntType);
if (pictureInfo.PicOrderCntType == 0)
{
writer.WriteUe(pictureInfo.Log2MaxPicOrderCntLsbMinus4);
}
else if (pictureInfo.PicOrderCntType == 1)
{
writer.WriteBit(pictureInfo.DeltaPicOrderAlwaysZeroFlag);
writer.WriteSe(0); // Offset for non-ref pic
writer.WriteSe(0); // Offset for top to bottom field
writer.WriteUe(0); // Num ref frames in pic order cnt cycle
}
writer.WriteUe(16); // Max num ref frames
writer.WriteBit(false); // Gaps in frame num value allowed flag
writer.WriteUe(pictureInfo.PicWidthInMbsMinus1);
writer.WriteUe(pictureInfo.PicHeightInMapUnitsMinus1);
writer.WriteBit(pictureInfo.FrameMbsOnlyFlag);
if (!pictureInfo.FrameMbsOnlyFlag)
{
writer.WriteBit(pictureInfo.MbAdaptiveFrameFieldFlag);
}
writer.WriteBit(pictureInfo.Direct8x8InferenceFlag);
writer.WriteBit(false); // Frame cropping flag
writer.WriteBit(false); // VUI parameter present flag
writer.End();
// Picture Parameter Set.
writer.WriteU(1, 24);
writer.WriteU(0, 1);
writer.WriteU(3, 2);
writer.WriteU(8, 5);
writer.WriteUe(0); // Pic parameter set id
writer.WriteUe(0); // Seq parameter set id
writer.WriteBit(pictureInfo.EntropyCodingModeFlag);
writer.WriteBit(false); // Bottom field pic order in frame present flag
writer.WriteUe(0); // Num slice groups minus 1
writer.WriteUe(pictureInfo.NumRefIdxL0ActiveMinus1);
writer.WriteUe(pictureInfo.NumRefIdxL1ActiveMinus1);
writer.WriteBit(pictureInfo.WeightedPredFlag);
writer.WriteU(pictureInfo.WeightedBipredIdc, 2);
writer.WriteSe(pictureInfo.PicInitQpMinus26);
writer.WriteSe(0); // Pic init qs minus 26
writer.WriteSe(pictureInfo.ChromaQpIndexOffset);
writer.WriteBit(pictureInfo.DeblockingFilterControlPresentFlag);
writer.WriteBit(pictureInfo.ConstrainedIntraPredFlag);
writer.WriteBit(pictureInfo.RedundantPicCntPresentFlag);
writer.WriteBit(pictureInfo.Transform8x8ModeFlag);
writer.WriteBit(pictureInfo.ScalingMatrixPresent);
if (pictureInfo.ScalingMatrixPresent)
{
for (int index = 0; index < 6; index++)
{
writer.WriteBit(true);
WriteScalingList(ref writer, pictureInfo.ScalingLists4x4[index]);
}
if (pictureInfo.Transform8x8ModeFlag)
{
for (int index = 0; index < 2; index++)
{
writer.WriteBit(true);
WriteScalingList(ref writer, pictureInfo.ScalingLists8x8[index]);
}
}
}
writer.WriteSe(pictureInfo.SecondChromaQpIndexOffset);
writer.End();
return writer.AsSpan();
}
// ZigZag LUTs from libavcodec.
private static readonly byte[] ZigZagDirect = new byte[]
{
0, 1, 8, 16, 9, 2, 3, 10,
17, 24, 32, 25, 18, 11, 4, 5,
12, 19, 26, 33, 40, 48, 41, 34,
27, 20, 13, 6, 7, 14, 21, 28,
35, 42, 49, 56, 57, 50, 43, 36,
29, 22, 15, 23, 30, 37, 44, 51,
58, 59, 52, 45, 38, 31, 39, 46,
53, 60, 61, 54, 47, 55, 62, 63
};
private static readonly byte[] ZigZagScan = new byte[]
{
0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4,
1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4,
3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4
};
private static void WriteScalingList(ref H264BitStreamWriter writer, IArray<byte> list)
{
byte[] scan = list.Length == 16 ? ZigZagScan : ZigZagDirect;
int lastScale = 8;
for (int index = 0; index < list.Length; index++)
{
byte value = list[scan[index]];
int deltaScale = value - lastScale;
writer.WriteSe(deltaScale);
lastScale = value;
}
}
}
}

@ -0,0 +1,33 @@
using FFmpeg.AutoGen;
using Ryujinx.Graphics.Video;
using System;
namespace Ryujinx.Graphics.Nvdec.H264
{
unsafe class Surface : ISurface
{
public AVFrame* Frame { get; }
public Plane YPlane => new Plane((IntPtr)Frame->data[0], Stride * Height);
public Plane UPlane => new Plane((IntPtr)Frame->data[1], UvStride * UvHeight);
public Plane VPlane => new Plane((IntPtr)Frame->data[2], UvStride * UvHeight);
public int Width => Frame->width;
public int Height => Frame->height;
public int Stride => Frame->linesize[0];
public int UvWidth => (Frame->width + 1) >> 1;
public int UvHeight => (Frame->height + 1) >> 1;
public int UvStride => Frame->linesize[1];
public Surface()
{
Frame = ffmpeg.av_frame_alloc();
}
public void Dispose()
{
ffmpeg.av_frame_unref(Frame);
ffmpeg.av_free(Frame);
}
}
}

@ -0,0 +1,9 @@
namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal enum BitDepth
{
Bits8 = 8, /**< 8 bits */
Bits10 = 10, /**< 10 bits */
Bits12 = 12, /**< 12 bits */
}
}

@ -0,0 +1,56 @@
namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal enum CodecErr
{
/*!\brief Operation completed without error */
CodecOk,
/*!\brief Unspecified error */
CodecError,
/*!\brief Memory operation failed */
CodecMemError,
/*!\brief ABI version mismatch */
CodecAbiMismatch,
/*!\brief Algorithm does not have required capability */
CodecIncapable,
/*!\brief The given bitstream is not supported.
*
* The bitstream was unable to be parsed at the highest level. The decoder
* is unable to proceed. This error \ref SHOULD be treated as fatal to the
* stream. */
CodecUnsupBitstream,
/*!\brief Encoded bitstream uses an unsupported feature
*
* The decoder does not implement a feature required by the encoder. This
* return code should only be used for features that prevent future
* pictures from being properly decoded. This error \ref MAY be treated as
* fatal to the stream or \ref MAY be treated as fatal to the current GOP.
*/
CodecUnsupFeature,
/*!\brief The coded data for this stream is corrupt or incomplete
*
* There was a problem decoding the current frame. This return code
* should only be used for failures that prevent future pictures from
* being properly decoded. This error \ref MAY be treated as fatal to the
* stream or \ref MAY be treated as fatal to the current GOP. If decoding
* is continued for the current GOP, artifacts may be present.
*/
CodecCorruptFrame,
/*!\brief An application-supplied parameter is not valid.
*
*/
CodecInvalidParam,
/*!\brief An iterator reached the end of list.
*
*/
CodecListEnd
}
}

@ -0,0 +1,59 @@
using System;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
namespace Ryujinx.Graphics.Nvdec.Vp9.Common
{
internal static class BitUtils
{
// FIXME: Enable inlining here after AVX2 gather bug is fixed.
// [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static byte ClipPixel(int val)
{
return (byte)((val > 255) ? 255 : (val < 0) ? 0 : val);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ushort ClipPixelHighbd(int val, int bd)
{
return bd switch
{
10 => (ushort)Math.Clamp(val, 0, 1023),
12 => (ushort)Math.Clamp(val, 0, 4095),
_ => (ushort)Math.Clamp(val, 0, 255)
};
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int RoundPowerOfTwo(int value, int n)
{
return (value + (1 << (n - 1))) >> n;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long RoundPowerOfTwo(long value, int n)
{
return (value + (1L << (n - 1))) >> n;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int AlignPowerOfTwo(int value, int n)
{
return (value + ((1 << n) - 1)) & ~((1 << n) - 1);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int GetMsb(uint n)
{
Debug.Assert(n != 0);
return 31 ^ BitOperations.LeadingZeroCount(n);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int GetUnsignedBits(uint numValues)
{
return numValues > 0 ? GetMsb(numValues) + 1 : 0;
}
}
}

@ -0,0 +1,94 @@
using Ryujinx.Common.Memory;
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Nvdec.Vp9.Common
{
internal class MemoryAllocator : IDisposable
{
private const int PoolEntries = 10;
private struct PoolItem
{
public IntPtr Pointer;
public int Length;
public bool InUse;
}
private PoolItem[] _pool = new PoolItem[PoolEntries];
public ArrayPtr<T> Allocate<T>(int length) where T : unmanaged
{
int lengthInBytes = Unsafe.SizeOf<T>() * length;
IntPtr ptr = IntPtr.Zero;
for (int i = 0; i < PoolEntries; i++)
{
ref PoolItem item = ref _pool[i];
if (!item.InUse && item.Length == lengthInBytes)
{
item.InUse = true;
ptr = item.Pointer;
break;
}
}
if (ptr == IntPtr.Zero)
{
ptr = Marshal.AllocHGlobal(lengthInBytes);
for (int i = 0; i < PoolEntries; i++)
{
ref PoolItem item = ref _pool[i];
if (!item.InUse)
{
item.InUse = true;
if (item.Pointer != IntPtr.Zero)
{
Marshal.FreeHGlobal(item.Pointer);
}
item.Pointer = ptr;
item.Length = lengthInBytes;
break;
}
}
}
return new ArrayPtr<T>(ptr, length);
}
public unsafe void Free<T>(ArrayPtr<T> arr) where T : unmanaged
{
IntPtr ptr = (IntPtr)arr.ToPointer();
for (int i = 0; i < PoolEntries; i++)
{
ref PoolItem item = ref _pool[i];
if (item.Pointer == ptr)
{
item.InUse = false;
break;
}
}
}
public void Dispose()
{
for (int i = 0; i < PoolEntries; i++)
{
ref PoolItem item = ref _pool[i];
if (item.Pointer != IntPtr.Zero)
{
Marshal.FreeHGlobal(item.Pointer);
item.Pointer = IntPtr.Zero;
}
}
}
}
}

@ -0,0 +1,25 @@
using Ryujinx.Common.Memory;
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Nvdec.Vp9.Common
{
internal static class MemoryUtil
{
public static unsafe void Copy<T>(T* dest, T* source, int length) where T : unmanaged
{
new Span<T>(source, length).CopyTo(new Span<T>(dest, length));
}
public static void Copy<T>(ref T dest, ref T source) where T : unmanaged
{
MemoryMarshal.CreateSpan(ref source, 1).CopyTo(MemoryMarshal.CreateSpan(ref dest, 1));
}
public static unsafe void Fill<T>(T* ptr, T value, int length) where T : unmanaged
{
new Span<T>(ptr, length).Fill(value);
}
}
}

@ -0,0 +1,71 @@
using Ryujinx.Graphics.Nvdec.Vp9.Types;
namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal static class Constants
{
public const int Vp9InterpExtend = 4;
public const int MaxMbPlane = 3;
public const int None = -1;
public const int IntraFrame = 0;
public const int LastFrame = 1;
public const int GoldenFrame = 2;
public const int AltRefFrame = 3;
public const int MaxRefFrames = 4;
public const int MiSizeLog2 = 3;
public const int MiBlockSizeLog2 = 6 - MiSizeLog2; // 64 = 2^6
public const int MiSize = 1 << MiSizeLog2; // pixels per mi-unit
public const int MiBlockSize = 1 << MiBlockSizeLog2; // mi-units per max block
public const int MiMask = MiBlockSize - 1;
public const int PartitionPloffset = 4; // number of probability models per block size
/* Segment Feature Masks */
public const int MaxMvRefCandidates = 2;
public const int CompInterContexts = 5;
public const int RefContexts = 5;
public const int EightTap = 0;
public const int EightTapSmooth = 1;
public const int EightTapSharp = 2;
public const int SwitchableFilters = 3; /* Number of switchable filters */
public const int Bilinear = 3;
public const int Switchable = 4; /* should be the last one */
// Frame
public const int RefsPerFrame = 3;
public const int NumPingPongBuffers = 2;
public const int Class0Bits = 1; /* bits at integer precision for class 0 */
public const int Class0Size = 1 << Class0Bits;
public const int MvInUseBits = 14;
public const int MvUpp = (1 << MvInUseBits) - 1;
public const int MvLow = -(1 << MvInUseBits);
// Coefficient token alphabet
public const int ZeroToken = 0; // 0 Extra Bits 0+0
public const int OneToken = 1; // 1 Extra Bits 0+1
public const int TwoToken = 2; // 2 Extra Bits 0+1
public const int PivotNode = 2;
public const int Cat1MinVal = 5;
public const int Cat2MinVal = 7;
public const int Cat3MinVal = 11;
public const int Cat4MinVal = 19;
public const int Cat5MinVal = 35;
public const int Cat6MinVal = 67;
public const int EobModelToken = 3;
public const int SegmentAbsData = 1;
public const int MaxSegments = 8;
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,164 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Nvdec.Vp9.Common;
using Ryujinx.Graphics.Nvdec.Vp9.Types;
using Ryujinx.Graphics.Video;
using System;
using Vp9MvRef = Ryujinx.Graphics.Video.Vp9MvRef;
namespace Ryujinx.Graphics.Nvdec.Vp9
{
public class Decoder : IVp9Decoder
{
public bool IsHardwareAccelerated => false;
private readonly MemoryAllocator _allocator = new MemoryAllocator();
public ISurface CreateSurface(int width, int height) => new Surface(width, height);
private static readonly byte[] LiteralToFilter = new byte[]
{
Constants.EightTapSmooth,
Constants.EightTap,
Constants.EightTapSharp,
Constants.Bilinear
};
public unsafe bool Decode(
ref Vp9PictureInfo pictureInfo,
ISurface output,
ReadOnlySpan<byte> bitstream,
ReadOnlySpan<Vp9MvRef> mvsIn,
Span<Vp9MvRef> mvsOut)
{
Vp9Common cm = new Vp9Common();
cm.FrameType = pictureInfo.IsKeyFrame ? FrameType.KeyFrame : FrameType.InterFrame;
cm.IntraOnly = pictureInfo.IntraOnly;
cm.Width = output.Width;
cm.Height = output.Height;
cm.UsePrevFrameMvs = pictureInfo.UsePrevInFindMvRefs;
cm.RefFrameSignBias = pictureInfo.RefFrameSignBias;
cm.BaseQindex = pictureInfo.BaseQIndex;
cm.YDcDeltaQ = pictureInfo.YDcDeltaQ;
cm.UvAcDeltaQ = pictureInfo.UvAcDeltaQ;
cm.UvDcDeltaQ = pictureInfo.UvDcDeltaQ;
cm.Mb.Lossless = pictureInfo.Lossless;
cm.TxMode = (TxMode)pictureInfo.TransformMode;
cm.AllowHighPrecisionMv = pictureInfo.AllowHighPrecisionMv;
cm.InterpFilter = (byte)pictureInfo.InterpFilter;
if (cm.InterpFilter != Constants.Switchable)
{
cm.InterpFilter = LiteralToFilter[cm.InterpFilter];
}
cm.ReferenceMode = (ReferenceMode)pictureInfo.ReferenceMode;
cm.CompFixedRef = pictureInfo.CompFixedRef;
cm.CompVarRef = pictureInfo.CompVarRef;
cm.Log2TileCols = pictureInfo.Log2TileCols;
cm.Log2TileRows = pictureInfo.Log2TileRows;
cm.Seg.Enabled = pictureInfo.SegmentEnabled;
cm.Seg.UpdateMap = pictureInfo.SegmentMapUpdate;
cm.Seg.TemporalUpdate = pictureInfo.SegmentMapTemporalUpdate;
cm.Seg.AbsDelta = (byte)pictureInfo.SegmentAbsDelta;
cm.Seg.FeatureMask = pictureInfo.SegmentFeatureEnable;
cm.Seg.FeatureData = pictureInfo.SegmentFeatureData;
cm.Lf.ModeRefDeltaEnabled = pictureInfo.ModeRefDeltaEnabled;
cm.Lf.RefDeltas = pictureInfo.RefDeltas;
cm.Lf.ModeDeltas = pictureInfo.ModeDeltas;
cm.Fc = new Ptr<Vp9EntropyProbs>(ref pictureInfo.Entropy);
cm.Counts = new Ptr<Vp9BackwardUpdates>(ref pictureInfo.BackwardUpdateCounts);
cm.FrameRefs[0].Buf = (Surface)pictureInfo.LastReference;
cm.FrameRefs[1].Buf = (Surface)pictureInfo.GoldenReference;
cm.FrameRefs[2].Buf = (Surface)pictureInfo.AltReference;
cm.Mb.CurBuf = (Surface)output;
cm.Mb.SetupBlockPlanes(1, 1);
cm.AllocTileWorkerData(_allocator, 1 << pictureInfo.Log2TileCols, 1 << pictureInfo.Log2TileRows);
cm.AllocContextBuffers(_allocator, output.Width, output.Height);
cm.InitContextBuffers();
cm.SetupSegmentationDequant();
cm.SetupScaleFactors();
SetMvs(ref cm, mvsIn);
fixed (byte* dataPtr = bitstream)
{
try
{
DecodeFrame.DecodeTiles(ref cm, new ArrayPtr<byte>(dataPtr, bitstream.Length));
}
catch (InternalErrorException)
{
return false;
}
}
GetMvs(ref cm, mvsOut);
cm.FreeTileWorkerData(_allocator);
cm.FreeContextBuffers(_allocator);
return true;
}
private static void SetMvs(ref Vp9Common cm, ReadOnlySpan<Vp9MvRef> mvs)
{
if (mvs.Length > cm.PrevFrameMvs.Length)
{
throw new ArgumentException($"Size mismatch, expected: {cm.PrevFrameMvs.Length}, but got: {mvs.Length}.");
}
for (int i = 0; i < mvs.Length; i++)
{
ref var mv = ref cm.PrevFrameMvs[i];
mv.Mv[0].Row = mvs[i].Mvs[0].Row;
mv.Mv[0].Col = mvs[i].Mvs[0].Col;
mv.Mv[1].Row = mvs[i].Mvs[1].Row;
mv.Mv[1].Col = mvs[i].Mvs[1].Col;
mv.RefFrame[0] = (sbyte)mvs[i].RefFrames[0];
mv.RefFrame[1] = (sbyte)mvs[i].RefFrames[1];
}
}
private static void GetMvs(ref Vp9Common cm, Span<Vp9MvRef> mvs)
{
if (mvs.Length > cm.CurFrameMvs.Length)
{
throw new ArgumentException($"Size mismatch, expected: {cm.CurFrameMvs.Length}, but got: {mvs.Length}.");
}
for (int i = 0; i < mvs.Length; i++)
{
ref var mv = ref cm.CurFrameMvs[i];
mvs[i].Mvs[0].Row = mv.Mv[0].Row;
mvs[i].Mvs[0].Col = mv.Mv[0].Col;
mvs[i].Mvs[1].Row = mv.Mv[1].Row;
mvs[i].Mvs[1].Col = mv.Mv[1].Col;
mvs[i].RefFrames[0] = mv.RefFrame[0];
mvs[i].RefFrames[1] = mv.RefFrame[1];
}
}
public void Dispose() => _allocator.Dispose();
}
}

@ -0,0 +1,325 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Nvdec.Vp9.Dsp;
using Ryujinx.Graphics.Nvdec.Vp9.Types;
using Ryujinx.Graphics.Video;
using System;
using System.Diagnostics;
using System.Runtime.InteropServices;
using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.InvTxfm;
namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal static class Detokenize
{
private const int EobContextNode = 0;
private const int ZeroContextNode = 1;
private const int OneContextNode = 2;
private static int GetCoefContext(ReadOnlySpan<short> neighbors, ReadOnlySpan<byte> tokenCache, int c)
{
const int maxNeighbors = 2;
return (1 + tokenCache[neighbors[maxNeighbors * c + 0]] + tokenCache[neighbors[maxNeighbors * c + 1]]) >> 1;
}
private static int ReadCoeff(
ref Reader r,
ReadOnlySpan<byte> probs,
int n,
ref ulong value,
ref int count,
ref uint range)
{
int i, val = 0;
for (i = 0; i < n; ++i)
{
val = (val << 1) | r.ReadBool(probs[i], ref value, ref count, ref range);
}
return val;
}
private static int DecodeCoefs(
ref MacroBlockD xd,
PlaneType type,
Span<int> dqcoeff,
TxSize txSize,
ref Array2<short> dq,
int ctx,
ReadOnlySpan<short> scan,
ReadOnlySpan<short> nb,
ref Reader r)
{
ref Vp9BackwardUpdates counts = ref xd.Counts.Value;
int maxEob = 16 << ((int)txSize << 1);
ref Vp9EntropyProbs fc = ref xd.Fc.Value;
int refr = xd.Mi[0].Value.IsInterBlock() ? 1 : 0;
int band, c = 0;
ref Array6<Array6<Array3<byte>>> coefProbs = ref fc.CoefProbs[(int)txSize][(int)type][refr];
Span<byte> tokenCache = stackalloc byte[32 * 32];
ReadOnlySpan<byte> bandTranslate = Luts.get_band_translate(txSize);
int dqShift = (txSize == TxSize.Tx32x32) ? 1 : 0;
int v;
short dqv = dq[0];
ReadOnlySpan<byte> cat6Prob = (xd.Bd == 12)
? Luts.Vp9Cat6ProbHigh12
: (xd.Bd == 10) ? new ReadOnlySpan<byte>(Luts.Vp9Cat6ProbHigh12).Slice(2) : Luts.Vp9Cat6Prob;
int cat6Bits = (xd.Bd == 12) ? 18 : (xd.Bd == 10) ? 16 : 14;
// Keep value, range, and count as locals. The compiler produces better
// results with the locals than using r directly.
ulong value = r.Value;
uint range = r.Range;
int count = r.Count;
while (c < maxEob)
{
int val = -1;
band = bandTranslate[0];
bandTranslate = bandTranslate.Slice(1);
ref Array3<byte> prob = ref coefProbs[band][ctx];
if (!xd.Counts.IsNull)
{
++counts.EobBranch[(int)txSize][(int)type][refr][band][ctx];
}
if (r.ReadBool(prob[EobContextNode], ref value, ref count, ref range) == 0)
{
if (!xd.Counts.IsNull)
{
++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.EobModelToken];
}
break;
}
while (r.ReadBool(prob[ZeroContextNode], ref value, ref count, ref range) == 0)
{
if (!xd.Counts.IsNull)
{
++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.ZeroToken];
}
dqv = dq[1];
tokenCache[scan[c]] = 0;
++c;
if (c >= maxEob)
{
r.Value = value;
r.Range = range;
r.Count = count;
return c; // Zero tokens at the end (no eob token)
}
ctx = GetCoefContext(nb, tokenCache, c);
band = bandTranslate[0];
bandTranslate = bandTranslate.Slice(1);
prob = ref coefProbs[band][ctx];
}
if (r.ReadBool(prob[OneContextNode], ref value, ref count, ref range) != 0)
{
ReadOnlySpan<byte> p = Luts.Vp9Pareto8Full[prob[Constants.PivotNode] - 1];
if (!xd.Counts.IsNull)
{
++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.TwoToken];
}
if (r.ReadBool(p[0], ref value, ref count, ref range) != 0)
{
if (r.ReadBool(p[3], ref value, ref count, ref range) != 0)
{
tokenCache[scan[c]] = 5;
if (r.ReadBool(p[5], ref value, ref count, ref range) != 0)
{
if (r.ReadBool(p[7], ref value, ref count, ref range) != 0)
{
val = Constants.Cat6MinVal + ReadCoeff(ref r, cat6Prob, cat6Bits, ref value, ref count, ref range);
}
else
{
val = Constants.Cat5MinVal + ReadCoeff(ref r, Luts.Vp9Cat5Prob, 5, ref value, ref count, ref range);
}
}
else if (r.ReadBool(p[6], ref value, ref count, ref range) != 0)
{
val = Constants.Cat4MinVal + ReadCoeff(ref r, Luts.Vp9Cat4Prob, 4, ref value, ref count, ref range);
}
else
{
val = Constants.Cat3MinVal + ReadCoeff(ref r, Luts.Vp9Cat3Prob, 3, ref value, ref count, ref range);
}
}
else
{
tokenCache[scan[c]] = 4;
if (r.ReadBool(p[4], ref value, ref count, ref range) != 0)
{
val = Constants.Cat2MinVal + ReadCoeff(ref r, Luts.Vp9Cat2Prob, 2, ref value, ref count, ref range);
}
else
{
val = Constants.Cat1MinVal + ReadCoeff(ref r, Luts.Vp9Cat1Prob, 1, ref value, ref count, ref range);
}
}
// Val may use 18-bits
v = (int)(((long)val * dqv) >> dqShift);
}
else
{
if (r.ReadBool(p[1], ref value, ref count, ref range) != 0)
{
tokenCache[scan[c]] = 3;
v = ((3 + r.ReadBool(p[2], ref value, ref count, ref range)) * dqv) >> dqShift;
}
else
{
tokenCache[scan[c]] = 2;
v = (2 * dqv) >> dqShift;
}
}
}
else
{
if (!xd.Counts.IsNull)
{
++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.OneToken];
}
tokenCache[scan[c]] = 1;
v = dqv >> dqShift;
}
dqcoeff[scan[c]] = (int)HighbdCheckRange(r.ReadBool(128, ref value, ref count, ref range) != 0 ? -v : v, xd.Bd);
++c;
ctx = GetCoefContext(nb, tokenCache, c);
dqv = dq[1];
}
r.Value = value;
r.Range = range;
r.Count = count;
return c;
}
private static void GetCtxShift(ref MacroBlockD xd, ref int ctxShiftA, ref int ctxShiftL, int x, int y, uint txSizeInBlocks)
{
if (xd.MaxBlocksWide != 0)
{
if (txSizeInBlocks + x > xd.MaxBlocksWide)
{
ctxShiftA = (int)(txSizeInBlocks - (xd.MaxBlocksWide - x)) * 8;
}
}
if (xd.MaxBlocksHigh != 0)
{
if (txSizeInBlocks + y > xd.MaxBlocksHigh)
{
ctxShiftL = (int)(txSizeInBlocks - (xd.MaxBlocksHigh - y)) * 8;
}
}
}
private static PlaneType GetPlaneType(int plane)
{
return (PlaneType)(plane > 0 ? 1 : 0);
}
public static int DecodeBlockTokens(
ref TileWorkerData twd,
int plane,
Luts.ScanOrder sc,
int x,
int y,
TxSize txSize,
int segId)
{
ref Reader r = ref twd.BitReader;
ref MacroBlockD xd = ref twd.Xd;
ref MacroBlockDPlane pd = ref xd.Plane[plane];
ref Array2<short> dequant = ref pd.SegDequant[segId];
int eob;
Span<sbyte> a = pd.AboveContext.ToSpan().Slice(x);
Span<sbyte> l = pd.LeftContext.ToSpan().Slice(y);
int ctx;
int ctxShiftA = 0;
int ctxShiftL = 0;
switch (txSize)
{
case TxSize.Tx4x4:
ctx = a[0] != 0 ? 1 : 0;
ctx += l[0] != 0 ? 1 : 0;
eob = DecodeCoefs(
ref xd,
GetPlaneType(plane),
pd.DqCoeff.ToSpan(),
txSize,
ref dequant,
ctx,
sc.Scan,
sc.Neighbors,
ref r);
a[0] = l[0] = (sbyte)(eob > 0 ? 1 : 0);
break;
case TxSize.Tx8x8:
GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx8x8);
ctx = MemoryMarshal.Cast<sbyte, ushort>(a)[0] != 0 ? 1 : 0;
ctx += MemoryMarshal.Cast<sbyte, ushort>(l)[0] != 0 ? 1 : 0;
eob = DecodeCoefs(
ref xd,
GetPlaneType(plane),
pd.DqCoeff.ToSpan(),
txSize,
ref dequant,
ctx,
sc.Scan,
sc.Neighbors,
ref r);
MemoryMarshal.Cast<sbyte, ushort>(a)[0] = (ushort)((eob > 0 ? 0x0101 : 0) >> ctxShiftA);
MemoryMarshal.Cast<sbyte, ushort>(l)[0] = (ushort)((eob > 0 ? 0x0101 : 0) >> ctxShiftL);
break;
case TxSize.Tx16x16:
GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx16x16);
ctx = MemoryMarshal.Cast<sbyte, uint>(a)[0] != 0 ? 1 : 0;
ctx += MemoryMarshal.Cast<sbyte, uint>(l)[0] != 0 ? 1 : 0;
eob = DecodeCoefs(
ref xd,
GetPlaneType(plane),
pd.DqCoeff.ToSpan(),
txSize,
ref dequant,
ctx,
sc.Scan,
sc.Neighbors,
ref r);
MemoryMarshal.Cast<sbyte, uint>(a)[0] = (uint)((eob > 0 ? 0x01010101 : 0) >> ctxShiftA);
MemoryMarshal.Cast<sbyte, uint>(l)[0] = (uint)((eob > 0 ? 0x01010101 : 0) >> ctxShiftL);
break;
case TxSize.Tx32x32:
GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx32x32);
// NOTE: Casting to ulong here is safe because the default memory
// alignment is at least 8 bytes and the Tx32x32 is aligned on 8 byte
// boundaries.
ctx = MemoryMarshal.Cast<sbyte, ulong>(a)[0] != 0 ? 1 : 0;
ctx += MemoryMarshal.Cast<sbyte, ulong>(l)[0] != 0 ? 1 : 0;
eob = DecodeCoefs(
ref xd,
GetPlaneType(plane),
pd.DqCoeff.ToSpan(),
txSize,
ref dequant,
ctx,
sc.Scan,
sc.Neighbors,
ref r);
MemoryMarshal.Cast<sbyte, ulong>(a)[0] = (eob > 0 ? 0x0101010101010101UL : 0) >> ctxShiftA;
MemoryMarshal.Cast<sbyte, ulong>(l)[0] = (eob > 0 ? 0x0101010101010101UL : 0) >> ctxShiftL;
break;
default:
Debug.Assert(false, "Invalid transform size.");
eob = 0;
break;
}
return eob;
}
}
}

@ -0,0 +1,949 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Nvdec.Vp9.Common;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.Filter;
namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
{
internal static class Convolve
{
private const bool UseIntrinsics = true;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector128<int> MultiplyAddAdjacent(
Vector128<short> vsrc0,
Vector128<short> vsrc1,
Vector128<short> vsrc2,
Vector128<short> vsrc3,
Vector128<short> vfilter,
Vector128<int> zero)
{
// < sumN, sumN, sumN, sumN >
Vector128<int> sum0 = Sse2.MultiplyAddAdjacent(vsrc0, vfilter);
Vector128<int> sum1 = Sse2.MultiplyAddAdjacent(vsrc1, vfilter);
Vector128<int> sum2 = Sse2.MultiplyAddAdjacent(vsrc2, vfilter);
Vector128<int> sum3 = Sse2.MultiplyAddAdjacent(vsrc3, vfilter);
// < 0, 0, sumN, sumN >
sum0 = Ssse3.HorizontalAdd(sum0, zero);
sum1 = Ssse3.HorizontalAdd(sum1, zero);
sum2 = Ssse3.HorizontalAdd(sum2, zero);
sum3 = Ssse3.HorizontalAdd(sum3, zero);
// < 0, 0, 0, sumN >
sum0 = Ssse3.HorizontalAdd(sum0, zero);
sum1 = Ssse3.HorizontalAdd(sum1, zero);
sum2 = Ssse3.HorizontalAdd(sum2, zero);
sum3 = Ssse3.HorizontalAdd(sum3, zero);
// < 0, 0, sum1, sum0 >
Vector128<int> sum01 = Sse2.UnpackLow(sum0, sum1);
// < 0, 0, sum3, sum2 >
Vector128<int> sum23 = Sse2.UnpackLow(sum2, sum3);
// < sum3, sum2, sum1, sum0 >
return Sse.MoveLowToHigh(sum01.AsSingle(), sum23.AsSingle()).AsInt32();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector128<int> RoundShift(Vector128<int> value, Vector128<int> const64)
{
return Sse2.ShiftRightArithmetic(Sse2.Add(value, const64), FilterBits);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector128<byte> PackUnsignedSaturate(Vector128<int> value, Vector128<int> zero)
{
return Sse2.PackUnsignedSaturate(Sse41.PackUnsignedSaturate(value, zero).AsInt16(), zero.AsInt16());
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe void ConvolveHorizSse41(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] xFilters,
int x0Q4,
int w,
int h)
{
Vector128<int> zero = Vector128<int>.Zero;
Vector128<int> const64 = Vector128.Create(64);
ulong x, y;
src -= SubpelTaps / 2 - 1;
fixed (Array8<short>* xFilter = xFilters)
{
Vector128<short> vfilter = Sse2.LoadVector128((short*)xFilter + (uint)(x0Q4 & SubpelMask) * 8);
for (y = 0; y < (uint)h; ++y)
{
ulong srcOffset = (uint)x0Q4 >> SubpelBits;
for (x = 0; x < (uint)w; x += 4)
{
Vector128<short> vsrc0 = Sse41.ConvertToVector128Int16(&src[srcOffset + x]);
Vector128<short> vsrc1 = Sse41.ConvertToVector128Int16(&src[srcOffset + x + 1]);
Vector128<short> vsrc2 = Sse41.ConvertToVector128Int16(&src[srcOffset + x + 2]);
Vector128<short> vsrc3 = Sse41.ConvertToVector128Int16(&src[srcOffset + x + 3]);
Vector128<int> sum0123 = MultiplyAddAdjacent(vsrc0, vsrc1, vsrc2, vsrc3, vfilter, zero);
Sse.StoreScalar((float*)&dst[x], PackUnsignedSaturate(RoundShift(sum0123, const64), zero).AsSingle());
}
src += srcStride;
dst += dstStride;
}
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe void ConvolveHoriz(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] xFilters,
int x0Q4,
int xStepQ4,
int w,
int h)
{
if (Sse41.IsSupported && UseIntrinsics && xStepQ4 == 1 << SubpelBits)
{
ConvolveHorizSse41(src, srcStride, dst, dstStride, xFilters, x0Q4, w, h);
return;
}
int x, y;
src -= SubpelTaps / 2 - 1;
for (y = 0; y < h; ++y)
{
int xQ4 = x0Q4;
for (x = 0; x < w; ++x)
{
byte* srcX = &src[xQ4 >> SubpelBits];
ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
int k, sum = 0;
for (k = 0; k < SubpelTaps; ++k)
{
sum += srcX[k] * xFilter[k];
}
dst[x] = BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits));
xQ4 += xStepQ4;
}
src += srcStride;
dst += dstStride;
}
}
private static unsafe void ConvolveAvgHoriz(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] xFilters,
int x0Q4,
int xStepQ4,
int w,
int h)
{
int x, y;
src -= SubpelTaps / 2 - 1;
for (y = 0; y < h; ++y)
{
int xQ4 = x0Q4;
for (x = 0; x < w; ++x)
{
byte* srcX = &src[xQ4 >> SubpelBits];
ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
int k, sum = 0;
for (k = 0; k < SubpelTaps; ++k)
{
sum += srcX[k] * xFilter[k];
}
dst[x] = (byte)BitUtils.RoundPowerOfTwo(dst[x] + BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits)), 1);
xQ4 += xStepQ4;
}
src += srcStride;
dst += dstStride;
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe void ConvolveVertAvx2(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] yFilters,
int y0Q4,
int w,
int h)
{
Vector128<int> zero = Vector128<int>.Zero;
Vector128<int> const64 = Vector128.Create(64);
Vector256<int> indices = Vector256.Create(
0,
srcStride,
srcStride * 2,
srcStride * 3,
srcStride * 4,
srcStride * 5,
srcStride * 6,
srcStride * 7);
ulong x, y;
src -= srcStride * (SubpelTaps / 2 - 1);
fixed (Array8<short>* yFilter = yFilters)
{
Vector128<short> vfilter = Sse2.LoadVector128((short*)yFilter + (uint)(y0Q4 & SubpelMask) * 8);
ulong srcBaseY = (uint)y0Q4 >> SubpelBits;
for (y = 0; y < (uint)h; ++y)
{
ulong srcOffset = (srcBaseY + y) * (uint)srcStride;
for (x = 0; x < (uint)w; x += 4)
{
Vector256<int> vsrc = Avx2.GatherVector256((uint*)&src[srcOffset + x], indices, 1).AsInt32();
Vector128<int> vsrcL = vsrc.GetLower();
Vector128<int> vsrcH = vsrc.GetUpper();
Vector128<byte> vsrcUnpck11 = Sse2.UnpackLow(vsrcL.AsByte(), vsrcH.AsByte());
Vector128<byte> vsrcUnpck12 = Sse2.UnpackHigh(vsrcL.AsByte(), vsrcH.AsByte());
Vector128<byte> vsrcUnpck21 = Sse2.UnpackLow(vsrcUnpck11, vsrcUnpck12);
Vector128<byte> vsrcUnpck22 = Sse2.UnpackHigh(vsrcUnpck11, vsrcUnpck12);
Vector128<byte> vsrc01 = Sse2.UnpackLow(vsrcUnpck21, vsrcUnpck22);
Vector128<byte> vsrc23 = Sse2.UnpackHigh(vsrcUnpck21, vsrcUnpck22);
Vector128<byte> vsrc11 = Sse.MoveHighToLow(vsrc01.AsSingle(), vsrc01.AsSingle()).AsByte();
Vector128<byte> vsrc33 = Sse.MoveHighToLow(vsrc23.AsSingle(), vsrc23.AsSingle()).AsByte();
Vector128<short> vsrc0 = Sse41.ConvertToVector128Int16(vsrc01);
Vector128<short> vsrc1 = Sse41.ConvertToVector128Int16(vsrc11);
Vector128<short> vsrc2 = Sse41.ConvertToVector128Int16(vsrc23);
Vector128<short> vsrc3 = Sse41.ConvertToVector128Int16(vsrc33);
Vector128<int> sum0123 = MultiplyAddAdjacent(vsrc0, vsrc1, vsrc2, vsrc3, vfilter, zero);
Sse.StoreScalar((float*)&dst[x], PackUnsignedSaturate(RoundShift(sum0123, const64), zero).AsSingle());
}
dst += dstStride;
}
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe void ConvolveVert(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] yFilters,
int y0Q4,
int yStepQ4,
int w,
int h)
{
if (Avx2.IsSupported && UseIntrinsics && yStepQ4 == 1 << SubpelBits)
{
ConvolveVertAvx2(src, srcStride, dst, dstStride, yFilters, y0Q4, w, h);
return;
}
int x, y;
src -= srcStride * (SubpelTaps / 2 - 1);
for (x = 0; x < w; ++x)
{
int yQ4 = y0Q4;
for (y = 0; y < h; ++y)
{
byte* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
int k, sum = 0;
for (k = 0; k < SubpelTaps; ++k)
{
sum += srcY[k * srcStride] * yFilter[k];
}
dst[y * dstStride] = BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits));
yQ4 += yStepQ4;
}
++src;
++dst;
}
}
private static unsafe void ConvolveAvgVert(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] yFilters,
int y0Q4,
int yStepQ4,
int w,
int h)
{
int x, y;
src -= srcStride * (SubpelTaps / 2 - 1);
for (x = 0; x < w; ++x)
{
int yQ4 = y0Q4;
for (y = 0; y < h; ++y)
{
byte* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
int k, sum = 0;
for (k = 0; k < SubpelTaps; ++k)
{
sum += srcY[k * srcStride] * yFilter[k];
}
dst[y * dstStride] = (byte)BitUtils.RoundPowerOfTwo(
dst[y * dstStride] + BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits)), 1);
yQ4 += yStepQ4;
}
++src;
++dst;
}
}
public static unsafe void Convolve8Horiz(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h)
{
ConvolveHoriz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, w, h);
}
public static unsafe void Convolve8AvgHoriz(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h)
{
ConvolveAvgHoriz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, w, h);
}
public static unsafe void Convolve8Vert(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h)
{
ConvolveVert(src, srcStride, dst, dstStride, filter, y0Q4, yStepQ4, w, h);
}
public static unsafe void Convolve8AvgVert(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h)
{
ConvolveAvgVert(src, srcStride, dst, dstStride, filter, y0Q4, yStepQ4, w, h);
}
[StructLayout(LayoutKind.Sequential, Size = 64 * 135)]
struct Temp
{
}
public static unsafe void Convolve8(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h)
{
// Note: Fixed size intermediate buffer, temp, places limits on parameters.
// 2d filtering proceeds in 2 steps:
// (1) Interpolate horizontally into an intermediate buffer, temp.
// (2) Interpolate temp vertically to derive the sub-pixel result.
// Deriving the maximum number of rows in the temp buffer (135):
// --Smallest scaling factor is x1/2 ==> yStepQ4 = 32 (Normative).
// --Largest block size is 64x64 pixels.
// --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
// original frame (in 1/16th pixel units).
// --Must round-up because block may be located at sub-pixel position.
// --Require an additional SubpelTaps rows for the 8-tap filter tails.
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
// When calling in frame scaling function, the smallest scaling factor is x1/4
// ==> yStepQ4 = 64. Since w and h are at most 16, the temp buffer is still
// big enough.
Temp tempStruct;
byte* temp = (byte*)Unsafe.AsPointer(ref tempStruct); // Avoid zero initialization.
int intermediateHeight = (((h - 1) * yStepQ4 + y0Q4) >> SubpelBits) + SubpelTaps;
Debug.Assert(w <= 64);
Debug.Assert(h <= 64);
Debug.Assert(yStepQ4 <= 32 || (yStepQ4 <= 64 && h <= 32));
Debug.Assert(xStepQ4 <= 64);
ConvolveHoriz(src - srcStride * (SubpelTaps / 2 - 1), srcStride, temp, 64, filter, x0Q4, xStepQ4, w, intermediateHeight);
ConvolveVert(temp + 64 * (SubpelTaps / 2 - 1), 64, dst, dstStride, filter, y0Q4, yStepQ4, w, h);
}
public static unsafe void Convolve8Avg(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h)
{
// Fixed size intermediate buffer places limits on parameters.
byte* temp = stackalloc byte[64 * 64];
Debug.Assert(w <= 64);
Debug.Assert(h <= 64);
Convolve8(src, srcStride, temp, 64, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h);
ConvolveAvg(temp, 64, dst, dstStride, null, 0, 0, 0, 0, w, h);
}
public static unsafe void ConvolveCopy(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h)
{
int r;
for (r = h; r > 0; --r)
{
MemoryUtil.Copy(dst, src, w);
src += srcStride;
dst += dstStride;
}
}
public static unsafe void ConvolveAvg(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h)
{
int x, y;
for (y = 0; y < h; ++y)
{
for (x = 0; x < w; ++x)
{
dst[x] = (byte)BitUtils.RoundPowerOfTwo(dst[x] + src[x], 1);
}
src += srcStride;
dst += dstStride;
}
}
public static unsafe void ScaledHoriz(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h)
{
Convolve8Horiz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h);
}
public static unsafe void ScaledVert(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h)
{
Convolve8Vert(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h);
}
public static unsafe void Scaled2D(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h)
{
Convolve8(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h);
}
public static unsafe void ScaledAvgHoriz(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h)
{
Convolve8AvgHoriz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h);
}
public static unsafe void ScaledAvgVert(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h)
{
Convolve8AvgVert(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h);
}
public static unsafe void ScaledAvg2D(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h)
{
Convolve8Avg(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h);
}
private static unsafe void HighbdConvolveHoriz(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
Array8<short>[] xFilters,
int x0Q4,
int xStepQ4,
int w,
int h,
int bd)
{
int x, y;
src -= SubpelTaps / 2 - 1;
for (y = 0; y < h; ++y)
{
int xQ4 = x0Q4;
for (x = 0; x < w; ++x)
{
ushort* srcX = &src[xQ4 >> SubpelBits];
ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
int k, sum = 0;
for (k = 0; k < SubpelTaps; ++k)
{
sum += srcX[k] * xFilter[k];
}
dst[x] = BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd);
xQ4 += xStepQ4;
}
src += srcStride;
dst += dstStride;
}
}
private static unsafe void HighbdConvolveAvgHoriz(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
Array8<short>[] xFilters,
int x0Q4,
int xStepQ4,
int w,
int h,
int bd)
{
int x, y;
src -= SubpelTaps / 2 - 1;
for (y = 0; y < h; ++y)
{
int xQ4 = x0Q4;
for (x = 0; x < w; ++x)
{
ushort* srcX = &src[xQ4 >> SubpelBits];
ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
int k, sum = 0;
for (k = 0; k < SubpelTaps; ++k)
{
sum += srcX[k] * xFilter[k];
}
dst[x] = (ushort)BitUtils.RoundPowerOfTwo(dst[x] + BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd), 1);
xQ4 += xStepQ4;
}
src += srcStride;
dst += dstStride;
}
}
private static unsafe void HighbdConvolveVert(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
Array8<short>[] yFilters,
int y0Q4,
int yStepQ4,
int w,
int h,
int bd)
{
int x, y;
src -= srcStride * (SubpelTaps / 2 - 1);
for (x = 0; x < w; ++x)
{
int yQ4 = y0Q4;
for (y = 0; y < h; ++y)
{
ushort* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
int k, sum = 0;
for (k = 0; k < SubpelTaps; ++k)
{
sum += srcY[k * srcStride] * yFilter[k];
}
dst[y * dstStride] = BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd);
yQ4 += yStepQ4;
}
++src;
++dst;
}
}
private static unsafe void HighConvolveAvgVert(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
Array8<short>[] yFilters,
int y0Q4,
int yStepQ4,
int w,
int h,
int bd)
{
int x, y;
src -= srcStride * (SubpelTaps / 2 - 1);
for (x = 0; x < w; ++x)
{
int yQ4 = y0Q4;
for (y = 0; y < h; ++y)
{
ushort* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
int k, sum = 0;
for (k = 0; k < SubpelTaps; ++k)
{
sum += srcY[k * srcStride] * yFilter[k];
}
dst[y * dstStride] = (ushort)BitUtils.RoundPowerOfTwo(
dst[y * dstStride] + BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd), 1);
yQ4 += yStepQ4;
}
++src;
++dst;
}
}
private static unsafe void HighbdConvolve(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h,
int bd)
{
// Note: Fixed size intermediate buffer, temp, places limits on parameters.
// 2d filtering proceeds in 2 steps:
// (1) Interpolate horizontally into an intermediate buffer, temp.
// (2) Interpolate temp vertically to derive the sub-pixel result.
// Deriving the maximum number of rows in the temp buffer (135):
// --Smallest scaling factor is x1/2 ==> yStepQ4 = 32 (Normative).
// --Largest block size is 64x64 pixels.
// --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
// original frame (in 1/16th pixel units).
// --Must round-up because block may be located at sub-pixel position.
// --Require an additional SubpelTaps rows for the 8-tap filter tails.
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
ushort* temp = stackalloc ushort[64 * 135];
int intermediateHeight = (((h - 1) * yStepQ4 + y0Q4) >> SubpelBits) + SubpelTaps;
Debug.Assert(w <= 64);
Debug.Assert(h <= 64);
Debug.Assert(yStepQ4 <= 32);
Debug.Assert(xStepQ4 <= 32);
HighbdConvolveHoriz(src - srcStride * (SubpelTaps / 2 - 1), srcStride, temp, 64, filter, x0Q4, xStepQ4, w, intermediateHeight, bd);
HighbdConvolveVert(temp + 64 * (SubpelTaps / 2 - 1), 64, dst, dstStride, filter, y0Q4, yStepQ4, w, h, bd);
}
public static unsafe void HighbdConvolve8Horiz(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h,
int bd)
{
HighbdConvolveHoriz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, w, h, bd);
}
public static unsafe void HighbdConvolve8AvgHoriz(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h,
int bd)
{
HighbdConvolveAvgHoriz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, w, h, bd);
}
public static unsafe void HighbdConvolve8Vert(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h,
int bd)
{
HighbdConvolveVert(src, srcStride, dst, dstStride, filter, y0Q4, yStepQ4, w, h, bd);
}
public static unsafe void HighbdConvolve8AvgVert(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h,
int bd)
{
HighConvolveAvgVert(src, srcStride, dst, dstStride, filter, y0Q4, yStepQ4, w, h, bd);
}
public static unsafe void HighbdConvolve8(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h,
int bd)
{
HighbdConvolve(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h, bd);
}
public static unsafe void HighbdConvolve8Avg(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h,
int bd)
{
// Fixed size intermediate buffer places limits on parameters.
ushort* temp = stackalloc ushort[64 * 64];
Debug.Assert(w <= 64);
Debug.Assert(h <= 64);
HighbdConvolve8(src, srcStride, temp, 64, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h, bd);
HighbdConvolveAvg(temp, 64, dst, dstStride, null, 0, 0, 0, 0, w, h, bd);
}
public static unsafe void HighbdConvolveCopy(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h,
int bd)
{
int r;
for (r = h; r > 0; --r)
{
MemoryUtil.Copy(dst, src, w);
src += srcStride;
dst += dstStride;
}
}
public static unsafe void HighbdConvolveAvg(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h,
int bd)
{
int x, y;
for (y = 0; y < h; ++y)
{
for (x = 0; x < w; ++x)
{
dst[x] = (ushort)BitUtils.RoundPowerOfTwo(dst[x] + src[x], 1);
}
src += srcStride;
dst += dstStride;
}
}
}
}

@ -0,0 +1,12 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
{
internal static class Filter
{
public const int FilterBits = 7;
public const int SubpelBits = 4;
public const int SubpelMask = (1 << SubpelBits) - 1;
public const int SubpelShifts = 1 << SubpelBits;
public const int SubpelTaps = 8;
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,73 @@
using Ryujinx.Graphics.Nvdec.Vp9.Common;
using System;
using System.Diagnostics;
namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
{
internal static class Prob
{
public const int MaxProb = 255;
private static byte GetProb(uint num, uint den)
{
Debug.Assert(den != 0);
{
int p = (int)(((ulong)num * 256 + (den >> 1)) / den);
// (p > 255) ? 255 : (p < 1) ? 1 : p;
int clippedProb = p | ((255 - p) >> 23) | (p == 0 ? 1 : 0);
return (byte)clippedProb;
}
}
/* This function assumes prob1 and prob2 are already within [1,255] range. */
public static byte WeightedProb(int prob1, int prob2, int factor)
{
return (byte)BitUtils.RoundPowerOfTwo(prob1 * (256 - factor) + prob2 * factor, 8);
}
// MODE_MV_MAX_UPDATE_FACTOR (128) * count / MODE_MV_COUNT_SAT;
private static readonly uint[] CountToUpdateFactor = new uint[]
{
0, 6, 12, 19, 25, 32, 38, 44, 51, 57, 64,
70, 76, 83, 89, 96, 102, 108, 115, 121, 128
};
private const int ModeMvCountSat = 20;
public static byte ModeMvMergeProbs(byte preProb, uint ct0, uint ct1)
{
uint den = ct0 + ct1;
if (den == 0)
{
return preProb;
}
else
{
uint count = Math.Min(den, ModeMvCountSat);
uint factor = CountToUpdateFactor[(int)count];
byte prob = GetProb(ct0, den);
return WeightedProb(preProb, prob, (int)factor);
}
}
private static uint TreeMergeProbsImpl(
uint i,
sbyte[] tree,
ReadOnlySpan<byte> preProbs,
ReadOnlySpan<uint> counts,
Span<byte> probs)
{
int l = tree[i];
uint leftCount = (l <= 0) ? counts[-l] : TreeMergeProbsImpl((uint)l, tree, preProbs, counts, probs);
int r = tree[i + 1];
uint rightCount = (r <= 0) ? counts[-r] : TreeMergeProbsImpl((uint)r, tree, preProbs, counts, probs);
probs[(int)(i >> 1)] = ModeMvMergeProbs(preProbs[(int)(i >> 1)], leftCount, rightCount);
return leftCount + rightCount;
}
public static void TreeMergeProbs(sbyte[] tree, ReadOnlySpan<byte> preProbs, ReadOnlySpan<uint> counts, Span<byte> probs)
{
TreeMergeProbsImpl(0, tree, preProbs, counts, probs);
}
}
}

@ -0,0 +1,237 @@
using System;
using System.Buffers.Binary;
using Ryujinx.Common.Memory;
namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
{
internal struct Reader
{
private static readonly byte[] Norm = new byte[]
{
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
private const int BdValueSize = sizeof(ulong) * 8;
// This is meant to be a large, positive constant that can still be efficiently
// loaded as an immediate (on platforms like ARM, for example).
// Even relatively modest values like 100 would work fine.
private const int LotsOfBits = 0x40000000;
public ulong Value;
public uint Range;
public int Count;
private ArrayPtr<byte> _buffer;
public bool Init(ArrayPtr<byte> buffer, int size)
{
if (size != 0 && buffer.IsNull)
{
return true;
}
else
{
_buffer = new ArrayPtr<byte>(ref buffer[0], size);
Value = 0;
Count = -8;
Range = 255;
Fill();
return ReadBit() != 0; // Marker bit
}
}
private void Fill()
{
ReadOnlySpan<byte> buffer = _buffer.ToSpan();
ReadOnlySpan<byte> bufferStart = buffer;
ulong value = Value;
int count = Count;
ulong bytesLeft = (ulong)buffer.Length;
ulong bitsLeft = bytesLeft * 8;
int shift = BdValueSize - 8 - (count + 8);
if (bitsLeft > BdValueSize)
{
int bits = (shift & unchecked((int)0xfffffff8)) + 8;
ulong nv;
ulong bigEndianValues = BinaryPrimitives.ReadUInt64BigEndian(buffer);
nv = bigEndianValues >> (BdValueSize - bits);
count += bits;
buffer = buffer.Slice(bits >> 3);
value = Value | (nv << (shift & 0x7));
}
else
{
int bitsOver = shift + 8 - (int)bitsLeft;
int loopEnd = 0;
if (bitsOver >= 0)
{
count += LotsOfBits;
loopEnd = bitsOver;
}
if (bitsOver < 0 || bitsLeft != 0)
{
while (shift >= loopEnd)
{
count += 8;
value |= (ulong)buffer[0] << shift;
buffer = buffer.Slice(1);
shift -= 8;
}
}
}
// NOTE: Variable 'buffer' may not relate to '_buffer' after decryption,
// so we increase '_buffer' by the amount that 'buffer' moved, rather than
// assign 'buffer' to '_buffer'.
_buffer = _buffer.Slice(bufferStart.Length - buffer.Length);
Value = value;
Count = count;
}
public bool HasError()
{
// Check if we have reached the end of the buffer.
//
// Variable 'count' stores the number of bits in the 'value' buffer, minus
// 8. The top byte is part of the algorithm, and the remainder is buffered
// to be shifted into it. So if count == 8, the top 16 bits of 'value' are
// occupied, 8 for the algorithm and 8 in the buffer.
//
// When reading a byte from the user's buffer, count is filled with 8 and
// one byte is filled into the value buffer. When we reach the end of the
// data, count is additionally filled with LotsOfBits. So when
// count == LotsOfBits - 1, the user's data has been exhausted.
//
// 1 if we have tried to decode bits after the end of stream was encountered.
// 0 No error.
return Count > BdValueSize && Count < LotsOfBits;
}
public int Read(int prob)
{
uint bit = 0;
ulong value;
ulong bigsplit;
int count;
uint range;
uint split = (Range * (uint)prob + (256 - (uint)prob)) >> 8;
if (Count < 0)
{
Fill();
}
value = Value;
count = Count;
bigsplit = (ulong)split << (BdValueSize - 8);
range = split;
if (value >= bigsplit)
{
range = Range - split;
value -= bigsplit;
bit = 1;
}
{
int shift = Norm[range];
range <<= shift;
value <<= shift;
count -= shift;
}
Value = value;
Count = count;
Range = range;
return (int)bit;
}
public int ReadBit()
{
return Read(128); // vpx_prob_half
}
public int ReadLiteral(int bits)
{
int literal = 0, bit;
for (bit = bits - 1; bit >= 0; bit--)
{
literal |= ReadBit() << bit;
}
return literal;
}
public int ReadTree(ReadOnlySpan<sbyte> tree, ReadOnlySpan<byte> probs)
{
sbyte i = 0;
while ((i = tree[i + Read(probs[i >> 1])]) > 0)
{
continue;
}
return -i;
}
public int ReadBool(int prob, ref ulong value, ref int count, ref uint range)
{
uint split = (range * (uint)prob + (256 - (uint)prob)) >> 8;
ulong bigsplit = (ulong)split << (BdValueSize - 8);
if (count < 0)
{
Value = value;
Count = count;
Fill();
value = Value;
count = Count;
}
if (value >= bigsplit)
{
range = range - split;
value = value - bigsplit;
{
int shift = Norm[range];
range <<= shift;
value <<= shift;
count -= shift;
}
return 1;
}
range = split;
{
int shift = Norm[range];
range <<= shift;
value <<= shift;
count -= shift;
}
return 0;
}
public ArrayPtr<byte> FindEnd()
{
// Find the end of the coded buffer
while (Count > 8 && Count < BdValueSize)
{
Count -= 8;
_buffer = _buffer.Slice(-1);
}
return _buffer;
}
}
}

@ -0,0 +1,54 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
{
internal static class TxfmCommon
{
// Constants used by all idct/dct functions
public const int DctConstBits = 14;
public const int DctConstRounding = 1 << (DctConstBits - 1);
public const int UnitQuantShift = 2;
public const int UnitQuantFactor = 1 << UnitQuantShift;
// Constants:
// for (int i = 1; i < 32; ++i)
// Console.WriteLine("public const short CosPi{0}_64 = {1};", i, MathF.Round(16384 * MathF.Cos(i * MathF.PI / 64)));
// Note: sin(k * Pi / 64) = cos((32 - k) * Pi / 64)
public const short CosPi1_64 = 16364;
public const short CosPi2_64 = 16305;
public const short CosPi3_64 = 16207;
public const short CosPi4_64 = 16069;
public const short CosPi5_64 = 15893;
public const short CosPi6_64 = 15679;
public const short CosPi7_64 = 15426;
public const short CosPi8_64 = 15137;
public const short CosPi9_64 = 14811;
public const short CosPi10_64 = 14449;
public const short CosPi11_64 = 14053;
public const short CosPi12_64 = 13623;
public const short CosPi13_64 = 13160;
public const short CosPi14_64 = 12665;
public const short CosPi15_64 = 12140;
public const short CosPi16_64 = 11585;
public const short CosPi17_64 = 11003;
public const short CosPi18_64 = 10394;
public const short CosPi19_64 = 9760;
public const short CosPi20_64 = 9102;
public const short CosPi21_64 = 8423;
public const short CosPi22_64 = 7723;
public const short CosPi23_64 = 7005;
public const short CosPi24_64 = 6270;
public const short CosPi25_64 = 5520;
public const short CosPi26_64 = 4756;
public const short CosPi27_64 = 3981;
public const short CosPi28_64 = 3196;
public const short CosPi29_64 = 2404;
public const short CosPi30_64 = 1606;
public const short CosPi31_64 = 804;
// 16384 * sqrt(2) * sin(kPi / 9) * 2 / 3
public const short SinPi1_9 = 5283;
public const short SinPi2_9 = 9929;
public const short SinPi3_9 = 13377;
public const short SinPi4_9 = 15212;
}
}

@ -0,0 +1,536 @@
using Ryujinx.Graphics.Nvdec.Vp9.Common;
using Ryujinx.Graphics.Nvdec.Vp9.Types;
using System;
using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.InvTxfm;
namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal static class Idct
{
private delegate void Transform1D(ReadOnlySpan<int> input, Span<int> output);
private delegate void HighbdTransform1D(ReadOnlySpan<int> input, Span<int> output, int bd);
private struct Transform2D
{
public Transform1D Cols, Rows; // Vertical and horizontal
public Transform2D(Transform1D cols, Transform1D rows)
{
Cols = cols;
Rows = rows;
}
}
private struct HighbdTransform2D
{
public HighbdTransform1D Cols, Rows; // Vertical and horizontal
public HighbdTransform2D(HighbdTransform1D cols, HighbdTransform1D rows)
{
Cols = cols;
Rows = rows;
}
}
private static readonly Transform2D[] Iht4 = new Transform2D[]
{
new Transform2D(Idct4, Idct4), // DCT_DCT = 0
new Transform2D(Iadst4, Idct4), // ADST_DCT = 1
new Transform2D(Idct4, Iadst4), // DCT_ADST = 2
new Transform2D(Iadst4, Iadst4) // ADST_ADST = 3
};
public static void Iht4x416Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int txType)
{
int i, j;
Span<int> output = stackalloc int[4 * 4];
Span<int> outptr = output;
Span<int> tempIn = stackalloc int[4];
Span<int> tempOut = stackalloc int[4];
// Inverse transform row vectors
for (i = 0; i < 4; ++i)
{
Iht4[txType].Rows(input, outptr);
input = input.Slice(4);
outptr = outptr.Slice(4);
}
// Inverse transform column vectors
for (i = 0; i < 4; ++i)
{
for (j = 0; j < 4; ++j)
{
tempIn[j] = output[j * 4 + i];
}
Iht4[txType].Cols(tempIn, tempOut);
for (j = 0; j < 4; ++j)
{
dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 4));
}
}
}
private static readonly Transform2D[] Iht8 = new Transform2D[]
{
new Transform2D(Idct8, Idct8), // DCT_DCT = 0
new Transform2D(Iadst8, Idct8), // ADST_DCT = 1
new Transform2D(Idct8, Iadst8), // DCT_ADST = 2
new Transform2D(Iadst8, Iadst8) // ADST_ADST = 3
};
public static void Iht8x864Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int txType)
{
int i, j;
Span<int> output = stackalloc int[8 * 8];
Span<int> outptr = output;
Span<int> tempIn = stackalloc int[8];
Span<int> tempOut = stackalloc int[8];
Transform2D ht = Iht8[txType];
// Inverse transform row vectors
for (i = 0; i < 8; ++i)
{
ht.Rows(input, outptr);
input = input.Slice(8);
outptr = outptr.Slice(8);
}
// Inverse transform column vectors
for (i = 0; i < 8; ++i)
{
for (j = 0; j < 8; ++j)
{
tempIn[j] = output[j * 8 + i];
}
ht.Cols(tempIn, tempOut);
for (j = 0; j < 8; ++j)
{
dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 5));
}
}
}
private static readonly Transform2D[] Iht16 = new Transform2D[]
{
new Transform2D(Idct16, Idct16), // DCT_DCT = 0
new Transform2D(Iadst16, Idct16), // ADST_DCT = 1
new Transform2D(Idct16, Iadst16), // DCT_ADST = 2
new Transform2D(Iadst16, Iadst16) // ADST_ADST = 3
};
public static void Iht16x16256Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int txType)
{
int i, j;
Span<int> output = stackalloc int[16 * 16];
Span<int> outptr = output;
Span<int> tempIn = stackalloc int[16];
Span<int> tempOut = stackalloc int[16];
Transform2D ht = Iht16[txType];
// Rows
for (i = 0; i < 16; ++i)
{
ht.Rows(input, outptr);
input = input.Slice(16);
outptr = outptr.Slice(16);
}
// Columns
for (i = 0; i < 16; ++i)
{
for (j = 0; j < 16; ++j)
{
tempIn[j] = output[j * 16 + i];
}
ht.Cols(tempIn, tempOut);
for (j = 0; j < 16; ++j)
{
dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6));
}
}
}
// Idct
public static void Idct4x4Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
{
if (eob > 1)
{
Idct4x416Add(input, dest, stride);
}
else
{
Idct4x41Add(input, dest, stride);
}
}
public static void Iwht4x4Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
{
if (eob > 1)
{
Iwht4x416Add(input, dest, stride);
}
else
{
Iwht4x41Add(input, dest, stride);
}
}
public static void Idct8x8Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
{
// If dc is 1, then input[0] is the reconstructed value, do not need
// dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
// The calculation can be simplified if there are not many non-zero dct
// coefficients. Use eobs to decide what to do.
if (eob == 1)
{
// DC only DCT coefficient
Idct8x81Add(input, dest, stride);
}
else if (eob <= 12)
{
Idct8x812Add(input, dest, stride);
}
else
{
Idct8x864Add(input, dest, stride);
}
}
public static void Idct16x16Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
{
/* The calculation can be simplified if there are not many non-zero dct
* coefficients. Use eobs to separate different cases. */
if (eob == 1) /* DC only DCT coefficient. */
{
Idct16x161Add(input, dest, stride);
}
else if (eob <= 10)
{
Idct16x1610Add(input, dest, stride);
}
else if (eob <= 38)
{
Idct16x1638Add(input, dest, stride);
}
else
{
Idct16x16256Add(input, dest, stride);
}
}
public static void Idct32x32Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
{
if (eob == 1)
{
Idct32x321Add(input, dest, stride);
}
else if (eob <= 34)
{
// Non-zero coeff only in upper-left 8x8
Idct32x3234Add(input, dest, stride);
}
else if (eob <= 135)
{
// Non-zero coeff only in upper-left 16x16
Idct32x32135Add(input, dest, stride);
}
else
{
Idct32x321024Add(input, dest, stride);
}
}
// Iht
public static void Iht4x4Add(TxType txType, ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
{
if (txType == TxType.DctDct)
{
Idct4x4Add(input, dest, stride, eob);
}
else
{
Iht4x416Add(input, dest, stride, (int)txType);
}
}
public static void Iht8x8Add(TxType txType, ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
{
if (txType == TxType.DctDct)
{
Idct8x8Add(input, dest, stride, eob);
}
else
{
Iht8x864Add(input, dest, stride, (int)txType);
}
}
public static void Iht16x16Add(TxType txType, ReadOnlySpan<int> input, Span<byte> dest,
int stride, int eob)
{
if (txType == TxType.DctDct)
{
Idct16x16Add(input, dest, stride, eob);
}
else
{
Iht16x16256Add(input, dest, stride, (int)txType);
}
}
private static readonly HighbdTransform2D[] HighbdIht4 = new HighbdTransform2D[]
{
new HighbdTransform2D(HighbdIdct4, HighbdIdct4), // DCT_DCT = 0
new HighbdTransform2D(HighbdIadst4, HighbdIdct4), // ADST_DCT = 1
new HighbdTransform2D(HighbdIdct4, HighbdIadst4), // DCT_ADST = 2
new HighbdTransform2D(HighbdIadst4, HighbdIadst4) // ADST_ADST = 3
};
public static void HighbdIht4x416Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int txType, int bd)
{
int i, j;
Span<int> output = stackalloc int[4 * 4];
Span<int> outptr = output;
Span<int> tempIn = stackalloc int[4];
Span<int> tempOut = stackalloc int[4];
// Inverse transform row vectors.
for (i = 0; i < 4; ++i)
{
HighbdIht4[txType].Rows(input, outptr, bd);
input = input.Slice(4);
outptr = outptr.Slice(4);
}
// Inverse transform column vectors.
for (i = 0; i < 4; ++i)
{
for (j = 0; j < 4; ++j)
{
tempIn[j] = output[j * 4 + i];
}
HighbdIht4[txType].Cols(tempIn, tempOut, bd);
for (j = 0; j < 4; ++j)
{
dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 4), bd);
}
}
}
private static readonly HighbdTransform2D[] HighIht8 = new HighbdTransform2D[]
{
new HighbdTransform2D(HighbdIdct8, HighbdIdct8), // DCT_DCT = 0
new HighbdTransform2D(HighbdIadst8, HighbdIdct8), // ADST_DCT = 1
new HighbdTransform2D(HighbdIdct8, HighbdIadst8), // DCT_ADST = 2
new HighbdTransform2D(HighbdIadst8, HighbdIadst8) // ADST_ADST = 3
};
public static void HighbdIht8x864Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int txType, int bd)
{
int i, j;
Span<int> output = stackalloc int[8 * 8];
Span<int> outptr = output;
Span<int> tempIn = stackalloc int[8];
Span<int> tempOut = stackalloc int[8];
HighbdTransform2D ht = HighIht8[txType];
// Inverse transform row vectors.
for (i = 0; i < 8; ++i)
{
ht.Rows(input, outptr, bd);
input = input.Slice(8);
outptr = output.Slice(8);
}
// Inverse transform column vectors.
for (i = 0; i < 8; ++i)
{
for (j = 0; j < 8; ++j)
{
tempIn[j] = output[j * 8 + i];
}
ht.Cols(tempIn, tempOut, bd);
for (j = 0; j < 8; ++j)
{
dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 5), bd);
}
}
}
private static readonly HighbdTransform2D[] HighIht16 = new HighbdTransform2D[]
{
new HighbdTransform2D(HighbdIdct16, HighbdIdct16), // DCT_DCT = 0
new HighbdTransform2D(HighbdIadst16, HighbdIdct16), // ADST_DCT = 1
new HighbdTransform2D(HighbdIdct16, HighbdIadst16), // DCT_ADST = 2
new HighbdTransform2D(HighbdIadst16, HighbdIadst16) // ADST_ADST = 3
};
public static void HighbdIht16x16256Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int txType, int bd)
{
int i, j;
Span<int> output = stackalloc int[16 * 16];
Span<int> outptr = output;
Span<int> tempIn = stackalloc int[16];
Span<int> tempOut = stackalloc int[16];
HighbdTransform2D ht = HighIht16[txType];
// Rows
for (i = 0; i < 16; ++i)
{
ht.Rows(input, outptr, bd);
input = input.Slice(16);
outptr = output.Slice(16);
}
// Columns
for (i = 0; i < 16; ++i)
{
for (j = 0; j < 16; ++j)
{
tempIn[j] = output[j * 16 + i];
}
ht.Cols(tempIn, tempOut, bd);
for (j = 0; j < 16; ++j)
{
dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6), bd);
}
}
}
// Idct
public static void HighbdIdct4x4Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
{
if (eob > 1)
{
HighbdIdct4x416Add(input, dest, stride, bd);
}
else
{
HighbdIdct4x41Add(input, dest, stride, bd);
}
}
public static void HighbdIwht4x4Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
{
if (eob > 1)
{
HighbdIwht4x416Add(input, dest, stride, bd);
}
else
{
HighbdIwht4x41Add(input, dest, stride, bd);
}
}
public static void HighbdIdct8x8Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
{
// If dc is 1, then input[0] is the reconstructed value, do not need
// dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
// The calculation can be simplified if there are not many non-zero dct
// coefficients. Use eobs to decide what to do.
// DC only DCT coefficient
if (eob == 1)
{
vpx_Highbdidct8x8_1_add_c(input, dest, stride, bd);
}
else if (eob <= 12)
{
HighbdIdct8x812Add(input, dest, stride, bd);
}
else
{
HighbdIdct8x864Add(input, dest, stride, bd);
}
}
public static void HighbdIdct16x16Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
{
// The calculation can be simplified if there are not many non-zero dct
// coefficients. Use eobs to separate different cases.
// DC only DCT coefficient.
if (eob == 1)
{
HighbdIdct16x161Add(input, dest, stride, bd);
}
else if (eob <= 10)
{
HighbdIdct16x1610Add(input, dest, stride, bd);
}
else if (eob <= 38)
{
HighbdIdct16x1638Add(input, dest, stride, bd);
}
else
{
HighbdIdct16x16256Add(input, dest, stride, bd);
}
}
public static void HighbdIdct32x32Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
{
// Non-zero coeff only in upper-left 8x8
if (eob == 1)
{
HighbdIdct32x321Add(input, dest, stride, bd);
}
else if (eob <= 34)
{
HighbdIdct32x3234Add(input, dest, stride, bd);
}
else if (eob <= 135)
{
HighbdIdct32x32135Add(input, dest, stride, bd);
}
else
{
HighbdIdct32x321024Add(input, dest, stride, bd);
}
}
// Iht
public static void HighbdIht4x4Add(TxType txType, ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
{
if (txType == TxType.DctDct)
{
HighbdIdct4x4Add(input, dest, stride, eob, bd);
}
else
{
HighbdIht4x416Add(input, dest, stride, (int)txType, bd);
}
}
public static void HighbdIht8x8Add(TxType txType, ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
{
if (txType == TxType.DctDct)
{
HighbdIdct8x8Add(input, dest, stride, eob, bd);
}
else
{
HighbdIht8x864Add(input, dest, stride, (int)txType, bd);
}
}
public static void HighbdIht16x16Add(TxType txType, ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
{
if (txType == TxType.DctDct)
{
HighbdIdct16x16Add(input, dest, stride, eob, bd);
}
else
{
HighbdIht16x16256Add(input, dest, stride, (int)txType, bd);
}
}
}
}

@ -0,0 +1,15 @@
using System;
namespace Ryujinx.Graphics.Nvdec.Vp9
{
class InternalErrorException : Exception
{
public InternalErrorException(string message) : base(message)
{
}
public InternalErrorException(string message, Exception innerException) : base(message, innerException)
{
}
}
}

@ -0,0 +1,14 @@
namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal struct InternalErrorInfo
{
public CodecErr ErrorCode;
public void InternalError(CodecErr error, string message)
{
ErrorCode = error;
throw new InternalErrorException(message);
}
}
}

@ -0,0 +1,418 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Nvdec.Vp9.Common;
using Ryujinx.Graphics.Nvdec.Vp9.Types;
using System;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal static class LoopFilter
{
public const int MaxLoopFilter = 63;
public const int MaxRefLfDeltas = 4;
public const int MaxModeLfDeltas = 2;
// 64 bit masks for left transform size. Each 1 represents a position where
// we should apply a loop filter across the left border of an 8x8 block
// boundary.
//
// In the case of TX_16X16 -> ( in low order byte first we end up with
// a mask that looks like this
//
// 10101010
// 10101010
// 10101010
// 10101010
// 10101010
// 10101010
// 10101010
// 10101010
//
// A loopfilter should be applied to every other 8x8 horizontally.
private static readonly ulong[] Left64X64TxformMask = new ulong[]
{
0xffffffffffffffffUL, // TX_4X4
0xffffffffffffffffUL, // TX_8x8
0x5555555555555555UL, // TX_16x16
0x1111111111111111UL, // TX_32x32
};
// 64 bit masks for above transform size. Each 1 represents a position where
// we should apply a loop filter across the top border of an 8x8 block
// boundary.
//
// In the case of TX_32x32 -> ( in low order byte first we end up with
// a mask that looks like this
//
// 11111111
// 00000000
// 00000000
// 00000000
// 11111111
// 00000000
// 00000000
// 00000000
//
// A loopfilter should be applied to every other 4 the row vertically.
private static readonly ulong[] Above64X64TxformMask = new ulong[]
{
0xffffffffffffffffUL, // TX_4X4
0xffffffffffffffffUL, // TX_8x8
0x00ff00ff00ff00ffUL, // TX_16x16
0x000000ff000000ffUL, // TX_32x32
};
// 64 bit masks for prediction sizes (left). Each 1 represents a position
// where left border of an 8x8 block. These are aligned to the right most
// appropriate bit, and then shifted into place.
//
// In the case of TX_16x32 -> ( low order byte first ) we end up with
// a mask that looks like this :
//
// 10000000
// 10000000
// 10000000
// 10000000
// 00000000
// 00000000
// 00000000
// 00000000
private static readonly ulong[] LeftPredictionMask = new ulong[]
{
0x0000000000000001UL, // BLOCK_4X4,
0x0000000000000001UL, // BLOCK_4X8,
0x0000000000000001UL, // BLOCK_8X4,
0x0000000000000001UL, // BLOCK_8X8,
0x0000000000000101UL, // BLOCK_8X16,
0x0000000000000001UL, // BLOCK_16X8,
0x0000000000000101UL, // BLOCK_16X16,
0x0000000001010101UL, // BLOCK_16X32,
0x0000000000000101UL, // BLOCK_32X16,
0x0000000001010101UL, // BLOCK_32X32,
0x0101010101010101UL, // BLOCK_32X64,
0x0000000001010101UL, // BLOCK_64X32,
0x0101010101010101UL, // BLOCK_64X64
};
// 64 bit mask to shift and set for each prediction size.
private static readonly ulong[] AbovePredictionMask = new ulong[]
{
0x0000000000000001UL, // BLOCK_4X4
0x0000000000000001UL, // BLOCK_4X8
0x0000000000000001UL, // BLOCK_8X4
0x0000000000000001UL, // BLOCK_8X8
0x0000000000000001UL, // BLOCK_8X16,
0x0000000000000003UL, // BLOCK_16X8
0x0000000000000003UL, // BLOCK_16X16
0x0000000000000003UL, // BLOCK_16X32,
0x000000000000000fUL, // BLOCK_32X16,
0x000000000000000fUL, // BLOCK_32X32,
0x000000000000000fUL, // BLOCK_32X64,
0x00000000000000ffUL, // BLOCK_64X32,
0x00000000000000ffUL, // BLOCK_64X64
};
// 64 bit mask to shift and set for each prediction size. A bit is set for
// each 8x8 block that would be in the left most block of the given block
// size in the 64x64 block.
private static readonly ulong[] SizeMask = new ulong[]
{
0x0000000000000001UL, // BLOCK_4X4
0x0000000000000001UL, // BLOCK_4X8
0x0000000000000001UL, // BLOCK_8X4
0x0000000000000001UL, // BLOCK_8X8
0x0000000000000101UL, // BLOCK_8X16,
0x0000000000000003UL, // BLOCK_16X8
0x0000000000000303UL, // BLOCK_16X16
0x0000000003030303UL, // BLOCK_16X32,
0x0000000000000f0fUL, // BLOCK_32X16,
0x000000000f0f0f0fUL, // BLOCK_32X32,
0x0f0f0f0f0f0f0f0fUL, // BLOCK_32X64,
0x00000000ffffffffUL, // BLOCK_64X32,
0xffffffffffffffffUL, // BLOCK_64X64
};
// These are used for masking the left and above borders.
private const ulong LeftBorder = 0x1111111111111111UL;
private const ulong AboveBorder = 0x000000ff000000ffUL;
// 16 bit masks for uv transform sizes.
private static readonly ushort[] Left64X64TxformMaskUv = new ushort[]
{
0xffff, // TX_4X4
0xffff, // TX_8x8
0x5555, // TX_16x16
0x1111, // TX_32x32
};
private static readonly ushort[] Above64X64TxformMaskUv = new ushort[]
{
0xffff, // TX_4X4
0xffff, // TX_8x8
0x0f0f, // TX_16x16
0x000f, // TX_32x32
};
// 16 bit left mask to shift and set for each uv prediction size.
private static readonly ushort[] LeftPredictionMaskUv = new ushort[]
{
0x0001, // BLOCK_4X4,
0x0001, // BLOCK_4X8,
0x0001, // BLOCK_8X4,
0x0001, // BLOCK_8X8,
0x0001, // BLOCK_8X16,
0x0001, // BLOCK_16X8,
0x0001, // BLOCK_16X16,
0x0011, // BLOCK_16X32,
0x0001, // BLOCK_32X16,
0x0011, // BLOCK_32X32,
0x1111, // BLOCK_32X64
0x0011, // BLOCK_64X32,
0x1111, // BLOCK_64X64
};
// 16 bit above mask to shift and set for uv each prediction size.
private static readonly ushort[] AbovePredictionMaskUv = new ushort[]
{
0x0001, // BLOCK_4X4
0x0001, // BLOCK_4X8
0x0001, // BLOCK_8X4
0x0001, // BLOCK_8X8
0x0001, // BLOCK_8X16,
0x0001, // BLOCK_16X8
0x0001, // BLOCK_16X16
0x0001, // BLOCK_16X32,
0x0003, // BLOCK_32X16,
0x0003, // BLOCK_32X32,
0x0003, // BLOCK_32X64,
0x000f, // BLOCK_64X32,
0x000f, // BLOCK_64X64
};
// 64 bit mask to shift and set for each uv prediction size
private static readonly ushort[] SizeMaskUv = new ushort[]
{
0x0001, // BLOCK_4X4
0x0001, // BLOCK_4X8
0x0001, // BLOCK_8X4
0x0001, // BLOCK_8X8
0x0001, // BLOCK_8X16,
0x0001, // BLOCK_16X8
0x0001, // BLOCK_16X16
0x0011, // BLOCK_16X32,
0x0003, // BLOCK_32X16,
0x0033, // BLOCK_32X32,
0x3333, // BLOCK_32X64,
0x00ff, // BLOCK_64X32,
0xffff, // BLOCK_64X64
};
private const ushort LeftBorderUv = 0x1111;
private const ushort AboveBorderUv = 0x000f;
private static readonly int[] ModeLfLut = new int[]
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES
1, 1, 0, 1 // INTER_MODES (ZEROMV == 0)
};
private static byte GetFilterLevel(ref LoopFilterInfoN lfiN, ref ModeInfo mi)
{
return lfiN.Lvl[mi.SegmentId][mi.RefFrame[0]][ModeLfLut[(int)mi.Mode]];
}
private static ref LoopFilterMask GetLfm(ref Types.LoopFilter lf, int miRow, int miCol)
{
return ref lf.Lfm[(miCol >> 3) + ((miRow >> 3) * lf.LfmStride)];
}
// 8x8 blocks in a superblock. A "1" represents the first block in a 16x16
// or greater area.
private static readonly byte[][] FirstBlockIn16x16 = new byte[][]
{
new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 },
new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 },
new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 },
new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 }
};
// This function sets up the bit masks for a block represented
// by miRow, miCol in a 64x64 region.
public static void BuildMask(ref Vp9Common cm, ref ModeInfo mi, int miRow, int miCol, int bw, int bh)
{
BlockSize blockSize = mi.SbType;
TxSize txSizeY = mi.TxSize;
ref LoopFilterInfoN lfiN = ref cm.LfInfo;
int filterLevel = GetFilterLevel(ref lfiN, ref mi);
TxSize txSizeUv = Luts.UvTxsizeLookup[(int)blockSize][(int)txSizeY][1][1];
ref LoopFilterMask lfm = ref GetLfm(ref cm.Lf, miRow, miCol);
ref ulong leftY = ref lfm.LeftY[(int)txSizeY];
ref ulong aboveY = ref lfm.AboveY[(int)txSizeY];
ref ulong int4X4Y = ref lfm.Int4x4Y;
ref ushort leftUv = ref lfm.LeftUv[(int)txSizeUv];
ref ushort aboveUv = ref lfm.AboveUv[(int)txSizeUv];
ref ushort int4X4Uv = ref lfm.Int4x4Uv;
int rowInSb = (miRow & 7);
int colInSb = (miCol & 7);
int shiftY = colInSb + (rowInSb << 3);
int shiftUv = (colInSb >> 1) + ((rowInSb >> 1) << 2);
int buildUv = FirstBlockIn16x16[rowInSb][colInSb];
if (filterLevel == 0)
{
return;
}
else
{
int index = shiftY;
int i;
for (i = 0; i < bh; i++)
{
MemoryMarshal.CreateSpan(ref lfm.LflY[index], 64 - index).Slice(0, bw).Fill((byte)filterLevel);
index += 8;
}
}
// These set 1 in the current block size for the block size edges.
// For instance if the block size is 32x16, we'll set:
// above = 1111
// 0000
// and
// left = 1000
// = 1000
// NOTE : In this example the low bit is left most ( 1000 ) is stored as
// 1, not 8...
//
// U and V set things on a 16 bit scale.
//
aboveY |= AbovePredictionMask[(int)blockSize] << shiftY;
leftY |= LeftPredictionMask[(int)blockSize] << shiftY;
if (buildUv != 0)
{
aboveUv |= (ushort)(AbovePredictionMaskUv[(int)blockSize] << shiftUv);
leftUv |= (ushort)(LeftPredictionMaskUv[(int)blockSize] << shiftUv);
}
// If the block has no coefficients and is not intra we skip applying
// the loop filter on block edges.
if (mi.Skip != 0 && mi.IsInterBlock())
{
return;
}
// Add a mask for the transform size. The transform size mask is set to
// be correct for a 64x64 prediction block size. Mask to match the size of
// the block we are working on and then shift it into place.
aboveY |= (SizeMask[(int)blockSize] & Above64X64TxformMask[(int)txSizeY]) << shiftY;
leftY |= (SizeMask[(int)blockSize] & Left64X64TxformMask[(int)txSizeY]) << shiftY;
if (buildUv != 0)
{
aboveUv |= (ushort)((SizeMaskUv[(int)blockSize] & Above64X64TxformMaskUv[(int)txSizeUv]) << shiftUv);
leftUv |= (ushort)((SizeMaskUv[(int)blockSize] & Left64X64TxformMaskUv[(int)txSizeUv]) << shiftUv);
}
// Try to determine what to do with the internal 4x4 block boundaries. These
// differ from the 4x4 boundaries on the outside edge of an 8x8 in that the
// internal ones can be skipped and don't depend on the prediction block size.
if (txSizeY == TxSize.Tx4x4)
{
int4X4Y |= SizeMask[(int)blockSize] << shiftY;
}
if (buildUv != 0 && txSizeUv == TxSize.Tx4x4)
{
int4X4Uv |= (ushort)((SizeMaskUv[(int)blockSize] & 0xffff) << shiftUv);
}
}
public static unsafe void ResetLfm(ref Vp9Common cm)
{
if (cm.Lf.FilterLevel != 0)
{
MemoryUtil.Fill(cm.Lf.Lfm.ToPointer(), new LoopFilterMask(), ((cm.MiRows + (Constants.MiBlockSize - 1)) >> 3) * cm.Lf.LfmStride);
}
}
private static void UpdateSharpness(ref LoopFilterInfoN lfi, int sharpnessLvl)
{
int lvl;
// For each possible value for the loop filter fill out limits
for (lvl = 0; lvl <= MaxLoopFilter; lvl++)
{
// Set loop filter parameters that control sharpness.
int blockInsideLimit = lvl >> ((sharpnessLvl > 0 ? 1 : 0) + (sharpnessLvl > 4 ? 1 : 0));
if (sharpnessLvl > 0)
{
if (blockInsideLimit > (9 - sharpnessLvl))
{
blockInsideLimit = (9 - sharpnessLvl);
}
}
if (blockInsideLimit < 1)
{
blockInsideLimit = 1;
}
lfi.Lfthr[lvl].Lim.ToSpan().Fill((byte)blockInsideLimit);
lfi.Lfthr[lvl].Mblim.ToSpan().Fill((byte)(2 * (lvl + 2) + blockInsideLimit));
}
}
public static void LoopFilterFrameInit(ref Vp9Common cm, int defaultFiltLvl)
{
int segId;
// nShift is the multiplier for lfDeltas
// the multiplier is 1 for when filterLvl is between 0 and 31;
// 2 when filterLvl is between 32 and 63
int scale = 1 << (defaultFiltLvl >> 5);
ref LoopFilterInfoN lfi = ref cm.LfInfo;
ref Types.LoopFilter lf = ref cm.Lf;
ref Segmentation seg = ref cm.Seg;
// Update limits if sharpness has changed
if (lf.LastSharpnessLevel != lf.SharpnessLevel)
{
UpdateSharpness(ref lfi, lf.SharpnessLevel);
lf.LastSharpnessLevel = lf.SharpnessLevel;
}
for (segId = 0; segId < Constants.MaxSegments; segId++)
{
int lvlSeg = defaultFiltLvl;
if (seg.IsSegFeatureActive(segId, SegLvlFeatures.SegLvlAltLf) != 0)
{
int data = seg.GetSegData(segId, SegLvlFeatures.SegLvlAltLf);
lvlSeg = Math.Clamp(seg.AbsDelta == Constants.SegmentAbsData ? data : defaultFiltLvl + data, 0, MaxLoopFilter);
}
if (!lf.ModeRefDeltaEnabled)
{
// We could get rid of this if we assume that deltas are set to
// zero when not in use; encoder always uses deltas
MemoryMarshal.Cast<Array2<byte>, byte>(lfi.Lvl[segId].ToSpan()).Fill((byte)lvlSeg);
}
else
{
int refr, mode;
int intraLvl = lvlSeg + lf.RefDeltas[Constants.IntraFrame] * scale;
lfi.Lvl[segId][Constants.IntraFrame][0] = (byte)Math.Clamp(intraLvl, 0, MaxLoopFilter);
for (refr = Constants.LastFrame; refr < Constants.MaxRefFrames; ++refr)
{
for (mode = 0; mode < MaxModeLfDeltas; ++mode)
{
int interLvl = lvlSeg + lf.RefDeltas[refr] * scale + lf.ModeDeltas[mode] * scale;
lfi.Lvl[segId][refr][mode] = (byte)Math.Clamp(interLvl, 0, MaxLoopFilter);
}
}
}
}
}
}
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,389 @@
using Ryujinx.Graphics.Nvdec.Vp9.Types;
using System.Diagnostics;
namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal static class PredCommon
{
public static int GetReferenceModeContext(ref Vp9Common cm, ref MacroBlockD xd)
{
int ctx;
// Note:
// The mode info data structure has a one element border above and to the
// left of the entries corresponding to real macroblocks.
// The prediction flags in these dummy entries are initialized to 0.
if (!xd.AboveMi.IsNull && !xd.LeftMi.IsNull)
{ // both edges available
if (!xd.AboveMi.Value.HasSecondRef() && !xd.LeftMi.Value.HasSecondRef())
{
// Neither edge uses comp pred (0/1)
ctx = (xd.AboveMi.Value.RefFrame[0] == cm.CompFixedRef ? 1 : 0) ^
(xd.LeftMi.Value.RefFrame[0] == cm.CompFixedRef ? 1 : 0);
}
else if (!xd.AboveMi.Value.HasSecondRef())
{
// One of two edges uses comp pred (2/3)
ctx = 2 + (xd.AboveMi.Value.RefFrame[0] == cm.CompFixedRef || !xd.AboveMi.Value.IsInterBlock() ? 1 : 0);
}
else if (!xd.LeftMi.Value.HasSecondRef())
{
// One of two edges uses comp pred (2/3)
ctx = 2 + (xd.LeftMi.Value.RefFrame[0] == cm.CompFixedRef || !xd.LeftMi.Value.IsInterBlock() ? 1 : 0);
}
else // Both edges use comp pred (4)
{
ctx = 4;
}
}
else if (!xd.AboveMi.IsNull || !xd.LeftMi.IsNull)
{ // One edge available
ref ModeInfo edgeMi = ref !xd.AboveMi.IsNull ? ref xd.AboveMi.Value : ref xd.LeftMi.Value;
if (!edgeMi.HasSecondRef())
{
// Edge does not use comp pred (0/1)
ctx = edgeMi.RefFrame[0] == cm.CompFixedRef ? 1 : 0;
}
else
{
// Edge uses comp pred (3)
ctx = 3;
}
}
else
{ // No edges available (1)
ctx = 1;
}
Debug.Assert(ctx >= 0 && ctx < Constants.CompInterContexts);
return ctx;
}
// Returns a context number for the given MB prediction signal
public static int GetPredContextCompRefP(ref Vp9Common cm, ref MacroBlockD xd)
{
int predContext;
// Note:
// The mode info data structure has a one element border above and to the
// left of the entries corresponding to real macroblocks.
// The prediction flags in these dummy entries are initialized to 0.
int fixRefIdx = cm.RefFrameSignBias[cm.CompFixedRef];
int varRefIdx = fixRefIdx == 0 ? 1 : 0;
if (!xd.AboveMi.IsNull && !xd.LeftMi.IsNull)
{ // Both edges available
bool aboveIntra = !xd.AboveMi.Value.IsInterBlock();
bool leftIntra = !xd.LeftMi.Value.IsInterBlock();
if (aboveIntra && leftIntra)
{ // Intra/Intra (2)
predContext = 2;
}
else if (aboveIntra || leftIntra)
{ // Intra/Inter
ref ModeInfo edgeMi = ref aboveIntra ? ref xd.LeftMi.Value : ref xd.AboveMi.Value;
if (!edgeMi.HasSecondRef()) // single pred (1/3)
{
predContext = 1 + 2 * (edgeMi.RefFrame[0] != cm.CompVarRef[1] ? 1 : 0);
}
else // Comp pred (1/3)
{
predContext = 1 + 2 * (edgeMi.RefFrame[varRefIdx] != cm.CompVarRef[1] ? 1 : 0);
}
}
else
{ // Inter/Inter
bool lSg = !xd.LeftMi.Value.HasSecondRef();
bool aSg = !xd.AboveMi.Value.HasSecondRef();
sbyte vrfa = aSg ? xd.AboveMi.Value.RefFrame[0] : xd.AboveMi.Value.RefFrame[varRefIdx];
sbyte vrfl = lSg ? xd.LeftMi.Value.RefFrame[0] : xd.LeftMi.Value.RefFrame[varRefIdx];
if (vrfa == vrfl && cm.CompVarRef[1] == vrfa)
{
predContext = 0;
}
else if (lSg && aSg)
{ // Single/Single
if ((vrfa == cm.CompFixedRef && vrfl == cm.CompVarRef[0]) ||
(vrfl == cm.CompFixedRef && vrfa == cm.CompVarRef[0]))
{
predContext = 4;
}
else if (vrfa == vrfl)
{
predContext = 3;
}
else
{
predContext = 1;
}
}
else if (lSg || aSg)
{ // Single/Comp
sbyte vrfc = lSg ? vrfa : vrfl;
sbyte rfs = aSg ? vrfa : vrfl;
if (vrfc == cm.CompVarRef[1] && rfs != cm.CompVarRef[1])
{
predContext = 1;
}
else if (rfs == cm.CompVarRef[1] && vrfc != cm.CompVarRef[1])
{
predContext = 2;
}
else
{
predContext = 4;
}
}
else if (vrfa == vrfl)
{ // Comp/Comp
predContext = 4;
}
else
{
predContext = 2;
}
}
}
else if (!xd.AboveMi.IsNull || !xd.LeftMi.IsNull)
{ // One edge available
ref ModeInfo edgeMi = ref !xd.AboveMi.IsNull ? ref xd.AboveMi.Value : ref xd.LeftMi.Value;
if (!edgeMi.IsInterBlock())
{
predContext = 2;
}
else
{
if (edgeMi.HasSecondRef())
{
predContext = 4 * (edgeMi.RefFrame[varRefIdx] != cm.CompVarRef[1] ? 1 : 0);
}
else
{
predContext = 3 * (edgeMi.RefFrame[0] != cm.CompVarRef[1] ? 1 : 0);
}
}
}
else
{ // No edges available (2)
predContext = 2;
}
Debug.Assert(predContext >= 0 && predContext < Constants.RefContexts);
return predContext;
}
public static int GetPredContextSingleRefP1(ref MacroBlockD xd)
{
int predContext;
// Note:
// The mode info data structure has a one element border above and to the
// left of the entries corresponding to real macroblocks.
// The prediction flags in these dummy entries are initialized to 0.
if (!xd.AboveMi.IsNull && !xd.LeftMi.IsNull)
{ // Both edges available
bool aboveIntra = !xd.AboveMi.Value.IsInterBlock();
bool leftIntra = !xd.LeftMi.Value.IsInterBlock();
if (aboveIntra && leftIntra)
{ // Intra/Intra
predContext = 2;
}
else if (aboveIntra || leftIntra)
{ // Intra/Inter or Inter/Intra
ref ModeInfo edgeMi = ref aboveIntra ? ref xd.LeftMi.Value : ref xd.AboveMi.Value;
if (!edgeMi.HasSecondRef())
{
predContext = 4 * (edgeMi.RefFrame[0] == Constants.LastFrame ? 1 : 0);
}
else
{
predContext = 1 + (edgeMi.RefFrame[0] == Constants.LastFrame ||
edgeMi.RefFrame[1] == Constants.LastFrame ? 1 : 0);
}
}
else
{ // Inter/Inter
bool aboveHasSecond = xd.AboveMi.Value.HasSecondRef();
bool leftHasSecond = xd.LeftMi.Value.HasSecondRef();
sbyte above0 = xd.AboveMi.Value.RefFrame[0];
sbyte above1 = xd.AboveMi.Value.RefFrame[1];
sbyte left0 = xd.LeftMi.Value.RefFrame[0];
sbyte left1 = xd.LeftMi.Value.RefFrame[1];
if (aboveHasSecond && leftHasSecond)
{
predContext = 1 + (above0 == Constants.LastFrame || above1 == Constants.LastFrame ||
left0 == Constants.LastFrame || left1 == Constants.LastFrame ? 1 : 0);
}
else if (aboveHasSecond || leftHasSecond)
{
sbyte rfs = !aboveHasSecond ? above0 : left0;
sbyte crf1 = aboveHasSecond ? above0 : left0;
sbyte crf2 = aboveHasSecond ? above1 : left1;
if (rfs == Constants.LastFrame)
{
predContext = 3 + (crf1 == Constants.LastFrame || crf2 == Constants.LastFrame ? 1 : 0);
}
else
{
predContext = (crf1 == Constants.LastFrame || crf2 == Constants.LastFrame ? 1 : 0);
}
}
else
{
predContext = 2 * (above0 == Constants.LastFrame ? 1 : 0) + 2 * (left0 == Constants.LastFrame ? 1 : 0);
}
}
}
else if (!xd.AboveMi.IsNull || !xd.LeftMi.IsNull)
{ // One edge available
ref ModeInfo edgeMi = ref !xd.AboveMi.IsNull ? ref xd.AboveMi.Value : ref xd.LeftMi.Value;
if (!edgeMi.IsInterBlock())
{ // Intra
predContext = 2;
}
else
{ // Inter
if (!edgeMi.HasSecondRef())
{
predContext = 4 * (edgeMi.RefFrame[0] == Constants.LastFrame ? 1 : 0);
}
else
{
predContext = 1 + (edgeMi.RefFrame[0] == Constants.LastFrame ||
edgeMi.RefFrame[1] == Constants.LastFrame ? 1 : 0);
}
}
}
else
{ // No edges available
predContext = 2;
}
Debug.Assert(predContext >= 0 && predContext < Constants.RefContexts);
return predContext;
}
public static int GetPredContextSingleRefP2(ref MacroBlockD xd)
{
int predContext;
// Note:
// The mode info data structure has a one element border above and to the
// left of the entries corresponding to real macroblocks.
// The prediction flags in these dummy entries are initialized to 0.
if (!xd.AboveMi.IsNull && !xd.LeftMi.IsNull)
{ // Both edges available
bool aboveIntra = !xd.AboveMi.Value.IsInterBlock();
bool leftIntra = !xd.LeftMi.Value.IsInterBlock();
if (aboveIntra && leftIntra)
{ // Intra/Intra
predContext = 2;
}
else if (aboveIntra || leftIntra)
{ // Intra/Inter or Inter/Intra
ref ModeInfo edgeMi = ref aboveIntra ? ref xd.LeftMi.Value : ref xd.AboveMi.Value;
if (!edgeMi.HasSecondRef())
{
if (edgeMi.RefFrame[0] == Constants.LastFrame)
{
predContext = 3;
}
else
{
predContext = 4 * (edgeMi.RefFrame[0] == Constants.GoldenFrame ? 1 : 0);
}
}
else
{
predContext = 1 + 2 * (edgeMi.RefFrame[0] == Constants.GoldenFrame ||
edgeMi.RefFrame[1] == Constants.GoldenFrame ? 1 : 0);
}
}
else
{ // Inter/Inter
bool aboveHasSecond = xd.AboveMi.Value.HasSecondRef();
bool leftHasSecond = xd.LeftMi.Value.HasSecondRef();
sbyte above0 = xd.AboveMi.Value.RefFrame[0];
sbyte above1 = xd.AboveMi.Value.RefFrame[1];
sbyte left0 = xd.LeftMi.Value.RefFrame[0];
sbyte left1 = xd.LeftMi.Value.RefFrame[1];
if (aboveHasSecond && leftHasSecond)
{
if (above0 == left0 && above1 == left1)
{
predContext = 3 * (above0 == Constants.GoldenFrame || above1 == Constants.GoldenFrame ||
left0 == Constants.GoldenFrame || left1 == Constants.GoldenFrame ? 1 : 0);
}
else
{
predContext = 2;
}
}
else if (aboveHasSecond || leftHasSecond)
{
sbyte rfs = !aboveHasSecond ? above0 : left0;
sbyte crf1 = aboveHasSecond ? above0 : left0;
sbyte crf2 = aboveHasSecond ? above1 : left1;
if (rfs == Constants.GoldenFrame)
{
predContext = 3 + (crf1 == Constants.GoldenFrame || crf2 == Constants.GoldenFrame ? 1 : 0);
}
else if (rfs == Constants.AltRefFrame)
{
predContext = crf1 == Constants.GoldenFrame || crf2 == Constants.GoldenFrame ? 1 : 0;
}
else
{
predContext = 1 + 2 * (crf1 == Constants.GoldenFrame || crf2 == Constants.GoldenFrame ? 1 : 0);
}
}
else
{
if (above0 == Constants.LastFrame && left0 == Constants.LastFrame)
{
predContext = 3;
}
else if (above0 == Constants.LastFrame || left0 == Constants.LastFrame)
{
sbyte edge0 = (above0 == Constants.LastFrame) ? left0 : above0;
predContext = 4 * (edge0 == Constants.GoldenFrame ? 1 : 0);
}
else
{
predContext = 2 * (above0 == Constants.GoldenFrame ? 1 : 0) + 2 * (left0 == Constants.GoldenFrame ? 1 : 0);
}
}
}
}
else if (!xd.AboveMi.IsNull || !xd.LeftMi.IsNull)
{ // One edge available
ref ModeInfo edgeMi = ref !xd.AboveMi.IsNull ? ref xd.AboveMi.Value : ref xd.LeftMi.Value;
if (!edgeMi.IsInterBlock() || (edgeMi.RefFrame[0] == Constants.LastFrame && !edgeMi.HasSecondRef()))
{
predContext = 2;
}
else if (!edgeMi.HasSecondRef())
{
predContext = 4 * (edgeMi.RefFrame[0] == Constants.GoldenFrame ? 1 : 0);
}
else
{
predContext = 3 * (edgeMi.RefFrame[0] == Constants.GoldenFrame ||
edgeMi.RefFrame[1] == Constants.GoldenFrame ? 1 : 0);
}
}
else
{ // No edges available (2)
predContext = 2;
}
Debug.Assert(predContext >= 0 && predContext < Constants.RefContexts);
return predContext;
}
}
}

@ -0,0 +1,203 @@
using Ryujinx.Graphics.Nvdec.Vp9.Types;
using System;
using System.Diagnostics;
namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal static class QuantCommon
{
public const int MinQ = 0;
public const int MaxQ = 255;
private static readonly short[] DcQlookup = new short[]
{
4, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15, 16, 17, 18,
19, 19, 20, 21, 22, 23, 24, 25, 26, 26, 27, 28, 29, 30,
31, 32, 32, 33, 34, 35, 36, 37, 38, 38, 39, 40, 41, 42,
43, 43, 44, 45, 46, 47, 48, 48, 49, 50, 51, 52, 53, 53,
54, 55, 56, 57, 57, 58, 59, 60, 61, 62, 62, 63, 64, 65,
66, 66, 67, 68, 69, 70, 70, 71, 72, 73, 74, 74, 75, 76,
77, 78, 78, 79, 80, 81, 81, 82, 83, 84, 85, 85, 87, 88,
90, 92, 93, 95, 96, 98, 99, 101, 102, 104, 105, 107, 108, 110,
111, 113, 114, 116, 117, 118, 120, 121, 123, 125, 127, 129, 131, 134,
136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 161, 164,
166, 169, 172, 174, 177, 180, 182, 185, 187, 190, 192, 195, 199, 202,
205, 208, 211, 214, 217, 220, 223, 226, 230, 233, 237, 240, 243, 247,
250, 253, 257, 261, 265, 269, 272, 276, 280, 284, 288, 292, 296, 300,
304, 309, 313, 317, 322, 326, 330, 335, 340, 344, 349, 354, 359, 364,
369, 374, 379, 384, 389, 395, 400, 406, 411, 417, 423, 429, 435, 441,
447, 454, 461, 467, 475, 482, 489, 497, 505, 513, 522, 530, 539, 549,
559, 569, 579, 590, 602, 614, 626, 640, 654, 668, 684, 700, 717, 736,
755, 775, 796, 819, 843, 869, 896, 925, 955, 988, 1022, 1058, 1098, 1139,
1184, 1232, 1282, 1336,
};
private static readonly short[] DcQlookup10 = new short[]
{
4, 9, 10, 13, 15, 17, 20, 22, 25, 28, 31, 34, 37,
40, 43, 47, 50, 53, 57, 60, 64, 68, 71, 75, 78, 82,
86, 90, 93, 97, 101, 105, 109, 113, 116, 120, 124, 128, 132,
136, 140, 143, 147, 151, 155, 159, 163, 166, 170, 174, 178, 182,
185, 189, 193, 197, 200, 204, 208, 212, 215, 219, 223, 226, 230,
233, 237, 241, 244, 248, 251, 255, 259, 262, 266, 269, 273, 276,
280, 283, 287, 290, 293, 297, 300, 304, 307, 310, 314, 317, 321,
324, 327, 331, 334, 337, 343, 350, 356, 362, 369, 375, 381, 387,
394, 400, 406, 412, 418, 424, 430, 436, 442, 448, 454, 460, 466,
472, 478, 484, 490, 499, 507, 516, 525, 533, 542, 550, 559, 567,
576, 584, 592, 601, 609, 617, 625, 634, 644, 655, 666, 676, 687,
698, 708, 718, 729, 739, 749, 759, 770, 782, 795, 807, 819, 831,
844, 856, 868, 880, 891, 906, 920, 933, 947, 961, 975, 988, 1001,
1015, 1030, 1045, 1061, 1076, 1090, 1105, 1120, 1137, 1153, 1170, 1186, 1202,
1218, 1236, 1253, 1271, 1288, 1306, 1323, 1342, 1361, 1379, 1398, 1416, 1436,
1456, 1476, 1496, 1516, 1537, 1559, 1580, 1601, 1624, 1647, 1670, 1692, 1717,
1741, 1766, 1791, 1817, 1844, 1871, 1900, 1929, 1958, 1990, 2021, 2054, 2088,
2123, 2159, 2197, 2236, 2276, 2319, 2363, 2410, 2458, 2508, 2561, 2616, 2675,
2737, 2802, 2871, 2944, 3020, 3102, 3188, 3280, 3375, 3478, 3586, 3702, 3823,
3953, 4089, 4236, 4394, 4559, 4737, 4929, 5130, 5347,
};
private static readonly short[] DcQlookup12 = new short[]
{
4, 12, 18, 25, 33, 41, 50, 60, 70, 80, 91,
103, 115, 127, 140, 153, 166, 180, 194, 208, 222, 237,
251, 266, 281, 296, 312, 327, 343, 358, 374, 390, 405,
421, 437, 453, 469, 484, 500, 516, 532, 548, 564, 580,
596, 611, 627, 643, 659, 674, 690, 706, 721, 737, 752,
768, 783, 798, 814, 829, 844, 859, 874, 889, 904, 919,
934, 949, 964, 978, 993, 1008, 1022, 1037, 1051, 1065, 1080,
1094, 1108, 1122, 1136, 1151, 1165, 1179, 1192, 1206, 1220, 1234,
1248, 1261, 1275, 1288, 1302, 1315, 1329, 1342, 1368, 1393, 1419,
1444, 1469, 1494, 1519, 1544, 1569, 1594, 1618, 1643, 1668, 1692,
1717, 1741, 1765, 1789, 1814, 1838, 1862, 1885, 1909, 1933, 1957,
1992, 2027, 2061, 2096, 2130, 2165, 2199, 2233, 2267, 2300, 2334,
2367, 2400, 2434, 2467, 2499, 2532, 2575, 2618, 2661, 2704, 2746,
2788, 2830, 2872, 2913, 2954, 2995, 3036, 3076, 3127, 3177, 3226,
3275, 3324, 3373, 3421, 3469, 3517, 3565, 3621, 3677, 3733, 3788,
3843, 3897, 3951, 4005, 4058, 4119, 4181, 4241, 4301, 4361, 4420,
4479, 4546, 4612, 4677, 4742, 4807, 4871, 4942, 5013, 5083, 5153,
5222, 5291, 5367, 5442, 5517, 5591, 5665, 5745, 5825, 5905, 5984,
6063, 6149, 6234, 6319, 6404, 6495, 6587, 6678, 6769, 6867, 6966,
7064, 7163, 7269, 7376, 7483, 7599, 7715, 7832, 7958, 8085, 8214,
8352, 8492, 8635, 8788, 8945, 9104, 9275, 9450, 9639, 9832, 10031,
10245, 10465, 10702, 10946, 11210, 11482, 11776, 12081, 12409, 12750, 13118,
13501, 13913, 14343, 14807, 15290, 15812, 16356, 16943, 17575, 18237, 18949,
19718, 20521, 21387,
};
private static readonly short[] AcQlookup = new short[]
{
4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97,
98, 99, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
120, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144,
146, 148, 150, 152, 155, 158, 161, 164, 167, 170, 173, 176, 179,
182, 185, 188, 191, 194, 197, 200, 203, 207, 211, 215, 219, 223,
227, 231, 235, 239, 243, 247, 251, 255, 260, 265, 270, 275, 280,
285, 290, 295, 300, 305, 311, 317, 323, 329, 335, 341, 347, 353,
359, 366, 373, 380, 387, 394, 401, 408, 416, 424, 432, 440, 448,
456, 465, 474, 483, 492, 501, 510, 520, 530, 540, 550, 560, 571,
582, 593, 604, 615, 627, 639, 651, 663, 676, 689, 702, 715, 729,
743, 757, 771, 786, 801, 816, 832, 848, 864, 881, 898, 915, 933,
951, 969, 988, 1007, 1026, 1046, 1066, 1087, 1108, 1129, 1151, 1173, 1196,
1219, 1243, 1267, 1292, 1317, 1343, 1369, 1396, 1423, 1451, 1479, 1508, 1537,
1567, 1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828,
};
private static readonly short[] AcQlookup10 = new short[]
{
4, 9, 11, 13, 16, 18, 21, 24, 27, 30, 33, 37, 40,
44, 48, 51, 55, 59, 63, 67, 71, 75, 79, 83, 88, 92,
96, 100, 105, 109, 114, 118, 122, 127, 131, 136, 140, 145, 149,
154, 158, 163, 168, 172, 177, 181, 186, 190, 195, 199, 204, 208,
213, 217, 222, 226, 231, 235, 240, 244, 249, 253, 258, 262, 267,
271, 275, 280, 284, 289, 293, 297, 302, 306, 311, 315, 319, 324,
328, 332, 337, 341, 345, 349, 354, 358, 362, 367, 371, 375, 379,
384, 388, 392, 396, 401, 409, 417, 425, 433, 441, 449, 458, 466,
474, 482, 490, 498, 506, 514, 523, 531, 539, 547, 555, 563, 571,
579, 588, 596, 604, 616, 628, 640, 652, 664, 676, 688, 700, 713,
725, 737, 749, 761, 773, 785, 797, 809, 825, 841, 857, 873, 889,
905, 922, 938, 954, 970, 986, 1002, 1018, 1038, 1058, 1078, 1098, 1118,
1138, 1158, 1178, 1198, 1218, 1242, 1266, 1290, 1314, 1338, 1362, 1386, 1411,
1435, 1463, 1491, 1519, 1547, 1575, 1603, 1631, 1663, 1695, 1727, 1759, 1791,
1823, 1859, 1895, 1931, 1967, 2003, 2039, 2079, 2119, 2159, 2199, 2239, 2283,
2327, 2371, 2415, 2459, 2507, 2555, 2603, 2651, 2703, 2755, 2807, 2859, 2915,
2971, 3027, 3083, 3143, 3203, 3263, 3327, 3391, 3455, 3523, 3591, 3659, 3731,
3803, 3876, 3952, 4028, 4104, 4184, 4264, 4348, 4432, 4516, 4604, 4692, 4784,
4876, 4972, 5068, 5168, 5268, 5372, 5476, 5584, 5692, 5804, 5916, 6032, 6148,
6268, 6388, 6512, 6640, 6768, 6900, 7036, 7172, 7312,
};
private static readonly short[] AcQlookup12 = new short[]
{
4, 13, 19, 27, 35, 44, 54, 64, 75, 87, 99,
112, 126, 139, 154, 168, 183, 199, 214, 230, 247, 263,
280, 297, 314, 331, 349, 366, 384, 402, 420, 438, 456,
475, 493, 511, 530, 548, 567, 586, 604, 623, 642, 660,
679, 698, 716, 735, 753, 772, 791, 809, 828, 846, 865,
884, 902, 920, 939, 957, 976, 994, 1012, 1030, 1049, 1067,
1085, 1103, 1121, 1139, 1157, 1175, 1193, 1211, 1229, 1246, 1264,
1282, 1299, 1317, 1335, 1352, 1370, 1387, 1405, 1422, 1440, 1457,
1474, 1491, 1509, 1526, 1543, 1560, 1577, 1595, 1627, 1660, 1693,
1725, 1758, 1791, 1824, 1856, 1889, 1922, 1954, 1987, 2020, 2052,
2085, 2118, 2150, 2183, 2216, 2248, 2281, 2313, 2346, 2378, 2411,
2459, 2508, 2556, 2605, 2653, 2701, 2750, 2798, 2847, 2895, 2943,
2992, 3040, 3088, 3137, 3185, 3234, 3298, 3362, 3426, 3491, 3555,
3619, 3684, 3748, 3812, 3876, 3941, 4005, 4069, 4149, 4230, 4310,
4390, 4470, 4550, 4631, 4711, 4791, 4871, 4967, 5064, 5160, 5256,
5352, 5448, 5544, 5641, 5737, 5849, 5961, 6073, 6185, 6297, 6410,
6522, 6650, 6778, 6906, 7034, 7162, 7290, 7435, 7579, 7723, 7867,
8011, 8155, 8315, 8475, 8635, 8795, 8956, 9132, 9308, 9484, 9660,
9836, 10028, 10220, 10412, 10604, 10812, 11020, 11228, 11437, 11661, 11885,
12109, 12333, 12573, 12813, 13053, 13309, 13565, 13821, 14093, 14365, 14637,
14925, 15213, 15502, 15806, 16110, 16414, 16734, 17054, 17390, 17726, 18062,
18414, 18766, 19134, 19502, 19886, 20270, 20670, 21070, 21486, 21902, 22334,
22766, 23214, 23662, 24126, 24590, 25070, 25551, 26047, 26559, 27071, 27599,
28143, 28687, 29247,
};
public static short DcQuant(int qindex, int delta, BitDepth bitDepth)
{
switch (bitDepth)
{
case BitDepth.Bits8: return DcQlookup[Math.Clamp(qindex + delta, 0, MaxQ)];
case BitDepth.Bits10: return DcQlookup10[Math.Clamp(qindex + delta, 0, MaxQ)];
case BitDepth.Bits12: return DcQlookup12[Math.Clamp(qindex + delta, 0, MaxQ)];
default:
Debug.Assert(false, "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
return -1;
}
}
public static short AcQuant(int qindex, int delta, BitDepth bitDepth)
{
switch (bitDepth)
{
case BitDepth.Bits8: return AcQlookup[Math.Clamp(qindex + delta, 0, MaxQ)];
case BitDepth.Bits10: return AcQlookup10[Math.Clamp(qindex + delta, 0, MaxQ)];
case BitDepth.Bits12: return AcQlookup12[Math.Clamp(qindex + delta, 0, MaxQ)];
default:
Debug.Assert(false, "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
return -1;
}
}
public static int GetQIndex(ref Segmentation seg, int segmentId, int baseQIndex)
{
if (seg.IsSegFeatureActive(segmentId, SegLvlFeatures.SegLvlAltQ) != 0)
{
int data = seg.GetSegData(segmentId, SegLvlFeatures.SegLvlAltQ);
int segQIndex = seg.AbsDelta == Constants.SegmentAbsData ? data : baseQIndex + data;
return Math.Clamp(segQIndex, 0, MaxQ);
}
else
{
return baseQIndex;
}
}
}
}

@ -0,0 +1,234 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Nvdec.Vp9.Types;
using System;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.Filter;
namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal static class ReconInter
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe void InterPredictor(
byte* src,
int srcStride,
byte* dst,
int dstStride,
int subpelX,
int subpelY,
ref ScaleFactors sf,
int w,
int h,
int refr,
Array8<short>[] kernel,
int xs,
int ys)
{
sf.InterPredict(
subpelX != 0 ? 1 : 0,
subpelY != 0 ? 1 : 0,
refr,
src,
srcStride,
dst,
dstStride,
subpelX,
subpelY,
w,
h,
kernel,
xs,
ys);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe void HighbdInterPredictor(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
int subpelX,
int subpelY,
ref ScaleFactors sf,
int w,
int h,
int refr,
Array8<short>[] kernel,
int xs,
int ys,
int bd)
{
sf.HighbdInterPredict(
subpelX != 0 ? 1 : 0,
subpelY != 0 ? 1 : 0,
refr,
src,
srcStride,
dst,
dstStride,
subpelX,
subpelY,
w,
h,
kernel,
xs,
ys,
bd);
}
private static int RoundMvCompQ4(int value)
{
return (value < 0 ? value - 2 : value + 2) / 4;
}
private static Mv MiMvPredQ4(ref ModeInfo mi, int idx)
{
Mv res = new Mv()
{
Row = (short)RoundMvCompQ4(
mi.Bmi[0].Mv[idx].Row + mi.Bmi[1].Mv[idx].Row +
mi.Bmi[2].Mv[idx].Row + mi.Bmi[3].Mv[idx].Row),
Col = (short)RoundMvCompQ4(
mi.Bmi[0].Mv[idx].Col + mi.Bmi[1].Mv[idx].Col +
mi.Bmi[2].Mv[idx].Col + mi.Bmi[3].Mv[idx].Col)
};
return res;
}
private static int RoundMvCompQ2(int value)
{
return (value < 0 ? value - 1 : value + 1) / 2;
}
private static Mv MiMvPredQ2(ref ModeInfo mi, int idx, int block0, int block1)
{
Mv res = new Mv()
{
Row = (short)RoundMvCompQ2(
mi.Bmi[block0].Mv[idx].Row +
mi.Bmi[block1].Mv[idx].Row),
Col = (short)RoundMvCompQ2(
mi.Bmi[block0].Mv[idx].Col +
mi.Bmi[block1].Mv[idx].Col)
};
return res;
}
public static Mv ClampMvToUmvBorderSb(ref MacroBlockD xd, ref Mv srcMv, int bw, int bh, int ssX, int ssY)
{
// If the MV points so far into the UMV border that no visible pixels
// are used for reconstruction, the subpel part of the MV can be
// discarded and the MV limited to 16 pixels with equivalent results.
int spelLeft = (Constants.Vp9InterpExtend + bw) << SubpelBits;
int spelRight = spelLeft - SubpelShifts;
int spelTop = (Constants.Vp9InterpExtend + bh) << SubpelBits;
int spelBottom = spelTop - SubpelShifts;
Mv clampedMv = new Mv()
{
Row = (short)(srcMv.Row * (1 << (1 - ssY))),
Col = (short)(srcMv.Col * (1 << (1 - ssX)))
};
Debug.Assert(ssX <= 1);
Debug.Assert(ssY <= 1);
clampedMv.ClampMv(
xd.MbToLeftEdge * (1 << (1 - ssX)) - spelLeft,
xd.MbToRightEdge * (1 << (1 - ssX)) + spelRight,
xd.MbToTopEdge * (1 << (1 - ssY)) - spelTop,
xd.MbToBottomEdge * (1 << (1 - ssY)) + spelBottom);
return clampedMv;
}
public static Mv AverageSplitMvs(ref MacroBlockDPlane pd, ref ModeInfo mi, int refr, int block)
{
int ssIdx = ((pd.SubsamplingX > 0 ? 1 : 0) << 1) | (pd.SubsamplingY > 0 ? 1 : 0);
Mv res = new Mv();
switch (ssIdx)
{
case 0: res = mi.Bmi[block].Mv[refr]; break;
case 1: res = MiMvPredQ2(ref mi, refr, block, block + 2); break;
case 2: res = MiMvPredQ2(ref mi, refr, block, block + 1); break;
case 3: res = MiMvPredQ4(ref mi, refr); break;
default: Debug.Assert(ssIdx <= 3 && ssIdx >= 0); break;
}
return res;
}
private static int ScaledBufferOffset(int xOffset, int yOffset, int stride, Ptr<ScaleFactors> sf)
{
int x = !sf.IsNull ? sf.Value.ScaleValueX(xOffset) : xOffset;
int y = !sf.IsNull ? sf.Value.ScaleValueY(yOffset) : yOffset;
return y * stride + x;
}
private static void SetupPredPlanes(
ref Buf2D dst,
ArrayPtr<byte> src,
int stride,
int miRow,
int miCol,
Ptr<ScaleFactors> scale,
int subsamplingX,
int subsamplingY)
{
int x = (Constants.MiSize * miCol) >> subsamplingX;
int y = (Constants.MiSize * miRow) >> subsamplingY;
dst.Buf = src.Slice(ScaledBufferOffset(x, y, stride, scale));
dst.Stride = stride;
}
public static void SetupDstPlanes(
ref Array3<MacroBlockDPlane> planes,
ref Surface src,
int miRow,
int miCol)
{
Span<ArrayPtr<byte>> buffers = stackalloc ArrayPtr<byte>[Constants.MaxMbPlane];
buffers[0] = src.YBuffer;
buffers[1] = src.UBuffer;
buffers[2] = src.VBuffer;
Span<int> strides = stackalloc int[Constants.MaxMbPlane];
strides[0] = src.Stride;
strides[1] = src.UvStride;
strides[2] = src.UvStride;
int i;
for (i = 0; i < Constants.MaxMbPlane; ++i)
{
ref MacroBlockDPlane pd = ref planes[i];
SetupPredPlanes(ref pd.Dst, buffers[i], strides[i], miRow, miCol, Ptr<ScaleFactors>.Null, pd.SubsamplingX, pd.SubsamplingY);
}
}
public static void SetupPrePlanes(
ref MacroBlockD xd,
int idx,
ref Surface src,
int miRow,
int miCol,
Ptr<ScaleFactors> sf)
{
if (!src.YBuffer.IsNull && !src.UBuffer.IsNull && !src.VBuffer.IsNull)
{
Span<ArrayPtr<byte>> buffers = stackalloc ArrayPtr<byte>[Constants.MaxMbPlane];
buffers[0] = src.YBuffer;
buffers[1] = src.UBuffer;
buffers[2] = src.VBuffer;
Span<int> strides = stackalloc int[Constants.MaxMbPlane];
strides[0] = src.Stride;
strides[1] = src.UvStride;
strides[2] = src.UvStride;
int i;
for (i = 0; i < Constants.MaxMbPlane; ++i)
{
ref MacroBlockDPlane pd = ref xd.Plane[i];
SetupPredPlanes(ref pd.Pre[idx], buffers[i], strides[i], miRow, miCol, sf, pd.SubsamplingX, pd.SubsamplingY);
}
}
}
}
}

@ -0,0 +1,761 @@
using Ryujinx.Graphics.Nvdec.Vp9.Common;
using Ryujinx.Graphics.Nvdec.Vp9.Types;
using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.IntraPred;
namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal static class ReconIntra
{
public static readonly TxType[] IntraModeToTxTypeLookup = new TxType[]
{
TxType.DctDct, // DC
TxType.AdstDct, // V
TxType.DctAdst, // H
TxType.DctDct, // D45
TxType.AdstAdst, // D135
TxType.AdstDct, // D117
TxType.DctAdst, // D153
TxType.DctAdst, // D207
TxType.AdstDct, // D63
TxType.AdstAdst // TM
};
private const int NeedLeft = 1 << 1;
private const int NeedAbove = 1 << 2;
private const int NeedAboveRight = 1 << 3;
private static readonly byte[] ExtendModes = new byte[]
{
NeedAbove | NeedLeft, // DC
NeedAbove, // V
NeedLeft, // H
NeedAboveRight, // D45
NeedLeft | NeedAbove, // D135
NeedLeft | NeedAbove, // D117
NeedLeft | NeedAbove, // D153
NeedLeft, // D207
NeedAboveRight, // D63
NeedLeft | NeedAbove, // TM
};
private unsafe delegate void IntraPredFn(byte* dst, int stride, byte* above, byte* left);
private static unsafe IntraPredFn[][] _pred = new IntraPredFn[][]
{
new IntraPredFn[]
{
null,
null,
null,
null
},
new IntraPredFn[]
{
VPredictor4x4,
VPredictor8x8,
VPredictor16x16,
VPredictor32x32
},
new IntraPredFn[]
{
HPredictor4x4,
HPredictor8x8,
HPredictor16x16,
HPredictor32x32
},
new IntraPredFn[]
{
D45Predictor4x4,
D45Predictor8x8,
D45Predictor16x16,
D45Predictor32x32
},
new IntraPredFn[]
{
D135Predictor4x4,
D135Predictor8x8,
D135Predictor16x16,
D135Predictor32x32
},
new IntraPredFn[]
{
D117Predictor4x4,
D117Predictor8x8,
D117Predictor16x16,
D117Predictor32x32
},
new IntraPredFn[]
{
D153Predictor4x4,
D153Predictor8x8,
D153Predictor16x16,
D153Predictor32x32
},
new IntraPredFn[]
{
D207Predictor4x4,
D207Predictor8x8,
D207Predictor16x16,
D207Predictor32x32
},
new IntraPredFn[]
{
D63Predictor4x4,
D63Predictor8x8,
D63Predictor16x16,
D63Predictor32x32
},
new IntraPredFn[]
{
TMPredictor4x4,
TMPredictor8x8,
TMPredictor16x16,
TMPredictor32x32
}
};
private static unsafe IntraPredFn[][][] _dcPred = new IntraPredFn[][][]
{
new IntraPredFn[][]
{
new IntraPredFn[]
{
Dc128Predictor4x4,
Dc128Predictor8x8,
Dc128Predictor16x16,
Dc128Predictor32x32
},
new IntraPredFn[]
{
DcTopPredictor4x4,
DcTopPredictor8x8,
DcTopPredictor16x16,
DcTopPredictor32x32
}
},
new IntraPredFn[][]
{
new IntraPredFn[]
{
DcLeftPredictor4x4,
DcLeftPredictor8x8,
DcLeftPredictor16x16,
DcLeftPredictor32x32
},
new IntraPredFn[]
{
DcPredictor4x4,
DcPredictor8x8,
DcPredictor16x16,
DcPredictor32x32
}
}
};
private unsafe delegate void IntraHighPredFn(ushort* dst, int stride, ushort* above, ushort* left, int bd);
private static unsafe IntraHighPredFn[][] _predHigh = new IntraHighPredFn[][]
{
new IntraHighPredFn[]
{
null,
null,
null,
null
},
new IntraHighPredFn[]
{
HighbdVPredictor4x4,
HighbdVPredictor8x8,
HighbdVPredictor16x16,
HighbdVPredictor32x32
},
new IntraHighPredFn[]
{
HighbdHPredictor4x4,
HighbdHPredictor8x8,
HighbdHPredictor16x16,
HighbdHPredictor32x32
},
new IntraHighPredFn[]
{
HighbdD45Predictor4x4,
HighbdD45Predictor8x8,
HighbdD45Predictor16x16,
HighbdD45Predictor32x32
},
new IntraHighPredFn[]
{
HighbdD135Predictor4x4,
HighbdD135Predictor8x8,
HighbdD135Predictor16x16,
HighbdD135Predictor32x32
},
new IntraHighPredFn[]
{
HighbdD117Predictor4x4,
HighbdD117Predictor8x8,
HighbdD117Predictor16x16,
HighbdD117Predictor32x32
},
new IntraHighPredFn[]
{
HighbdD153Predictor4x4,
HighbdD153Predictor8x8,
HighbdD153Predictor16x16,
HighbdD153Predictor32x32
},
new IntraHighPredFn[]
{
HighbdD207Predictor4x4,
HighbdD207Predictor8x8,
HighbdD207Predictor16x16,
HighbdD207Predictor32x32
},
new IntraHighPredFn[]
{
HighbdD63Predictor4x4,
HighbdD63Predictor8x8,
HighbdD63Predictor16x16,
HighbdD63Predictor32x32
},
new IntraHighPredFn[]
{
HighbdTMPredictor4x4,
HighbdTMPredictor8x8,
HighbdTMPredictor16x16,
HighbdTMPredictor32x32
}
};
private static unsafe IntraHighPredFn[][][] _dcPredHigh = new IntraHighPredFn[][][]
{
new IntraHighPredFn[][]
{
new IntraHighPredFn[]
{
HighbdDc128Predictor4x4,
HighbdDc128Predictor8x8,
HighbdDc128Predictor16x16,
HighbdDc128Predictor32x32
},
new IntraHighPredFn[]
{
HighbdDcTopPredictor4x4,
HighbdDcTopPredictor8x8,
HighbdDcTopPredictor16x16,
HighbdDcTopPredictor32x32
}
},
new IntraHighPredFn[][]
{
new IntraHighPredFn[]
{
HighbdDcLeftPredictor4x4,
HighbdDcLeftPredictor8x8,
HighbdDcLeftPredictor16x16,
HighbdDcLeftPredictor32x32
},
new IntraHighPredFn[]
{
HighbdDcPredictor4x4,
HighbdDcPredictor8x8,
HighbdDcPredictor16x16,
HighbdDcPredictor32x32
}
}
};
private static unsafe void BuildIntraPredictorsHigh(
ref MacroBlockD xd,
byte* ref8,
int refStride,
byte* dst8,
int dstStride,
PredictionMode mode,
TxSize txSize,
int upAvailable,
int leftAvailable,
int rightAvailable,
int x,
int y,
int plane)
{
int i;
ushort* dst = (ushort*)dst8;
ushort* refr = (ushort*)ref8;
ushort* leftCol = stackalloc ushort[32];
ushort* aboveData = stackalloc ushort[64 + 16];
ushort* aboveRow = aboveData + 16;
ushort* constAboveRow = aboveRow;
int bs = 4 << (int)txSize;
int frameWidth, frameHeight;
int x0, y0;
ref MacroBlockDPlane pd = ref xd.Plane[plane];
int needLeft = ExtendModes[(int)mode] & NeedLeft;
int needAbove = ExtendModes[(int)mode] & NeedAbove;
int needAboveRight = ExtendModes[(int)mode] & NeedAboveRight;
int baseVal = 128 << (xd.Bd - 8);
// 127 127 127 .. 127 127 127 127 127 127
// 129 A B .. Y Z
// 129 C D .. W X
// 129 E F .. U V
// 129 G H .. S T T T T T
// For 10 bit and 12 bit, 127 and 129 are replaced by base -1 and base + 1.
// Get current frame pointer, width and height.
if (plane == 0)
{
frameWidth = xd.CurBuf.Width;
frameHeight = xd.CurBuf.Height;
}
else
{
frameWidth = xd.CurBuf.UvWidth;
frameHeight = xd.CurBuf.UvHeight;
}
// Get block position in current frame.
x0 = (-xd.MbToLeftEdge >> (3 + pd.SubsamplingX)) + x;
y0 = (-xd.MbToTopEdge >> (3 + pd.SubsamplingY)) + y;
// NEED_LEFT
if (needLeft != 0)
{
if (leftAvailable != 0)
{
if (xd.MbToBottomEdge < 0)
{
/* slower path if the block needs border extension */
if (y0 + bs <= frameHeight)
{
for (i = 0; i < bs; ++i)
{
leftCol[i] = refr[i * refStride - 1];
}
}
else
{
int extendBottom = frameHeight - y0;
for (i = 0; i < extendBottom; ++i)
{
leftCol[i] = refr[i * refStride - 1];
}
for (; i < bs; ++i)
{
leftCol[i] = refr[(extendBottom - 1) * refStride - 1];
}
}
}
else
{
/* faster path if the block does not need extension */
for (i = 0; i < bs; ++i)
{
leftCol[i] = refr[i * refStride - 1];
}
}
}
else
{
MemoryUtil.Fill(leftCol, (ushort)(baseVal + 1), bs);
}
}
// NEED_ABOVE
if (needAbove != 0)
{
if (upAvailable != 0)
{
ushort* aboveRef = refr - refStride;
if (xd.MbToRightEdge < 0)
{
/* slower path if the block needs border extension */
if (x0 + bs <= frameWidth)
{
MemoryUtil.Copy(aboveRow, aboveRef, bs);
}
else if (x0 <= frameWidth)
{
int r = frameWidth - x0;
MemoryUtil.Copy(aboveRow, aboveRef, r);
MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + bs - frameWidth);
}
}
else
{
/* faster path if the block does not need extension */
if (bs == 4 && rightAvailable != 0 && leftAvailable != 0)
{
constAboveRow = aboveRef;
}
else
{
MemoryUtil.Copy(aboveRow, aboveRef, bs);
}
}
aboveRow[-1] = leftAvailable != 0 ? aboveRef[-1] : (ushort)(baseVal + 1);
}
else
{
MemoryUtil.Fill(aboveRow, (ushort)(baseVal - 1), bs);
aboveRow[-1] = (ushort)(baseVal - 1);
}
}
// NEED_ABOVERIGHT
if (needAboveRight != 0)
{
if (upAvailable != 0)
{
ushort* aboveRef = refr - refStride;
if (xd.MbToRightEdge < 0)
{
/* slower path if the block needs border extension */
if (x0 + 2 * bs <= frameWidth)
{
if (rightAvailable != 0 && bs == 4)
{
MemoryUtil.Copy(aboveRow, aboveRef, 2 * bs);
}
else
{
MemoryUtil.Copy(aboveRow, aboveRef, bs);
MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs);
}
}
else if (x0 + bs <= frameWidth)
{
int r = frameWidth - x0;
if (rightAvailable != 0 && bs == 4)
{
MemoryUtil.Copy(aboveRow, aboveRef, r);
MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + 2 * bs - frameWidth);
}
else
{
MemoryUtil.Copy(aboveRow, aboveRef, bs);
MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs);
}
}
else if (x0 <= frameWidth)
{
int r = frameWidth - x0;
MemoryUtil.Copy(aboveRow, aboveRef, r);
MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + 2 * bs - frameWidth);
}
aboveRow[-1] = leftAvailable != 0 ? aboveRef[-1] : (ushort)(baseVal + 1);
}
else
{
/* faster path if the block does not need extension */
if (bs == 4 && rightAvailable != 0 && leftAvailable != 0)
{
constAboveRow = aboveRef;
}
else
{
MemoryUtil.Copy(aboveRow, aboveRef, bs);
if (bs == 4 && rightAvailable != 0)
{
MemoryUtil.Copy(aboveRow + bs, aboveRef + bs, bs);
}
else
{
MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs);
}
aboveRow[-1] = leftAvailable != 0 ? aboveRef[-1] : (ushort)(baseVal + 1);
}
}
}
else
{
MemoryUtil.Fill(aboveRow, (ushort)(baseVal - 1), bs * 2);
aboveRow[-1] = (ushort)(baseVal - 1);
}
}
// Predict
if (mode == PredictionMode.DcPred)
{
_dcPredHigh[leftAvailable][upAvailable][(int)txSize](dst, dstStride, constAboveRow, leftCol, xd.Bd);
}
else
{
_predHigh[(int)mode][(int)txSize](dst, dstStride, constAboveRow, leftCol, xd.Bd);
}
}
public static unsafe void BuildIntraPredictors(
ref MacroBlockD xd,
byte* refr,
int refStride,
byte* dst,
int dstStride,
PredictionMode mode,
TxSize txSize,
int upAvailable,
int leftAvailable,
int rightAvailable,
int x,
int y,
int plane)
{
int i;
byte* leftCol = stackalloc byte[32];
byte* aboveData = stackalloc byte[64 + 16];
byte* aboveRow = aboveData + 16;
byte* constAboveRow = aboveRow;
int bs = 4 << (int)txSize;
int frameWidth, frameHeight;
int x0, y0;
ref MacroBlockDPlane pd = ref xd.Plane[plane];
// 127 127 127 .. 127 127 127 127 127 127
// 129 A B .. Y Z
// 129 C D .. W X
// 129 E F .. U V
// 129 G H .. S T T T T T
// ..
// Get current frame pointer, width and height.
if (plane == 0)
{
frameWidth = xd.CurBuf.Width;
frameHeight = xd.CurBuf.Height;
}
else
{
frameWidth = xd.CurBuf.UvWidth;
frameHeight = xd.CurBuf.UvHeight;
}
// Get block position in current frame.
x0 = (-xd.MbToLeftEdge >> (3 + pd.SubsamplingX)) + x;
y0 = (-xd.MbToTopEdge >> (3 + pd.SubsamplingY)) + y;
// NEED_LEFT
if ((ExtendModes[(int)mode] & NeedLeft) != 0)
{
if (leftAvailable != 0)
{
if (xd.MbToBottomEdge < 0)
{
/* Slower path if the block needs border extension */
if (y0 + bs <= frameHeight)
{
for (i = 0; i < bs; ++i)
{
leftCol[i] = refr[i * refStride - 1];
}
}
else
{
int extendBottom = frameHeight - y0;
for (i = 0; i < extendBottom; ++i)
{
leftCol[i] = refr[i * refStride - 1];
}
for (; i < bs; ++i)
{
leftCol[i] = refr[(extendBottom - 1) * refStride - 1];
}
}
}
else
{
/* Faster path if the block does not need extension */
for (i = 0; i < bs; ++i)
{
leftCol[i] = refr[i * refStride - 1];
}
}
}
else
{
MemoryUtil.Fill(leftCol, (byte)129, bs);
}
}
// NEED_ABOVE
if ((ExtendModes[(int)mode] & NeedAbove) != 0)
{
if (upAvailable != 0)
{
byte* aboveRef = refr - refStride;
if (xd.MbToRightEdge < 0)
{
/* Slower path if the block needs border extension */
if (x0 + bs <= frameWidth)
{
MemoryUtil.Copy(aboveRow, aboveRef, bs);
}
else if (x0 <= frameWidth)
{
int r = frameWidth - x0;
MemoryUtil.Copy(aboveRow, aboveRef, r);
MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + bs - frameWidth);
}
}
else
{
/* Faster path if the block does not need extension */
if (bs == 4 && rightAvailable != 0 && leftAvailable != 0)
{
constAboveRow = aboveRef;
}
else
{
MemoryUtil.Copy(aboveRow, aboveRef, bs);
}
}
aboveRow[-1] = leftAvailable != 0 ? aboveRef[-1] : (byte)129;
}
else
{
MemoryUtil.Fill(aboveRow, (byte)127, bs);
aboveRow[-1] = 127;
}
}
// NEED_ABOVERIGHT
if ((ExtendModes[(int)mode] & NeedAboveRight) != 0)
{
if (upAvailable != 0)
{
byte* aboveRef = refr - refStride;
if (xd.MbToRightEdge < 0)
{
/* Slower path if the block needs border extension */
if (x0 + 2 * bs <= frameWidth)
{
if (rightAvailable != 0 && bs == 4)
{
MemoryUtil.Copy(aboveRow, aboveRef, 2 * bs);
}
else
{
MemoryUtil.Copy(aboveRow, aboveRef, bs);
MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs);
}
}
else if (x0 + bs <= frameWidth)
{
int r = frameWidth - x0;
if (rightAvailable != 0 && bs == 4)
{
MemoryUtil.Copy(aboveRow, aboveRef, r);
MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + 2 * bs - frameWidth);
}
else
{
MemoryUtil.Copy(aboveRow, aboveRef, bs);
MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs);
}
}
else if (x0 <= frameWidth)
{
int r = frameWidth - x0;
MemoryUtil.Copy(aboveRow, aboveRef, r);
MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + 2 * bs - frameWidth);
}
}
else
{
/* Faster path if the block does not need extension */
if (bs == 4 && rightAvailable != 0 && leftAvailable != 0)
{
constAboveRow = aboveRef;
}
else
{
MemoryUtil.Copy(aboveRow, aboveRef, bs);
if (bs == 4 && rightAvailable != 0)
{
MemoryUtil.Copy(aboveRow + bs, aboveRef + bs, bs);
}
else
{
MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs);
}
}
}
aboveRow[-1] = leftAvailable != 0 ? aboveRef[-1] : (byte)129;
}
else
{
MemoryUtil.Fill(aboveRow, (byte)127, bs * 2);
aboveRow[-1] = 127;
}
}
// Predict
if (mode == PredictionMode.DcPred)
{
_dcPred[leftAvailable][upAvailable][(int)txSize](dst, dstStride, constAboveRow, leftCol);
}
else
{
_pred[(int)mode][(int)txSize](dst, dstStride, constAboveRow, leftCol);
}
}
public static unsafe void PredictIntraBlock(
ref MacroBlockD xd,
int bwlIn,
TxSize txSize,
PredictionMode mode,
byte* refr,
int refStride,
byte* dst,
int dstStride,
int aoff,
int loff,
int plane)
{
int bw = 1 << bwlIn;
int txw = 1 << (int)txSize;
int haveTop = loff != 0 || !xd.AboveMi.IsNull ? 1 : 0;
int haveLeft = aoff != 0 || !xd.LeftMi.IsNull ? 1 : 0;
int haveRight = (aoff + txw) < bw ? 1 : 0;
int x = aoff * 4;
int y = loff * 4;
if (xd.CurBuf.HighBd)
{
BuildIntraPredictorsHigh(
ref xd,
refr,
refStride,
dst,
dstStride,
mode,
txSize,
haveTop,
haveLeft,
haveRight,
x,
y,
plane);
return;
}
BuildIntraPredictors(
ref xd,
refr,
refStride,
dst,
dstStride,
mode,
txSize,
haveTop,
haveLeft,
haveRight,
x,
y,
plane);
}
}
}

@ -0,0 +1,20 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netcoreapp3.1</TargetFramework>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" />
<ProjectReference Include="..\Ryujinx.Graphics.Video\Ryujinx.Graphics.Video.csproj" />
</ItemGroup>
</Project>

@ -0,0 +1,10 @@
using Ryujinx.Common.Memory;
namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal struct TileBuffer
{
public ArrayPtr<byte> Data;
public int Size;
}
}

@ -0,0 +1,15 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Nvdec.Vp9.Dsp;
using Ryujinx.Graphics.Nvdec.Vp9.Types;
using Ryujinx.Graphics.Video;
namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal struct TileWorkerData
{
public Reader BitReader;
public MacroBlockD Xd;
/* dqcoeff are shared by all the planes. So planes must be decoded serially */
public Array32<Array32<int>> Dqcoeff;
}
}

@ -0,0 +1,10 @@
using Ryujinx.Common.Memory;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct BModeInfo
{
public PredictionMode Mode;
public Array2<Mv> Mv; // First, second inter predictor motion vectors
}
}

@ -0,0 +1,21 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal enum BlockSize
{
Block4x4 = 0,
Block4x8 = 1,
Block8x4 = 2,
Block8x8 = 3,
Block8x16 = 4,
Block16x8 = 5,
Block16x16 = 6,
Block16x32 = 7,
Block32x16 = 8,
Block32x32 = 9,
Block32x64 = 10,
Block64x32 = 11,
Block64x64 = 12,
BlockSizes = 13,
BlockInvalid = BlockSizes
}
}

@ -0,0 +1,10 @@
using Ryujinx.Common.Memory;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct Buf2D
{
public ArrayPtr<byte> Buf;
public int Stride;
}
}

@ -0,0 +1,8 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal enum FrameType
{
KeyFrame = 0,
InterFrame = 1
}
}

@ -0,0 +1,27 @@
using Ryujinx.Common.Memory;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct LoopFilter
{
public int FilterLevel;
public int LastFiltLevel;
public int SharpnessLevel;
public int LastSharpnessLevel;
public bool ModeRefDeltaEnabled;
public bool ModeRefDeltaUpdate;
// 0 = Intra, Last, GF, ARF
public Array4<sbyte> RefDeltas;
public Array4<sbyte> LastRefDeltas;
// 0 = ZERO_MV, MV
public Array2<sbyte> ModeDeltas;
public Array2<sbyte> LastModeDeltas;
public ArrayPtr<LoopFilterMask> Lfm;
public int LfmStride;
}
}

@ -0,0 +1,10 @@
using Ryujinx.Common.Memory;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct LoopFilterInfoN
{
public Array64<LoopFilterThresh> Lfthr;
public Array8<Array4<Array2<byte>>> Lvl;
}
}

@ -0,0 +1,24 @@
using Ryujinx.Common.Memory;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
// This structure holds bit masks for all 8x8 blocks in a 64x64 region.
// Each 1 bit represents a position in which we want to apply the loop filter.
// Left_ entries refer to whether we apply a filter on the border to the
// left of the block. Above_ entries refer to whether or not to apply a
// filter on the above border. Int_ entries refer to whether or not to
// apply borders on the 4x4 edges within the 8x8 block that each bit
// represents.
// Since each transform is accompanied by a potentially different type of
// loop filter there is a different entry in the array for each transform size.
internal struct LoopFilterMask
{
public Array4<ulong> LeftY;
public Array4<ulong> AboveY;
public ulong Int4x4Y;
public Array4<ushort> LeftUv;
public Array4<ushort> AboveUv;
public ushort Int4x4Uv;
public Array64<byte> LflY;
}
}

@ -0,0 +1,13 @@
using Ryujinx.Common.Memory;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
// Need to align this structure so when it is declared and
// passed it can be loaded into vector registers.
internal struct LoopFilterThresh
{
public Array16<byte> Mblim;
public Array16<byte> Lim;
public Array16<byte> HevThr;
}
}

@ -0,0 +1,179 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Video;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct MacroBlockD
{
public Array3<MacroBlockDPlane> Plane;
public byte BmodeBlocksWl;
public byte BmodeBlocksHl;
public Ptr<Vp9BackwardUpdates> Counts;
public TileInfo Tile;
public int MiStride;
// Grid of 8x8 cells is placed over the block.
// If some of them belong to the same mbtree-block
// they will just have same mi[i][j] value
public ArrayPtr<Ptr<ModeInfo>> Mi;
public Ptr<ModeInfo> LeftMi;
public Ptr<ModeInfo> AboveMi;
public uint MaxBlocksWide;
public uint MaxBlocksHigh;
public ArrayPtr<Array3<byte>> PartitionProbs;
/* Distance of MB away from frame edges */
public int MbToLeftEdge;
public int MbToRightEdge;
public int MbToTopEdge;
public int MbToBottomEdge;
public Ptr<Vp9EntropyProbs> Fc;
/* pointers to reference frames */
public Array2<Ptr<RefBuffer>> BlockRefs;
/* pointer to current frame */
public Surface CurBuf;
public Array3<ArrayPtr<sbyte>> AboveContext;
public Array3<Array16<sbyte>> LeftContext;
public ArrayPtr<sbyte> AboveSegContext;
public Array8<sbyte> LeftSegContext;
/* Bit depth: 8, 10, 12 */
public int Bd;
public bool Lossless;
public bool Corrupted;
public Ptr<InternalErrorInfo> ErrorInfo;
public int GetPredContextSegId()
{
sbyte aboveSip = !AboveMi.IsNull ? AboveMi.Value.SegIdPredicted : (sbyte)0;
sbyte leftSip = !LeftMi.IsNull ? LeftMi.Value.SegIdPredicted : (sbyte)0;
return aboveSip + leftSip;
}
public int GetSkipContext()
{
int aboveSkip = !AboveMi.IsNull ? AboveMi.Value.Skip : 0;
int leftSkip = !LeftMi.IsNull ? LeftMi.Value.Skip : 0;
return aboveSkip + leftSkip;
}
public int GetPredContextSwitchableInterp()
{
// Note:
// The mode info data structure has a one element border above and to the
// left of the entries corresponding to real macroblocks.
// The prediction flags in these dummy entries are initialized to 0.
int leftType = !LeftMi.IsNull ? LeftMi.Value.InterpFilter : Constants.SwitchableFilters;
int aboveType = !AboveMi.IsNull ? AboveMi.Value.InterpFilter : Constants.SwitchableFilters;
if (leftType == aboveType)
{
return leftType;
}
else if (leftType == Constants.SwitchableFilters)
{
return aboveType;
}
else if (aboveType == Constants.SwitchableFilters)
{
return leftType;
}
else
{
return Constants.SwitchableFilters;
}
}
// The mode info data structure has a one element border above and to the
// left of the entries corresponding to real macroblocks.
// The prediction flags in these dummy entries are initialized to 0.
// 0 - inter/inter, inter/--, --/inter, --/--
// 1 - intra/inter, inter/intra
// 2 - intra/--, --/intra
// 3 - intra/intra
public int GetIntraInterContext()
{
if (!AboveMi.IsNull && !LeftMi.IsNull)
{ // Both edges available
bool aboveIntra = !AboveMi.Value.IsInterBlock();
bool leftIntra = !LeftMi.Value.IsInterBlock();
return leftIntra && aboveIntra ? 3 : (leftIntra || aboveIntra ? 1 : 0);
}
else if (!AboveMi.IsNull || !LeftMi.IsNull)
{ // One edge available
return 2 * (!(!AboveMi.IsNull ? AboveMi.Value : LeftMi.Value).IsInterBlock() ? 1 : 0);
}
return 0;
}
// Returns a context number for the given MB prediction signal
// The mode info data structure has a one element border above and to the
// left of the entries corresponding to real blocks.
// The prediction flags in these dummy entries are initialized to 0.
public int GetTxSizeContext()
{
int maxTxSize = (int)Luts.MaxTxSizeLookup[(int)Mi[0].Value.SbType];
int aboveCtx = (!AboveMi.IsNull && AboveMi.Value.Skip == 0) ? (int)AboveMi.Value.TxSize : maxTxSize;
int leftCtx = (!LeftMi.IsNull && LeftMi.Value.Skip == 0) ? (int)LeftMi.Value.TxSize : maxTxSize;
if (LeftMi.IsNull)
{
leftCtx = aboveCtx;
}
if (AboveMi.IsNull)
{
aboveCtx = leftCtx;
}
return (aboveCtx + leftCtx) > maxTxSize ? 1 : 0;
}
public void SetupBlockPlanes(int ssX, int ssY)
{
int i;
for (i = 0; i < Constants.MaxMbPlane; i++)
{
Plane[i].SubsamplingX = i != 0 ? ssX : 0;
Plane[i].SubsamplingY = i != 0 ? ssY : 0;
}
}
public void SetSkipContext(int miRow, int miCol)
{
int aboveIdx = miCol * 2;
int leftIdx = (miRow * 2) & 15;
int i;
for (i = 0; i < Constants.MaxMbPlane; ++i)
{
ref MacroBlockDPlane pd = ref Plane[i];
pd.AboveContext = AboveContext[i].Slice(aboveIdx >> pd.SubsamplingX);
pd.LeftContext = new ArrayPtr<sbyte>(ref LeftContext[i][leftIdx >> pd.SubsamplingY], 16 - (leftIdx >> pd.SubsamplingY));
}
}
internal void SetMiRowCol(ref TileInfo tile, int miRow, int bh, int miCol, int bw, int miRows, int miCols)
{
MbToTopEdge = -((miRow * Constants.MiSize) * 8);
MbToBottomEdge = ((miRows - bh - miRow) * Constants.MiSize) * 8;
MbToLeftEdge = -((miCol * Constants.MiSize) * 8);
MbToRightEdge = ((miCols - bw - miCol) * Constants.MiSize) * 8;
// Are edges available for intra prediction?
AboveMi = (miRow != 0) ? Mi[-MiStride] : Ptr<ModeInfo>.Null;
LeftMi = (miCol > tile.MiColStart) ? Mi[-1] : Ptr<ModeInfo>.Null;
}
}
}

@ -0,0 +1,21 @@
using Ryujinx.Common.Memory;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct MacroBlockDPlane
{
public ArrayPtr<int> DqCoeff;
public int SubsamplingX;
public int SubsamplingY;
public Buf2D Dst;
public Array2<Buf2D> Pre;
public ArrayPtr<sbyte> AboveContext;
public ArrayPtr<sbyte> LeftContext;
public Array8<Array2<short>> SegDequant;
// Number of 4x4s in current block
public ushort N4W, N4H;
// Log2 of N4W, N4H
public byte N4Wl, N4Hl;
}
}

@ -0,0 +1,66 @@
using Ryujinx.Common.Memory;
using System.Diagnostics;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct ModeInfo
{
// Common for both Inter and Intra blocks
public BlockSize SbType;
public PredictionMode Mode;
public TxSize TxSize;
public sbyte Skip;
public sbyte SegmentId;
public sbyte SegIdPredicted; // Valid only when TemporalUpdate is enabled
// Only for Intra blocks
public PredictionMode UvMode;
// Only for Inter blocks
public byte InterpFilter;
// if ref_frame[idx] is equal to AltRefFrame then
// MacroBlockD.BlockRef[idx] is an altref
public Array2<sbyte> RefFrame;
public Array2<Mv> Mv;
public Array4<BModeInfo> Bmi;
public PredictionMode GetYMode(int block)
{
return SbType < BlockSize.Block8x8 ? Bmi[block].Mode : Mode;
}
public TxSize GetUvTxSize(ref MacroBlockDPlane pd)
{
Debug.Assert(SbType < BlockSize.Block8x8 ||
Luts.SsSizeLookup[(int)SbType][pd.SubsamplingX][pd.SubsamplingY] != BlockSize.BlockInvalid);
return Luts.UvTxsizeLookup[(int)SbType][(int)TxSize][pd.SubsamplingX][pd.SubsamplingY];
}
public bool IsInterBlock()
{
return RefFrame[0] > Constants.IntraFrame;
}
public bool HasSecondRef()
{
return RefFrame[1] > Constants.IntraFrame;
}
private static readonly int[][] IdxNColumnToSubblock = new int[][]
{
new int[] { 1, 2 }, new int[] { 1, 3 }, new int[] { 3, 2 }, new int[] { 3, 3 }
};
// This function returns either the appropriate sub block or block's mv
// on whether the block_size < 8x8 and we have check_sub_blocks set.
public Mv GetSubBlockMv(int whichMv, int searchCol, int blockIdx)
{
return blockIdx >= 0 && SbType < BlockSize.Block8x8
? Bmi[IdxNColumnToSubblock[blockIdx][searchCol == 0 ? 1 : 0]].Mv[whichMv]
: Mv[whichMv];
}
}
}

@ -0,0 +1,14 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal enum MotionVectorContext
{
BothZero = 0,
ZeroPlusPredicted = 1,
BothPredicted = 2,
NewPlusNonIntra = 3,
BothNew = 4,
IntraPlusNonIntra = 5,
BothIntra = 6,
InvalidCase = 9
}
}

@ -0,0 +1,189 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Video;
using System;
using System.Diagnostics;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct Mv
{
public short Row;
public short Col;
private static readonly byte[] LogInBase2 = new byte[]
{
0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10
};
public bool UseMvHp()
{
const int kMvRefThresh = 64; // Threshold for use of high-precision 1/8 mv
return Math.Abs(Row) < kMvRefThresh && Math.Abs(Col) < kMvRefThresh;
}
public static bool MvJointVertical(MvJointType type)
{
return type == MvJointType.MvJointHzvnz || type == MvJointType.MvJointHnzvnz;
}
public static bool MvJointHorizontal(MvJointType type)
{
return type == MvJointType.MvJointHnzvz || type == MvJointType.MvJointHnzvnz;
}
private static int MvClassBase(MvClassType c)
{
return c != 0 ? Constants.Class0Size << ((int)c + 2) : 0;
}
private static MvClassType GetMvClass(int z, Ptr<int> offset)
{
MvClassType c = (z >= Constants.Class0Size * 4096) ? MvClassType.MvClass10 : (MvClassType)LogInBase2[z >> 3];
if (!offset.IsNull)
{
offset.Value = z - MvClassBase(c);
}
return c;
}
private static void IncMvComponent(int v, ref Vp9BackwardUpdates counts, int comp, int incr, int usehp)
{
int s, z, c, o = 0, d, e, f;
Debug.Assert(v != 0); /* Should not be zero */
s = v < 0 ? 1 : 0;
counts.Sign[comp][s] += (uint)incr;
z = (s != 0 ? -v : v) - 1; /* Magnitude - 1 */
c = (int)GetMvClass(z, new Ptr<int>(ref o));
counts.Classes[comp][c] += (uint)incr;
d = (o >> 3); /* Int mv data */
f = (o >> 1) & 3; /* Fractional pel mv data */
e = (o & 1); /* High precision mv data */
if (c == (int)MvClassType.MvClass0)
{
counts.Class0[comp][d] += (uint)incr;
counts.Class0Fp[comp][d][f] += (uint)incr;
counts.Class0Hp[comp][e] += (uint)(usehp * incr);
}
else
{
int i;
int b = c + Constants.Class0Bits - 1; // Number of bits
for (i = 0; i < b; ++i)
{
counts.Bits[comp][i][((d >> i) & 1)] += (uint)incr;
}
counts.Fp[comp][f] += (uint)incr;
counts.Hp[comp][e] += (uint)(usehp * incr);
}
}
private MvJointType GetMvJoint()
{
if (Row == 0)
{
return Col == 0 ? MvJointType.MvJointZero : MvJointType.MvJointHnzvz;
}
else
{
return Col == 0 ? MvJointType.MvJointHzvnz : MvJointType.MvJointHnzvnz;
}
}
internal void IncMv(Ptr<Vp9BackwardUpdates> counts)
{
if (!counts.IsNull)
{
MvJointType j = GetMvJoint();
++counts.Value.Joints[(int)j];
if (MvJointVertical(j))
{
IncMvComponent(Row, ref counts.Value, 0, 1, 1);
}
if (MvJointHorizontal(j))
{
IncMvComponent(Col, ref counts.Value, 1, 1, 1);
}
}
}
public void ClampMv(int minCol, int maxCol, int minRow, int maxRow)
{
Col = (short)Math.Clamp(Col, minCol, maxCol);
Row = (short)Math.Clamp(Row, minRow, maxRow);
}
private const int MvBorder = (16 << 3); // Allow 16 pels in 1/8th pel units
public void ClampMvRef(ref MacroBlockD xd)
{
ClampMv(
xd.MbToLeftEdge - MvBorder,
xd.MbToRightEdge + MvBorder,
xd.MbToTopEdge - MvBorder,
xd.MbToBottomEdge + MvBorder);
}
public void LowerMvPrecision(bool allowHP)
{
bool useHP = allowHP && UseMvHp();
if (!useHP)
{
if ((Row & 1) != 0)
{
Row += (short)(Row > 0 ? -1 : 1);
}
if ((Col & 1) != 0)
{
Col += (short)(Col > 0 ? -1 : 1);
}
}
}
}
}

@ -0,0 +1,8 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct Mv32
{
public int Row;
public int Col;
}
}

@ -0,0 +1,17 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal enum MvClassType
{
MvClass0 = 0, /* (0, 2] integer pel */
MvClass1 = 1, /* (2, 4] integer pel */
MvClass2 = 2, /* (4, 8] integer pel */
MvClass3 = 3, /* (8, 16] integer pel */
MvClass4 = 4, /* (16, 32] integer pel */
MvClass5 = 5, /* (32, 64] integer pel */
MvClass6 = 6, /* (64, 128] integer pel */
MvClass7 = 7, /* (128, 256] integer pel */
MvClass8 = 8, /* (256, 512] integer pel */
MvClass9 = 9, /* (512, 1024] integer pel */
MvClass10 = 10, /* (1024,2048] integer pel */
}
}

@ -0,0 +1,10 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal enum MvJointType
{
MvJointZero = 0, /* Zero vector */
MvJointHnzvz = 1, /* Vert zero, hor nonzero */
MvJointHzvnz = 2, /* Hor zero, vert nonzero */
MvJointHnzvnz = 3, /* Both components nonzero */
}
}

@ -0,0 +1,10 @@
using Ryujinx.Common.Memory;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct MvRef
{
public Array2<Mv> Mv;
public Array2<sbyte> RefFrame;
}
}

@ -0,0 +1,12 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal enum PartitionType
{
PartitionNone,
PartitionHorz,
PartitionVert,
PartitionSplit,
PartitionTypes,
PartitionInvalid = PartitionTypes
}
}

@ -0,0 +1,9 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal enum PlaneType
{
Y = 0,
Uv = 1,
PlaneTypes
}
}

@ -0,0 +1,14 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct Position
{
public int Row;
public int Col;
public Position(int row, int col)
{
Row = row;
Col = col;
}
}
}

@ -0,0 +1,21 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal enum PredictionMode
{
DcPred = 0, // Average of above and left pixels
VPred = 1, // Vertical
HPred = 2, // Horizontal
D45Pred = 3, // Directional 45 deg = round(arctan(1 / 1) * 180 / pi)
D135Pred = 4, // Directional 135 deg = 180 - 45
D117Pred = 5, // Directional 117 deg = 180 - 63
D153Pred = 6, // Directional 153 deg = 180 - 27
D207Pred = 7, // Directional 207 deg = 180 + 27
D63Pred = 8, // Directional 63 deg = round(arctan(2 / 1) * 180 / pi)
TmPred = 9, // True-motion
NearestMv = 10,
NearMv = 11,
ZeroMv = 12,
NewMv = 13,
MbModeCount = 14
}
}

@ -0,0 +1,8 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct RefBuffer
{
public Surface Buf;
public ScaleFactors Sf;
}
}

@ -0,0 +1,10 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal enum ReferenceMode
{
SingleReference = 0,
CompoundReference = 1,
ReferenceModeSelect = 2,
ReferenceModes = 3
}
}

@ -0,0 +1,451 @@
using Ryujinx.Common.Memory;
using System.Runtime.CompilerServices;
using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.Convolve;
using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.Filter;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct ScaleFactors
{
private const int RefScaleShift = 14;
private const int RefNoScale = (1 << RefScaleShift);
private const int RefInvalidScale = -1;
private unsafe delegate void ConvolveFn(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h);
private unsafe delegate void HighbdConvolveFn(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h,
int bd);
private static readonly unsafe ConvolveFn[][][] PredictX16Y16 = new ConvolveFn[][][]
{
new ConvolveFn[][]
{
new ConvolveFn[]
{
ConvolveCopy,
ConvolveAvg
},
new ConvolveFn[]
{
Convolve8Vert,
Convolve8AvgVert
}
},
new ConvolveFn[][]
{
new ConvolveFn[]
{
Convolve8Horiz,
Convolve8AvgHoriz
},
new ConvolveFn[]
{
Convolve8,
Convolve8Avg
}
}
};
private static readonly unsafe ConvolveFn[][][] PredictX16 = new ConvolveFn[][][]
{
new ConvolveFn[][]
{
new ConvolveFn[]
{
ScaledVert,
ScaledAvgVert
},
new ConvolveFn[]
{
ScaledVert,
ScaledAvgVert
}
},
new ConvolveFn[][]
{
new ConvolveFn[]
{
Scaled2D,
ScaledAvg2D
},
new ConvolveFn[]
{
Scaled2D,
ScaledAvg2D
}
}
};
private static readonly unsafe ConvolveFn[][][] PredictY16 = new ConvolveFn[][][]
{
new ConvolveFn[][]
{
new ConvolveFn[]
{
ScaledHoriz,
ScaledAvgHoriz
},
new ConvolveFn[]
{
Scaled2D,
ScaledAvg2D
}
},
new ConvolveFn[][]
{
new ConvolveFn[]
{
ScaledHoriz,
ScaledAvgHoriz
},
new ConvolveFn[]
{
Scaled2D,
ScaledAvg2D
}
}
};
private static readonly unsafe ConvolveFn[][][] Predict = new ConvolveFn[][][]
{
new ConvolveFn[][]
{
new ConvolveFn[]
{
Scaled2D,
ScaledAvg2D
},
new ConvolveFn[]
{
Scaled2D,
ScaledAvg2D
}
},
new ConvolveFn[][]
{
new ConvolveFn[]
{
Scaled2D,
ScaledAvg2D
},
new ConvolveFn[]
{
Scaled2D,
ScaledAvg2D
}
}
};
private static readonly unsafe HighbdConvolveFn[][][] HighbdPredictX16Y16 = new HighbdConvolveFn[][][]
{
new HighbdConvolveFn[][]
{
new HighbdConvolveFn[]
{
HighbdConvolveCopy,
HighbdConvolveAvg
},
new HighbdConvolveFn[]
{
HighbdConvolve8Vert,
HighbdConvolve8AvgVert
}
},
new HighbdConvolveFn[][]
{
new HighbdConvolveFn[]
{
HighbdConvolve8Horiz,
HighbdConvolve8AvgHoriz
},
new HighbdConvolveFn[]
{
HighbdConvolve8,
HighbdConvolve8Avg
}
}
};
private static readonly unsafe HighbdConvolveFn[][][] HighbdPredictX16 = new HighbdConvolveFn[][][]
{
new HighbdConvolveFn[][]
{
new HighbdConvolveFn[]
{
HighbdConvolve8Vert,
HighbdConvolve8AvgVert
},
new HighbdConvolveFn[]
{
HighbdConvolve8Vert,
HighbdConvolve8AvgVert
}
},
new HighbdConvolveFn[][]
{
new HighbdConvolveFn[]
{
HighbdConvolve8,
HighbdConvolve8Avg
},
new HighbdConvolveFn[]
{
HighbdConvolve8,
HighbdConvolve8Avg
}
}
};
private static readonly unsafe HighbdConvolveFn[][][] HighbdPredictY16 = new HighbdConvolveFn[][][]
{
new HighbdConvolveFn[][]
{
new HighbdConvolveFn[]
{
HighbdConvolve8Horiz,
HighbdConvolve8AvgHoriz
},
new HighbdConvolveFn[]
{
HighbdConvolve8,
HighbdConvolve8Avg
}
},
new HighbdConvolveFn[][]
{
new HighbdConvolveFn[]
{
HighbdConvolve8Horiz,
HighbdConvolve8AvgHoriz
},
new HighbdConvolveFn[]
{
HighbdConvolve8,
HighbdConvolve8Avg
}
}
};
private static readonly unsafe HighbdConvolveFn[][][] HighbdPredict = new HighbdConvolveFn[][][]
{
new HighbdConvolveFn[][]
{
new HighbdConvolveFn[]
{
HighbdConvolve8,
HighbdConvolve8Avg
},
new HighbdConvolveFn[]
{
HighbdConvolve8,
HighbdConvolve8Avg
}
},
new HighbdConvolveFn[][]
{
new HighbdConvolveFn[]
{
HighbdConvolve8,
HighbdConvolve8Avg
},
new HighbdConvolveFn[]
{
HighbdConvolve8,
HighbdConvolve8Avg
}
}
};
public int XScaleFP; // Horizontal fixed point scale factor
public int YScaleFP; // Vertical fixed point scale factor
public int XStepQ4;
public int YStepQ4;
public int ScaleValueX(int val)
{
return IsScaled() ? ScaledX(val) : val;
}
public int ScaleValueY(int val)
{
return IsScaled() ? ScaledY(val) : val;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public unsafe void InterPredict(
int horiz,
int vert,
int avg,
byte* src,
int srcStride,
byte* dst,
int dstStride,
int subpelX,
int subpelY,
int w,
int h,
Array8<short>[] kernel,
int xs,
int ys)
{
if (XStepQ4 == 16)
{
if (YStepQ4 == 16)
{
// No scaling in either direction.
PredictX16Y16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h);
}
else
{
// No scaling in x direction. Must always scale in the y direction.
PredictX16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h);
}
}
else
{
if (YStepQ4 == 16)
{
// No scaling in the y direction. Must always scale in the x direction.
PredictY16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h);
}
else
{
// Must always scale in both directions.
Predict[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h);
}
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public unsafe void HighbdInterPredict(
int horiz,
int vert,
int avg,
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
int subpelX,
int subpelY,
int w,
int h,
Array8<short>[] kernel,
int xs,
int ys,
int bd)
{
if (XStepQ4 == 16)
{
if (YStepQ4 == 16)
{
// No scaling in either direction.
HighbdPredictX16Y16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h, bd);
}
else
{
// No scaling in x direction. Must always scale in the y direction.
HighbdPredictX16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h, bd);
}
}
else
{
if (YStepQ4 == 16)
{
// No scaling in the y direction. Must always scale in the x direction.
HighbdPredictY16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h, bd);
}
else
{
// Must always scale in both directions.
HighbdPredict[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h, bd);
}
}
}
private int ScaledX(int val)
{
return (int)((long)val * XScaleFP >> RefScaleShift);
}
private int ScaledY(int val)
{
return (int)((long)val * YScaleFP >> RefScaleShift);
}
private static int GetFixedPointScaleFactor(int otherSize, int thisSize)
{
// Calculate scaling factor once for each reference frame
// and use fixed point scaling factors in decoding and encoding routines.
// Hardware implementations can calculate scale factor in device driver
// and use multiplication and shifting on hardware instead of division.
return (otherSize << RefScaleShift) / thisSize;
}
public Mv32 ScaleMv(ref Mv mv, int x, int y)
{
int xOffQ4 = ScaledX(x << SubpelBits) & SubpelMask;
int yOffQ4 = ScaledY(y << SubpelBits) & SubpelMask;
Mv32 res = new Mv32()
{
Row = ScaledY(mv.Row) + yOffQ4,
Col = ScaledX(mv.Col) + xOffQ4
};
return res;
}
public bool IsValidScale()
{
return XScaleFP != RefInvalidScale && YScaleFP != RefInvalidScale;
}
public bool IsScaled()
{
return IsValidScale() && (XScaleFP != RefNoScale || YScaleFP != RefNoScale);
}
public static bool ValidRefFrameSize(int refWidth, int refHeight, int thisWidth, int thisHeight)
{
return 2 * thisWidth >= refWidth &&
2 * thisHeight >= refHeight &&
thisWidth <= 16 * refWidth &&
thisHeight <= 16 * refHeight;
}
public void SetupScaleFactorsForFrame(int otherW, int otherH, int thisW, int thisH)
{
if (!ValidRefFrameSize(otherW, otherH, thisW, thisH))
{
XScaleFP = RefInvalidScale;
YScaleFP = RefInvalidScale;
return;
}
XScaleFP = GetFixedPointScaleFactor(otherW, thisW);
YScaleFP = GetFixedPointScaleFactor(otherH, thisH);
XStepQ4 = ScaledX(16);
YStepQ4 = ScaledY(16);
}
}
}

@ -0,0 +1,11 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal enum SegLvlFeatures
{
SegLvlAltQ = 0, // Use alternate Quantizer ....
SegLvlAltLf = 1, // Use alternate loop filter value...
SegLvlRefFrame = 2, // Optional Segment reference frame
SegLvlSkip = 3, // Optional Segment (0,0) + skip mode
SegLvlMax = 4 // Number of features supported
}
}

@ -0,0 +1,71 @@
using Ryujinx.Common.Memory;
using System.Diagnostics;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct Segmentation
{
private static readonly int[] SegFeatureDataSigned = new int[] { 1, 1, 0, 0 };
private static readonly int[] SegFeatureDataMax = new int[] { QuantCommon.MaxQ, Vp9.LoopFilter.MaxLoopFilter, 3, 0 };
public bool Enabled;
public bool UpdateMap;
public byte UpdateData;
public byte AbsDelta;
public bool TemporalUpdate;
public Array8<Array4<short>> FeatureData;
public Array8<uint> FeatureMask;
public int AqAvOffset;
public static byte GetPredProbSegId(ref Array3<byte> segPredProbs, ref MacroBlockD xd)
{
return segPredProbs[xd.GetPredContextSegId()];
}
public void ClearAllSegFeatures()
{
MemoryMarshal.CreateSpan(ref FeatureData[0][0], 8 * 4).Fill(0);
MemoryMarshal.CreateSpan(ref FeatureMask[0], 8).Fill(0);
AqAvOffset = 0;
}
internal void EnableSegFeature(int segmentId, SegLvlFeatures featureId)
{
FeatureMask[segmentId] |= 1u << (int)featureId;
}
internal static int FeatureDataMax(SegLvlFeatures featureId)
{
return SegFeatureDataMax[(int)featureId];
}
internal static int IsSegFeatureSigned(SegLvlFeatures featureId)
{
return SegFeatureDataSigned[(int)featureId];
}
internal void SetSegData(int segmentId, SegLvlFeatures featureId, int segData)
{
Debug.Assert(segData <= SegFeatureDataMax[(int)featureId]);
if (segData < 0)
{
Debug.Assert(SegFeatureDataSigned[(int)featureId] != 0);
Debug.Assert(-segData <= SegFeatureDataMax[(int)featureId]);
}
FeatureData[segmentId][(int)featureId] = (short)segData;
}
internal int IsSegFeatureActive(int segmentId, SegLvlFeatures featureId)
{
return Enabled && (FeatureMask[segmentId] & (1 << (int)featureId)) != 0 ? 1 : 0;
}
internal short GetSegData(int segmentId, SegLvlFeatures featureId)
{
return FeatureData[segmentId][(int)featureId];
}
}
}

@ -0,0 +1,80 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Video;
using System;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct Surface : ISurface
{
public ArrayPtr<byte> YBuffer;
public ArrayPtr<byte> UBuffer;
public ArrayPtr<byte> VBuffer;
public unsafe Plane YPlane => new Plane((IntPtr)YBuffer.ToPointer(), YBuffer.Length);
public unsafe Plane UPlane => new Plane((IntPtr)UBuffer.ToPointer(), UBuffer.Length);
public unsafe Plane VPlane => new Plane((IntPtr)VBuffer.ToPointer(), VBuffer.Length);
public int Width { get; }
public int Height { get; }
public int AlignedWidth { get; }
public int AlignedHeight { get; }
public int Stride { get; }
public int UvWidth { get; }
public int UvHeight { get; }
public int UvAlignedWidth { get; }
public int UvAlignedHeight { get; }
public int UvStride { get; }
public bool HighBd => false;
private readonly IntPtr _pointer;
public Surface(int width, int height)
{
const int border = 32;
const int ssX = 1;
const int ssY = 1;
const bool highbd = false;
int alignedWidth = (width + 7) & ~7;
int alignedHeight = (height + 7) & ~7;
int yStride = ((alignedWidth + 2 * border) + 31) & ~31;
int yplaneSize = (alignedHeight + 2 * border) * yStride;
int uvWidth = alignedWidth >> ssX;
int uvHeight = alignedHeight >> ssY;
int uvStride = yStride >> ssX;
int uvBorderW = border >> ssX;
int uvBorderH = border >> ssY;
int uvplaneSize = (uvHeight + 2 * uvBorderH) * uvStride;
int frameSize = (highbd ? 2 : 1) * (yplaneSize + 2 * uvplaneSize);
IntPtr pointer = Marshal.AllocHGlobal(frameSize);
_pointer = pointer;
Width = width;
Height = height;
AlignedWidth = alignedWidth;
AlignedHeight = alignedHeight;
Stride = yStride;
UvWidth = (width + ssX) >> ssX;
UvHeight = (height + ssY) >> ssY;
UvAlignedWidth = uvWidth;
UvAlignedHeight = uvHeight;
UvStride = uvStride;
ArrayPtr<byte> NewPlane(int start, int size, int border)
{
return new ArrayPtr<byte>(pointer + start + border, size - border);
}
YBuffer = NewPlane(0, yplaneSize, (border * yStride) + border);
UBuffer = NewPlane(yplaneSize, uvplaneSize, (uvBorderH * uvStride) + uvBorderW);
VBuffer = NewPlane(yplaneSize + uvplaneSize, uvplaneSize, (uvBorderH * uvStride) + uvBorderW);
}
public void Dispose()
{
Marshal.FreeHGlobal(_pointer);
}
}
}

@ -0,0 +1,85 @@
using Ryujinx.Graphics.Nvdec.Vp9.Common;
using System;
using System.Diagnostics;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct TileInfo
{
private const int MinTileWidthB64 = 4;
private const int MaxTileWidthB64 = 64;
public int MiRowStart, MiRowEnd;
public int MiColStart, MiColEnd;
public static int MiColsAlignedToSb(int nMis)
{
return BitUtils.AlignPowerOfTwo(nMis, Constants.MiBlockSizeLog2);
}
private static int GetTileOffset(int idx, int mis, int log2)
{
int sbCols = MiColsAlignedToSb(mis) >> Constants.MiBlockSizeLog2;
int offset = ((idx * sbCols) >> log2) << Constants.MiBlockSizeLog2;
return Math.Min(offset, mis);
}
public void SetRow(ref Vp9Common cm, int row)
{
MiRowStart = GetTileOffset(row, cm.MiRows, cm.Log2TileRows);
MiRowEnd = GetTileOffset(row + 1, cm.MiRows, cm.Log2TileRows);
}
public void SetCol(ref Vp9Common cm, int col)
{
MiColStart = GetTileOffset(col, cm.MiCols, cm.Log2TileCols);
MiColEnd = GetTileOffset(col + 1, cm.MiCols, cm.Log2TileCols);
}
public void Init(ref Vp9Common cm, int row, int col)
{
SetRow(ref cm, row);
SetCol(ref cm, col);
}
// Checks that the given miRow, miCol and search point
// are inside the borders of the tile.
public bool IsInside(int miCol, int miRow, int miRows, ref Position miPos)
{
return !(miRow + miPos.Row < 0 ||
miCol + miPos.Col < MiColStart ||
miRow + miPos.Row >= miRows ||
miCol + miPos.Col >= MiColEnd);
}
private static int GetMinLog2TileCols(int sb64Cols)
{
int minLog2 = 0;
while ((MaxTileWidthB64 << minLog2) < sb64Cols)
{
++minLog2;
}
return minLog2;
}
private static int GetMaxLog2TileCols(int sb64Cols)
{
int maxLog2 = 1;
while ((sb64Cols >> maxLog2) >= MinTileWidthB64)
{
++maxLog2;
}
return maxLog2 - 1;
}
public static void GetTileNBits(int miCols, ref int minLog2TileCols, ref int maxLog2TileCols)
{
int sb64Cols = MiColsAlignedToSb(miCols) >> Constants.MiBlockSizeLog2;
minLog2TileCols = GetMinLog2TileCols(sb64Cols);
maxLog2TileCols = GetMaxLog2TileCols(sb64Cols);
Debug.Assert(minLog2TileCols <= maxLog2TileCols);
}
}
}

@ -0,0 +1,12 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
public enum TxMode
{
Only4X4 = 0, // Only 4x4 transform used
Allow8X8 = 1, // Allow block transform size up to 8x8
Allow16X16 = 2, // Allow block transform size up to 16x16
Allow32X32 = 3, // Allow block transform size up to 32x32
TxModeSelect = 4, // Transform specified for each block
TxModes = 5
}
}

@ -0,0 +1,11 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
public enum TxSize
{
Tx4x4 = 0, // 4x4 transform
Tx8x8 = 1, // 8x8 transform
Tx16x16 = 2, // 16x16 transform
Tx32x32 = 3, // 32x32 transform
TxSizes = 4
}
}

@ -0,0 +1,11 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal enum TxType
{
DctDct = 0, // DCT in both horizontal and vertical
AdstDct = 1, // ADST in vertical, DCT in horizontal
DctAdst = 2, // DCT in vertical, ADST in horizontal
AdstAdst = 3, // ADST in both directions
TxTypes = 4
}
}

Some files were not shown because too many files have changed in this diff Show More