mirror of
https://github.com/Ryujinx/Ryujinx.git
synced 2025-01-10 15:21:56 -08:00
New NVDEC and VIC implementation (#1384)
* Initial NVDEC and VIC implementation * Update FFmpeg.AutoGen to 4.3.0 * Add nvdec dependencies for Windows * Unify some VP9 structures * Rename VP9 structure fields * Improvements to Video API * XML docs for Common.Memory * Remove now unused or redundant overloads from MemoryAccessor * NVDEC UV surface read/write scalar paths * Add FIXME comments about hacky things/stuff that will need to be fixed in the future * Cleaned up VP9 memory allocation * Remove some debug logs * Rename some VP9 structs * Remove unused struct * No need to compile Ryujinx.Graphics.Host1x with unsafe anymore * Name AsyncWorkQueue threads to make debugging easier * Make Vp9PictureInfo a ref struct * LayoutConverter no longer needs the depth argument (broken by rebase) * Pooling of VP9 buffers, plus fix a memory leak on VP9 * Really wish VS could rename projects properly... * Address feedback * Remove using * Catch OperationCanceledException * Add licensing informations * Add THIRDPARTY.md to release too Co-authored-by: Thog <me@thog.eu>
This commit is contained in:
parent
38b26cf424
commit
4d02a2d2c0
@ -112,3 +112,9 @@ If you need help with setting up Ryujinx, you can ask questions in the #support
|
||||
If you have contributions, need support, have suggestions, or just want to get in touch with the team, join our [Discord server](https://discord.gg/N2FmfVc)!
|
||||
|
||||
If you'd like to donate, please take a look at our [Patreon](https://www.patreon.com/ryujinx).
|
||||
|
||||
## License
|
||||
|
||||
This software is licensed under the terms of the MIT license.
|
||||
This project makes use of code authored by the libvpx project, licensed under BSD and the ffmpeg project, licensed under LGPLv3.
|
||||
See [LICENSE.txt](LICENSE.txt) and [THIRDPARTY.md](Ryujinx/THIRDPARTY.md) for more details.
|
||||
|
100
Ryujinx.Common/AsyncWorkQueue.cs
Normal file
100
Ryujinx.Common/AsyncWorkQueue.cs
Normal file
@ -0,0 +1,100 @@
|
||||
using System;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Threading;
|
||||
|
||||
namespace Ryujinx.Common
|
||||
{
|
||||
public sealed class AsyncWorkQueue<T> : IDisposable
|
||||
{
|
||||
private readonly Thread _workerThread;
|
||||
private readonly CancellationTokenSource _cts;
|
||||
private readonly Action<T> _workerAction;
|
||||
private readonly BlockingCollection<T> _queue;
|
||||
|
||||
public bool IsCancellationRequested => _cts.IsCancellationRequested;
|
||||
|
||||
public AsyncWorkQueue(Action<T> callback, string name = null) : this(callback, name, new BlockingCollection<T>())
|
||||
{
|
||||
}
|
||||
|
||||
public AsyncWorkQueue(Action<T> callback, string name, BlockingCollection<T> collection)
|
||||
{
|
||||
_cts = new CancellationTokenSource();
|
||||
_queue = collection;
|
||||
_workerAction = callback;
|
||||
_workerThread = new Thread(DoWork) { Name = name };
|
||||
|
||||
_workerThread.IsBackground = true;
|
||||
_workerThread.Start();
|
||||
}
|
||||
|
||||
private void DoWork()
|
||||
{
|
||||
try
|
||||
{
|
||||
foreach (var item in _queue.GetConsumingEnumerable(_cts.Token))
|
||||
{
|
||||
_workerAction(item);
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
public void Cancel()
|
||||
{
|
||||
_cts.Cancel();
|
||||
}
|
||||
|
||||
public void CancelAfter(int millisecondsDelay)
|
||||
{
|
||||
_cts.CancelAfter(millisecondsDelay);
|
||||
}
|
||||
|
||||
public void CancelAfter(TimeSpan delay)
|
||||
{
|
||||
_cts.CancelAfter(delay);
|
||||
}
|
||||
|
||||
public void Add(T workItem)
|
||||
{
|
||||
_queue.Add(workItem);
|
||||
}
|
||||
|
||||
public void Add(T workItem, CancellationToken cancellationToken)
|
||||
{
|
||||
_queue.Add(workItem, cancellationToken);
|
||||
}
|
||||
|
||||
public bool TryAdd(T workItem)
|
||||
{
|
||||
return _queue.TryAdd(workItem);
|
||||
}
|
||||
|
||||
public bool TryAdd(T workItem, int millisecondsDelay)
|
||||
{
|
||||
return _queue.TryAdd(workItem, millisecondsDelay);
|
||||
}
|
||||
|
||||
public bool TryAdd(T workItem, int millisecondsDelay, CancellationToken cancellationToken)
|
||||
{
|
||||
return _queue.TryAdd(workItem, millisecondsDelay, cancellationToken);
|
||||
}
|
||||
|
||||
public bool TryAdd(T workItem, TimeSpan timeout)
|
||||
{
|
||||
return _queue.TryAdd(workItem, timeout);
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
_queue.CompleteAdding();
|
||||
_cts.Cancel();
|
||||
_workerThread.Join();
|
||||
|
||||
_queue.Dispose();
|
||||
_cts.Dispose();
|
||||
}
|
||||
}
|
||||
}
|
@ -9,12 +9,14 @@ namespace Ryujinx.Common.Logging
|
||||
Emulation,
|
||||
Gpu,
|
||||
Hid,
|
||||
Host1x,
|
||||
Kernel,
|
||||
KernelIpc,
|
||||
KernelScheduler,
|
||||
KernelSvc,
|
||||
Loader,
|
||||
ModLoader,
|
||||
Nvdec,
|
||||
Ptc,
|
||||
Service,
|
||||
ServiceAcc,
|
||||
@ -50,6 +52,7 @@ namespace Ryujinx.Common.Logging
|
||||
ServiceSss,
|
||||
ServiceTime,
|
||||
ServiceVi,
|
||||
SurfaceFlinger
|
||||
SurfaceFlinger,
|
||||
Vic
|
||||
}
|
||||
}
|
123
Ryujinx.Common/Memory/ArrayPtr.cs
Normal file
123
Ryujinx.Common/Memory/ArrayPtr.cs
Normal file
@ -0,0 +1,123 @@
|
||||
using System;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.Common.Memory
|
||||
{
|
||||
/// <summary>
|
||||
/// Represents an array of unmanaged resources.
|
||||
/// </summary>
|
||||
/// <typeparam name="T">Array element type</typeparam>
|
||||
public unsafe struct ArrayPtr<T> : IEquatable<ArrayPtr<T>>, IArray<T> where T : unmanaged
|
||||
{
|
||||
private IntPtr _ptr;
|
||||
|
||||
/// <summary>
|
||||
/// Null pointer.
|
||||
/// </summary>
|
||||
public static ArrayPtr<T> Null => new ArrayPtr<T>() { _ptr = IntPtr.Zero };
|
||||
|
||||
/// <summary>
|
||||
/// True if the pointer is null, false otherwise.
|
||||
/// </summary>
|
||||
public bool IsNull => _ptr == IntPtr.Zero;
|
||||
|
||||
/// <summary>
|
||||
/// Number of elements on the array.
|
||||
/// </summary>
|
||||
public int Length { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets a reference to the item at the given index.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// No bounds checks are performed, this allows negative indexing,
|
||||
/// but care must be taken if the index may be out of bounds.
|
||||
/// </remarks>
|
||||
/// <param name="index">Index of the element</param>
|
||||
/// <returns>Reference to the element at the given index</returns>
|
||||
public ref T this[int index] => ref Unsafe.AsRef<T>((T*)_ptr + index);
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new array from a given reference.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// For data on the heap, proper pinning is necessary during
|
||||
/// use. Failure to do so will result in memory corruption and crashes.
|
||||
/// </remarks>
|
||||
/// <param name="value">Reference of the first array element</param>
|
||||
/// <param name="length">Number of elements on the array</param>
|
||||
public ArrayPtr(ref T value, int length)
|
||||
{
|
||||
_ptr = (IntPtr)Unsafe.AsPointer(ref value);
|
||||
Length = length;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new array from a given pointer.
|
||||
/// </summary>
|
||||
/// <param name="ptr">Array base pointer</param>
|
||||
/// <param name="length">Number of elements on the array</param>
|
||||
public ArrayPtr(T* ptr, int length)
|
||||
{
|
||||
_ptr = (IntPtr)ptr;
|
||||
Length = length;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new array from a given pointer.
|
||||
/// </summary>
|
||||
/// <param name="ptr">Array base pointer</param>
|
||||
/// <param name="length">Number of elements on the array</param>
|
||||
public ArrayPtr(IntPtr ptr, int length)
|
||||
{
|
||||
_ptr = ptr;
|
||||
Length = length;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Splits the array starting at the specified position.
|
||||
/// </summary>
|
||||
/// <param name="start">Index where the new array should start</param>
|
||||
/// <returns>New array starting at the specified position</returns>
|
||||
public ArrayPtr<T> Slice(int start) => new ArrayPtr<T>(ref this[start], Length - start);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a span from the array.
|
||||
/// </summary>
|
||||
/// <returns>Span of the array</returns>
|
||||
public Span<T> ToSpan() => Length == 0 ? Span<T>.Empty : MemoryMarshal.CreateSpan(ref this[0], Length);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the array base pointer.
|
||||
/// </summary>
|
||||
/// <returns>Base pointer</returns>
|
||||
public T* ToPointer() => (T*)_ptr;
|
||||
|
||||
public override bool Equals(object obj)
|
||||
{
|
||||
return obj is ArrayPtr<T> other && Equals(other);
|
||||
}
|
||||
|
||||
public bool Equals([AllowNull] ArrayPtr<T> other)
|
||||
{
|
||||
return _ptr == other._ptr && Length == other.Length;
|
||||
}
|
||||
|
||||
public override int GetHashCode()
|
||||
{
|
||||
return HashCode.Combine(_ptr, Length);
|
||||
}
|
||||
|
||||
public static bool operator ==(ArrayPtr<T> left, ArrayPtr<T> right)
|
||||
{
|
||||
return left.Equals(right);
|
||||
}
|
||||
|
||||
public static bool operator !=(ArrayPtr<T> left, ArrayPtr<T> right)
|
||||
{
|
||||
return !(left == right);
|
||||
}
|
||||
}
|
||||
}
|
21
Ryujinx.Common/Memory/IArray.cs
Normal file
21
Ryujinx.Common/Memory/IArray.cs
Normal file
@ -0,0 +1,21 @@
|
||||
namespace Ryujinx.Common.Memory
|
||||
{
|
||||
/// <summary>
|
||||
/// Array interface.
|
||||
/// </summary>
|
||||
/// <typeparam name="T">Element type</typeparam>
|
||||
public interface IArray<T> where T : unmanaged
|
||||
{
|
||||
/// <summary>
|
||||
/// Used to index the array.
|
||||
/// </summary>
|
||||
/// <param name="index">Element index</param>
|
||||
/// <returns>Element at the specified index</returns>
|
||||
ref T this[int index] { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Number of elements on the array.
|
||||
/// </summary>
|
||||
int Length { get; }
|
||||
}
|
||||
}
|
68
Ryujinx.Common/Memory/Ptr.cs
Normal file
68
Ryujinx.Common/Memory/Ptr.cs
Normal file
@ -0,0 +1,68 @@
|
||||
using System;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
namespace Ryujinx.Common.Memory
|
||||
{
|
||||
/// <summary>
|
||||
/// Represents a pointer to an unmanaged resource.
|
||||
/// </summary>
|
||||
/// <typeparam name="T">Type of the unmanaged resource</typeparam>
|
||||
public unsafe struct Ptr<T> : IEquatable<Ptr<T>> where T : unmanaged
|
||||
{
|
||||
private IntPtr _ptr;
|
||||
|
||||
/// <summary>
|
||||
/// Null pointer.
|
||||
/// </summary>
|
||||
public static Ptr<T> Null => new Ptr<T>() { _ptr = IntPtr.Zero };
|
||||
|
||||
/// <summary>
|
||||
/// True if the pointer is null, false otherwise.
|
||||
/// </summary>
|
||||
public bool IsNull => _ptr == IntPtr.Zero;
|
||||
|
||||
/// <summary>
|
||||
/// Gets a reference to the value.
|
||||
/// </summary>
|
||||
public ref T Value => ref Unsafe.AsRef<T>((void*)_ptr);
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new pointer to an unmanaged resource.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// For data on the heap, proper pinning is necessary during
|
||||
/// use. Failure to do so will result in memory corruption and crashes.
|
||||
/// </remarks>
|
||||
/// <param name="value">Reference to the unmanaged resource</param>
|
||||
public Ptr(ref T value)
|
||||
{
|
||||
_ptr = (IntPtr)Unsafe.AsPointer(ref value);
|
||||
}
|
||||
|
||||
public override bool Equals(object obj)
|
||||
{
|
||||
return obj is Ptr<T> other && Equals(other);
|
||||
}
|
||||
|
||||
public bool Equals([AllowNull] Ptr<T> other)
|
||||
{
|
||||
return _ptr == other._ptr;
|
||||
}
|
||||
|
||||
public override int GetHashCode()
|
||||
{
|
||||
return _ptr.GetHashCode();
|
||||
}
|
||||
|
||||
public static bool operator ==(Ptr<T> left, Ptr<T> right)
|
||||
{
|
||||
return left.Equals(right);
|
||||
}
|
||||
|
||||
public static bool operator !=(Ptr<T> left, Ptr<T> right)
|
||||
{
|
||||
return !(left == right);
|
||||
}
|
||||
}
|
||||
}
|
518
Ryujinx.Common/Memory/StructArrayHelpers.cs
Normal file
518
Ryujinx.Common/Memory/StructArrayHelpers.cs
Normal file
@ -0,0 +1,518 @@
|
||||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.Common.Memory
|
||||
{
|
||||
public struct Array1<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
public int Length => 1;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 1);
|
||||
}
|
||||
public struct Array2<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array1<T> _other;
|
||||
public int Length => 2;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 2);
|
||||
}
|
||||
public struct Array3<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array2<T> _other;
|
||||
public int Length => 3;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 3);
|
||||
}
|
||||
public struct Array4<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array3<T> _other;
|
||||
public int Length => 4;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 4);
|
||||
}
|
||||
public struct Array5<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array4<T> _other;
|
||||
public int Length => 5;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 5);
|
||||
}
|
||||
public struct Array6<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array5<T> _other;
|
||||
public int Length => 6;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 6);
|
||||
}
|
||||
public struct Array7<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array6<T> _other;
|
||||
public int Length => 7;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 7);
|
||||
}
|
||||
public struct Array8<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array7<T> _other;
|
||||
public int Length => 8;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 8);
|
||||
}
|
||||
public struct Array9<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array8<T> _other;
|
||||
public int Length => 9;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 9);
|
||||
}
|
||||
public struct Array10<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array9<T> _other;
|
||||
public int Length => 10;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 10);
|
||||
}
|
||||
public struct Array11<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array10<T> _other;
|
||||
public int Length => 11;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 11);
|
||||
}
|
||||
public struct Array12<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array11<T> _other;
|
||||
public int Length => 12;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 12);
|
||||
}
|
||||
public struct Array13<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array12<T> _other;
|
||||
public int Length => 13;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 13);
|
||||
}
|
||||
public struct Array14<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array13<T> _other;
|
||||
public int Length => 14;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 14);
|
||||
}
|
||||
public struct Array15<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array14<T> _other;
|
||||
public int Length => 15;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 15);
|
||||
}
|
||||
public struct Array16<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array15<T> _other;
|
||||
public int Length => 16;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 16);
|
||||
}
|
||||
public struct Array17<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array16<T> _other;
|
||||
public int Length => 17;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 17);
|
||||
}
|
||||
public struct Array18<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array17<T> _other;
|
||||
public int Length => 18;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 18);
|
||||
}
|
||||
public struct Array19<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array18<T> _other;
|
||||
public int Length => 19;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 19);
|
||||
}
|
||||
public struct Array20<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array19<T> _other;
|
||||
public int Length => 20;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 20);
|
||||
}
|
||||
public struct Array21<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array20<T> _other;
|
||||
public int Length => 21;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 21);
|
||||
}
|
||||
public struct Array22<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array21<T> _other;
|
||||
public int Length => 22;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 22);
|
||||
}
|
||||
public struct Array23<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array22<T> _other;
|
||||
public int Length => 23;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 23);
|
||||
}
|
||||
public struct Array24<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array23<T> _other;
|
||||
public int Length => 24;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 24);
|
||||
}
|
||||
public struct Array25<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array24<T> _other;
|
||||
public int Length => 25;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 25);
|
||||
}
|
||||
public struct Array26<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array25<T> _other;
|
||||
public int Length => 26;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 26);
|
||||
}
|
||||
public struct Array27<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array26<T> _other;
|
||||
public int Length => 27;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 27);
|
||||
}
|
||||
public struct Array28<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array27<T> _other;
|
||||
public int Length => 28;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 28);
|
||||
}
|
||||
public struct Array29<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array28<T> _other;
|
||||
public int Length => 29;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 29);
|
||||
}
|
||||
public struct Array30<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array29<T> _other;
|
||||
public int Length => 30;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 30);
|
||||
}
|
||||
public struct Array31<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array30<T> _other;
|
||||
public int Length => 31;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 31);
|
||||
}
|
||||
public struct Array32<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array31<T> _other;
|
||||
public int Length => 32;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 32);
|
||||
}
|
||||
public struct Array33<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array32<T> _other;
|
||||
public int Length => 33;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 33);
|
||||
}
|
||||
public struct Array34<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array33<T> _other;
|
||||
public int Length => 34;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 34);
|
||||
}
|
||||
public struct Array35<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array34<T> _other;
|
||||
public int Length => 35;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 35);
|
||||
}
|
||||
public struct Array36<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array35<T> _other;
|
||||
public int Length => 36;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 36);
|
||||
}
|
||||
public struct Array37<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array36<T> _other;
|
||||
public int Length => 37;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 37);
|
||||
}
|
||||
public struct Array38<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array37<T> _other;
|
||||
public int Length => 38;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 38);
|
||||
}
|
||||
public struct Array39<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array38<T> _other;
|
||||
public int Length => 39;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 39);
|
||||
}
|
||||
public struct Array40<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array39<T> _other;
|
||||
public int Length => 40;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 40);
|
||||
}
|
||||
public struct Array41<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array40<T> _other;
|
||||
public int Length => 41;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 41);
|
||||
}
|
||||
public struct Array42<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array41<T> _other;
|
||||
public int Length => 42;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 42);
|
||||
}
|
||||
public struct Array43<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array42<T> _other;
|
||||
public int Length => 43;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 43);
|
||||
}
|
||||
public struct Array44<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array43<T> _other;
|
||||
public int Length => 44;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 44);
|
||||
}
|
||||
public struct Array45<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array44<T> _other;
|
||||
public int Length => 45;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 45);
|
||||
}
|
||||
public struct Array46<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array45<T> _other;
|
||||
public int Length => 46;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 46);
|
||||
}
|
||||
public struct Array47<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array46<T> _other;
|
||||
public int Length => 47;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 47);
|
||||
}
|
||||
public struct Array48<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array47<T> _other;
|
||||
public int Length => 48;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 48);
|
||||
}
|
||||
public struct Array49<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array48<T> _other;
|
||||
public int Length => 49;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 49);
|
||||
}
|
||||
public struct Array50<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array49<T> _other;
|
||||
public int Length => 50;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 50);
|
||||
}
|
||||
public struct Array51<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array50<T> _other;
|
||||
public int Length => 51;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 51);
|
||||
}
|
||||
public struct Array52<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array51<T> _other;
|
||||
public int Length => 52;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 52);
|
||||
}
|
||||
public struct Array53<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array52<T> _other;
|
||||
public int Length => 53;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 53);
|
||||
}
|
||||
public struct Array54<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array53<T> _other;
|
||||
public int Length => 54;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 54);
|
||||
}
|
||||
public struct Array55<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array54<T> _other;
|
||||
public int Length => 55;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 55);
|
||||
}
|
||||
public struct Array56<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array55<T> _other;
|
||||
public int Length => 56;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 56);
|
||||
}
|
||||
public struct Array57<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array56<T> _other;
|
||||
public int Length => 57;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 57);
|
||||
}
|
||||
public struct Array58<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array57<T> _other;
|
||||
public int Length => 58;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 58);
|
||||
}
|
||||
public struct Array59<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array58<T> _other;
|
||||
public int Length => 59;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 59);
|
||||
}
|
||||
public struct Array60<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array59<T> _other;
|
||||
public int Length => 60;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 60);
|
||||
}
|
||||
public struct Array61<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array60<T> _other;
|
||||
public int Length => 61;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 61);
|
||||
}
|
||||
public struct Array62<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array61<T> _other;
|
||||
public int Length => 62;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 62);
|
||||
}
|
||||
public struct Array63<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array62<T> _other;
|
||||
public int Length => 63;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 63);
|
||||
}
|
||||
public struct Array64<T> : IArray<T> where T : unmanaged
|
||||
{
|
||||
T _e0;
|
||||
Array63<T> _other;
|
||||
public int Length => 64;
|
||||
public ref T this[int index] => ref ToSpan()[index];
|
||||
public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 64);
|
||||
}
|
||||
|
||||
}
|
@ -193,6 +193,38 @@ namespace Ryujinx.Cpu
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets a region of memory that can be written to.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// If the requested region is not contiguous in physical memory,
|
||||
/// this will perform an allocation, and flush the data (writing it
|
||||
/// back to guest memory) on disposal.
|
||||
/// </remarks>
|
||||
/// <param name="va">Virtual address of the data</param>
|
||||
/// <param name="size">Size of the data</param>
|
||||
/// <returns>A writable region of memory containing the data</returns>
|
||||
public WritableRegion GetWritableRegion(ulong va, int size)
|
||||
{
|
||||
if (size == 0)
|
||||
{
|
||||
return new WritableRegion(null, va, Memory<byte>.Empty);
|
||||
}
|
||||
|
||||
if (IsContiguous(va, size))
|
||||
{
|
||||
return new WritableRegion(null, va, _backingMemory.GetMemory(GetPhysicalAddressInternal(va), size));
|
||||
}
|
||||
else
|
||||
{
|
||||
Memory<byte> memory = new byte[size];
|
||||
|
||||
GetSpan(va, size).CopyTo(memory.Span);
|
||||
|
||||
return new WritableRegion(this, va, memory);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets a reference for the given type at the specified virtual memory address.
|
||||
/// </summary>
|
||||
|
29
Ryujinx.Cpu/WritableRegion.cs
Normal file
29
Ryujinx.Cpu/WritableRegion.cs
Normal file
@ -0,0 +1,29 @@
|
||||
using System;
|
||||
|
||||
namespace Ryujinx.Cpu
|
||||
{
|
||||
public sealed class WritableRegion : IDisposable
|
||||
{
|
||||
private readonly MemoryManager _mm;
|
||||
private readonly ulong _va;
|
||||
|
||||
private bool NeedsWriteback => _mm != null;
|
||||
|
||||
public Memory<byte> Memory { get; }
|
||||
|
||||
internal WritableRegion(MemoryManager mm, ulong va, Memory<byte> memory)
|
||||
{
|
||||
_mm = mm;
|
||||
_va = va;
|
||||
Memory = memory;
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
if (NeedsWriteback)
|
||||
{
|
||||
_mm.Write(_va, Memory.Span);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
10
Ryujinx.Graphics.Device/AccessControl.cs
Normal file
10
Ryujinx.Graphics.Device/AccessControl.cs
Normal file
@ -0,0 +1,10 @@
|
||||
namespace Ryujinx.Graphics.Device
|
||||
{
|
||||
public enum AccessControl
|
||||
{
|
||||
None = 0,
|
||||
ReadOnly = 1 << 0,
|
||||
WriteOnly = 1 << 1,
|
||||
ReadWrite = ReadOnly | WriteOnly
|
||||
}
|
||||
}
|
124
Ryujinx.Graphics.Device/DeviceState.cs
Normal file
124
Ryujinx.Graphics.Device/DeviceState.cs
Normal file
@ -0,0 +1,124 @@
|
||||
using System;
|
||||
using System.Collections;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Linq;
|
||||
using System.Reflection;
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
namespace Ryujinx.Graphics.Device
|
||||
{
|
||||
public class DeviceState<TState> : IDeviceState where TState : unmanaged
|
||||
{
|
||||
private const int RegisterSize = sizeof(int);
|
||||
|
||||
public TState State;
|
||||
|
||||
private readonly BitArray _readableRegisters;
|
||||
private readonly BitArray _writableRegisters;
|
||||
|
||||
private readonly Dictionary<int, Func<int>> _readCallbacks;
|
||||
private readonly Dictionary<int, Action<int>> _writeCallbacks;
|
||||
|
||||
public DeviceState(IReadOnlyDictionary<string, RwCallback> callbacks = null)
|
||||
{
|
||||
int size = (Unsafe.SizeOf<TState>() + RegisterSize - 1) / RegisterSize;
|
||||
|
||||
_readableRegisters = new BitArray(size);
|
||||
_writableRegisters = new BitArray(size);
|
||||
|
||||
_readCallbacks = new Dictionary<int, Func<int>>();
|
||||
_writeCallbacks = new Dictionary<int, Action<int>>();
|
||||
|
||||
var fields = typeof(TState).GetFields();
|
||||
int offset = 0;
|
||||
|
||||
for (int fieldIndex = 0; fieldIndex < fields.Length; fieldIndex++)
|
||||
{
|
||||
var field = fields[fieldIndex];
|
||||
var regAttr = field.GetCustomAttributes<RegisterAttribute>(false).FirstOrDefault();
|
||||
|
||||
int sizeOfField = SizeCalculator.SizeOf(field.FieldType);
|
||||
|
||||
for (int i = 0; i < ((sizeOfField + 3) & ~3); i += 4)
|
||||
{
|
||||
_readableRegisters[(offset + i) / RegisterSize] = regAttr?.AccessControl.HasFlag(AccessControl.ReadOnly) ?? true;
|
||||
_writableRegisters[(offset + i) / RegisterSize] = regAttr?.AccessControl.HasFlag(AccessControl.WriteOnly) ?? true;
|
||||
}
|
||||
|
||||
if (callbacks != null && callbacks.TryGetValue(field.Name, out var cb))
|
||||
{
|
||||
if (cb.Read != null)
|
||||
{
|
||||
_readCallbacks.Add(offset, cb.Read);
|
||||
}
|
||||
|
||||
if (cb.Write != null)
|
||||
{
|
||||
_writeCallbacks.Add(offset, cb.Write);
|
||||
}
|
||||
}
|
||||
|
||||
offset += sizeOfField;
|
||||
}
|
||||
|
||||
Debug.Assert(offset == Unsafe.SizeOf<TState>());
|
||||
}
|
||||
|
||||
public virtual int Read(int offset)
|
||||
{
|
||||
if (Check(offset) && _readableRegisters[offset / RegisterSize])
|
||||
{
|
||||
int alignedOffset = Align(offset);
|
||||
|
||||
if (_readCallbacks.TryGetValue(alignedOffset, out Func<int> read))
|
||||
{
|
||||
return read();
|
||||
}
|
||||
else
|
||||
{
|
||||
return GetRef<int>(alignedOffset);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
public virtual void Write(int offset, int data)
|
||||
{
|
||||
if (Check(offset) && _writableRegisters[offset / RegisterSize])
|
||||
{
|
||||
int alignedOffset = Align(offset);
|
||||
|
||||
if (_writeCallbacks.TryGetValue(alignedOffset, out Action<int> write))
|
||||
{
|
||||
write(data);
|
||||
}
|
||||
else
|
||||
{
|
||||
GetRef<int>(alignedOffset) = data;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private bool Check(int offset)
|
||||
{
|
||||
return (uint)Align(offset) < Unsafe.SizeOf<TState>();
|
||||
}
|
||||
|
||||
public ref T GetRef<T>(int offset) where T : unmanaged
|
||||
{
|
||||
if ((uint)(offset + Unsafe.SizeOf<T>()) > Unsafe.SizeOf<TState>())
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(offset));
|
||||
}
|
||||
|
||||
return ref Unsafe.As<TState, T>(ref Unsafe.AddByteOffset(ref State, (IntPtr)offset));
|
||||
}
|
||||
|
||||
private static int Align(int offset)
|
||||
{
|
||||
return offset & ~(RegisterSize - 1);
|
||||
}
|
||||
}
|
||||
}
|
8
Ryujinx.Graphics.Device/IDeviceState.cs
Normal file
8
Ryujinx.Graphics.Device/IDeviceState.cs
Normal file
@ -0,0 +1,8 @@
|
||||
namespace Ryujinx.Graphics.Device
|
||||
{
|
||||
public interface IDeviceState
|
||||
{
|
||||
int Read(int offset);
|
||||
void Write(int offset, int data);
|
||||
}
|
||||
}
|
15
Ryujinx.Graphics.Device/RegisterAttribute.cs
Normal file
15
Ryujinx.Graphics.Device/RegisterAttribute.cs
Normal file
@ -0,0 +1,15 @@
|
||||
using System;
|
||||
|
||||
namespace Ryujinx.Graphics.Device
|
||||
{
|
||||
[AttributeUsage(AttributeTargets.Field, AllowMultiple = false)]
|
||||
public sealed class RegisterAttribute : Attribute
|
||||
{
|
||||
public AccessControl AccessControl { get; }
|
||||
|
||||
public RegisterAttribute(AccessControl ac)
|
||||
{
|
||||
AccessControl = ac;
|
||||
}
|
||||
}
|
||||
}
|
16
Ryujinx.Graphics.Device/RwCallback.cs
Normal file
16
Ryujinx.Graphics.Device/RwCallback.cs
Normal file
@ -0,0 +1,16 @@
|
||||
using System;
|
||||
|
||||
namespace Ryujinx.Graphics.Device
|
||||
{
|
||||
public struct RwCallback
|
||||
{
|
||||
public Action<int> Write { get; }
|
||||
public Func<int> Read { get; }
|
||||
|
||||
public RwCallback(Action<int> write, Func<int> read)
|
||||
{
|
||||
Write = write;
|
||||
Read = read;
|
||||
}
|
||||
}
|
||||
}
|
7
Ryujinx.Graphics.Device/Ryujinx.Graphics.Device.csproj
Normal file
7
Ryujinx.Graphics.Device/Ryujinx.Graphics.Device.csproj
Normal file
@ -0,0 +1,7 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>netcoreapp3.1</TargetFramework>
|
||||
</PropertyGroup>
|
||||
|
||||
</Project>
|
63
Ryujinx.Graphics.Device/SizeCalculator.cs
Normal file
63
Ryujinx.Graphics.Device/SizeCalculator.cs
Normal file
@ -0,0 +1,63 @@
|
||||
using System;
|
||||
using System.Reflection;
|
||||
|
||||
namespace Ryujinx.Graphics.Device
|
||||
{
|
||||
static class SizeCalculator
|
||||
{
|
||||
public static int SizeOf(Type type)
|
||||
{
|
||||
// Is type a enum type?
|
||||
if (type.IsEnum)
|
||||
{
|
||||
type = type.GetEnumUnderlyingType();
|
||||
}
|
||||
|
||||
// Is type a pointer type?
|
||||
if (type.IsPointer || type == typeof(IntPtr) || type == typeof(UIntPtr))
|
||||
{
|
||||
return IntPtr.Size;
|
||||
}
|
||||
|
||||
// Is type a struct type?
|
||||
if (type.IsValueType && !type.IsPrimitive)
|
||||
{
|
||||
// Check if the struct has a explicit size, if so, return that.
|
||||
if (type.StructLayoutAttribute.Size != 0)
|
||||
{
|
||||
return type.StructLayoutAttribute.Size;
|
||||
}
|
||||
|
||||
// Otherwise we calculate the sum of the sizes of all fields.
|
||||
int size = 0;
|
||||
var fields = type.GetFields(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance);
|
||||
|
||||
for (int fieldIndex = 0; fieldIndex < fields.Length; fieldIndex++)
|
||||
{
|
||||
size += SizeOf(fields[fieldIndex].FieldType);
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
// Primitive types.
|
||||
return (Type.GetTypeCode(type)) switch
|
||||
{
|
||||
TypeCode.SByte => sizeof(sbyte),
|
||||
TypeCode.Byte => sizeof(byte),
|
||||
TypeCode.Int16 => sizeof(short),
|
||||
TypeCode.UInt16 => sizeof(ushort),
|
||||
TypeCode.Int32 => sizeof(int),
|
||||
TypeCode.UInt32 => sizeof(uint),
|
||||
TypeCode.Int64 => sizeof(long),
|
||||
TypeCode.UInt64 => sizeof(ulong),
|
||||
TypeCode.Char => sizeof(char),
|
||||
TypeCode.Single => sizeof(float),
|
||||
TypeCode.Double => sizeof(double),
|
||||
TypeCode.Decimal => sizeof(decimal),
|
||||
TypeCode.Boolean => sizeof(bool),
|
||||
_ => throw new ArgumentException($"Length for type \"{type.Name}\" is unknown.")
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
@ -63,7 +63,7 @@ namespace Ryujinx.Graphics.Gpu.Engine
|
||||
else
|
||||
{
|
||||
evt.Flush();
|
||||
return (_context.MemoryAccessor.ReadUInt64(gpuVa) != 0) ? ConditionalRenderEnabled.True : ConditionalRenderEnabled.False;
|
||||
return (_context.MemoryAccessor.Read<ulong>(gpuVa) != 0) ? ConditionalRenderEnabled.True : ConditionalRenderEnabled.False;
|
||||
}
|
||||
}
|
||||
|
||||
@ -87,11 +87,11 @@ namespace Ryujinx.Graphics.Gpu.Engine
|
||||
|
||||
if (evt != null && evt2 == null)
|
||||
{
|
||||
useHost = _context.Renderer.Pipeline.TryHostConditionalRendering(evt, _context.MemoryAccessor.ReadUInt64(gpuVa + 16), isEqual);
|
||||
useHost = _context.Renderer.Pipeline.TryHostConditionalRendering(evt, _context.MemoryAccessor.Read<ulong>(gpuVa + 16), isEqual);
|
||||
}
|
||||
else if (evt == null && evt2 != null)
|
||||
{
|
||||
useHost = _context.Renderer.Pipeline.TryHostConditionalRendering(evt2, _context.MemoryAccessor.ReadUInt64(gpuVa), isEqual);
|
||||
useHost = _context.Renderer.Pipeline.TryHostConditionalRendering(evt2, _context.MemoryAccessor.Read<ulong>(gpuVa), isEqual);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -107,8 +107,8 @@ namespace Ryujinx.Graphics.Gpu.Engine
|
||||
evt?.Flush();
|
||||
evt2?.Flush();
|
||||
|
||||
ulong x = _context.MemoryAccessor.ReadUInt64(gpuVa);
|
||||
ulong y = _context.MemoryAccessor.ReadUInt64(gpuVa + 16);
|
||||
ulong x = _context.MemoryAccessor.Read<ulong>(gpuVa);
|
||||
ulong y = _context.MemoryAccessor.Read<ulong>(gpuVa + 16);
|
||||
|
||||
return (isEqual ? x == y : x != y) ? ConditionalRenderEnabled.True : ConditionalRenderEnabled.False;
|
||||
}
|
||||
|
@ -77,7 +77,7 @@ namespace Ryujinx.Graphics.Gpu
|
||||
{
|
||||
Renderer = renderer;
|
||||
|
||||
MemoryManager = new MemoryManager();
|
||||
MemoryManager = new MemoryManager(this);
|
||||
|
||||
MemoryAccessor = new MemoryAccessor(this);
|
||||
|
||||
|
@ -643,6 +643,8 @@ namespace Ryujinx.Graphics.Gpu.Image
|
||||
overlap.ChangeSize(info.Width, info.Height, info.DepthOrLayers);
|
||||
}
|
||||
|
||||
overlap.SynchronizeMemory();
|
||||
|
||||
return overlap;
|
||||
}
|
||||
}
|
||||
|
@ -58,42 +58,6 @@ namespace Ryujinx.Graphics.Gpu.Memory
|
||||
return MemoryMarshal.Cast<byte, T>(_context.PhysicalMemory.GetSpan(processVa, Unsafe.SizeOf<T>()))[0];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reads a 32-bits signed integer from GPU mapped memory.
|
||||
/// </summary>
|
||||
/// <param name="gpuVa">GPU virtual address where the value is located</param>
|
||||
/// <returns>The value at the specified memory location</returns>
|
||||
public int ReadInt32(ulong gpuVa)
|
||||
{
|
||||
ulong processVa = _context.MemoryManager.Translate(gpuVa);
|
||||
|
||||
return _context.PhysicalMemory.Read<int>(processVa);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reads a 64-bits unsigned integer from GPU mapped memory.
|
||||
/// </summary>
|
||||
/// <param name="gpuVa">GPU virtual address where the value is located</param>
|
||||
/// <returns>The value at the specified memory location</returns>
|
||||
public ulong ReadUInt64(ulong gpuVa)
|
||||
{
|
||||
ulong processVa = _context.MemoryManager.Translate(gpuVa);
|
||||
|
||||
return _context.PhysicalMemory.Read<ulong>(processVa);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reads a 8-bits unsigned integer from GPU mapped memory.
|
||||
/// </summary>
|
||||
/// <param name="gpuVa">GPU virtual address where the value is located</param>
|
||||
/// <param name="value">The value to be written</param>
|
||||
public void WriteByte(ulong gpuVa, byte value)
|
||||
{
|
||||
ulong processVa = _context.MemoryManager.Translate(gpuVa);
|
||||
|
||||
_context.PhysicalMemory.Write(processVa, MemoryMarshal.CreateSpan(ref value, 1));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Writes a 32-bits signed integer to GPU mapped memory.
|
||||
/// </summary>
|
||||
|
@ -1,4 +1,7 @@
|
||||
using Ryujinx.Cpu;
|
||||
using System;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.Graphics.Gpu.Memory
|
||||
{
|
||||
@ -33,14 +36,69 @@ namespace Ryujinx.Graphics.Gpu.Memory
|
||||
|
||||
public event EventHandler<UnmapEventArgs> MemoryUnmapped;
|
||||
|
||||
private GpuContext _context;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new instance of the GPU memory manager.
|
||||
/// </summary>
|
||||
public MemoryManager()
|
||||
public MemoryManager(GpuContext context)
|
||||
{
|
||||
_context = context;
|
||||
_pageTable = new ulong[PtLvl0Size][];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reads data from GPU mapped memory.
|
||||
/// </summary>
|
||||
/// <typeparam name="T">Type of the data</typeparam>
|
||||
/// <param name="gpuVa">GPU virtual address where the data is located</param>
|
||||
/// <returns>The data at the specified memory location</returns>
|
||||
public T Read<T>(ulong gpuVa) where T : unmanaged
|
||||
{
|
||||
ulong processVa = Translate(gpuVa);
|
||||
|
||||
return MemoryMarshal.Cast<byte, T>(_context.PhysicalMemory.GetSpan(processVa, Unsafe.SizeOf<T>()))[0];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets a read-only span of data from GPU mapped memory.
|
||||
/// This reads as much data as possible, up to the specified maximum size.
|
||||
/// </summary>
|
||||
/// <param name="gpuVa">GPU virtual address where the data is located</param>
|
||||
/// <param name="size">Size of the data</param>
|
||||
/// <returns>The span of the data at the specified memory location</returns>
|
||||
public ReadOnlySpan<byte> GetSpan(ulong gpuVa, int size)
|
||||
{
|
||||
ulong processVa = Translate(gpuVa);
|
||||
|
||||
return _context.PhysicalMemory.GetSpan(processVa, size);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets a writable region from GPU mapped memory.
|
||||
/// </summary>
|
||||
/// <param name="address">Start address of the range</param>
|
||||
/// <param name="size">Size in bytes to be range</param>
|
||||
/// <returns>A writable region with the data at the specified memory location</returns>
|
||||
public WritableRegion GetWritableRegion(ulong gpuVa, int size)
|
||||
{
|
||||
ulong processVa = Translate(gpuVa);
|
||||
|
||||
return _context.PhysicalMemory.GetWritableRegion(processVa, size);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Writes data to GPU mapped memory.
|
||||
/// </summary>
|
||||
/// <param name="gpuVa">GPU virtual address to write the data into</param>
|
||||
/// <param name="data">The data to be written</param>
|
||||
public void Write(ulong gpuVa, ReadOnlySpan<byte> data)
|
||||
{
|
||||
ulong processVa = Translate(gpuVa);
|
||||
|
||||
_context.PhysicalMemory.Write(processVa, data);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Maps a given range of pages to the specified CPU virtual address.
|
||||
/// </summary>
|
||||
|
@ -1,3 +1,4 @@
|
||||
using Ryujinx.Cpu;
|
||||
using System;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.InteropServices;
|
||||
@ -34,6 +35,17 @@ namespace Ryujinx.Graphics.Gpu.Memory
|
||||
return _cpuMemory.GetSpan(address, size);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets a writable region from the application process.
|
||||
/// </summary>
|
||||
/// <param name="address">Start address of the range</param>
|
||||
/// <param name="size">Size in bytes to be range</param>
|
||||
/// <returns>A writable region with the data at the specified memory location</returns>
|
||||
public WritableRegion GetWritableRegion(ulong address, int size)
|
||||
{
|
||||
return _cpuMemory.GetWritableRegion(address, size);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reads data from the application process.
|
||||
/// </summary>
|
||||
|
20
Ryujinx.Graphics.Host1x/ClassId.cs
Normal file
20
Ryujinx.Graphics.Host1x/ClassId.cs
Normal file
@ -0,0 +1,20 @@
|
||||
namespace Ryujinx.Graphics.Host1x
|
||||
{
|
||||
public enum ClassId
|
||||
{
|
||||
Host1x = 0x1,
|
||||
Mpeg = 0x20,
|
||||
Nvenc = 0x21,
|
||||
Vi = 0x30,
|
||||
Isp = 0x32,
|
||||
Ispb = 0x34,
|
||||
Vii2c = 0x36,
|
||||
Vic = 0x5d,
|
||||
Gr3d = 0x60,
|
||||
Gpu = 0x61,
|
||||
Tsec = 0xe0,
|
||||
Tsecb = 0xe1,
|
||||
Nvjpg = 0xc0,
|
||||
Nvdec = 0xf0
|
||||
}
|
||||
}
|
32
Ryujinx.Graphics.Host1x/Devices.cs
Normal file
32
Ryujinx.Graphics.Host1x/Devices.cs
Normal file
@ -0,0 +1,32 @@
|
||||
using Ryujinx.Graphics.Device;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace Ryujinx.Graphics.Host1x
|
||||
{
|
||||
class Devices : IDisposable
|
||||
{
|
||||
private readonly Dictionary<ClassId, IDeviceState> _devices = new Dictionary<ClassId, IDeviceState>();
|
||||
|
||||
public void RegisterDevice(ClassId classId, IDeviceState device)
|
||||
{
|
||||
_devices[classId] = device;
|
||||
}
|
||||
|
||||
public IDeviceState GetDevice(ClassId classId)
|
||||
{
|
||||
return _devices.TryGetValue(classId, out IDeviceState device) ? device : null;
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
foreach (var device in _devices.Values)
|
||||
{
|
||||
if (device is ThiDevice thi)
|
||||
{
|
||||
thi.Dispose();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
33
Ryujinx.Graphics.Host1x/Host1xClass.cs
Normal file
33
Ryujinx.Graphics.Host1x/Host1xClass.cs
Normal file
@ -0,0 +1,33 @@
|
||||
using Ryujinx.Graphics.Device;
|
||||
using Ryujinx.Graphics.Gpu.Synchronization;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
|
||||
namespace Ryujinx.Graphics.Host1x
|
||||
{
|
||||
public class Host1xClass : IDeviceState
|
||||
{
|
||||
private readonly SynchronizationManager _syncMgr;
|
||||
private readonly DeviceState<Host1xClassRegisters> _state;
|
||||
|
||||
public Host1xClass(SynchronizationManager syncMgr)
|
||||
{
|
||||
_syncMgr = syncMgr;
|
||||
_state = new DeviceState<Host1xClassRegisters>(new Dictionary<string, RwCallback>
|
||||
{
|
||||
{ nameof(Host1xClassRegisters.WaitSyncpt32), new RwCallback(WaitSyncpt32, null) }
|
||||
});
|
||||
}
|
||||
|
||||
public int Read(int offset) => _state.Read(offset);
|
||||
public void Write(int offset, int data) => _state.Write(offset, data);
|
||||
|
||||
private void WaitSyncpt32(int data)
|
||||
{
|
||||
uint syncpointId = (uint)(data & 0xFF);
|
||||
uint threshold = _state.State.LoadSyncptPayload32;
|
||||
|
||||
_syncMgr.WaitOnSyncpoint(syncpointId, threshold, Timeout.InfiniteTimeSpan);
|
||||
}
|
||||
}
|
||||
}
|
41
Ryujinx.Graphics.Host1x/Host1xClassRegisters.cs
Normal file
41
Ryujinx.Graphics.Host1x/Host1xClassRegisters.cs
Normal file
@ -0,0 +1,41 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
|
||||
namespace Ryujinx.Graphics.Host1x
|
||||
{
|
||||
struct Host1xClassRegisters
|
||||
{
|
||||
public uint IncrSyncpt;
|
||||
public uint IncrSyncptCntrl;
|
||||
public uint IncrSyncptError;
|
||||
public Array5<uint> ReservedC;
|
||||
public uint WaitSyncpt;
|
||||
public uint WaitSyncptBase;
|
||||
public uint WaitSyncptIncr;
|
||||
public uint LoadSyncptBase;
|
||||
public uint IncrSyncptBase;
|
||||
public uint Clear;
|
||||
public uint Wait;
|
||||
public uint WaitWithIntr;
|
||||
public uint DelayUsec;
|
||||
public uint TickcountHi;
|
||||
public uint TickcountLo;
|
||||
public uint Tickctrl;
|
||||
public Array23<uint> Reserved50;
|
||||
public uint Indctrl;
|
||||
public uint Indoff2;
|
||||
public uint Indoff;
|
||||
public Array31<uint> Inddata;
|
||||
public uint Reserved134;
|
||||
public uint LoadSyncptPayload32;
|
||||
public uint Stallctrl;
|
||||
public uint WaitSyncpt32;
|
||||
public uint WaitSyncptBase32;
|
||||
public uint LoadSyncptBase32;
|
||||
public uint IncrSyncptBase32;
|
||||
public uint StallcountHi;
|
||||
public uint StallcountLo;
|
||||
public uint Xrefctrl;
|
||||
public uint ChannelXrefHi;
|
||||
public uint ChannelXrefLo;
|
||||
}
|
||||
}
|
123
Ryujinx.Graphics.Host1x/Host1xDevice.cs
Normal file
123
Ryujinx.Graphics.Host1x/Host1xDevice.cs
Normal file
@ -0,0 +1,123 @@
|
||||
using Ryujinx.Common;
|
||||
using Ryujinx.Common.Logging;
|
||||
using Ryujinx.Graphics.Device;
|
||||
using Ryujinx.Graphics.Gpu.Synchronization;
|
||||
using System;
|
||||
using System.Numerics;
|
||||
|
||||
namespace Ryujinx.Graphics.Host1x
|
||||
{
|
||||
public sealed class Host1xDevice : IDisposable
|
||||
{
|
||||
private readonly SyncptIncrManager _syncptIncrMgr;
|
||||
private readonly AsyncWorkQueue<int[]> _commandQueue;
|
||||
|
||||
private readonly Devices _devices = new Devices();
|
||||
|
||||
public Host1xClass Class { get; }
|
||||
|
||||
private IDeviceState _device;
|
||||
|
||||
private int _count;
|
||||
private int _offset;
|
||||
private int _mask;
|
||||
private bool _incrementing;
|
||||
|
||||
public Host1xDevice(SynchronizationManager syncMgr)
|
||||
{
|
||||
_syncptIncrMgr = new SyncptIncrManager(syncMgr);
|
||||
_commandQueue = new AsyncWorkQueue<int[]>(Process, "Ryujinx.Host1xProcessor");
|
||||
|
||||
Class = new Host1xClass(syncMgr);
|
||||
|
||||
_devices.RegisterDevice(ClassId.Host1x, Class);
|
||||
}
|
||||
|
||||
public void RegisterDevice(ClassId classId, IDeviceState device)
|
||||
{
|
||||
var thi = new ThiDevice(classId, device ?? throw new ArgumentNullException(nameof(device)), _syncptIncrMgr);
|
||||
_devices.RegisterDevice(classId, thi);
|
||||
}
|
||||
|
||||
public void Submit(ReadOnlySpan<int> commandBuffer)
|
||||
{
|
||||
_commandQueue.Add(commandBuffer.ToArray());
|
||||
}
|
||||
|
||||
private void Process(int[] commandBuffer)
|
||||
{
|
||||
for (int index = 0; index < commandBuffer.Length; index++)
|
||||
{
|
||||
Step(commandBuffer[index]);
|
||||
}
|
||||
}
|
||||
|
||||
private void Step(int value)
|
||||
{
|
||||
if (_mask != 0)
|
||||
{
|
||||
int lbs = BitOperations.TrailingZeroCount(_mask);
|
||||
|
||||
_mask &= ~(1 << lbs);
|
||||
|
||||
DeviceWrite(_offset + lbs, value);
|
||||
|
||||
return;
|
||||
}
|
||||
else if (_count != 0)
|
||||
{
|
||||
_count--;
|
||||
|
||||
DeviceWrite(_offset, value);
|
||||
|
||||
if (_incrementing)
|
||||
{
|
||||
_offset++;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
OpCode opCode = (OpCode)((value >> 28) & 0xf);
|
||||
|
||||
switch (opCode)
|
||||
{
|
||||
case OpCode.SetClass:
|
||||
_mask = value & 0x3f;
|
||||
ClassId classId = (ClassId)((value >> 6) & 0x3ff);
|
||||
_offset = (value >> 16) & 0xfff;
|
||||
_device = _devices.GetDevice(classId);
|
||||
break;
|
||||
case OpCode.Incr:
|
||||
case OpCode.NonIncr:
|
||||
_count = value & 0xffff;
|
||||
_offset = (value >> 16) & 0xfff;
|
||||
_incrementing = opCode == OpCode.Incr;
|
||||
break;
|
||||
case OpCode.Mask:
|
||||
_mask = value & 0xffff;
|
||||
_offset = (value >> 16) & 0xfff;
|
||||
break;
|
||||
case OpCode.Imm:
|
||||
int data = value & 0xfff;
|
||||
_offset = (value >> 16) & 0xfff;
|
||||
DeviceWrite(_offset, data);
|
||||
break;
|
||||
default:
|
||||
Logger.PrintError(LogClass.Host1x, $"Unsupported opcode \"{opCode}\".");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private void DeviceWrite(int offset, int data)
|
||||
{
|
||||
_device?.Write(offset * 4, data);
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
_commandQueue.Dispose();
|
||||
_devices.Dispose();
|
||||
}
|
||||
}
|
||||
}
|
21
Ryujinx.Graphics.Host1x/OpCode.cs
Normal file
21
Ryujinx.Graphics.Host1x/OpCode.cs
Normal file
@ -0,0 +1,21 @@
|
||||
namespace Ryujinx.Graphics.Host1x
|
||||
{
|
||||
enum OpCode
|
||||
{
|
||||
SetClass,
|
||||
Incr,
|
||||
NonIncr,
|
||||
Mask,
|
||||
Imm,
|
||||
Restart,
|
||||
Gather,
|
||||
SetStrmId,
|
||||
SetAppId,
|
||||
SetPyld,
|
||||
IncrW,
|
||||
NonIncrW,
|
||||
GatherW,
|
||||
RestartW,
|
||||
Extend
|
||||
}
|
||||
}
|
20
Ryujinx.Graphics.Host1x/Ryujinx.Graphics.Host1x.csproj
Normal file
20
Ryujinx.Graphics.Host1x/Ryujinx.Graphics.Host1x.csproj
Normal file
@ -0,0 +1,20 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>netcoreapp3.1</TargetFramework>
|
||||
</PropertyGroup>
|
||||
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
|
||||
<AllowUnsafeBlocks>false</AllowUnsafeBlocks>
|
||||
</PropertyGroup>
|
||||
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
|
||||
<AllowUnsafeBlocks>false</AllowUnsafeBlocks>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\Ryujinx.Graphics.Device\Ryujinx.Graphics.Device.csproj" />
|
||||
<ProjectReference Include="..\Ryujinx.Graphics.Gpu\Ryujinx.Graphics.Gpu.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
99
Ryujinx.Graphics.Host1x/SyncptIncrManager.cs
Normal file
99
Ryujinx.Graphics.Host1x/SyncptIncrManager.cs
Normal file
@ -0,0 +1,99 @@
|
||||
using Ryujinx.Graphics.Gpu.Synchronization;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace Ryujinx.Graphics.Host1x
|
||||
{
|
||||
class SyncptIncrManager
|
||||
{
|
||||
private readonly SynchronizationManager _syncMgr;
|
||||
|
||||
private struct SyncptIncr
|
||||
{
|
||||
public uint Id { get; }
|
||||
public ClassId ClassId { get; }
|
||||
public uint SyncptId { get; }
|
||||
public bool Done { get; }
|
||||
|
||||
public SyncptIncr(uint id, ClassId classId, uint syncptId, bool done = false)
|
||||
{
|
||||
Id = id;
|
||||
ClassId = classId;
|
||||
SyncptId = syncptId;
|
||||
Done = done;
|
||||
}
|
||||
}
|
||||
|
||||
private readonly List<SyncptIncr> _incrs = new List<SyncptIncr>();
|
||||
|
||||
private uint _currentId;
|
||||
|
||||
public SyncptIncrManager(SynchronizationManager syncMgr)
|
||||
{
|
||||
_syncMgr = syncMgr;
|
||||
}
|
||||
|
||||
public void Increment(uint id)
|
||||
{
|
||||
lock (_incrs)
|
||||
{
|
||||
_incrs.Add(new SyncptIncr(0, 0, id, true));
|
||||
|
||||
IncrementAllDone();
|
||||
}
|
||||
}
|
||||
|
||||
public uint IncrementWhenDone(ClassId classId, uint id)
|
||||
{
|
||||
lock (_incrs)
|
||||
{
|
||||
uint handle = _currentId++;
|
||||
|
||||
_incrs.Add(new SyncptIncr(handle, classId, id));
|
||||
|
||||
return handle;
|
||||
}
|
||||
}
|
||||
|
||||
public void SignalDone(uint handle)
|
||||
{
|
||||
lock (_incrs)
|
||||
{
|
||||
// Set pending increment with the given handle to "done".
|
||||
for (int i = 0; i < _incrs.Count; i++)
|
||||
{
|
||||
SyncptIncr incr = _incrs[i];
|
||||
|
||||
if (_incrs[i].Id == handle)
|
||||
{
|
||||
_incrs[i] = new SyncptIncr(incr.Id, incr.ClassId, incr.SyncptId, true);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
IncrementAllDone();
|
||||
}
|
||||
}
|
||||
|
||||
private void IncrementAllDone()
|
||||
{
|
||||
lock (_incrs)
|
||||
{
|
||||
// Increment all sequential pending increments that are already done.
|
||||
int doneCount = 0;
|
||||
|
||||
for (; doneCount < _incrs.Count; doneCount++)
|
||||
{
|
||||
if (!_incrs[doneCount].Done)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
_syncMgr.IncrementSyncpoint(_incrs[doneCount].SyncptId);
|
||||
}
|
||||
|
||||
_incrs.RemoveRange(0, doneCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
96
Ryujinx.Graphics.Host1x/ThiDevice.cs
Normal file
96
Ryujinx.Graphics.Host1x/ThiDevice.cs
Normal file
@ -0,0 +1,96 @@
|
||||
using Ryujinx.Common;
|
||||
using Ryujinx.Graphics.Device;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace Ryujinx.Graphics.Host1x
|
||||
{
|
||||
class ThiDevice : IDeviceState, IDisposable
|
||||
{
|
||||
private readonly ClassId _classId;
|
||||
private readonly IDeviceState _device;
|
||||
|
||||
private readonly SyncptIncrManager _syncptIncrMgr;
|
||||
|
||||
private class CommandAction
|
||||
{
|
||||
public int Data { get; }
|
||||
|
||||
public CommandAction(int data)
|
||||
{
|
||||
Data = data;
|
||||
}
|
||||
}
|
||||
|
||||
private class MethodCallAction : CommandAction
|
||||
{
|
||||
public int Method { get; }
|
||||
|
||||
public MethodCallAction(int method, int data) : base(data)
|
||||
{
|
||||
Method = method;
|
||||
}
|
||||
}
|
||||
|
||||
private class SyncptIncrAction : CommandAction
|
||||
{
|
||||
public SyncptIncrAction(uint syncptIncrHandle) : base((int)syncptIncrHandle)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
private readonly AsyncWorkQueue<CommandAction> _commandQueue;
|
||||
|
||||
private readonly DeviceState<ThiRegisters> _state;
|
||||
|
||||
public ThiDevice(ClassId classId, IDeviceState device, SyncptIncrManager syncptIncrMgr)
|
||||
{
|
||||
_classId = classId;
|
||||
_device = device;
|
||||
_syncptIncrMgr = syncptIncrMgr;
|
||||
_commandQueue = new AsyncWorkQueue<CommandAction>(Process, $"Ryujinx.{classId}Processor");
|
||||
_state = new DeviceState<ThiRegisters>(new Dictionary<string, RwCallback>
|
||||
{
|
||||
{ nameof(ThiRegisters.IncrSyncpt), new RwCallback(IncrSyncpt, null) },
|
||||
{ nameof(ThiRegisters.Method1), new RwCallback(Method1, null) }
|
||||
});
|
||||
}
|
||||
|
||||
public int Read(int offset) => _state.Read(offset);
|
||||
public void Write(int offset, int data) => _state.Write(offset, data);
|
||||
|
||||
private void IncrSyncpt(int data)
|
||||
{
|
||||
uint syncpointId = (uint)(data & 0xFF);
|
||||
uint cond = (uint)((data >> 8) & 0xFF); // 0 = Immediate, 1 = Done
|
||||
|
||||
if (cond == 0)
|
||||
{
|
||||
_syncptIncrMgr.Increment(syncpointId);
|
||||
}
|
||||
else
|
||||
{
|
||||
_commandQueue.Add(new SyncptIncrAction(_syncptIncrMgr.IncrementWhenDone(_classId, syncpointId)));
|
||||
}
|
||||
}
|
||||
|
||||
private void Method1(int data)
|
||||
{
|
||||
_commandQueue.Add(new MethodCallAction((int)_state.State.Method0 * 4, data));
|
||||
}
|
||||
|
||||
private void Process(CommandAction cmdAction)
|
||||
{
|
||||
if (cmdAction is SyncptIncrAction syncptIncrAction)
|
||||
{
|
||||
_syncptIncrMgr.SignalDone((uint)syncptIncrAction.Data);
|
||||
}
|
||||
else if (cmdAction is MethodCallAction methodCallAction)
|
||||
{
|
||||
_device.Write(methodCallAction.Method, methodCallAction.Data);
|
||||
}
|
||||
}
|
||||
|
||||
public void Dispose() => _commandQueue.Dispose();
|
||||
}
|
||||
}
|
22
Ryujinx.Graphics.Host1x/ThiRegisters.cs
Normal file
22
Ryujinx.Graphics.Host1x/ThiRegisters.cs
Normal file
@ -0,0 +1,22 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
|
||||
namespace Ryujinx.Graphics.Host1x
|
||||
{
|
||||
struct ThiRegisters
|
||||
{
|
||||
public uint IncrSyncpt;
|
||||
public uint Reserved4;
|
||||
public uint IncrSyncptErr;
|
||||
public uint CtxswIncrSyncpt;
|
||||
public Array4<uint> Reserved10;
|
||||
public uint Ctxsw;
|
||||
public uint Reserved24;
|
||||
public uint ContSyncptEof;
|
||||
public Array5<uint> Reserved2C;
|
||||
public uint Method0;
|
||||
public uint Method1;
|
||||
public Array12<uint> Reserved48;
|
||||
public uint IntStatus;
|
||||
public uint IntMask;
|
||||
}
|
||||
}
|
40
Ryujinx.Graphics.Nvdec.H264/Decoder.cs
Normal file
40
Ryujinx.Graphics.Nvdec.H264/Decoder.cs
Normal file
@ -0,0 +1,40 @@
|
||||
using Ryujinx.Graphics.Video;
|
||||
using System;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.H264
|
||||
{
|
||||
public class Decoder : IH264Decoder
|
||||
{
|
||||
public bool IsHardwareAccelerated => false;
|
||||
|
||||
private const int WorkBufferSize = 0x200;
|
||||
|
||||
private readonly byte[] _workBuffer = new byte[WorkBufferSize];
|
||||
|
||||
private readonly FFmpegContext _context = new FFmpegContext();
|
||||
|
||||
public ISurface CreateSurface(int width, int height)
|
||||
{
|
||||
return new Surface();
|
||||
}
|
||||
|
||||
public bool Decode(ref H264PictureInfo pictureInfo, ISurface output, ReadOnlySpan<byte> bitstream)
|
||||
{
|
||||
Span<byte> bs = Prepend(bitstream, SpsAndPpsReconstruction.Reconstruct(ref pictureInfo, _workBuffer));
|
||||
|
||||
return _context.DecodeFrame((Surface)output, bs) == 0;
|
||||
}
|
||||
|
||||
private static byte[] Prepend(ReadOnlySpan<byte> data, ReadOnlySpan<byte> prep)
|
||||
{
|
||||
byte[] output = new byte[data.Length + prep.Length];
|
||||
|
||||
prep.CopyTo(output);
|
||||
data.CopyTo(new Span<byte>(output).Slice(prep.Length));
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
public void Dispose() => _context.Dispose();
|
||||
}
|
||||
}
|
51
Ryujinx.Graphics.Nvdec.H264/FFmpegContext.cs
Normal file
51
Ryujinx.Graphics.Nvdec.H264/FFmpegContext.cs
Normal file
@ -0,0 +1,51 @@
|
||||
using FFmpeg.AutoGen;
|
||||
using System;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.H264
|
||||
{
|
||||
unsafe class FFmpegContext : IDisposable
|
||||
{
|
||||
private readonly AVCodec* _codec;
|
||||
private AVCodecContext* _context;
|
||||
|
||||
public FFmpegContext()
|
||||
{
|
||||
_codec = ffmpeg.avcodec_find_decoder(AVCodecID.AV_CODEC_ID_H264);
|
||||
_context = ffmpeg.avcodec_alloc_context3(_codec);
|
||||
|
||||
ffmpeg.avcodec_open2(_context, _codec, null);
|
||||
}
|
||||
|
||||
public int DecodeFrame(Surface output, ReadOnlySpan<byte> bitstream)
|
||||
{
|
||||
AVPacket packet;
|
||||
|
||||
ffmpeg.av_init_packet(&packet);
|
||||
|
||||
fixed (byte* ptr = bitstream)
|
||||
{
|
||||
packet.data = ptr;
|
||||
packet.size = bitstream.Length;
|
||||
|
||||
int rc = ffmpeg.avcodec_send_packet(_context, &packet);
|
||||
|
||||
if (rc != 0)
|
||||
{
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
return ffmpeg.avcodec_receive_frame(_context, output.Frame);
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
ffmpeg.avcodec_close(_context);
|
||||
|
||||
fixed (AVCodecContext** ppContext = &_context)
|
||||
{
|
||||
ffmpeg.avcodec_free_context(ppContext);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
121
Ryujinx.Graphics.Nvdec.H264/H264BitStreamWriter.cs
Normal file
121
Ryujinx.Graphics.Nvdec.H264/H264BitStreamWriter.cs
Normal file
@ -0,0 +1,121 @@
|
||||
using System;
|
||||
using System.Numerics;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.H264
|
||||
{
|
||||
struct H264BitStreamWriter
|
||||
{
|
||||
private const int BufferSize = 8;
|
||||
|
||||
private readonly byte[] _workBuffer;
|
||||
|
||||
private int _offset;
|
||||
private int _buffer;
|
||||
private int _bufferPos;
|
||||
|
||||
public H264BitStreamWriter(byte[] workBuffer)
|
||||
{
|
||||
_workBuffer = workBuffer;
|
||||
_offset = 0;
|
||||
_buffer = 0;
|
||||
_bufferPos = 0;
|
||||
}
|
||||
|
||||
public void WriteBit(bool value)
|
||||
{
|
||||
WriteBits(value ? 1 : 0, 1);
|
||||
}
|
||||
|
||||
public void WriteBits(int value, int valueSize)
|
||||
{
|
||||
int valuePos = 0;
|
||||
|
||||
int remaining = valueSize;
|
||||
|
||||
while (remaining > 0)
|
||||
{
|
||||
int copySize = remaining;
|
||||
|
||||
int free = GetFreeBufferBits();
|
||||
|
||||
if (copySize > free)
|
||||
{
|
||||
copySize = free;
|
||||
}
|
||||
|
||||
int mask = (1 << copySize) - 1;
|
||||
|
||||
int srcShift = (valueSize - valuePos) - copySize;
|
||||
int dstShift = (BufferSize - _bufferPos) - copySize;
|
||||
|
||||
_buffer |= ((value >> srcShift) & mask) << dstShift;
|
||||
|
||||
valuePos += copySize;
|
||||
_bufferPos += copySize;
|
||||
remaining -= copySize;
|
||||
}
|
||||
}
|
||||
|
||||
private int GetFreeBufferBits()
|
||||
{
|
||||
if (_bufferPos == BufferSize)
|
||||
{
|
||||
Flush();
|
||||
}
|
||||
|
||||
return BufferSize - _bufferPos;
|
||||
}
|
||||
|
||||
public void Flush()
|
||||
{
|
||||
if (_bufferPos != 0)
|
||||
{
|
||||
_workBuffer[_offset++] = (byte)_buffer;
|
||||
|
||||
_buffer = 0;
|
||||
_bufferPos = 0;
|
||||
}
|
||||
}
|
||||
|
||||
public void End()
|
||||
{
|
||||
WriteBit(true);
|
||||
|
||||
Flush();
|
||||
}
|
||||
|
||||
public Span<byte> AsSpan()
|
||||
{
|
||||
return new Span<byte>(_workBuffer).Slice(0, _offset);
|
||||
}
|
||||
|
||||
public void WriteU(uint value, int valueSize) => WriteBits((int)value, valueSize);
|
||||
public void WriteSe(int value) => WriteExpGolombCodedInt(value);
|
||||
public void WriteUe(uint value) => WriteExpGolombCodedUInt(value);
|
||||
|
||||
private void WriteExpGolombCodedInt(int value)
|
||||
{
|
||||
int sign = value <= 0 ? 0 : 1;
|
||||
|
||||
if (value < 0)
|
||||
{
|
||||
value = -value;
|
||||
}
|
||||
|
||||
value = (value << 1) - sign;
|
||||
|
||||
WriteExpGolombCodedUInt((uint)value);
|
||||
}
|
||||
|
||||
private void WriteExpGolombCodedUInt(uint value)
|
||||
{
|
||||
int size = 32 - BitOperations.LeadingZeroCount(value + 1);
|
||||
|
||||
WriteBits(1, size);
|
||||
|
||||
value -= (1u << (size - 1)) - 1;
|
||||
|
||||
WriteBits((int)value, size - 1);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,23 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>netcoreapp3.1</TargetFramework>
|
||||
</PropertyGroup>
|
||||
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
|
||||
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
||||
</PropertyGroup>
|
||||
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
|
||||
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="FFmpeg.AutoGen" Version="4.3.0" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\Ryujinx.Graphics.Video\Ryujinx.Graphics.Video.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
159
Ryujinx.Graphics.Nvdec.H264/SpsAndPpsReconstruction.cs
Normal file
159
Ryujinx.Graphics.Nvdec.H264/SpsAndPpsReconstruction.cs
Normal file
@ -0,0 +1,159 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using Ryujinx.Graphics.Video;
|
||||
using System;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.H264
|
||||
{
|
||||
static class SpsAndPpsReconstruction
|
||||
{
|
||||
public static Span<byte> Reconstruct(ref H264PictureInfo pictureInfo, byte[] workBuffer)
|
||||
{
|
||||
H264BitStreamWriter writer = new H264BitStreamWriter(workBuffer);
|
||||
|
||||
// Sequence Parameter Set.
|
||||
writer.WriteU(1, 24);
|
||||
writer.WriteU(0, 1);
|
||||
writer.WriteU(3, 2);
|
||||
writer.WriteU(7, 5);
|
||||
writer.WriteU(100, 8); // Profile idc
|
||||
writer.WriteU(0, 8); // Reserved
|
||||
writer.WriteU(31, 8); // Level idc
|
||||
writer.WriteUe(0); // Seq parameter set id
|
||||
writer.WriteUe(pictureInfo.ChromaFormatIdc);
|
||||
|
||||
if (pictureInfo.ChromaFormatIdc == 3)
|
||||
{
|
||||
writer.WriteBit(false); // Separate colour plane flag
|
||||
}
|
||||
|
||||
writer.WriteUe(0); // Bit depth luma minus 8
|
||||
writer.WriteUe(0); // Bit depth chroma minus 8
|
||||
writer.WriteBit(pictureInfo.QpprimeYZeroTransformBypassFlag);
|
||||
writer.WriteBit(false); // Scaling matrix present flag
|
||||
|
||||
writer.WriteUe(pictureInfo.Log2MaxFrameNumMinus4);
|
||||
writer.WriteUe(pictureInfo.PicOrderCntType);
|
||||
|
||||
if (pictureInfo.PicOrderCntType == 0)
|
||||
{
|
||||
writer.WriteUe(pictureInfo.Log2MaxPicOrderCntLsbMinus4);
|
||||
}
|
||||
else if (pictureInfo.PicOrderCntType == 1)
|
||||
{
|
||||
writer.WriteBit(pictureInfo.DeltaPicOrderAlwaysZeroFlag);
|
||||
|
||||
writer.WriteSe(0); // Offset for non-ref pic
|
||||
writer.WriteSe(0); // Offset for top to bottom field
|
||||
writer.WriteUe(0); // Num ref frames in pic order cnt cycle
|
||||
}
|
||||
|
||||
writer.WriteUe(16); // Max num ref frames
|
||||
writer.WriteBit(false); // Gaps in frame num value allowed flag
|
||||
writer.WriteUe(pictureInfo.PicWidthInMbsMinus1);
|
||||
writer.WriteUe(pictureInfo.PicHeightInMapUnitsMinus1);
|
||||
writer.WriteBit(pictureInfo.FrameMbsOnlyFlag);
|
||||
|
||||
if (!pictureInfo.FrameMbsOnlyFlag)
|
||||
{
|
||||
writer.WriteBit(pictureInfo.MbAdaptiveFrameFieldFlag);
|
||||
}
|
||||
|
||||
writer.WriteBit(pictureInfo.Direct8x8InferenceFlag);
|
||||
writer.WriteBit(false); // Frame cropping flag
|
||||
writer.WriteBit(false); // VUI parameter present flag
|
||||
|
||||
writer.End();
|
||||
|
||||
// Picture Parameter Set.
|
||||
writer.WriteU(1, 24);
|
||||
writer.WriteU(0, 1);
|
||||
writer.WriteU(3, 2);
|
||||
writer.WriteU(8, 5);
|
||||
|
||||
writer.WriteUe(0); // Pic parameter set id
|
||||
writer.WriteUe(0); // Seq parameter set id
|
||||
|
||||
writer.WriteBit(pictureInfo.EntropyCodingModeFlag);
|
||||
writer.WriteBit(false); // Bottom field pic order in frame present flag
|
||||
writer.WriteUe(0); // Num slice groups minus 1
|
||||
writer.WriteUe(pictureInfo.NumRefIdxL0ActiveMinus1);
|
||||
writer.WriteUe(pictureInfo.NumRefIdxL1ActiveMinus1);
|
||||
writer.WriteBit(pictureInfo.WeightedPredFlag);
|
||||
writer.WriteU(pictureInfo.WeightedBipredIdc, 2);
|
||||
writer.WriteSe(pictureInfo.PicInitQpMinus26);
|
||||
writer.WriteSe(0); // Pic init qs minus 26
|
||||
writer.WriteSe(pictureInfo.ChromaQpIndexOffset);
|
||||
writer.WriteBit(pictureInfo.DeblockingFilterControlPresentFlag);
|
||||
writer.WriteBit(pictureInfo.ConstrainedIntraPredFlag);
|
||||
writer.WriteBit(pictureInfo.RedundantPicCntPresentFlag);
|
||||
writer.WriteBit(pictureInfo.Transform8x8ModeFlag);
|
||||
|
||||
writer.WriteBit(pictureInfo.ScalingMatrixPresent);
|
||||
|
||||
if (pictureInfo.ScalingMatrixPresent)
|
||||
{
|
||||
for (int index = 0; index < 6; index++)
|
||||
{
|
||||
writer.WriteBit(true);
|
||||
|
||||
WriteScalingList(ref writer, pictureInfo.ScalingLists4x4[index]);
|
||||
}
|
||||
|
||||
if (pictureInfo.Transform8x8ModeFlag)
|
||||
{
|
||||
for (int index = 0; index < 2; index++)
|
||||
{
|
||||
writer.WriteBit(true);
|
||||
|
||||
WriteScalingList(ref writer, pictureInfo.ScalingLists8x8[index]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
writer.WriteSe(pictureInfo.SecondChromaQpIndexOffset);
|
||||
|
||||
writer.End();
|
||||
|
||||
return writer.AsSpan();
|
||||
}
|
||||
|
||||
// ZigZag LUTs from libavcodec.
|
||||
private static readonly byte[] ZigZagDirect = new byte[]
|
||||
{
|
||||
0, 1, 8, 16, 9, 2, 3, 10,
|
||||
17, 24, 32, 25, 18, 11, 4, 5,
|
||||
12, 19, 26, 33, 40, 48, 41, 34,
|
||||
27, 20, 13, 6, 7, 14, 21, 28,
|
||||
35, 42, 49, 56, 57, 50, 43, 36,
|
||||
29, 22, 15, 23, 30, 37, 44, 51,
|
||||
58, 59, 52, 45, 38, 31, 39, 46,
|
||||
53, 60, 61, 54, 47, 55, 62, 63
|
||||
};
|
||||
|
||||
private static readonly byte[] ZigZagScan = new byte[]
|
||||
{
|
||||
0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4,
|
||||
1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
|
||||
1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4,
|
||||
3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4
|
||||
};
|
||||
|
||||
private static void WriteScalingList(ref H264BitStreamWriter writer, IArray<byte> list)
|
||||
{
|
||||
byte[] scan = list.Length == 16 ? ZigZagScan : ZigZagDirect;
|
||||
|
||||
int lastScale = 8;
|
||||
|
||||
for (int index = 0; index < list.Length; index++)
|
||||
{
|
||||
byte value = list[scan[index]];
|
||||
|
||||
int deltaScale = value - lastScale;
|
||||
|
||||
writer.WriteSe(deltaScale);
|
||||
|
||||
lastScale = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
33
Ryujinx.Graphics.Nvdec.H264/Surface.cs
Normal file
33
Ryujinx.Graphics.Nvdec.H264/Surface.cs
Normal file
@ -0,0 +1,33 @@
|
||||
using FFmpeg.AutoGen;
|
||||
using Ryujinx.Graphics.Video;
|
||||
using System;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.H264
|
||||
{
|
||||
unsafe class Surface : ISurface
|
||||
{
|
||||
public AVFrame* Frame { get; }
|
||||
|
||||
public Plane YPlane => new Plane((IntPtr)Frame->data[0], Stride * Height);
|
||||
public Plane UPlane => new Plane((IntPtr)Frame->data[1], UvStride * UvHeight);
|
||||
public Plane VPlane => new Plane((IntPtr)Frame->data[2], UvStride * UvHeight);
|
||||
|
||||
public int Width => Frame->width;
|
||||
public int Height => Frame->height;
|
||||
public int Stride => Frame->linesize[0];
|
||||
public int UvWidth => (Frame->width + 1) >> 1;
|
||||
public int UvHeight => (Frame->height + 1) >> 1;
|
||||
public int UvStride => Frame->linesize[1];
|
||||
|
||||
public Surface()
|
||||
{
|
||||
Frame = ffmpeg.av_frame_alloc();
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
ffmpeg.av_frame_unref(Frame);
|
||||
ffmpeg.av_free(Frame);
|
||||
}
|
||||
}
|
||||
}
|
9
Ryujinx.Graphics.Nvdec.Vp9/BitDepth.cs
Normal file
9
Ryujinx.Graphics.Nvdec.Vp9/BitDepth.cs
Normal file
@ -0,0 +1,9 @@
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
internal enum BitDepth
|
||||
{
|
||||
Bits8 = 8, /**< 8 bits */
|
||||
Bits10 = 10, /**< 10 bits */
|
||||
Bits12 = 12, /**< 12 bits */
|
||||
}
|
||||
}
|
56
Ryujinx.Graphics.Nvdec.Vp9/CodecErr.cs
Normal file
56
Ryujinx.Graphics.Nvdec.Vp9/CodecErr.cs
Normal file
@ -0,0 +1,56 @@
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
internal enum CodecErr
|
||||
{
|
||||
/*!\brief Operation completed without error */
|
||||
CodecOk,
|
||||
|
||||
/*!\brief Unspecified error */
|
||||
CodecError,
|
||||
|
||||
/*!\brief Memory operation failed */
|
||||
CodecMemError,
|
||||
|
||||
/*!\brief ABI version mismatch */
|
||||
CodecAbiMismatch,
|
||||
|
||||
/*!\brief Algorithm does not have required capability */
|
||||
CodecIncapable,
|
||||
|
||||
/*!\brief The given bitstream is not supported.
|
||||
*
|
||||
* The bitstream was unable to be parsed at the highest level. The decoder
|
||||
* is unable to proceed. This error \ref SHOULD be treated as fatal to the
|
||||
* stream. */
|
||||
CodecUnsupBitstream,
|
||||
|
||||
/*!\brief Encoded bitstream uses an unsupported feature
|
||||
*
|
||||
* The decoder does not implement a feature required by the encoder. This
|
||||
* return code should only be used for features that prevent future
|
||||
* pictures from being properly decoded. This error \ref MAY be treated as
|
||||
* fatal to the stream or \ref MAY be treated as fatal to the current GOP.
|
||||
*/
|
||||
CodecUnsupFeature,
|
||||
|
||||
/*!\brief The coded data for this stream is corrupt or incomplete
|
||||
*
|
||||
* There was a problem decoding the current frame. This return code
|
||||
* should only be used for failures that prevent future pictures from
|
||||
* being properly decoded. This error \ref MAY be treated as fatal to the
|
||||
* stream or \ref MAY be treated as fatal to the current GOP. If decoding
|
||||
* is continued for the current GOP, artifacts may be present.
|
||||
*/
|
||||
CodecCorruptFrame,
|
||||
|
||||
/*!\brief An application-supplied parameter is not valid.
|
||||
*
|
||||
*/
|
||||
CodecInvalidParam,
|
||||
|
||||
/*!\brief An iterator reached the end of list.
|
||||
*
|
||||
*/
|
||||
CodecListEnd
|
||||
}
|
||||
}
|
59
Ryujinx.Graphics.Nvdec.Vp9/Common/BitUtils.cs
Normal file
59
Ryujinx.Graphics.Nvdec.Vp9/Common/BitUtils.cs
Normal file
@ -0,0 +1,59 @@
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.Numerics;
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Common
|
||||
{
|
||||
internal static class BitUtils
|
||||
{
|
||||
// FIXME: Enable inlining here after AVX2 gather bug is fixed.
|
||||
// [MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static byte ClipPixel(int val)
|
||||
{
|
||||
return (byte)((val > 255) ? 255 : (val < 0) ? 0 : val);
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static ushort ClipPixelHighbd(int val, int bd)
|
||||
{
|
||||
return bd switch
|
||||
{
|
||||
10 => (ushort)Math.Clamp(val, 0, 1023),
|
||||
12 => (ushort)Math.Clamp(val, 0, 4095),
|
||||
_ => (ushort)Math.Clamp(val, 0, 255)
|
||||
};
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static int RoundPowerOfTwo(int value, int n)
|
||||
{
|
||||
return (value + (1 << (n - 1))) >> n;
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static long RoundPowerOfTwo(long value, int n)
|
||||
{
|
||||
return (value + (1L << (n - 1))) >> n;
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static int AlignPowerOfTwo(int value, int n)
|
||||
{
|
||||
return (value + ((1 << n) - 1)) & ~((1 << n) - 1);
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static int GetMsb(uint n)
|
||||
{
|
||||
Debug.Assert(n != 0);
|
||||
return 31 ^ BitOperations.LeadingZeroCount(n);
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static int GetUnsignedBits(uint numValues)
|
||||
{
|
||||
return numValues > 0 ? GetMsb(numValues) + 1 : 0;
|
||||
}
|
||||
}
|
||||
}
|
94
Ryujinx.Graphics.Nvdec.Vp9/Common/MemoryAllocator.cs
Normal file
94
Ryujinx.Graphics.Nvdec.Vp9/Common/MemoryAllocator.cs
Normal file
@ -0,0 +1,94 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using System;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Common
|
||||
{
|
||||
internal class MemoryAllocator : IDisposable
|
||||
{
|
||||
private const int PoolEntries = 10;
|
||||
|
||||
private struct PoolItem
|
||||
{
|
||||
public IntPtr Pointer;
|
||||
public int Length;
|
||||
public bool InUse;
|
||||
}
|
||||
|
||||
private PoolItem[] _pool = new PoolItem[PoolEntries];
|
||||
|
||||
public ArrayPtr<T> Allocate<T>(int length) where T : unmanaged
|
||||
{
|
||||
int lengthInBytes = Unsafe.SizeOf<T>() * length;
|
||||
|
||||
IntPtr ptr = IntPtr.Zero;
|
||||
|
||||
for (int i = 0; i < PoolEntries; i++)
|
||||
{
|
||||
ref PoolItem item = ref _pool[i];
|
||||
|
||||
if (!item.InUse && item.Length == lengthInBytes)
|
||||
{
|
||||
item.InUse = true;
|
||||
ptr = item.Pointer;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (ptr == IntPtr.Zero)
|
||||
{
|
||||
ptr = Marshal.AllocHGlobal(lengthInBytes);
|
||||
|
||||
for (int i = 0; i < PoolEntries; i++)
|
||||
{
|
||||
ref PoolItem item = ref _pool[i];
|
||||
|
||||
if (!item.InUse)
|
||||
{
|
||||
item.InUse = true;
|
||||
if (item.Pointer != IntPtr.Zero)
|
||||
{
|
||||
Marshal.FreeHGlobal(item.Pointer);
|
||||
}
|
||||
item.Pointer = ptr;
|
||||
item.Length = lengthInBytes;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new ArrayPtr<T>(ptr, length);
|
||||
}
|
||||
|
||||
public unsafe void Free<T>(ArrayPtr<T> arr) where T : unmanaged
|
||||
{
|
||||
IntPtr ptr = (IntPtr)arr.ToPointer();
|
||||
|
||||
for (int i = 0; i < PoolEntries; i++)
|
||||
{
|
||||
ref PoolItem item = ref _pool[i];
|
||||
|
||||
if (item.Pointer == ptr)
|
||||
{
|
||||
item.InUse = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
for (int i = 0; i < PoolEntries; i++)
|
||||
{
|
||||
ref PoolItem item = ref _pool[i];
|
||||
|
||||
if (item.Pointer != IntPtr.Zero)
|
||||
{
|
||||
Marshal.FreeHGlobal(item.Pointer);
|
||||
item.Pointer = IntPtr.Zero;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
25
Ryujinx.Graphics.Nvdec.Vp9/Common/MemoryUtil.cs
Normal file
25
Ryujinx.Graphics.Nvdec.Vp9/Common/MemoryUtil.cs
Normal file
@ -0,0 +1,25 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using System;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Common
|
||||
{
|
||||
internal static class MemoryUtil
|
||||
{
|
||||
public static unsafe void Copy<T>(T* dest, T* source, int length) where T : unmanaged
|
||||
{
|
||||
new Span<T>(source, length).CopyTo(new Span<T>(dest, length));
|
||||
}
|
||||
|
||||
public static void Copy<T>(ref T dest, ref T source) where T : unmanaged
|
||||
{
|
||||
MemoryMarshal.CreateSpan(ref source, 1).CopyTo(MemoryMarshal.CreateSpan(ref dest, 1));
|
||||
}
|
||||
|
||||
public static unsafe void Fill<T>(T* ptr, T value, int length) where T : unmanaged
|
||||
{
|
||||
new Span<T>(ptr, length).Fill(value);
|
||||
}
|
||||
}
|
||||
}
|
71
Ryujinx.Graphics.Nvdec.Vp9/Constants.cs
Normal file
71
Ryujinx.Graphics.Nvdec.Vp9/Constants.cs
Normal file
@ -0,0 +1,71 @@
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Types;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
internal static class Constants
|
||||
{
|
||||
public const int Vp9InterpExtend = 4;
|
||||
|
||||
public const int MaxMbPlane = 3;
|
||||
|
||||
public const int None = -1;
|
||||
public const int IntraFrame = 0;
|
||||
public const int LastFrame = 1;
|
||||
public const int GoldenFrame = 2;
|
||||
public const int AltRefFrame = 3;
|
||||
public const int MaxRefFrames = 4;
|
||||
|
||||
public const int MiSizeLog2 = 3;
|
||||
public const int MiBlockSizeLog2 = 6 - MiSizeLog2; // 64 = 2^6
|
||||
|
||||
public const int MiSize = 1 << MiSizeLog2; // pixels per mi-unit
|
||||
public const int MiBlockSize = 1 << MiBlockSizeLog2; // mi-units per max block
|
||||
public const int MiMask = MiBlockSize - 1;
|
||||
|
||||
public const int PartitionPloffset = 4; // number of probability models per block size
|
||||
|
||||
/* Segment Feature Masks */
|
||||
public const int MaxMvRefCandidates = 2;
|
||||
|
||||
public const int CompInterContexts = 5;
|
||||
public const int RefContexts = 5;
|
||||
|
||||
public const int EightTap = 0;
|
||||
public const int EightTapSmooth = 1;
|
||||
public const int EightTapSharp = 2;
|
||||
public const int SwitchableFilters = 3; /* Number of switchable filters */
|
||||
public const int Bilinear = 3;
|
||||
public const int Switchable = 4; /* should be the last one */
|
||||
|
||||
// Frame
|
||||
public const int RefsPerFrame = 3;
|
||||
|
||||
public const int NumPingPongBuffers = 2;
|
||||
|
||||
public const int Class0Bits = 1; /* bits at integer precision for class 0 */
|
||||
public const int Class0Size = 1 << Class0Bits;
|
||||
|
||||
public const int MvInUseBits = 14;
|
||||
public const int MvUpp = (1 << MvInUseBits) - 1;
|
||||
public const int MvLow = -(1 << MvInUseBits);
|
||||
|
||||
// Coefficient token alphabet
|
||||
public const int ZeroToken = 0; // 0 Extra Bits 0+0
|
||||
public const int OneToken = 1; // 1 Extra Bits 0+1
|
||||
public const int TwoToken = 2; // 2 Extra Bits 0+1
|
||||
|
||||
public const int PivotNode = 2;
|
||||
|
||||
public const int Cat1MinVal = 5;
|
||||
public const int Cat2MinVal = 7;
|
||||
public const int Cat3MinVal = 11;
|
||||
public const int Cat4MinVal = 19;
|
||||
public const int Cat5MinVal = 35;
|
||||
public const int Cat6MinVal = 67;
|
||||
|
||||
public const int EobModelToken = 3;
|
||||
|
||||
public const int SegmentAbsData = 1;
|
||||
public const int MaxSegments = 8;
|
||||
}
|
||||
}
|
1190
Ryujinx.Graphics.Nvdec.Vp9/DecodeFrame.cs
Normal file
1190
Ryujinx.Graphics.Nvdec.Vp9/DecodeFrame.cs
Normal file
File diff suppressed because it is too large
Load Diff
1159
Ryujinx.Graphics.Nvdec.Vp9/DecodeMv.cs
Normal file
1159
Ryujinx.Graphics.Nvdec.Vp9/DecodeMv.cs
Normal file
File diff suppressed because it is too large
Load Diff
164
Ryujinx.Graphics.Nvdec.Vp9/Decoder.cs
Normal file
164
Ryujinx.Graphics.Nvdec.Vp9/Decoder.cs
Normal file
@ -0,0 +1,164 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Common;
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Types;
|
||||
using Ryujinx.Graphics.Video;
|
||||
using System;
|
||||
using Vp9MvRef = Ryujinx.Graphics.Video.Vp9MvRef;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
public class Decoder : IVp9Decoder
|
||||
{
|
||||
public bool IsHardwareAccelerated => false;
|
||||
|
||||
private readonly MemoryAllocator _allocator = new MemoryAllocator();
|
||||
|
||||
public ISurface CreateSurface(int width, int height) => new Surface(width, height);
|
||||
|
||||
private static readonly byte[] LiteralToFilter = new byte[]
|
||||
{
|
||||
Constants.EightTapSmooth,
|
||||
Constants.EightTap,
|
||||
Constants.EightTapSharp,
|
||||
Constants.Bilinear
|
||||
};
|
||||
|
||||
public unsafe bool Decode(
|
||||
ref Vp9PictureInfo pictureInfo,
|
||||
ISurface output,
|
||||
ReadOnlySpan<byte> bitstream,
|
||||
ReadOnlySpan<Vp9MvRef> mvsIn,
|
||||
Span<Vp9MvRef> mvsOut)
|
||||
{
|
||||
Vp9Common cm = new Vp9Common();
|
||||
|
||||
cm.FrameType = pictureInfo.IsKeyFrame ? FrameType.KeyFrame : FrameType.InterFrame;
|
||||
cm.IntraOnly = pictureInfo.IntraOnly;
|
||||
|
||||
cm.Width = output.Width;
|
||||
cm.Height = output.Height;
|
||||
|
||||
cm.UsePrevFrameMvs = pictureInfo.UsePrevInFindMvRefs;
|
||||
|
||||
cm.RefFrameSignBias = pictureInfo.RefFrameSignBias;
|
||||
|
||||
cm.BaseQindex = pictureInfo.BaseQIndex;
|
||||
cm.YDcDeltaQ = pictureInfo.YDcDeltaQ;
|
||||
cm.UvAcDeltaQ = pictureInfo.UvAcDeltaQ;
|
||||
cm.UvDcDeltaQ = pictureInfo.UvDcDeltaQ;
|
||||
|
||||
cm.Mb.Lossless = pictureInfo.Lossless;
|
||||
|
||||
cm.TxMode = (TxMode)pictureInfo.TransformMode;
|
||||
|
||||
cm.AllowHighPrecisionMv = pictureInfo.AllowHighPrecisionMv;
|
||||
|
||||
cm.InterpFilter = (byte)pictureInfo.InterpFilter;
|
||||
|
||||
if (cm.InterpFilter != Constants.Switchable)
|
||||
{
|
||||
cm.InterpFilter = LiteralToFilter[cm.InterpFilter];
|
||||
}
|
||||
|
||||
cm.ReferenceMode = (ReferenceMode)pictureInfo.ReferenceMode;
|
||||
|
||||
cm.CompFixedRef = pictureInfo.CompFixedRef;
|
||||
cm.CompVarRef = pictureInfo.CompVarRef;
|
||||
|
||||
cm.Log2TileCols = pictureInfo.Log2TileCols;
|
||||
cm.Log2TileRows = pictureInfo.Log2TileRows;
|
||||
|
||||
cm.Seg.Enabled = pictureInfo.SegmentEnabled;
|
||||
cm.Seg.UpdateMap = pictureInfo.SegmentMapUpdate;
|
||||
cm.Seg.TemporalUpdate = pictureInfo.SegmentMapTemporalUpdate;
|
||||
cm.Seg.AbsDelta = (byte)pictureInfo.SegmentAbsDelta;
|
||||
cm.Seg.FeatureMask = pictureInfo.SegmentFeatureEnable;
|
||||
cm.Seg.FeatureData = pictureInfo.SegmentFeatureData;
|
||||
|
||||
cm.Lf.ModeRefDeltaEnabled = pictureInfo.ModeRefDeltaEnabled;
|
||||
cm.Lf.RefDeltas = pictureInfo.RefDeltas;
|
||||
cm.Lf.ModeDeltas = pictureInfo.ModeDeltas;
|
||||
|
||||
cm.Fc = new Ptr<Vp9EntropyProbs>(ref pictureInfo.Entropy);
|
||||
cm.Counts = new Ptr<Vp9BackwardUpdates>(ref pictureInfo.BackwardUpdateCounts);
|
||||
|
||||
cm.FrameRefs[0].Buf = (Surface)pictureInfo.LastReference;
|
||||
cm.FrameRefs[1].Buf = (Surface)pictureInfo.GoldenReference;
|
||||
cm.FrameRefs[2].Buf = (Surface)pictureInfo.AltReference;
|
||||
cm.Mb.CurBuf = (Surface)output;
|
||||
|
||||
cm.Mb.SetupBlockPlanes(1, 1);
|
||||
|
||||
cm.AllocTileWorkerData(_allocator, 1 << pictureInfo.Log2TileCols, 1 << pictureInfo.Log2TileRows);
|
||||
cm.AllocContextBuffers(_allocator, output.Width, output.Height);
|
||||
cm.InitContextBuffers();
|
||||
cm.SetupSegmentationDequant();
|
||||
cm.SetupScaleFactors();
|
||||
|
||||
SetMvs(ref cm, mvsIn);
|
||||
|
||||
fixed (byte* dataPtr = bitstream)
|
||||
{
|
||||
try
|
||||
{
|
||||
DecodeFrame.DecodeTiles(ref cm, new ArrayPtr<byte>(dataPtr, bitstream.Length));
|
||||
}
|
||||
catch (InternalErrorException)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
GetMvs(ref cm, mvsOut);
|
||||
|
||||
cm.FreeTileWorkerData(_allocator);
|
||||
cm.FreeContextBuffers(_allocator);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static void SetMvs(ref Vp9Common cm, ReadOnlySpan<Vp9MvRef> mvs)
|
||||
{
|
||||
if (mvs.Length > cm.PrevFrameMvs.Length)
|
||||
{
|
||||
throw new ArgumentException($"Size mismatch, expected: {cm.PrevFrameMvs.Length}, but got: {mvs.Length}.");
|
||||
}
|
||||
|
||||
for (int i = 0; i < mvs.Length; i++)
|
||||
{
|
||||
ref var mv = ref cm.PrevFrameMvs[i];
|
||||
|
||||
mv.Mv[0].Row = mvs[i].Mvs[0].Row;
|
||||
mv.Mv[0].Col = mvs[i].Mvs[0].Col;
|
||||
mv.Mv[1].Row = mvs[i].Mvs[1].Row;
|
||||
mv.Mv[1].Col = mvs[i].Mvs[1].Col;
|
||||
|
||||
mv.RefFrame[0] = (sbyte)mvs[i].RefFrames[0];
|
||||
mv.RefFrame[1] = (sbyte)mvs[i].RefFrames[1];
|
||||
}
|
||||
}
|
||||
|
||||
private static void GetMvs(ref Vp9Common cm, Span<Vp9MvRef> mvs)
|
||||
{
|
||||
if (mvs.Length > cm.CurFrameMvs.Length)
|
||||
{
|
||||
throw new ArgumentException($"Size mismatch, expected: {cm.CurFrameMvs.Length}, but got: {mvs.Length}.");
|
||||
}
|
||||
|
||||
for (int i = 0; i < mvs.Length; i++)
|
||||
{
|
||||
ref var mv = ref cm.CurFrameMvs[i];
|
||||
|
||||
mvs[i].Mvs[0].Row = mv.Mv[0].Row;
|
||||
mvs[i].Mvs[0].Col = mv.Mv[0].Col;
|
||||
mvs[i].Mvs[1].Row = mv.Mv[1].Row;
|
||||
mvs[i].Mvs[1].Col = mv.Mv[1].Col;
|
||||
|
||||
mvs[i].RefFrames[0] = mv.RefFrame[0];
|
||||
mvs[i].RefFrames[1] = mv.RefFrame[1];
|
||||
}
|
||||
}
|
||||
|
||||
public void Dispose() => _allocator.Dispose();
|
||||
}
|
||||
}
|
325
Ryujinx.Graphics.Nvdec.Vp9/Detokenize.cs
Normal file
325
Ryujinx.Graphics.Nvdec.Vp9/Detokenize.cs
Normal file
@ -0,0 +1,325 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Dsp;
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Types;
|
||||
using Ryujinx.Graphics.Video;
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.Runtime.InteropServices;
|
||||
using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.InvTxfm;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
internal static class Detokenize
|
||||
{
|
||||
private const int EobContextNode = 0;
|
||||
private const int ZeroContextNode = 1;
|
||||
private const int OneContextNode = 2;
|
||||
|
||||
private static int GetCoefContext(ReadOnlySpan<short> neighbors, ReadOnlySpan<byte> tokenCache, int c)
|
||||
{
|
||||
const int maxNeighbors = 2;
|
||||
|
||||
return (1 + tokenCache[neighbors[maxNeighbors * c + 0]] + tokenCache[neighbors[maxNeighbors * c + 1]]) >> 1;
|
||||
}
|
||||
|
||||
private static int ReadCoeff(
|
||||
ref Reader r,
|
||||
ReadOnlySpan<byte> probs,
|
||||
int n,
|
||||
ref ulong value,
|
||||
ref int count,
|
||||
ref uint range)
|
||||
{
|
||||
int i, val = 0;
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
val = (val << 1) | r.ReadBool(probs[i], ref value, ref count, ref range);
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
private static int DecodeCoefs(
|
||||
ref MacroBlockD xd,
|
||||
PlaneType type,
|
||||
Span<int> dqcoeff,
|
||||
TxSize txSize,
|
||||
ref Array2<short> dq,
|
||||
int ctx,
|
||||
ReadOnlySpan<short> scan,
|
||||
ReadOnlySpan<short> nb,
|
||||
ref Reader r)
|
||||
{
|
||||
ref Vp9BackwardUpdates counts = ref xd.Counts.Value;
|
||||
int maxEob = 16 << ((int)txSize << 1);
|
||||
ref Vp9EntropyProbs fc = ref xd.Fc.Value;
|
||||
int refr = xd.Mi[0].Value.IsInterBlock() ? 1 : 0;
|
||||
int band, c = 0;
|
||||
ref Array6<Array6<Array3<byte>>> coefProbs = ref fc.CoefProbs[(int)txSize][(int)type][refr];
|
||||
Span<byte> tokenCache = stackalloc byte[32 * 32];
|
||||
ReadOnlySpan<byte> bandTranslate = Luts.get_band_translate(txSize);
|
||||
int dqShift = (txSize == TxSize.Tx32x32) ? 1 : 0;
|
||||
int v;
|
||||
short dqv = dq[0];
|
||||
ReadOnlySpan<byte> cat6Prob = (xd.Bd == 12)
|
||||
? Luts.Vp9Cat6ProbHigh12
|
||||
: (xd.Bd == 10) ? new ReadOnlySpan<byte>(Luts.Vp9Cat6ProbHigh12).Slice(2) : Luts.Vp9Cat6Prob;
|
||||
int cat6Bits = (xd.Bd == 12) ? 18 : (xd.Bd == 10) ? 16 : 14;
|
||||
// Keep value, range, and count as locals. The compiler produces better
|
||||
// results with the locals than using r directly.
|
||||
ulong value = r.Value;
|
||||
uint range = r.Range;
|
||||
int count = r.Count;
|
||||
|
||||
while (c < maxEob)
|
||||
{
|
||||
int val = -1;
|
||||
band = bandTranslate[0];
|
||||
bandTranslate = bandTranslate.Slice(1);
|
||||
ref Array3<byte> prob = ref coefProbs[band][ctx];
|
||||
if (!xd.Counts.IsNull)
|
||||
{
|
||||
++counts.EobBranch[(int)txSize][(int)type][refr][band][ctx];
|
||||
}
|
||||
|
||||
if (r.ReadBool(prob[EobContextNode], ref value, ref count, ref range) == 0)
|
||||
{
|
||||
if (!xd.Counts.IsNull)
|
||||
{
|
||||
++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.EobModelToken];
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
while (r.ReadBool(prob[ZeroContextNode], ref value, ref count, ref range) == 0)
|
||||
{
|
||||
if (!xd.Counts.IsNull)
|
||||
{
|
||||
++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.ZeroToken];
|
||||
}
|
||||
|
||||
dqv = dq[1];
|
||||
tokenCache[scan[c]] = 0;
|
||||
++c;
|
||||
if (c >= maxEob)
|
||||
{
|
||||
r.Value = value;
|
||||
r.Range = range;
|
||||
r.Count = count;
|
||||
return c; // Zero tokens at the end (no eob token)
|
||||
}
|
||||
ctx = GetCoefContext(nb, tokenCache, c);
|
||||
band = bandTranslate[0];
|
||||
bandTranslate = bandTranslate.Slice(1);
|
||||
prob = ref coefProbs[band][ctx];
|
||||
}
|
||||
|
||||
if (r.ReadBool(prob[OneContextNode], ref value, ref count, ref range) != 0)
|
||||
{
|
||||
ReadOnlySpan<byte> p = Luts.Vp9Pareto8Full[prob[Constants.PivotNode] - 1];
|
||||
if (!xd.Counts.IsNull)
|
||||
{
|
||||
++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.TwoToken];
|
||||
}
|
||||
|
||||
if (r.ReadBool(p[0], ref value, ref count, ref range) != 0)
|
||||
{
|
||||
if (r.ReadBool(p[3], ref value, ref count, ref range) != 0)
|
||||
{
|
||||
tokenCache[scan[c]] = 5;
|
||||
if (r.ReadBool(p[5], ref value, ref count, ref range) != 0)
|
||||
{
|
||||
if (r.ReadBool(p[7], ref value, ref count, ref range) != 0)
|
||||
{
|
||||
val = Constants.Cat6MinVal + ReadCoeff(ref r, cat6Prob, cat6Bits, ref value, ref count, ref range);
|
||||
}
|
||||
else
|
||||
{
|
||||
val = Constants.Cat5MinVal + ReadCoeff(ref r, Luts.Vp9Cat5Prob, 5, ref value, ref count, ref range);
|
||||
}
|
||||
}
|
||||
else if (r.ReadBool(p[6], ref value, ref count, ref range) != 0)
|
||||
{
|
||||
val = Constants.Cat4MinVal + ReadCoeff(ref r, Luts.Vp9Cat4Prob, 4, ref value, ref count, ref range);
|
||||
}
|
||||
else
|
||||
{
|
||||
val = Constants.Cat3MinVal + ReadCoeff(ref r, Luts.Vp9Cat3Prob, 3, ref value, ref count, ref range);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
tokenCache[scan[c]] = 4;
|
||||
if (r.ReadBool(p[4], ref value, ref count, ref range) != 0)
|
||||
{
|
||||
val = Constants.Cat2MinVal + ReadCoeff(ref r, Luts.Vp9Cat2Prob, 2, ref value, ref count, ref range);
|
||||
}
|
||||
else
|
||||
{
|
||||
val = Constants.Cat1MinVal + ReadCoeff(ref r, Luts.Vp9Cat1Prob, 1, ref value, ref count, ref range);
|
||||
}
|
||||
}
|
||||
// Val may use 18-bits
|
||||
v = (int)(((long)val * dqv) >> dqShift);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (r.ReadBool(p[1], ref value, ref count, ref range) != 0)
|
||||
{
|
||||
tokenCache[scan[c]] = 3;
|
||||
v = ((3 + r.ReadBool(p[2], ref value, ref count, ref range)) * dqv) >> dqShift;
|
||||
}
|
||||
else
|
||||
{
|
||||
tokenCache[scan[c]] = 2;
|
||||
v = (2 * dqv) >> dqShift;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!xd.Counts.IsNull)
|
||||
{
|
||||
++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.OneToken];
|
||||
}
|
||||
|
||||
tokenCache[scan[c]] = 1;
|
||||
v = dqv >> dqShift;
|
||||
}
|
||||
dqcoeff[scan[c]] = (int)HighbdCheckRange(r.ReadBool(128, ref value, ref count, ref range) != 0 ? -v : v, xd.Bd);
|
||||
++c;
|
||||
ctx = GetCoefContext(nb, tokenCache, c);
|
||||
dqv = dq[1];
|
||||
}
|
||||
|
||||
r.Value = value;
|
||||
r.Range = range;
|
||||
r.Count = count;
|
||||
return c;
|
||||
}
|
||||
|
||||
private static void GetCtxShift(ref MacroBlockD xd, ref int ctxShiftA, ref int ctxShiftL, int x, int y, uint txSizeInBlocks)
|
||||
{
|
||||
if (xd.MaxBlocksWide != 0)
|
||||
{
|
||||
if (txSizeInBlocks + x > xd.MaxBlocksWide)
|
||||
{
|
||||
ctxShiftA = (int)(txSizeInBlocks - (xd.MaxBlocksWide - x)) * 8;
|
||||
}
|
||||
}
|
||||
if (xd.MaxBlocksHigh != 0)
|
||||
{
|
||||
if (txSizeInBlocks + y > xd.MaxBlocksHigh)
|
||||
{
|
||||
ctxShiftL = (int)(txSizeInBlocks - (xd.MaxBlocksHigh - y)) * 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static PlaneType GetPlaneType(int plane)
|
||||
{
|
||||
return (PlaneType)(plane > 0 ? 1 : 0);
|
||||
}
|
||||
|
||||
public static int DecodeBlockTokens(
|
||||
ref TileWorkerData twd,
|
||||
int plane,
|
||||
Luts.ScanOrder sc,
|
||||
int x,
|
||||
int y,
|
||||
TxSize txSize,
|
||||
int segId)
|
||||
{
|
||||
ref Reader r = ref twd.BitReader;
|
||||
ref MacroBlockD xd = ref twd.Xd;
|
||||
ref MacroBlockDPlane pd = ref xd.Plane[plane];
|
||||
ref Array2<short> dequant = ref pd.SegDequant[segId];
|
||||
int eob;
|
||||
Span<sbyte> a = pd.AboveContext.ToSpan().Slice(x);
|
||||
Span<sbyte> l = pd.LeftContext.ToSpan().Slice(y);
|
||||
int ctx;
|
||||
int ctxShiftA = 0;
|
||||
int ctxShiftL = 0;
|
||||
|
||||
switch (txSize)
|
||||
{
|
||||
case TxSize.Tx4x4:
|
||||
ctx = a[0] != 0 ? 1 : 0;
|
||||
ctx += l[0] != 0 ? 1 : 0;
|
||||
eob = DecodeCoefs(
|
||||
ref xd,
|
||||
GetPlaneType(plane),
|
||||
pd.DqCoeff.ToSpan(),
|
||||
txSize,
|
||||
ref dequant,
|
||||
ctx,
|
||||
sc.Scan,
|
||||
sc.Neighbors,
|
||||
ref r);
|
||||
a[0] = l[0] = (sbyte)(eob > 0 ? 1 : 0);
|
||||
break;
|
||||
case TxSize.Tx8x8:
|
||||
GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx8x8);
|
||||
ctx = MemoryMarshal.Cast<sbyte, ushort>(a)[0] != 0 ? 1 : 0;
|
||||
ctx += MemoryMarshal.Cast<sbyte, ushort>(l)[0] != 0 ? 1 : 0;
|
||||
eob = DecodeCoefs(
|
||||
ref xd,
|
||||
GetPlaneType(plane),
|
||||
pd.DqCoeff.ToSpan(),
|
||||
txSize,
|
||||
ref dequant,
|
||||
ctx,
|
||||
sc.Scan,
|
||||
sc.Neighbors,
|
||||
ref r);
|
||||
MemoryMarshal.Cast<sbyte, ushort>(a)[0] = (ushort)((eob > 0 ? 0x0101 : 0) >> ctxShiftA);
|
||||
MemoryMarshal.Cast<sbyte, ushort>(l)[0] = (ushort)((eob > 0 ? 0x0101 : 0) >> ctxShiftL);
|
||||
break;
|
||||
case TxSize.Tx16x16:
|
||||
GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx16x16);
|
||||
ctx = MemoryMarshal.Cast<sbyte, uint>(a)[0] != 0 ? 1 : 0;
|
||||
ctx += MemoryMarshal.Cast<sbyte, uint>(l)[0] != 0 ? 1 : 0;
|
||||
eob = DecodeCoefs(
|
||||
ref xd,
|
||||
GetPlaneType(plane),
|
||||
pd.DqCoeff.ToSpan(),
|
||||
txSize,
|
||||
ref dequant,
|
||||
ctx,
|
||||
sc.Scan,
|
||||
sc.Neighbors,
|
||||
ref r);
|
||||
MemoryMarshal.Cast<sbyte, uint>(a)[0] = (uint)((eob > 0 ? 0x01010101 : 0) >> ctxShiftA);
|
||||
MemoryMarshal.Cast<sbyte, uint>(l)[0] = (uint)((eob > 0 ? 0x01010101 : 0) >> ctxShiftL);
|
||||
break;
|
||||
case TxSize.Tx32x32:
|
||||
GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx32x32);
|
||||
// NOTE: Casting to ulong here is safe because the default memory
|
||||
// alignment is at least 8 bytes and the Tx32x32 is aligned on 8 byte
|
||||
// boundaries.
|
||||
ctx = MemoryMarshal.Cast<sbyte, ulong>(a)[0] != 0 ? 1 : 0;
|
||||
ctx += MemoryMarshal.Cast<sbyte, ulong>(l)[0] != 0 ? 1 : 0;
|
||||
eob = DecodeCoefs(
|
||||
ref xd,
|
||||
GetPlaneType(plane),
|
||||
pd.DqCoeff.ToSpan(),
|
||||
txSize,
|
||||
ref dequant,
|
||||
ctx,
|
||||
sc.Scan,
|
||||
sc.Neighbors,
|
||||
ref r);
|
||||
MemoryMarshal.Cast<sbyte, ulong>(a)[0] = (eob > 0 ? 0x0101010101010101UL : 0) >> ctxShiftA;
|
||||
MemoryMarshal.Cast<sbyte, ulong>(l)[0] = (eob > 0 ? 0x0101010101010101UL : 0) >> ctxShiftL;
|
||||
break;
|
||||
default:
|
||||
Debug.Assert(false, "Invalid transform size.");
|
||||
eob = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
return eob;
|
||||
}
|
||||
}
|
||||
}
|
949
Ryujinx.Graphics.Nvdec.Vp9/Dsp/Convolve.cs
Normal file
949
Ryujinx.Graphics.Nvdec.Vp9/Dsp/Convolve.cs
Normal file
@ -0,0 +1,949 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Common;
|
||||
using System.Diagnostics;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Runtime.Intrinsics;
|
||||
using System.Runtime.Intrinsics.X86;
|
||||
using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.Filter;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
{
|
||||
internal static class Convolve
|
||||
{
|
||||
private const bool UseIntrinsics = true;
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static Vector128<int> MultiplyAddAdjacent(
|
||||
Vector128<short> vsrc0,
|
||||
Vector128<short> vsrc1,
|
||||
Vector128<short> vsrc2,
|
||||
Vector128<short> vsrc3,
|
||||
Vector128<short> vfilter,
|
||||
Vector128<int> zero)
|
||||
{
|
||||
// < sumN, sumN, sumN, sumN >
|
||||
Vector128<int> sum0 = Sse2.MultiplyAddAdjacent(vsrc0, vfilter);
|
||||
Vector128<int> sum1 = Sse2.MultiplyAddAdjacent(vsrc1, vfilter);
|
||||
Vector128<int> sum2 = Sse2.MultiplyAddAdjacent(vsrc2, vfilter);
|
||||
Vector128<int> sum3 = Sse2.MultiplyAddAdjacent(vsrc3, vfilter);
|
||||
|
||||
// < 0, 0, sumN, sumN >
|
||||
sum0 = Ssse3.HorizontalAdd(sum0, zero);
|
||||
sum1 = Ssse3.HorizontalAdd(sum1, zero);
|
||||
sum2 = Ssse3.HorizontalAdd(sum2, zero);
|
||||
sum3 = Ssse3.HorizontalAdd(sum3, zero);
|
||||
|
||||
// < 0, 0, 0, sumN >
|
||||
sum0 = Ssse3.HorizontalAdd(sum0, zero);
|
||||
sum1 = Ssse3.HorizontalAdd(sum1, zero);
|
||||
sum2 = Ssse3.HorizontalAdd(sum2, zero);
|
||||
sum3 = Ssse3.HorizontalAdd(sum3, zero);
|
||||
|
||||
// < 0, 0, sum1, sum0 >
|
||||
Vector128<int> sum01 = Sse2.UnpackLow(sum0, sum1);
|
||||
|
||||
// < 0, 0, sum3, sum2 >
|
||||
Vector128<int> sum23 = Sse2.UnpackLow(sum2, sum3);
|
||||
|
||||
// < sum3, sum2, sum1, sum0 >
|
||||
return Sse.MoveLowToHigh(sum01.AsSingle(), sum23.AsSingle()).AsInt32();
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static Vector128<int> RoundShift(Vector128<int> value, Vector128<int> const64)
|
||||
{
|
||||
return Sse2.ShiftRightArithmetic(Sse2.Add(value, const64), FilterBits);
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static Vector128<byte> PackUnsignedSaturate(Vector128<int> value, Vector128<int> zero)
|
||||
{
|
||||
return Sse2.PackUnsignedSaturate(Sse41.PackUnsignedSaturate(value, zero).AsInt16(), zero.AsInt16());
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static unsafe void ConvolveHorizSse41(
|
||||
byte* src,
|
||||
int srcStride,
|
||||
byte* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] xFilters,
|
||||
int x0Q4,
|
||||
int w,
|
||||
int h)
|
||||
{
|
||||
Vector128<int> zero = Vector128<int>.Zero;
|
||||
Vector128<int> const64 = Vector128.Create(64);
|
||||
|
||||
ulong x, y;
|
||||
src -= SubpelTaps / 2 - 1;
|
||||
|
||||
fixed (Array8<short>* xFilter = xFilters)
|
||||
{
|
||||
Vector128<short> vfilter = Sse2.LoadVector128((short*)xFilter + (uint)(x0Q4 & SubpelMask) * 8);
|
||||
|
||||
for (y = 0; y < (uint)h; ++y)
|
||||
{
|
||||
ulong srcOffset = (uint)x0Q4 >> SubpelBits;
|
||||
for (x = 0; x < (uint)w; x += 4)
|
||||
{
|
||||
Vector128<short> vsrc0 = Sse41.ConvertToVector128Int16(&src[srcOffset + x]);
|
||||
Vector128<short> vsrc1 = Sse41.ConvertToVector128Int16(&src[srcOffset + x + 1]);
|
||||
Vector128<short> vsrc2 = Sse41.ConvertToVector128Int16(&src[srcOffset + x + 2]);
|
||||
Vector128<short> vsrc3 = Sse41.ConvertToVector128Int16(&src[srcOffset + x + 3]);
|
||||
|
||||
Vector128<int> sum0123 = MultiplyAddAdjacent(vsrc0, vsrc1, vsrc2, vsrc3, vfilter, zero);
|
||||
|
||||
Sse.StoreScalar((float*)&dst[x], PackUnsignedSaturate(RoundShift(sum0123, const64), zero).AsSingle());
|
||||
}
|
||||
src += srcStride;
|
||||
dst += dstStride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static unsafe void ConvolveHoriz(
|
||||
byte* src,
|
||||
int srcStride,
|
||||
byte* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] xFilters,
|
||||
int x0Q4,
|
||||
int xStepQ4,
|
||||
int w,
|
||||
int h)
|
||||
{
|
||||
if (Sse41.IsSupported && UseIntrinsics && xStepQ4 == 1 << SubpelBits)
|
||||
{
|
||||
ConvolveHorizSse41(src, srcStride, dst, dstStride, xFilters, x0Q4, w, h);
|
||||
return;
|
||||
}
|
||||
|
||||
int x, y;
|
||||
src -= SubpelTaps / 2 - 1;
|
||||
|
||||
for (y = 0; y < h; ++y)
|
||||
{
|
||||
int xQ4 = x0Q4;
|
||||
for (x = 0; x < w; ++x)
|
||||
{
|
||||
byte* srcX = &src[xQ4 >> SubpelBits];
|
||||
ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
|
||||
int k, sum = 0;
|
||||
for (k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
sum += srcX[k] * xFilter[k];
|
||||
}
|
||||
|
||||
dst[x] = BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits));
|
||||
xQ4 += xStepQ4;
|
||||
}
|
||||
src += srcStride;
|
||||
dst += dstStride;
|
||||
}
|
||||
}
|
||||
|
||||
private static unsafe void ConvolveAvgHoriz(
|
||||
byte* src,
|
||||
int srcStride,
|
||||
byte* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] xFilters,
|
||||
int x0Q4,
|
||||
int xStepQ4,
|
||||
int w,
|
||||
int h)
|
||||
{
|
||||
int x, y;
|
||||
src -= SubpelTaps / 2 - 1;
|
||||
|
||||
for (y = 0; y < h; ++y)
|
||||
{
|
||||
int xQ4 = x0Q4;
|
||||
for (x = 0; x < w; ++x)
|
||||
{
|
||||
byte* srcX = &src[xQ4 >> SubpelBits];
|
||||
ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
|
||||
int k, sum = 0;
|
||||
for (k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
sum += srcX[k] * xFilter[k];
|
||||
}
|
||||
|
||||
dst[x] = (byte)BitUtils.RoundPowerOfTwo(dst[x] + BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits)), 1);
|
||||
xQ4 += xStepQ4;
|
||||
}
|
||||
src += srcStride;
|
||||
dst += dstStride;
|
||||
}
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static unsafe void ConvolveVertAvx2(
|
||||
byte* src,
|
||||
int srcStride,
|
||||
byte* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] yFilters,
|
||||
int y0Q4,
|
||||
int w,
|
||||
int h)
|
||||
{
|
||||
Vector128<int> zero = Vector128<int>.Zero;
|
||||
Vector128<int> const64 = Vector128.Create(64);
|
||||
Vector256<int> indices = Vector256.Create(
|
||||
0,
|
||||
srcStride,
|
||||
srcStride * 2,
|
||||
srcStride * 3,
|
||||
srcStride * 4,
|
||||
srcStride * 5,
|
||||
srcStride * 6,
|
||||
srcStride * 7);
|
||||
|
||||
ulong x, y;
|
||||
src -= srcStride * (SubpelTaps / 2 - 1);
|
||||
|
||||
fixed (Array8<short>* yFilter = yFilters)
|
||||
{
|
||||
Vector128<short> vfilter = Sse2.LoadVector128((short*)yFilter + (uint)(y0Q4 & SubpelMask) * 8);
|
||||
|
||||
ulong srcBaseY = (uint)y0Q4 >> SubpelBits;
|
||||
for (y = 0; y < (uint)h; ++y)
|
||||
{
|
||||
ulong srcOffset = (srcBaseY + y) * (uint)srcStride;
|
||||
for (x = 0; x < (uint)w; x += 4)
|
||||
{
|
||||
Vector256<int> vsrc = Avx2.GatherVector256((uint*)&src[srcOffset + x], indices, 1).AsInt32();
|
||||
|
||||
Vector128<int> vsrcL = vsrc.GetLower();
|
||||
Vector128<int> vsrcH = vsrc.GetUpper();
|
||||
|
||||
Vector128<byte> vsrcUnpck11 = Sse2.UnpackLow(vsrcL.AsByte(), vsrcH.AsByte());
|
||||
Vector128<byte> vsrcUnpck12 = Sse2.UnpackHigh(vsrcL.AsByte(), vsrcH.AsByte());
|
||||
|
||||
Vector128<byte> vsrcUnpck21 = Sse2.UnpackLow(vsrcUnpck11, vsrcUnpck12);
|
||||
Vector128<byte> vsrcUnpck22 = Sse2.UnpackHigh(vsrcUnpck11, vsrcUnpck12);
|
||||
|
||||
Vector128<byte> vsrc01 = Sse2.UnpackLow(vsrcUnpck21, vsrcUnpck22);
|
||||
Vector128<byte> vsrc23 = Sse2.UnpackHigh(vsrcUnpck21, vsrcUnpck22);
|
||||
|
||||
Vector128<byte> vsrc11 = Sse.MoveHighToLow(vsrc01.AsSingle(), vsrc01.AsSingle()).AsByte();
|
||||
Vector128<byte> vsrc33 = Sse.MoveHighToLow(vsrc23.AsSingle(), vsrc23.AsSingle()).AsByte();
|
||||
|
||||
Vector128<short> vsrc0 = Sse41.ConvertToVector128Int16(vsrc01);
|
||||
Vector128<short> vsrc1 = Sse41.ConvertToVector128Int16(vsrc11);
|
||||
Vector128<short> vsrc2 = Sse41.ConvertToVector128Int16(vsrc23);
|
||||
Vector128<short> vsrc3 = Sse41.ConvertToVector128Int16(vsrc33);
|
||||
|
||||
Vector128<int> sum0123 = MultiplyAddAdjacent(vsrc0, vsrc1, vsrc2, vsrc3, vfilter, zero);
|
||||
|
||||
Sse.StoreScalar((float*)&dst[x], PackUnsignedSaturate(RoundShift(sum0123, const64), zero).AsSingle());
|
||||
}
|
||||
dst += dstStride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static unsafe void ConvolveVert(
|
||||
byte* src,
|
||||
int srcStride,
|
||||
byte* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] yFilters,
|
||||
int y0Q4,
|
||||
int yStepQ4,
|
||||
int w,
|
||||
int h)
|
||||
{
|
||||
if (Avx2.IsSupported && UseIntrinsics && yStepQ4 == 1 << SubpelBits)
|
||||
{
|
||||
ConvolveVertAvx2(src, srcStride, dst, dstStride, yFilters, y0Q4, w, h);
|
||||
return;
|
||||
}
|
||||
|
||||
int x, y;
|
||||
src -= srcStride * (SubpelTaps / 2 - 1);
|
||||
|
||||
for (x = 0; x < w; ++x)
|
||||
{
|
||||
int yQ4 = y0Q4;
|
||||
for (y = 0; y < h; ++y)
|
||||
{
|
||||
byte* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
|
||||
ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
|
||||
int k, sum = 0;
|
||||
for (k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
sum += srcY[k * srcStride] * yFilter[k];
|
||||
}
|
||||
|
||||
dst[y * dstStride] = BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits));
|
||||
yQ4 += yStepQ4;
|
||||
}
|
||||
++src;
|
||||
++dst;
|
||||
}
|
||||
}
|
||||
|
||||
private static unsafe void ConvolveAvgVert(
|
||||
byte* src,
|
||||
int srcStride,
|
||||
byte* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] yFilters,
|
||||
int y0Q4,
|
||||
int yStepQ4,
|
||||
int w,
|
||||
int h)
|
||||
{
|
||||
int x, y;
|
||||
src -= srcStride * (SubpelTaps / 2 - 1);
|
||||
|
||||
for (x = 0; x < w; ++x)
|
||||
{
|
||||
int yQ4 = y0Q4;
|
||||
for (y = 0; y < h; ++y)
|
||||
{
|
||||
byte* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
|
||||
ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
|
||||
int k, sum = 0;
|
||||
for (k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
sum += srcY[k * srcStride] * yFilter[k];
|
||||
}
|
||||
|
||||
dst[y * dstStride] = (byte)BitUtils.RoundPowerOfTwo(
|
||||
dst[y * dstStride] + BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits)), 1);
|
||||
yQ4 += yStepQ4;
|
||||
}
|
||||
++src;
|
||||
++dst;
|
||||
}
|
||||
}
|
||||
|
||||
public static unsafe void Convolve8Horiz(
|
||||
byte* src,
|
||||
int srcStride,
|
||||
byte* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] filter,
|
||||
int x0Q4,
|
||||
int xStepQ4,
|
||||
int y0Q4,
|
||||
int yStepQ4,
|
||||
int w,
|
||||
int h)
|
||||
{
|
||||
ConvolveHoriz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, w, h);
|
||||
}
|
||||
|
||||
public static unsafe void Convolve8AvgHoriz(
|
||||
byte* src,
|
||||
int srcStride,
|
||||
byte* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] filter,
|
||||
int x0Q4,
|
||||
int xStepQ4,
|
||||
int y0Q4,
|
||||
int yStepQ4,
|
||||
int w,
|
||||
int h)
|
||||
{
|
||||
ConvolveAvgHoriz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, w, h);
|
||||
}
|
||||
|
||||
public static unsafe void Convolve8Vert(
|
||||
byte* src,
|
||||
int srcStride,
|
||||
byte* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] filter,
|
||||
int x0Q4,
|
||||
int xStepQ4,
|
||||
int y0Q4,
|
||||
int yStepQ4,
|
||||
int w,
|
||||
int h)
|
||||
{
|
||||
ConvolveVert(src, srcStride, dst, dstStride, filter, y0Q4, yStepQ4, w, h);
|
||||
}
|
||||
|
||||
public static unsafe void Convolve8AvgVert(
|
||||
byte* src,
|
||||
int srcStride,
|
||||
byte* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] filter,
|
||||
int x0Q4,
|
||||
int xStepQ4,
|
||||
int y0Q4,
|
||||
int yStepQ4,
|
||||
int w,
|
||||
int h)
|
||||
{
|
||||
ConvolveAvgVert(src, srcStride, dst, dstStride, filter, y0Q4, yStepQ4, w, h);
|
||||
}
|
||||
|
||||
[StructLayout(LayoutKind.Sequential, Size = 64 * 135)]
|
||||
struct Temp
|
||||
{
|
||||
}
|
||||
|
||||
public static unsafe void Convolve8(
|
||||
byte* src,
|
||||
int srcStride,
|
||||
byte* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] filter,
|
||||
int x0Q4,
|
||||
int xStepQ4,
|
||||
int y0Q4,
|
||||
int yStepQ4,
|
||||
int w,
|
||||
int h)
|
||||
{
|
||||
// Note: Fixed size intermediate buffer, temp, places limits on parameters.
|
||||
// 2d filtering proceeds in 2 steps:
|
||||
// (1) Interpolate horizontally into an intermediate buffer, temp.
|
||||
// (2) Interpolate temp vertically to derive the sub-pixel result.
|
||||
// Deriving the maximum number of rows in the temp buffer (135):
|
||||
// --Smallest scaling factor is x1/2 ==> yStepQ4 = 32 (Normative).
|
||||
// --Largest block size is 64x64 pixels.
|
||||
// --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
|
||||
// original frame (in 1/16th pixel units).
|
||||
// --Must round-up because block may be located at sub-pixel position.
|
||||
// --Require an additional SubpelTaps rows for the 8-tap filter tails.
|
||||
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
|
||||
// When calling in frame scaling function, the smallest scaling factor is x1/4
|
||||
// ==> yStepQ4 = 64. Since w and h are at most 16, the temp buffer is still
|
||||
// big enough.
|
||||
Temp tempStruct;
|
||||
byte* temp = (byte*)Unsafe.AsPointer(ref tempStruct); // Avoid zero initialization.
|
||||
int intermediateHeight = (((h - 1) * yStepQ4 + y0Q4) >> SubpelBits) + SubpelTaps;
|
||||
|
||||
Debug.Assert(w <= 64);
|
||||
Debug.Assert(h <= 64);
|
||||
Debug.Assert(yStepQ4 <= 32 || (yStepQ4 <= 64 && h <= 32));
|
||||
Debug.Assert(xStepQ4 <= 64);
|
||||
|
||||
ConvolveHoriz(src - srcStride * (SubpelTaps / 2 - 1), srcStride, temp, 64, filter, x0Q4, xStepQ4, w, intermediateHeight);
|
||||
ConvolveVert(temp + 64 * (SubpelTaps / 2 - 1), 64, dst, dstStride, filter, y0Q4, yStepQ4, w, h);
|
||||
}
|
||||
|
||||
public static unsafe void Convolve8Avg(
|
||||
byte* src,
|
||||
int srcStride,
|
||||
byte* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] filter,
|
||||
int x0Q4,
|
||||
int xStepQ4,
|
||||
int y0Q4,
|
||||
int yStepQ4,
|
||||
int w,
|
||||
int h)
|
||||
{
|
||||
// Fixed size intermediate buffer places limits on parameters.
|
||||
byte* temp = stackalloc byte[64 * 64];
|
||||
Debug.Assert(w <= 64);
|
||||
Debug.Assert(h <= 64);
|
||||
|
||||
Convolve8(src, srcStride, temp, 64, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h);
|
||||
ConvolveAvg(temp, 64, dst, dstStride, null, 0, 0, 0, 0, w, h);
|
||||
}
|
||||
|
||||
public static unsafe void ConvolveCopy(
|
||||
byte* src,
|
||||
int srcStride,
|
||||
byte* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] filter,
|
||||
int x0Q4,
|
||||
int xStepQ4,
|
||||
int y0Q4,
|
||||
int yStepQ4,
|
||||
int w,
|
||||
int h)
|
||||
{
|
||||
int r;
|
||||
|
||||
for (r = h; r > 0; --r)
|
||||
{
|
||||
MemoryUtil.Copy(dst, src, w);
|
||||
src += srcStride;
|
||||
dst += dstStride;
|
||||
}
|
||||
}
|
||||
|
||||
public static unsafe void ConvolveAvg(
|
||||
byte* src,
|
||||
int srcStride,
|
||||
byte* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] filter,
|
||||
int x0Q4,
|
||||
int xStepQ4,
|
||||
int y0Q4,
|
||||
int yStepQ4,
|
||||
int w,
|
||||
int h)
|
||||
{
|
||||
int x, y;
|
||||
|
||||
for (y = 0; y < h; ++y)
|
||||
{
|
||||
for (x = 0; x < w; ++x)
|
||||
{
|
||||
dst[x] = (byte)BitUtils.RoundPowerOfTwo(dst[x] + src[x], 1);
|
||||
}
|
||||
|
||||
src += srcStride;
|
||||
dst += dstStride;
|
||||
}
|
||||
}
|
||||
|
||||
public static unsafe void ScaledHoriz(
|
||||
byte* src,
|
||||
int srcStride,
|
||||
byte* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] filter,
|
||||
int x0Q4,
|
||||
int xStepQ4,
|
||||
int y0Q4,
|
||||
int yStepQ4,
|
||||
int w,
|
||||
int h)
|
||||
{
|
||||
Convolve8Horiz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h);
|
||||
}
|
||||
|
||||
public static unsafe void ScaledVert(
|
||||
byte* src,
|
||||
int srcStride,
|
||||
byte* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] filter,
|
||||
int x0Q4,
|
||||
int xStepQ4,
|
||||
int y0Q4,
|
||||
int yStepQ4,
|
||||
int w,
|
||||
int h)
|
||||
{
|
||||
Convolve8Vert(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h);
|
||||
}
|
||||
|
||||
public static unsafe void Scaled2D(
|
||||
byte* src,
|
||||
int srcStride,
|
||||
byte* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] filter,
|
||||
int x0Q4,
|
||||
int xStepQ4,
|
||||
int y0Q4,
|
||||
int yStepQ4,
|
||||
int w,
|
||||
int h)
|
||||
{
|
||||
Convolve8(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h);
|
||||
}
|
||||
|
||||
public static unsafe void ScaledAvgHoriz(
|
||||
byte* src,
|
||||
int srcStride,
|
||||
byte* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] filter,
|
||||
int x0Q4,
|
||||
int xStepQ4,
|
||||
int y0Q4,
|
||||
int yStepQ4,
|
||||
int w,
|
||||
int h)
|
||||
{
|
||||
Convolve8AvgHoriz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h);
|
||||
}
|
||||
|
||||
public static unsafe void ScaledAvgVert(
|
||||
byte* src,
|
||||
int srcStride,
|
||||
byte* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] filter,
|
||||
int x0Q4,
|
||||
int xStepQ4,
|
||||
int y0Q4,
|
||||
int yStepQ4,
|
||||
int w,
|
||||
int h)
|
||||
{
|
||||
Convolve8AvgVert(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h);
|
||||
}
|
||||
|
||||
public static unsafe void ScaledAvg2D(
|
||||
byte* src,
|
||||
int srcStride,
|
||||
byte* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] filter,
|
||||
int x0Q4,
|
||||
int xStepQ4,
|
||||
int y0Q4,
|
||||
int yStepQ4,
|
||||
int w,
|
||||
int h)
|
||||
{
|
||||
Convolve8Avg(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h);
|
||||
}
|
||||
|
||||
private static unsafe void HighbdConvolveHoriz(
|
||||
ushort* src,
|
||||
int srcStride,
|
||||
ushort* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] xFilters,
|
||||
int x0Q4,
|
||||
int xStepQ4,
|
||||
int w,
|
||||
int h,
|
||||
int bd)
|
||||
{
|
||||
int x, y;
|
||||
src -= SubpelTaps / 2 - 1;
|
||||
|
||||
for (y = 0; y < h; ++y)
|
||||
{
|
||||
int xQ4 = x0Q4;
|
||||
for (x = 0; x < w; ++x)
|
||||
{
|
||||
ushort* srcX = &src[xQ4 >> SubpelBits];
|
||||
ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
|
||||
int k, sum = 0;
|
||||
for (k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
sum += srcX[k] * xFilter[k];
|
||||
}
|
||||
|
||||
dst[x] = BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd);
|
||||
xQ4 += xStepQ4;
|
||||
}
|
||||
src += srcStride;
|
||||
dst += dstStride;
|
||||
}
|
||||
}
|
||||
|
||||
private static unsafe void HighbdConvolveAvgHoriz(
|
||||
ushort* src,
|
||||
int srcStride,
|
||||
ushort* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] xFilters,
|
||||
int x0Q4,
|
||||
int xStepQ4,
|
||||
int w,
|
||||
int h,
|
||||
int bd)
|
||||
{
|
||||
int x, y;
|
||||
src -= SubpelTaps / 2 - 1;
|
||||
|
||||
for (y = 0; y < h; ++y)
|
||||
{
|
||||
int xQ4 = x0Q4;
|
||||
for (x = 0; x < w; ++x)
|
||||
{
|
||||
ushort* srcX = &src[xQ4 >> SubpelBits];
|
||||
ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
|
||||
int k, sum = 0;
|
||||
for (k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
sum += srcX[k] * xFilter[k];
|
||||
}
|
||||
|
||||
dst[x] = (ushort)BitUtils.RoundPowerOfTwo(dst[x] + BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd), 1);
|
||||
xQ4 += xStepQ4;
|
||||
}
|
||||
src += srcStride;
|
||||
dst += dstStride;
|
||||
}
|
||||
}
|
||||
|
||||
private static unsafe void HighbdConvolveVert(
|
||||
ushort* src,
|
||||
int srcStride,
|
||||
ushort* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] yFilters,
|
||||
int y0Q4,
|
||||
int yStepQ4,
|
||||
int w,
|
||||
int h,
|
||||
int bd)
|
||||
{
|
||||
int x, y;
|
||||
src -= srcStride * (SubpelTaps / 2 - 1);
|
||||
|
||||
for (x = 0; x < w; ++x)
|
||||
{
|
||||
int yQ4 = y0Q4;
|
||||
for (y = 0; y < h; ++y)
|
||||
{
|
||||
ushort* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
|
||||
ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
|
||||
int k, sum = 0;
|
||||
for (k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
sum += srcY[k * srcStride] * yFilter[k];
|
||||
}
|
||||
|
||||
dst[y * dstStride] = BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd);
|
||||
yQ4 += yStepQ4;
|
||||
}
|
||||
++src;
|
||||
++dst;
|
||||
}
|
||||
}
|
||||
|
||||
private static unsafe void HighConvolveAvgVert(
|
||||
ushort* src,
|
||||
int srcStride,
|
||||
ushort* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] yFilters,
|
||||
int y0Q4,
|
||||
int yStepQ4,
|
||||
int w,
|
||||
int h,
|
||||
int bd)
|
||||
{
|
||||
int x, y;
|
||||
src -= srcStride * (SubpelTaps / 2 - 1);
|
||||
|
||||
for (x = 0; x < w; ++x)
|
||||
{
|
||||
int yQ4 = y0Q4;
|
||||
for (y = 0; y < h; ++y)
|
||||
{
|
||||
ushort* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
|
||||
ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
|
||||
int k, sum = 0;
|
||||
for (k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
sum += srcY[k * srcStride] * yFilter[k];
|
||||
}
|
||||
|
||||
dst[y * dstStride] = (ushort)BitUtils.RoundPowerOfTwo(
|
||||
dst[y * dstStride] + BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd), 1);
|
||||
yQ4 += yStepQ4;
|
||||
}
|
||||
++src;
|
||||
++dst;
|
||||
}
|
||||
}
|
||||
|
||||
private static unsafe void HighbdConvolve(
|
||||
ushort* src,
|
||||
int srcStride,
|
||||
ushort* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] filter,
|
||||
int x0Q4,
|
||||
int xStepQ4,
|
||||
int y0Q4,
|
||||
int yStepQ4,
|
||||
int w,
|
||||
int h,
|
||||
int bd)
|
||||
{
|
||||
// Note: Fixed size intermediate buffer, temp, places limits on parameters.
|
||||
// 2d filtering proceeds in 2 steps:
|
||||
// (1) Interpolate horizontally into an intermediate buffer, temp.
|
||||
// (2) Interpolate temp vertically to derive the sub-pixel result.
|
||||
// Deriving the maximum number of rows in the temp buffer (135):
|
||||
// --Smallest scaling factor is x1/2 ==> yStepQ4 = 32 (Normative).
|
||||
// --Largest block size is 64x64 pixels.
|
||||
// --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
|
||||
// original frame (in 1/16th pixel units).
|
||||
// --Must round-up because block may be located at sub-pixel position.
|
||||
// --Require an additional SubpelTaps rows for the 8-tap filter tails.
|
||||
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
|
||||
ushort* temp = stackalloc ushort[64 * 135];
|
||||
int intermediateHeight = (((h - 1) * yStepQ4 + y0Q4) >> SubpelBits) + SubpelTaps;
|
||||
|
||||
Debug.Assert(w <= 64);
|
||||
Debug.Assert(h <= 64);
|
||||
Debug.Assert(yStepQ4 <= 32);
|
||||
Debug.Assert(xStepQ4 <= 32);
|
||||
|
||||
HighbdConvolveHoriz(src - srcStride * (SubpelTaps / 2 - 1), srcStride, temp, 64, filter, x0Q4, xStepQ4, w, intermediateHeight, bd);
|
||||
HighbdConvolveVert(temp + 64 * (SubpelTaps / 2 - 1), 64, dst, dstStride, filter, y0Q4, yStepQ4, w, h, bd);
|
||||
}
|
||||
|
||||
public static unsafe void HighbdConvolve8Horiz(
|
||||
ushort* src,
|
||||
int srcStride,
|
||||
ushort* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] filter,
|
||||
int x0Q4,
|
||||
int xStepQ4,
|
||||
int y0Q4,
|
||||
int yStepQ4,
|
||||
int w,
|
||||
int h,
|
||||
int bd)
|
||||
{
|
||||
HighbdConvolveHoriz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, w, h, bd);
|
||||
}
|
||||
|
||||
public static unsafe void HighbdConvolve8AvgHoriz(
|
||||
ushort* src,
|
||||
int srcStride,
|
||||
ushort* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] filter,
|
||||
int x0Q4,
|
||||
int xStepQ4,
|
||||
int y0Q4,
|
||||
int yStepQ4,
|
||||
int w,
|
||||
int h,
|
||||
int bd)
|
||||
{
|
||||
HighbdConvolveAvgHoriz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, w, h, bd);
|
||||
}
|
||||
|
||||
public static unsafe void HighbdConvolve8Vert(
|
||||
ushort* src,
|
||||
int srcStride,
|
||||
ushort* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] filter,
|
||||
int x0Q4,
|
||||
int xStepQ4,
|
||||
int y0Q4,
|
||||
int yStepQ4,
|
||||
int w,
|
||||
int h,
|
||||
int bd)
|
||||
{
|
||||
HighbdConvolveVert(src, srcStride, dst, dstStride, filter, y0Q4, yStepQ4, w, h, bd);
|
||||
}
|
||||
|
||||
public static unsafe void HighbdConvolve8AvgVert(
|
||||
ushort* src,
|
||||
int srcStride,
|
||||
ushort* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] filter,
|
||||
int x0Q4,
|
||||
int xStepQ4,
|
||||
int y0Q4,
|
||||
int yStepQ4,
|
||||
int w,
|
||||
int h,
|
||||
int bd)
|
||||
{
|
||||
HighConvolveAvgVert(src, srcStride, dst, dstStride, filter, y0Q4, yStepQ4, w, h, bd);
|
||||
}
|
||||
|
||||
public static unsafe void HighbdConvolve8(
|
||||
ushort* src,
|
||||
int srcStride,
|
||||
ushort* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] filter,
|
||||
int x0Q4,
|
||||
int xStepQ4,
|
||||
int y0Q4,
|
||||
int yStepQ4,
|
||||
int w,
|
||||
int h,
|
||||
int bd)
|
||||
{
|
||||
HighbdConvolve(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h, bd);
|
||||
}
|
||||
|
||||
public static unsafe void HighbdConvolve8Avg(
|
||||
ushort* src,
|
||||
int srcStride,
|
||||
ushort* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] filter,
|
||||
int x0Q4,
|
||||
int xStepQ4,
|
||||
int y0Q4,
|
||||
int yStepQ4,
|
||||
int w,
|
||||
int h,
|
||||
int bd)
|
||||
{
|
||||
// Fixed size intermediate buffer places limits on parameters.
|
||||
ushort* temp = stackalloc ushort[64 * 64];
|
||||
Debug.Assert(w <= 64);
|
||||
Debug.Assert(h <= 64);
|
||||
|
||||
HighbdConvolve8(src, srcStride, temp, 64, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h, bd);
|
||||
HighbdConvolveAvg(temp, 64, dst, dstStride, null, 0, 0, 0, 0, w, h, bd);
|
||||
}
|
||||
|
||||
public static unsafe void HighbdConvolveCopy(
|
||||
ushort* src,
|
||||
int srcStride,
|
||||
ushort* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] filter,
|
||||
int x0Q4,
|
||||
int xStepQ4,
|
||||
int y0Q4,
|
||||
int yStepQ4,
|
||||
int w,
|
||||
int h,
|
||||
int bd)
|
||||
{
|
||||
int r;
|
||||
|
||||
for (r = h; r > 0; --r)
|
||||
{
|
||||
MemoryUtil.Copy(dst, src, w);
|
||||
src += srcStride;
|
||||
dst += dstStride;
|
||||
}
|
||||
}
|
||||
|
||||
public static unsafe void HighbdConvolveAvg(
|
||||
ushort* src,
|
||||
int srcStride,
|
||||
ushort* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] filter,
|
||||
int x0Q4,
|
||||
int xStepQ4,
|
||||
int y0Q4,
|
||||
int yStepQ4,
|
||||
int w,
|
||||
int h,
|
||||
int bd)
|
||||
{
|
||||
int x, y;
|
||||
|
||||
for (y = 0; y < h; ++y)
|
||||
{
|
||||
for (x = 0; x < w; ++x)
|
||||
{
|
||||
dst[x] = (ushort)BitUtils.RoundPowerOfTwo(dst[x] + src[x], 1);
|
||||
}
|
||||
|
||||
src += srcStride;
|
||||
dst += dstStride;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
12
Ryujinx.Graphics.Nvdec.Vp9/Dsp/Filter.cs
Normal file
12
Ryujinx.Graphics.Nvdec.Vp9/Dsp/Filter.cs
Normal file
@ -0,0 +1,12 @@
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
{
|
||||
internal static class Filter
|
||||
{
|
||||
public const int FilterBits = 7;
|
||||
|
||||
public const int SubpelBits = 4;
|
||||
public const int SubpelMask = (1 << SubpelBits) - 1;
|
||||
public const int SubpelShifts = 1 << SubpelBits;
|
||||
public const int SubpelTaps = 8;
|
||||
}
|
||||
}
|
1379
Ryujinx.Graphics.Nvdec.Vp9/Dsp/IntraPred.cs
Normal file
1379
Ryujinx.Graphics.Nvdec.Vp9/Dsp/IntraPred.cs
Normal file
File diff suppressed because it is too large
Load Diff
2868
Ryujinx.Graphics.Nvdec.Vp9/Dsp/InvTxfm.cs
Normal file
2868
Ryujinx.Graphics.Nvdec.Vp9/Dsp/InvTxfm.cs
Normal file
File diff suppressed because it is too large
Load Diff
73
Ryujinx.Graphics.Nvdec.Vp9/Dsp/Prob.cs
Normal file
73
Ryujinx.Graphics.Nvdec.Vp9/Dsp/Prob.cs
Normal file
@ -0,0 +1,73 @@
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Common;
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
{
|
||||
internal static class Prob
|
||||
{
|
||||
public const int MaxProb = 255;
|
||||
|
||||
private static byte GetProb(uint num, uint den)
|
||||
{
|
||||
Debug.Assert(den != 0);
|
||||
{
|
||||
int p = (int)(((ulong)num * 256 + (den >> 1)) / den);
|
||||
// (p > 255) ? 255 : (p < 1) ? 1 : p;
|
||||
int clippedProb = p | ((255 - p) >> 23) | (p == 0 ? 1 : 0);
|
||||
return (byte)clippedProb;
|
||||
}
|
||||
}
|
||||
|
||||
/* This function assumes prob1 and prob2 are already within [1,255] range. */
|
||||
public static byte WeightedProb(int prob1, int prob2, int factor)
|
||||
{
|
||||
return (byte)BitUtils.RoundPowerOfTwo(prob1 * (256 - factor) + prob2 * factor, 8);
|
||||
}
|
||||
|
||||
// MODE_MV_MAX_UPDATE_FACTOR (128) * count / MODE_MV_COUNT_SAT;
|
||||
private static readonly uint[] CountToUpdateFactor = new uint[]
|
||||
{
|
||||
0, 6, 12, 19, 25, 32, 38, 44, 51, 57, 64,
|
||||
70, 76, 83, 89, 96, 102, 108, 115, 121, 128
|
||||
};
|
||||
|
||||
private const int ModeMvCountSat = 20;
|
||||
|
||||
public static byte ModeMvMergeProbs(byte preProb, uint ct0, uint ct1)
|
||||
{
|
||||
uint den = ct0 + ct1;
|
||||
if (den == 0)
|
||||
{
|
||||
return preProb;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint count = Math.Min(den, ModeMvCountSat);
|
||||
uint factor = CountToUpdateFactor[(int)count];
|
||||
byte prob = GetProb(ct0, den);
|
||||
return WeightedProb(preProb, prob, (int)factor);
|
||||
}
|
||||
}
|
||||
|
||||
private static uint TreeMergeProbsImpl(
|
||||
uint i,
|
||||
sbyte[] tree,
|
||||
ReadOnlySpan<byte> preProbs,
|
||||
ReadOnlySpan<uint> counts,
|
||||
Span<byte> probs)
|
||||
{
|
||||
int l = tree[i];
|
||||
uint leftCount = (l <= 0) ? counts[-l] : TreeMergeProbsImpl((uint)l, tree, preProbs, counts, probs);
|
||||
int r = tree[i + 1];
|
||||
uint rightCount = (r <= 0) ? counts[-r] : TreeMergeProbsImpl((uint)r, tree, preProbs, counts, probs);
|
||||
probs[(int)(i >> 1)] = ModeMvMergeProbs(preProbs[(int)(i >> 1)], leftCount, rightCount);
|
||||
return leftCount + rightCount;
|
||||
}
|
||||
|
||||
public static void TreeMergeProbs(sbyte[] tree, ReadOnlySpan<byte> preProbs, ReadOnlySpan<uint> counts, Span<byte> probs)
|
||||
{
|
||||
TreeMergeProbsImpl(0, tree, preProbs, counts, probs);
|
||||
}
|
||||
}
|
||||
}
|
237
Ryujinx.Graphics.Nvdec.Vp9/Dsp/Reader.cs
Normal file
237
Ryujinx.Graphics.Nvdec.Vp9/Dsp/Reader.cs
Normal file
@ -0,0 +1,237 @@
|
||||
using System;
|
||||
using System.Buffers.Binary;
|
||||
using Ryujinx.Common.Memory;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
{
|
||||
internal struct Reader
|
||||
{
|
||||
private static readonly byte[] Norm = new byte[]
|
||||
{
|
||||
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
private const int BdValueSize = sizeof(ulong) * 8;
|
||||
|
||||
// This is meant to be a large, positive constant that can still be efficiently
|
||||
// loaded as an immediate (on platforms like ARM, for example).
|
||||
// Even relatively modest values like 100 would work fine.
|
||||
private const int LotsOfBits = 0x40000000;
|
||||
|
||||
public ulong Value;
|
||||
public uint Range;
|
||||
public int Count;
|
||||
private ArrayPtr<byte> _buffer;
|
||||
|
||||
public bool Init(ArrayPtr<byte> buffer, int size)
|
||||
{
|
||||
if (size != 0 && buffer.IsNull)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
_buffer = new ArrayPtr<byte>(ref buffer[0], size);
|
||||
Value = 0;
|
||||
Count = -8;
|
||||
Range = 255;
|
||||
Fill();
|
||||
return ReadBit() != 0; // Marker bit
|
||||
}
|
||||
}
|
||||
|
||||
private void Fill()
|
||||
{
|
||||
ReadOnlySpan<byte> buffer = _buffer.ToSpan();
|
||||
ReadOnlySpan<byte> bufferStart = buffer;
|
||||
ulong value = Value;
|
||||
int count = Count;
|
||||
ulong bytesLeft = (ulong)buffer.Length;
|
||||
ulong bitsLeft = bytesLeft * 8;
|
||||
int shift = BdValueSize - 8 - (count + 8);
|
||||
|
||||
if (bitsLeft > BdValueSize)
|
||||
{
|
||||
int bits = (shift & unchecked((int)0xfffffff8)) + 8;
|
||||
ulong nv;
|
||||
ulong bigEndianValues = BinaryPrimitives.ReadUInt64BigEndian(buffer);
|
||||
nv = bigEndianValues >> (BdValueSize - bits);
|
||||
count += bits;
|
||||
buffer = buffer.Slice(bits >> 3);
|
||||
value = Value | (nv << (shift & 0x7));
|
||||
}
|
||||
else
|
||||
{
|
||||
int bitsOver = shift + 8 - (int)bitsLeft;
|
||||
int loopEnd = 0;
|
||||
if (bitsOver >= 0)
|
||||
{
|
||||
count += LotsOfBits;
|
||||
loopEnd = bitsOver;
|
||||
}
|
||||
|
||||
if (bitsOver < 0 || bitsLeft != 0)
|
||||
{
|
||||
while (shift >= loopEnd)
|
||||
{
|
||||
count += 8;
|
||||
value |= (ulong)buffer[0] << shift;
|
||||
buffer = buffer.Slice(1);
|
||||
shift -= 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: Variable 'buffer' may not relate to '_buffer' after decryption,
|
||||
// so we increase '_buffer' by the amount that 'buffer' moved, rather than
|
||||
// assign 'buffer' to '_buffer'.
|
||||
_buffer = _buffer.Slice(bufferStart.Length - buffer.Length);
|
||||
Value = value;
|
||||
Count = count;
|
||||
}
|
||||
|
||||
public bool HasError()
|
||||
{
|
||||
// Check if we have reached the end of the buffer.
|
||||
//
|
||||
// Variable 'count' stores the number of bits in the 'value' buffer, minus
|
||||
// 8. The top byte is part of the algorithm, and the remainder is buffered
|
||||
// to be shifted into it. So if count == 8, the top 16 bits of 'value' are
|
||||
// occupied, 8 for the algorithm and 8 in the buffer.
|
||||
//
|
||||
// When reading a byte from the user's buffer, count is filled with 8 and
|
||||
// one byte is filled into the value buffer. When we reach the end of the
|
||||
// data, count is additionally filled with LotsOfBits. So when
|
||||
// count == LotsOfBits - 1, the user's data has been exhausted.
|
||||
//
|
||||
// 1 if we have tried to decode bits after the end of stream was encountered.
|
||||
// 0 No error.
|
||||
return Count > BdValueSize && Count < LotsOfBits;
|
||||
}
|
||||
|
||||
public int Read(int prob)
|
||||
{
|
||||
uint bit = 0;
|
||||
ulong value;
|
||||
ulong bigsplit;
|
||||
int count;
|
||||
uint range;
|
||||
uint split = (Range * (uint)prob + (256 - (uint)prob)) >> 8;
|
||||
|
||||
if (Count < 0)
|
||||
{
|
||||
Fill();
|
||||
}
|
||||
|
||||
value = Value;
|
||||
count = Count;
|
||||
|
||||
bigsplit = (ulong)split << (BdValueSize - 8);
|
||||
|
||||
range = split;
|
||||
|
||||
if (value >= bigsplit)
|
||||
{
|
||||
range = Range - split;
|
||||
value -= bigsplit;
|
||||
bit = 1;
|
||||
}
|
||||
|
||||
{
|
||||
int shift = Norm[range];
|
||||
range <<= shift;
|
||||
value <<= shift;
|
||||
count -= shift;
|
||||
}
|
||||
Value = value;
|
||||
Count = count;
|
||||
Range = range;
|
||||
|
||||
return (int)bit;
|
||||
}
|
||||
|
||||
public int ReadBit()
|
||||
{
|
||||
return Read(128); // vpx_prob_half
|
||||
}
|
||||
|
||||
public int ReadLiteral(int bits)
|
||||
{
|
||||
int literal = 0, bit;
|
||||
|
||||
for (bit = bits - 1; bit >= 0; bit--)
|
||||
{
|
||||
literal |= ReadBit() << bit;
|
||||
}
|
||||
|
||||
return literal;
|
||||
}
|
||||
|
||||
public int ReadTree(ReadOnlySpan<sbyte> tree, ReadOnlySpan<byte> probs)
|
||||
{
|
||||
sbyte i = 0;
|
||||
|
||||
while ((i = tree[i + Read(probs[i >> 1])]) > 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
return -i;
|
||||
}
|
||||
|
||||
public int ReadBool(int prob, ref ulong value, ref int count, ref uint range)
|
||||
{
|
||||
uint split = (range * (uint)prob + (256 - (uint)prob)) >> 8;
|
||||
ulong bigsplit = (ulong)split << (BdValueSize - 8);
|
||||
|
||||
if (count < 0)
|
||||
{
|
||||
Value = value;
|
||||
Count = count;
|
||||
Fill();
|
||||
value = Value;
|
||||
count = Count;
|
||||
}
|
||||
|
||||
if (value >= bigsplit)
|
||||
{
|
||||
range = range - split;
|
||||
value = value - bigsplit;
|
||||
{
|
||||
int shift = Norm[range];
|
||||
range <<= shift;
|
||||
value <<= shift;
|
||||
count -= shift;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
range = split;
|
||||
{
|
||||
int shift = Norm[range];
|
||||
range <<= shift;
|
||||
value <<= shift;
|
||||
count -= shift;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
public ArrayPtr<byte> FindEnd()
|
||||
{
|
||||
// Find the end of the coded buffer
|
||||
while (Count > 8 && Count < BdValueSize)
|
||||
{
|
||||
Count -= 8;
|
||||
_buffer = _buffer.Slice(-1);
|
||||
}
|
||||
return _buffer;
|
||||
}
|
||||
}
|
||||
}
|
54
Ryujinx.Graphics.Nvdec.Vp9/Dsp/TxfmCommon.cs
Normal file
54
Ryujinx.Graphics.Nvdec.Vp9/Dsp/TxfmCommon.cs
Normal file
@ -0,0 +1,54 @@
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
{
|
||||
internal static class TxfmCommon
|
||||
{
|
||||
// Constants used by all idct/dct functions
|
||||
public const int DctConstBits = 14;
|
||||
public const int DctConstRounding = 1 << (DctConstBits - 1);
|
||||
|
||||
public const int UnitQuantShift = 2;
|
||||
public const int UnitQuantFactor = 1 << UnitQuantShift;
|
||||
|
||||
// Constants:
|
||||
// for (int i = 1; i < 32; ++i)
|
||||
// Console.WriteLine("public const short CosPi{0}_64 = {1};", i, MathF.Round(16384 * MathF.Cos(i * MathF.PI / 64)));
|
||||
// Note: sin(k * Pi / 64) = cos((32 - k) * Pi / 64)
|
||||
public const short CosPi1_64 = 16364;
|
||||
public const short CosPi2_64 = 16305;
|
||||
public const short CosPi3_64 = 16207;
|
||||
public const short CosPi4_64 = 16069;
|
||||
public const short CosPi5_64 = 15893;
|
||||
public const short CosPi6_64 = 15679;
|
||||
public const short CosPi7_64 = 15426;
|
||||
public const short CosPi8_64 = 15137;
|
||||
public const short CosPi9_64 = 14811;
|
||||
public const short CosPi10_64 = 14449;
|
||||
public const short CosPi11_64 = 14053;
|
||||
public const short CosPi12_64 = 13623;
|
||||
public const short CosPi13_64 = 13160;
|
||||
public const short CosPi14_64 = 12665;
|
||||
public const short CosPi15_64 = 12140;
|
||||
public const short CosPi16_64 = 11585;
|
||||
public const short CosPi17_64 = 11003;
|
||||
public const short CosPi18_64 = 10394;
|
||||
public const short CosPi19_64 = 9760;
|
||||
public const short CosPi20_64 = 9102;
|
||||
public const short CosPi21_64 = 8423;
|
||||
public const short CosPi22_64 = 7723;
|
||||
public const short CosPi23_64 = 7005;
|
||||
public const short CosPi24_64 = 6270;
|
||||
public const short CosPi25_64 = 5520;
|
||||
public const short CosPi26_64 = 4756;
|
||||
public const short CosPi27_64 = 3981;
|
||||
public const short CosPi28_64 = 3196;
|
||||
public const short CosPi29_64 = 2404;
|
||||
public const short CosPi30_64 = 1606;
|
||||
public const short CosPi31_64 = 804;
|
||||
|
||||
// 16384 * sqrt(2) * sin(kPi / 9) * 2 / 3
|
||||
public const short SinPi1_9 = 5283;
|
||||
public const short SinPi2_9 = 9929;
|
||||
public const short SinPi3_9 = 13377;
|
||||
public const short SinPi4_9 = 15212;
|
||||
}
|
||||
}
|
536
Ryujinx.Graphics.Nvdec.Vp9/Idct.cs
Normal file
536
Ryujinx.Graphics.Nvdec.Vp9/Idct.cs
Normal file
@ -0,0 +1,536 @@
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Common;
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Types;
|
||||
using System;
|
||||
using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.InvTxfm;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
internal static class Idct
|
||||
{
|
||||
private delegate void Transform1D(ReadOnlySpan<int> input, Span<int> output);
|
||||
private delegate void HighbdTransform1D(ReadOnlySpan<int> input, Span<int> output, int bd);
|
||||
|
||||
private struct Transform2D
|
||||
{
|
||||
public Transform1D Cols, Rows; // Vertical and horizontal
|
||||
|
||||
public Transform2D(Transform1D cols, Transform1D rows)
|
||||
{
|
||||
Cols = cols;
|
||||
Rows = rows;
|
||||
}
|
||||
}
|
||||
|
||||
private struct HighbdTransform2D
|
||||
{
|
||||
public HighbdTransform1D Cols, Rows; // Vertical and horizontal
|
||||
|
||||
public HighbdTransform2D(HighbdTransform1D cols, HighbdTransform1D rows)
|
||||
{
|
||||
Cols = cols;
|
||||
Rows = rows;
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly Transform2D[] Iht4 = new Transform2D[]
|
||||
{
|
||||
new Transform2D(Idct4, Idct4), // DCT_DCT = 0
|
||||
new Transform2D(Iadst4, Idct4), // ADST_DCT = 1
|
||||
new Transform2D(Idct4, Iadst4), // DCT_ADST = 2
|
||||
new Transform2D(Iadst4, Iadst4) // ADST_ADST = 3
|
||||
};
|
||||
|
||||
public static void Iht4x416Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int txType)
|
||||
{
|
||||
int i, j;
|
||||
Span<int> output = stackalloc int[4 * 4];
|
||||
Span<int> outptr = output;
|
||||
Span<int> tempIn = stackalloc int[4];
|
||||
Span<int> tempOut = stackalloc int[4];
|
||||
|
||||
// Inverse transform row vectors
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
Iht4[txType].Rows(input, outptr);
|
||||
input = input.Slice(4);
|
||||
outptr = outptr.Slice(4);
|
||||
}
|
||||
|
||||
// Inverse transform column vectors
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
for (j = 0; j < 4; ++j)
|
||||
{
|
||||
tempIn[j] = output[j * 4 + i];
|
||||
}
|
||||
|
||||
Iht4[txType].Cols(tempIn, tempOut);
|
||||
for (j = 0; j < 4; ++j)
|
||||
{
|
||||
dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 4));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly Transform2D[] Iht8 = new Transform2D[]
|
||||
{
|
||||
new Transform2D(Idct8, Idct8), // DCT_DCT = 0
|
||||
new Transform2D(Iadst8, Idct8), // ADST_DCT = 1
|
||||
new Transform2D(Idct8, Iadst8), // DCT_ADST = 2
|
||||
new Transform2D(Iadst8, Iadst8) // ADST_ADST = 3
|
||||
};
|
||||
|
||||
public static void Iht8x864Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int txType)
|
||||
{
|
||||
int i, j;
|
||||
Span<int> output = stackalloc int[8 * 8];
|
||||
Span<int> outptr = output;
|
||||
Span<int> tempIn = stackalloc int[8];
|
||||
Span<int> tempOut = stackalloc int[8];
|
||||
Transform2D ht = Iht8[txType];
|
||||
|
||||
// Inverse transform row vectors
|
||||
for (i = 0; i < 8; ++i)
|
||||
{
|
||||
ht.Rows(input, outptr);
|
||||
input = input.Slice(8);
|
||||
outptr = outptr.Slice(8);
|
||||
}
|
||||
|
||||
// Inverse transform column vectors
|
||||
for (i = 0; i < 8; ++i)
|
||||
{
|
||||
for (j = 0; j < 8; ++j)
|
||||
{
|
||||
tempIn[j] = output[j * 8 + i];
|
||||
}
|
||||
|
||||
ht.Cols(tempIn, tempOut);
|
||||
for (j = 0; j < 8; ++j)
|
||||
{
|
||||
dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 5));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly Transform2D[] Iht16 = new Transform2D[]
|
||||
{
|
||||
new Transform2D(Idct16, Idct16), // DCT_DCT = 0
|
||||
new Transform2D(Iadst16, Idct16), // ADST_DCT = 1
|
||||
new Transform2D(Idct16, Iadst16), // DCT_ADST = 2
|
||||
new Transform2D(Iadst16, Iadst16) // ADST_ADST = 3
|
||||
};
|
||||
|
||||
public static void Iht16x16256Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int txType)
|
||||
{
|
||||
int i, j;
|
||||
Span<int> output = stackalloc int[16 * 16];
|
||||
Span<int> outptr = output;
|
||||
Span<int> tempIn = stackalloc int[16];
|
||||
Span<int> tempOut = stackalloc int[16];
|
||||
Transform2D ht = Iht16[txType];
|
||||
|
||||
// Rows
|
||||
for (i = 0; i < 16; ++i)
|
||||
{
|
||||
ht.Rows(input, outptr);
|
||||
input = input.Slice(16);
|
||||
outptr = outptr.Slice(16);
|
||||
}
|
||||
|
||||
// Columns
|
||||
for (i = 0; i < 16; ++i)
|
||||
{
|
||||
for (j = 0; j < 16; ++j)
|
||||
{
|
||||
tempIn[j] = output[j * 16 + i];
|
||||
}
|
||||
|
||||
ht.Cols(tempIn, tempOut);
|
||||
for (j = 0; j < 16; ++j)
|
||||
{
|
||||
dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Idct
|
||||
public static void Idct4x4Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
|
||||
{
|
||||
if (eob > 1)
|
||||
{
|
||||
Idct4x416Add(input, dest, stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
Idct4x41Add(input, dest, stride);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Iwht4x4Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
|
||||
{
|
||||
if (eob > 1)
|
||||
{
|
||||
Iwht4x416Add(input, dest, stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
Iwht4x41Add(input, dest, stride);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Idct8x8Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
|
||||
{
|
||||
// If dc is 1, then input[0] is the reconstructed value, do not need
|
||||
// dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
|
||||
|
||||
// The calculation can be simplified if there are not many non-zero dct
|
||||
// coefficients. Use eobs to decide what to do.
|
||||
if (eob == 1)
|
||||
{
|
||||
// DC only DCT coefficient
|
||||
Idct8x81Add(input, dest, stride);
|
||||
}
|
||||
else if (eob <= 12)
|
||||
{
|
||||
Idct8x812Add(input, dest, stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
Idct8x864Add(input, dest, stride);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Idct16x16Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
|
||||
{
|
||||
/* The calculation can be simplified if there are not many non-zero dct
|
||||
* coefficients. Use eobs to separate different cases. */
|
||||
if (eob == 1) /* DC only DCT coefficient. */
|
||||
{
|
||||
Idct16x161Add(input, dest, stride);
|
||||
}
|
||||
else if (eob <= 10)
|
||||
{
|
||||
Idct16x1610Add(input, dest, stride);
|
||||
}
|
||||
else if (eob <= 38)
|
||||
{
|
||||
Idct16x1638Add(input, dest, stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
Idct16x16256Add(input, dest, stride);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Idct32x32Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
|
||||
{
|
||||
if (eob == 1)
|
||||
{
|
||||
Idct32x321Add(input, dest, stride);
|
||||
}
|
||||
else if (eob <= 34)
|
||||
{
|
||||
// Non-zero coeff only in upper-left 8x8
|
||||
Idct32x3234Add(input, dest, stride);
|
||||
}
|
||||
else if (eob <= 135)
|
||||
{
|
||||
// Non-zero coeff only in upper-left 16x16
|
||||
Idct32x32135Add(input, dest, stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
Idct32x321024Add(input, dest, stride);
|
||||
}
|
||||
}
|
||||
|
||||
// Iht
|
||||
public static void Iht4x4Add(TxType txType, ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
|
||||
{
|
||||
if (txType == TxType.DctDct)
|
||||
{
|
||||
Idct4x4Add(input, dest, stride, eob);
|
||||
}
|
||||
else
|
||||
{
|
||||
Iht4x416Add(input, dest, stride, (int)txType);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Iht8x8Add(TxType txType, ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
|
||||
{
|
||||
if (txType == TxType.DctDct)
|
||||
{
|
||||
Idct8x8Add(input, dest, stride, eob);
|
||||
}
|
||||
else
|
||||
{
|
||||
Iht8x864Add(input, dest, stride, (int)txType);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Iht16x16Add(TxType txType, ReadOnlySpan<int> input, Span<byte> dest,
|
||||
int stride, int eob)
|
||||
{
|
||||
if (txType == TxType.DctDct)
|
||||
{
|
||||
Idct16x16Add(input, dest, stride, eob);
|
||||
}
|
||||
else
|
||||
{
|
||||
Iht16x16256Add(input, dest, stride, (int)txType);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly HighbdTransform2D[] HighbdIht4 = new HighbdTransform2D[]
|
||||
{
|
||||
new HighbdTransform2D(HighbdIdct4, HighbdIdct4), // DCT_DCT = 0
|
||||
new HighbdTransform2D(HighbdIadst4, HighbdIdct4), // ADST_DCT = 1
|
||||
new HighbdTransform2D(HighbdIdct4, HighbdIadst4), // DCT_ADST = 2
|
||||
new HighbdTransform2D(HighbdIadst4, HighbdIadst4) // ADST_ADST = 3
|
||||
};
|
||||
|
||||
public static void HighbdIht4x416Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int txType, int bd)
|
||||
{
|
||||
int i, j;
|
||||
Span<int> output = stackalloc int[4 * 4];
|
||||
Span<int> outptr = output;
|
||||
Span<int> tempIn = stackalloc int[4];
|
||||
Span<int> tempOut = stackalloc int[4];
|
||||
|
||||
// Inverse transform row vectors.
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
HighbdIht4[txType].Rows(input, outptr, bd);
|
||||
input = input.Slice(4);
|
||||
outptr = outptr.Slice(4);
|
||||
}
|
||||
|
||||
// Inverse transform column vectors.
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
for (j = 0; j < 4; ++j)
|
||||
{
|
||||
tempIn[j] = output[j * 4 + i];
|
||||
}
|
||||
|
||||
HighbdIht4[txType].Cols(tempIn, tempOut, bd);
|
||||
for (j = 0; j < 4; ++j)
|
||||
{
|
||||
dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 4), bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly HighbdTransform2D[] HighIht8 = new HighbdTransform2D[]
|
||||
{
|
||||
new HighbdTransform2D(HighbdIdct8, HighbdIdct8), // DCT_DCT = 0
|
||||
new HighbdTransform2D(HighbdIadst8, HighbdIdct8), // ADST_DCT = 1
|
||||
new HighbdTransform2D(HighbdIdct8, HighbdIadst8), // DCT_ADST = 2
|
||||
new HighbdTransform2D(HighbdIadst8, HighbdIadst8) // ADST_ADST = 3
|
||||
};
|
||||
|
||||
public static void HighbdIht8x864Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int txType, int bd)
|
||||
{
|
||||
int i, j;
|
||||
Span<int> output = stackalloc int[8 * 8];
|
||||
Span<int> outptr = output;
|
||||
Span<int> tempIn = stackalloc int[8];
|
||||
Span<int> tempOut = stackalloc int[8];
|
||||
HighbdTransform2D ht = HighIht8[txType];
|
||||
|
||||
// Inverse transform row vectors.
|
||||
for (i = 0; i < 8; ++i)
|
||||
{
|
||||
ht.Rows(input, outptr, bd);
|
||||
input = input.Slice(8);
|
||||
outptr = output.Slice(8);
|
||||
}
|
||||
|
||||
// Inverse transform column vectors.
|
||||
for (i = 0; i < 8; ++i)
|
||||
{
|
||||
for (j = 0; j < 8; ++j)
|
||||
{
|
||||
tempIn[j] = output[j * 8 + i];
|
||||
}
|
||||
|
||||
ht.Cols(tempIn, tempOut, bd);
|
||||
for (j = 0; j < 8; ++j)
|
||||
{
|
||||
dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 5), bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly HighbdTransform2D[] HighIht16 = new HighbdTransform2D[]
|
||||
{
|
||||
new HighbdTransform2D(HighbdIdct16, HighbdIdct16), // DCT_DCT = 0
|
||||
new HighbdTransform2D(HighbdIadst16, HighbdIdct16), // ADST_DCT = 1
|
||||
new HighbdTransform2D(HighbdIdct16, HighbdIadst16), // DCT_ADST = 2
|
||||
new HighbdTransform2D(HighbdIadst16, HighbdIadst16) // ADST_ADST = 3
|
||||
};
|
||||
|
||||
public static void HighbdIht16x16256Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int txType, int bd)
|
||||
{
|
||||
int i, j;
|
||||
Span<int> output = stackalloc int[16 * 16];
|
||||
Span<int> outptr = output;
|
||||
Span<int> tempIn = stackalloc int[16];
|
||||
Span<int> tempOut = stackalloc int[16];
|
||||
HighbdTransform2D ht = HighIht16[txType];
|
||||
|
||||
// Rows
|
||||
for (i = 0; i < 16; ++i)
|
||||
{
|
||||
ht.Rows(input, outptr, bd);
|
||||
input = input.Slice(16);
|
||||
outptr = output.Slice(16);
|
||||
}
|
||||
|
||||
// Columns
|
||||
for (i = 0; i < 16; ++i)
|
||||
{
|
||||
for (j = 0; j < 16; ++j)
|
||||
{
|
||||
tempIn[j] = output[j * 16 + i];
|
||||
}
|
||||
|
||||
ht.Cols(tempIn, tempOut, bd);
|
||||
for (j = 0; j < 16; ++j)
|
||||
{
|
||||
dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6), bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Idct
|
||||
public static void HighbdIdct4x4Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
|
||||
{
|
||||
if (eob > 1)
|
||||
{
|
||||
HighbdIdct4x416Add(input, dest, stride, bd);
|
||||
}
|
||||
else
|
||||
{
|
||||
HighbdIdct4x41Add(input, dest, stride, bd);
|
||||
}
|
||||
}
|
||||
|
||||
public static void HighbdIwht4x4Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
|
||||
{
|
||||
if (eob > 1)
|
||||
{
|
||||
HighbdIwht4x416Add(input, dest, stride, bd);
|
||||
}
|
||||
else
|
||||
{
|
||||
HighbdIwht4x41Add(input, dest, stride, bd);
|
||||
}
|
||||
}
|
||||
|
||||
public static void HighbdIdct8x8Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
|
||||
{
|
||||
// If dc is 1, then input[0] is the reconstructed value, do not need
|
||||
// dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
|
||||
|
||||
// The calculation can be simplified if there are not many non-zero dct
|
||||
// coefficients. Use eobs to decide what to do.
|
||||
// DC only DCT coefficient
|
||||
if (eob == 1)
|
||||
{
|
||||
vpx_Highbdidct8x8_1_add_c(input, dest, stride, bd);
|
||||
}
|
||||
else if (eob <= 12)
|
||||
{
|
||||
HighbdIdct8x812Add(input, dest, stride, bd);
|
||||
}
|
||||
else
|
||||
{
|
||||
HighbdIdct8x864Add(input, dest, stride, bd);
|
||||
}
|
||||
}
|
||||
|
||||
public static void HighbdIdct16x16Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
|
||||
{
|
||||
// The calculation can be simplified if there are not many non-zero dct
|
||||
// coefficients. Use eobs to separate different cases.
|
||||
// DC only DCT coefficient.
|
||||
if (eob == 1)
|
||||
{
|
||||
HighbdIdct16x161Add(input, dest, stride, bd);
|
||||
}
|
||||
else if (eob <= 10)
|
||||
{
|
||||
HighbdIdct16x1610Add(input, dest, stride, bd);
|
||||
}
|
||||
else if (eob <= 38)
|
||||
{
|
||||
HighbdIdct16x1638Add(input, dest, stride, bd);
|
||||
}
|
||||
else
|
||||
{
|
||||
HighbdIdct16x16256Add(input, dest, stride, bd);
|
||||
}
|
||||
}
|
||||
|
||||
public static void HighbdIdct32x32Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
|
||||
{
|
||||
// Non-zero coeff only in upper-left 8x8
|
||||
if (eob == 1)
|
||||
{
|
||||
HighbdIdct32x321Add(input, dest, stride, bd);
|
||||
}
|
||||
else if (eob <= 34)
|
||||
{
|
||||
HighbdIdct32x3234Add(input, dest, stride, bd);
|
||||
}
|
||||
else if (eob <= 135)
|
||||
{
|
||||
HighbdIdct32x32135Add(input, dest, stride, bd);
|
||||
}
|
||||
else
|
||||
{
|
||||
HighbdIdct32x321024Add(input, dest, stride, bd);
|
||||
}
|
||||
}
|
||||
|
||||
// Iht
|
||||
public static void HighbdIht4x4Add(TxType txType, ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
|
||||
{
|
||||
if (txType == TxType.DctDct)
|
||||
{
|
||||
HighbdIdct4x4Add(input, dest, stride, eob, bd);
|
||||
}
|
||||
else
|
||||
{
|
||||
HighbdIht4x416Add(input, dest, stride, (int)txType, bd);
|
||||
}
|
||||
}
|
||||
|
||||
public static void HighbdIht8x8Add(TxType txType, ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
|
||||
{
|
||||
if (txType == TxType.DctDct)
|
||||
{
|
||||
HighbdIdct8x8Add(input, dest, stride, eob, bd);
|
||||
}
|
||||
else
|
||||
{
|
||||
HighbdIht8x864Add(input, dest, stride, (int)txType, bd);
|
||||
}
|
||||
}
|
||||
|
||||
public static void HighbdIht16x16Add(TxType txType, ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
|
||||
{
|
||||
if (txType == TxType.DctDct)
|
||||
{
|
||||
HighbdIdct16x16Add(input, dest, stride, eob, bd);
|
||||
}
|
||||
else
|
||||
{
|
||||
HighbdIht16x16256Add(input, dest, stride, (int)txType, bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
15
Ryujinx.Graphics.Nvdec.Vp9/InternalErrorException.cs
Normal file
15
Ryujinx.Graphics.Nvdec.Vp9/InternalErrorException.cs
Normal file
@ -0,0 +1,15 @@
|
||||
using System;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
class InternalErrorException : Exception
|
||||
{
|
||||
public InternalErrorException(string message) : base(message)
|
||||
{
|
||||
}
|
||||
|
||||
public InternalErrorException(string message, Exception innerException) : base(message, innerException)
|
||||
{
|
||||
}
|
||||
}
|
||||
}
|
14
Ryujinx.Graphics.Nvdec.Vp9/InternalErrorInfo.cs
Normal file
14
Ryujinx.Graphics.Nvdec.Vp9/InternalErrorInfo.cs
Normal file
@ -0,0 +1,14 @@
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
internal struct InternalErrorInfo
|
||||
{
|
||||
public CodecErr ErrorCode;
|
||||
|
||||
public void InternalError(CodecErr error, string message)
|
||||
{
|
||||
ErrorCode = error;
|
||||
|
||||
throw new InternalErrorException(message);
|
||||
}
|
||||
}
|
||||
}
|
418
Ryujinx.Graphics.Nvdec.Vp9/LoopFilter.cs
Normal file
418
Ryujinx.Graphics.Nvdec.Vp9/LoopFilter.cs
Normal file
@ -0,0 +1,418 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Common;
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Types;
|
||||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
internal static class LoopFilter
|
||||
{
|
||||
public const int MaxLoopFilter = 63;
|
||||
|
||||
public const int MaxRefLfDeltas = 4;
|
||||
public const int MaxModeLfDeltas = 2;
|
||||
|
||||
// 64 bit masks for left transform size. Each 1 represents a position where
|
||||
// we should apply a loop filter across the left border of an 8x8 block
|
||||
// boundary.
|
||||
//
|
||||
// In the case of TX_16X16 -> ( in low order byte first we end up with
|
||||
// a mask that looks like this
|
||||
//
|
||||
// 10101010
|
||||
// 10101010
|
||||
// 10101010
|
||||
// 10101010
|
||||
// 10101010
|
||||
// 10101010
|
||||
// 10101010
|
||||
// 10101010
|
||||
//
|
||||
// A loopfilter should be applied to every other 8x8 horizontally.
|
||||
private static readonly ulong[] Left64X64TxformMask = new ulong[]
|
||||
{
|
||||
0xffffffffffffffffUL, // TX_4X4
|
||||
0xffffffffffffffffUL, // TX_8x8
|
||||
0x5555555555555555UL, // TX_16x16
|
||||
0x1111111111111111UL, // TX_32x32
|
||||
};
|
||||
|
||||
// 64 bit masks for above transform size. Each 1 represents a position where
|
||||
// we should apply a loop filter across the top border of an 8x8 block
|
||||
// boundary.
|
||||
//
|
||||
// In the case of TX_32x32 -> ( in low order byte first we end up with
|
||||
// a mask that looks like this
|
||||
//
|
||||
// 11111111
|
||||
// 00000000
|
||||
// 00000000
|
||||
// 00000000
|
||||
// 11111111
|
||||
// 00000000
|
||||
// 00000000
|
||||
// 00000000
|
||||
//
|
||||
// A loopfilter should be applied to every other 4 the row vertically.
|
||||
private static readonly ulong[] Above64X64TxformMask = new ulong[]
|
||||
{
|
||||
0xffffffffffffffffUL, // TX_4X4
|
||||
0xffffffffffffffffUL, // TX_8x8
|
||||
0x00ff00ff00ff00ffUL, // TX_16x16
|
||||
0x000000ff000000ffUL, // TX_32x32
|
||||
};
|
||||
|
||||
// 64 bit masks for prediction sizes (left). Each 1 represents a position
|
||||
// where left border of an 8x8 block. These are aligned to the right most
|
||||
// appropriate bit, and then shifted into place.
|
||||
//
|
||||
// In the case of TX_16x32 -> ( low order byte first ) we end up with
|
||||
// a mask that looks like this :
|
||||
//
|
||||
// 10000000
|
||||
// 10000000
|
||||
// 10000000
|
||||
// 10000000
|
||||
// 00000000
|
||||
// 00000000
|
||||
// 00000000
|
||||
// 00000000
|
||||
private static readonly ulong[] LeftPredictionMask = new ulong[]
|
||||
{
|
||||
0x0000000000000001UL, // BLOCK_4X4,
|
||||
0x0000000000000001UL, // BLOCK_4X8,
|
||||
0x0000000000000001UL, // BLOCK_8X4,
|
||||
0x0000000000000001UL, // BLOCK_8X8,
|
||||
0x0000000000000101UL, // BLOCK_8X16,
|
||||
0x0000000000000001UL, // BLOCK_16X8,
|
||||
0x0000000000000101UL, // BLOCK_16X16,
|
||||
0x0000000001010101UL, // BLOCK_16X32,
|
||||
0x0000000000000101UL, // BLOCK_32X16,
|
||||
0x0000000001010101UL, // BLOCK_32X32,
|
||||
0x0101010101010101UL, // BLOCK_32X64,
|
||||
0x0000000001010101UL, // BLOCK_64X32,
|
||||
0x0101010101010101UL, // BLOCK_64X64
|
||||
};
|
||||
|
||||
// 64 bit mask to shift and set for each prediction size.
|
||||
private static readonly ulong[] AbovePredictionMask = new ulong[]
|
||||
{
|
||||
0x0000000000000001UL, // BLOCK_4X4
|
||||
0x0000000000000001UL, // BLOCK_4X8
|
||||
0x0000000000000001UL, // BLOCK_8X4
|
||||
0x0000000000000001UL, // BLOCK_8X8
|
||||
0x0000000000000001UL, // BLOCK_8X16,
|
||||
0x0000000000000003UL, // BLOCK_16X8
|
||||
0x0000000000000003UL, // BLOCK_16X16
|
||||
0x0000000000000003UL, // BLOCK_16X32,
|
||||
0x000000000000000fUL, // BLOCK_32X16,
|
||||
0x000000000000000fUL, // BLOCK_32X32,
|
||||
0x000000000000000fUL, // BLOCK_32X64,
|
||||
0x00000000000000ffUL, // BLOCK_64X32,
|
||||
0x00000000000000ffUL, // BLOCK_64X64
|
||||
};
|
||||
|
||||
// 64 bit mask to shift and set for each prediction size. A bit is set for
|
||||
// each 8x8 block that would be in the left most block of the given block
|
||||
// size in the 64x64 block.
|
||||
private static readonly ulong[] SizeMask = new ulong[]
|
||||
{
|
||||
0x0000000000000001UL, // BLOCK_4X4
|
||||
0x0000000000000001UL, // BLOCK_4X8
|
||||
0x0000000000000001UL, // BLOCK_8X4
|
||||
0x0000000000000001UL, // BLOCK_8X8
|
||||
0x0000000000000101UL, // BLOCK_8X16,
|
||||
0x0000000000000003UL, // BLOCK_16X8
|
||||
0x0000000000000303UL, // BLOCK_16X16
|
||||
0x0000000003030303UL, // BLOCK_16X32,
|
||||
0x0000000000000f0fUL, // BLOCK_32X16,
|
||||
0x000000000f0f0f0fUL, // BLOCK_32X32,
|
||||
0x0f0f0f0f0f0f0f0fUL, // BLOCK_32X64,
|
||||
0x00000000ffffffffUL, // BLOCK_64X32,
|
||||
0xffffffffffffffffUL, // BLOCK_64X64
|
||||
};
|
||||
|
||||
// These are used for masking the left and above borders.
|
||||
private const ulong LeftBorder = 0x1111111111111111UL;
|
||||
private const ulong AboveBorder = 0x000000ff000000ffUL;
|
||||
|
||||
// 16 bit masks for uv transform sizes.
|
||||
private static readonly ushort[] Left64X64TxformMaskUv = new ushort[]
|
||||
{
|
||||
0xffff, // TX_4X4
|
||||
0xffff, // TX_8x8
|
||||
0x5555, // TX_16x16
|
||||
0x1111, // TX_32x32
|
||||
};
|
||||
|
||||
private static readonly ushort[] Above64X64TxformMaskUv = new ushort[]
|
||||
{
|
||||
0xffff, // TX_4X4
|
||||
0xffff, // TX_8x8
|
||||
0x0f0f, // TX_16x16
|
||||
0x000f, // TX_32x32
|
||||
};
|
||||
|
||||
// 16 bit left mask to shift and set for each uv prediction size.
|
||||
private static readonly ushort[] LeftPredictionMaskUv = new ushort[]
|
||||
{
|
||||
0x0001, // BLOCK_4X4,
|
||||
0x0001, // BLOCK_4X8,
|
||||
0x0001, // BLOCK_8X4,
|
||||
0x0001, // BLOCK_8X8,
|
||||
0x0001, // BLOCK_8X16,
|
||||
0x0001, // BLOCK_16X8,
|
||||
0x0001, // BLOCK_16X16,
|
||||
0x0011, // BLOCK_16X32,
|
||||
0x0001, // BLOCK_32X16,
|
||||
0x0011, // BLOCK_32X32,
|
||||
0x1111, // BLOCK_32X64
|
||||
0x0011, // BLOCK_64X32,
|
||||
0x1111, // BLOCK_64X64
|
||||
};
|
||||
|
||||
// 16 bit above mask to shift and set for uv each prediction size.
|
||||
private static readonly ushort[] AbovePredictionMaskUv = new ushort[]
|
||||
{
|
||||
0x0001, // BLOCK_4X4
|
||||
0x0001, // BLOCK_4X8
|
||||
0x0001, // BLOCK_8X4
|
||||
0x0001, // BLOCK_8X8
|
||||
0x0001, // BLOCK_8X16,
|
||||
0x0001, // BLOCK_16X8
|
||||
0x0001, // BLOCK_16X16
|
||||
0x0001, // BLOCK_16X32,
|
||||
0x0003, // BLOCK_32X16,
|
||||
0x0003, // BLOCK_32X32,
|
||||
0x0003, // BLOCK_32X64,
|
||||
0x000f, // BLOCK_64X32,
|
||||
0x000f, // BLOCK_64X64
|
||||
};
|
||||
|
||||
// 64 bit mask to shift and set for each uv prediction size
|
||||
private static readonly ushort[] SizeMaskUv = new ushort[]
|
||||
{
|
||||
0x0001, // BLOCK_4X4
|
||||
0x0001, // BLOCK_4X8
|
||||
0x0001, // BLOCK_8X4
|
||||
0x0001, // BLOCK_8X8
|
||||
0x0001, // BLOCK_8X16,
|
||||
0x0001, // BLOCK_16X8
|
||||
0x0001, // BLOCK_16X16
|
||||
0x0011, // BLOCK_16X32,
|
||||
0x0003, // BLOCK_32X16,
|
||||
0x0033, // BLOCK_32X32,
|
||||
0x3333, // BLOCK_32X64,
|
||||
0x00ff, // BLOCK_64X32,
|
||||
0xffff, // BLOCK_64X64
|
||||
};
|
||||
|
||||
private const ushort LeftBorderUv = 0x1111;
|
||||
private const ushort AboveBorderUv = 0x000f;
|
||||
|
||||
private static readonly int[] ModeLfLut = new int[]
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES
|
||||
1, 1, 0, 1 // INTER_MODES (ZEROMV == 0)
|
||||
};
|
||||
|
||||
private static byte GetFilterLevel(ref LoopFilterInfoN lfiN, ref ModeInfo mi)
|
||||
{
|
||||
return lfiN.Lvl[mi.SegmentId][mi.RefFrame[0]][ModeLfLut[(int)mi.Mode]];
|
||||
}
|
||||
|
||||
private static ref LoopFilterMask GetLfm(ref Types.LoopFilter lf, int miRow, int miCol)
|
||||
{
|
||||
return ref lf.Lfm[(miCol >> 3) + ((miRow >> 3) * lf.LfmStride)];
|
||||
}
|
||||
|
||||
// 8x8 blocks in a superblock. A "1" represents the first block in a 16x16
|
||||
// or greater area.
|
||||
private static readonly byte[][] FirstBlockIn16x16 = new byte[][]
|
||||
{
|
||||
new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 }
|
||||
};
|
||||
|
||||
// This function sets up the bit masks for a block represented
|
||||
// by miRow, miCol in a 64x64 region.
|
||||
public static void BuildMask(ref Vp9Common cm, ref ModeInfo mi, int miRow, int miCol, int bw, int bh)
|
||||
{
|
||||
BlockSize blockSize = mi.SbType;
|
||||
TxSize txSizeY = mi.TxSize;
|
||||
ref LoopFilterInfoN lfiN = ref cm.LfInfo;
|
||||
int filterLevel = GetFilterLevel(ref lfiN, ref mi);
|
||||
TxSize txSizeUv = Luts.UvTxsizeLookup[(int)blockSize][(int)txSizeY][1][1];
|
||||
ref LoopFilterMask lfm = ref GetLfm(ref cm.Lf, miRow, miCol);
|
||||
ref ulong leftY = ref lfm.LeftY[(int)txSizeY];
|
||||
ref ulong aboveY = ref lfm.AboveY[(int)txSizeY];
|
||||
ref ulong int4X4Y = ref lfm.Int4x4Y;
|
||||
ref ushort leftUv = ref lfm.LeftUv[(int)txSizeUv];
|
||||
ref ushort aboveUv = ref lfm.AboveUv[(int)txSizeUv];
|
||||
ref ushort int4X4Uv = ref lfm.Int4x4Uv;
|
||||
int rowInSb = (miRow & 7);
|
||||
int colInSb = (miCol & 7);
|
||||
int shiftY = colInSb + (rowInSb << 3);
|
||||
int shiftUv = (colInSb >> 1) + ((rowInSb >> 1) << 2);
|
||||
int buildUv = FirstBlockIn16x16[rowInSb][colInSb];
|
||||
|
||||
if (filterLevel == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
int index = shiftY;
|
||||
int i;
|
||||
for (i = 0; i < bh; i++)
|
||||
{
|
||||
MemoryMarshal.CreateSpan(ref lfm.LflY[index], 64 - index).Slice(0, bw).Fill((byte)filterLevel);
|
||||
index += 8;
|
||||
}
|
||||
}
|
||||
|
||||
// These set 1 in the current block size for the block size edges.
|
||||
// For instance if the block size is 32x16, we'll set:
|
||||
// above = 1111
|
||||
// 0000
|
||||
// and
|
||||
// left = 1000
|
||||
// = 1000
|
||||
// NOTE : In this example the low bit is left most ( 1000 ) is stored as
|
||||
// 1, not 8...
|
||||
//
|
||||
// U and V set things on a 16 bit scale.
|
||||
//
|
||||
aboveY |= AbovePredictionMask[(int)blockSize] << shiftY;
|
||||
leftY |= LeftPredictionMask[(int)blockSize] << shiftY;
|
||||
|
||||
if (buildUv != 0)
|
||||
{
|
||||
aboveUv |= (ushort)(AbovePredictionMaskUv[(int)blockSize] << shiftUv);
|
||||
leftUv |= (ushort)(LeftPredictionMaskUv[(int)blockSize] << shiftUv);
|
||||
}
|
||||
|
||||
// If the block has no coefficients and is not intra we skip applying
|
||||
// the loop filter on block edges.
|
||||
if (mi.Skip != 0 && mi.IsInterBlock())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Add a mask for the transform size. The transform size mask is set to
|
||||
// be correct for a 64x64 prediction block size. Mask to match the size of
|
||||
// the block we are working on and then shift it into place.
|
||||
aboveY |= (SizeMask[(int)blockSize] & Above64X64TxformMask[(int)txSizeY]) << shiftY;
|
||||
leftY |= (SizeMask[(int)blockSize] & Left64X64TxformMask[(int)txSizeY]) << shiftY;
|
||||
|
||||
if (buildUv != 0)
|
||||
{
|
||||
aboveUv |= (ushort)((SizeMaskUv[(int)blockSize] & Above64X64TxformMaskUv[(int)txSizeUv]) << shiftUv);
|
||||
leftUv |= (ushort)((SizeMaskUv[(int)blockSize] & Left64X64TxformMaskUv[(int)txSizeUv]) << shiftUv);
|
||||
}
|
||||
|
||||
// Try to determine what to do with the internal 4x4 block boundaries. These
|
||||
// differ from the 4x4 boundaries on the outside edge of an 8x8 in that the
|
||||
// internal ones can be skipped and don't depend on the prediction block size.
|
||||
if (txSizeY == TxSize.Tx4x4)
|
||||
{
|
||||
int4X4Y |= SizeMask[(int)blockSize] << shiftY;
|
||||
}
|
||||
|
||||
if (buildUv != 0 && txSizeUv == TxSize.Tx4x4)
|
||||
{
|
||||
int4X4Uv |= (ushort)((SizeMaskUv[(int)blockSize] & 0xffff) << shiftUv);
|
||||
}
|
||||
}
|
||||
|
||||
public static unsafe void ResetLfm(ref Vp9Common cm)
|
||||
{
|
||||
if (cm.Lf.FilterLevel != 0)
|
||||
{
|
||||
MemoryUtil.Fill(cm.Lf.Lfm.ToPointer(), new LoopFilterMask(), ((cm.MiRows + (Constants.MiBlockSize - 1)) >> 3) * cm.Lf.LfmStride);
|
||||
}
|
||||
}
|
||||
|
||||
private static void UpdateSharpness(ref LoopFilterInfoN lfi, int sharpnessLvl)
|
||||
{
|
||||
int lvl;
|
||||
|
||||
// For each possible value for the loop filter fill out limits
|
||||
for (lvl = 0; lvl <= MaxLoopFilter; lvl++)
|
||||
{
|
||||
// Set loop filter parameters that control sharpness.
|
||||
int blockInsideLimit = lvl >> ((sharpnessLvl > 0 ? 1 : 0) + (sharpnessLvl > 4 ? 1 : 0));
|
||||
|
||||
if (sharpnessLvl > 0)
|
||||
{
|
||||
if (blockInsideLimit > (9 - sharpnessLvl))
|
||||
{
|
||||
blockInsideLimit = (9 - sharpnessLvl);
|
||||
}
|
||||
}
|
||||
|
||||
if (blockInsideLimit < 1)
|
||||
{
|
||||
blockInsideLimit = 1;
|
||||
}
|
||||
|
||||
lfi.Lfthr[lvl].Lim.ToSpan().Fill((byte)blockInsideLimit);
|
||||
lfi.Lfthr[lvl].Mblim.ToSpan().Fill((byte)(2 * (lvl + 2) + blockInsideLimit));
|
||||
}
|
||||
}
|
||||
|
||||
public static void LoopFilterFrameInit(ref Vp9Common cm, int defaultFiltLvl)
|
||||
{
|
||||
int segId;
|
||||
// nShift is the multiplier for lfDeltas
|
||||
// the multiplier is 1 for when filterLvl is between 0 and 31;
|
||||
// 2 when filterLvl is between 32 and 63
|
||||
int scale = 1 << (defaultFiltLvl >> 5);
|
||||
ref LoopFilterInfoN lfi = ref cm.LfInfo;
|
||||
ref Types.LoopFilter lf = ref cm.Lf;
|
||||
ref Segmentation seg = ref cm.Seg;
|
||||
|
||||
// Update limits if sharpness has changed
|
||||
if (lf.LastSharpnessLevel != lf.SharpnessLevel)
|
||||
{
|
||||
UpdateSharpness(ref lfi, lf.SharpnessLevel);
|
||||
lf.LastSharpnessLevel = lf.SharpnessLevel;
|
||||
}
|
||||
|
||||
for (segId = 0; segId < Constants.MaxSegments; segId++)
|
||||
{
|
||||
int lvlSeg = defaultFiltLvl;
|
||||
if (seg.IsSegFeatureActive(segId, SegLvlFeatures.SegLvlAltLf) != 0)
|
||||
{
|
||||
int data = seg.GetSegData(segId, SegLvlFeatures.SegLvlAltLf);
|
||||
lvlSeg = Math.Clamp(seg.AbsDelta == Constants.SegmentAbsData ? data : defaultFiltLvl + data, 0, MaxLoopFilter);
|
||||
}
|
||||
|
||||
if (!lf.ModeRefDeltaEnabled)
|
||||
{
|
||||
// We could get rid of this if we assume that deltas are set to
|
||||
// zero when not in use; encoder always uses deltas
|
||||
MemoryMarshal.Cast<Array2<byte>, byte>(lfi.Lvl[segId].ToSpan()).Fill((byte)lvlSeg);
|
||||
}
|
||||
else
|
||||
{
|
||||
int refr, mode;
|
||||
int intraLvl = lvlSeg + lf.RefDeltas[Constants.IntraFrame] * scale;
|
||||
lfi.Lvl[segId][Constants.IntraFrame][0] = (byte)Math.Clamp(intraLvl, 0, MaxLoopFilter);
|
||||
|
||||
for (refr = Constants.LastFrame; refr < Constants.MaxRefFrames; ++refr)
|
||||
{
|
||||
for (mode = 0; mode < MaxModeLfDeltas; ++mode)
|
||||
{
|
||||
int interLvl = lvlSeg + lf.RefDeltas[refr] * scale + lf.ModeDeltas[mode] * scale;
|
||||
lfi.Lvl[segId][refr][mode] = (byte)Math.Clamp(interLvl, 0, MaxLoopFilter);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
1612
Ryujinx.Graphics.Nvdec.Vp9/Luts.cs
Normal file
1612
Ryujinx.Graphics.Nvdec.Vp9/Luts.cs
Normal file
File diff suppressed because it is too large
Load Diff
389
Ryujinx.Graphics.Nvdec.Vp9/PredCommon.cs
Normal file
389
Ryujinx.Graphics.Nvdec.Vp9/PredCommon.cs
Normal file
@ -0,0 +1,389 @@
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Types;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
internal static class PredCommon
|
||||
{
|
||||
public static int GetReferenceModeContext(ref Vp9Common cm, ref MacroBlockD xd)
|
||||
{
|
||||
int ctx;
|
||||
// Note:
|
||||
// The mode info data structure has a one element border above and to the
|
||||
// left of the entries corresponding to real macroblocks.
|
||||
// The prediction flags in these dummy entries are initialized to 0.
|
||||
if (!xd.AboveMi.IsNull && !xd.LeftMi.IsNull)
|
||||
{ // both edges available
|
||||
if (!xd.AboveMi.Value.HasSecondRef() && !xd.LeftMi.Value.HasSecondRef())
|
||||
{
|
||||
// Neither edge uses comp pred (0/1)
|
||||
ctx = (xd.AboveMi.Value.RefFrame[0] == cm.CompFixedRef ? 1 : 0) ^
|
||||
(xd.LeftMi.Value.RefFrame[0] == cm.CompFixedRef ? 1 : 0);
|
||||
}
|
||||
else if (!xd.AboveMi.Value.HasSecondRef())
|
||||
{
|
||||
// One of two edges uses comp pred (2/3)
|
||||
ctx = 2 + (xd.AboveMi.Value.RefFrame[0] == cm.CompFixedRef || !xd.AboveMi.Value.IsInterBlock() ? 1 : 0);
|
||||
}
|
||||
else if (!xd.LeftMi.Value.HasSecondRef())
|
||||
{
|
||||
// One of two edges uses comp pred (2/3)
|
||||
ctx = 2 + (xd.LeftMi.Value.RefFrame[0] == cm.CompFixedRef || !xd.LeftMi.Value.IsInterBlock() ? 1 : 0);
|
||||
}
|
||||
else // Both edges use comp pred (4)
|
||||
{
|
||||
ctx = 4;
|
||||
}
|
||||
}
|
||||
else if (!xd.AboveMi.IsNull || !xd.LeftMi.IsNull)
|
||||
{ // One edge available
|
||||
ref ModeInfo edgeMi = ref !xd.AboveMi.IsNull ? ref xd.AboveMi.Value : ref xd.LeftMi.Value;
|
||||
|
||||
if (!edgeMi.HasSecondRef())
|
||||
{
|
||||
// Edge does not use comp pred (0/1)
|
||||
ctx = edgeMi.RefFrame[0] == cm.CompFixedRef ? 1 : 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Edge uses comp pred (3)
|
||||
ctx = 3;
|
||||
}
|
||||
}
|
||||
else
|
||||
{ // No edges available (1)
|
||||
ctx = 1;
|
||||
}
|
||||
Debug.Assert(ctx >= 0 && ctx < Constants.CompInterContexts);
|
||||
return ctx;
|
||||
}
|
||||
|
||||
// Returns a context number for the given MB prediction signal
|
||||
public static int GetPredContextCompRefP(ref Vp9Common cm, ref MacroBlockD xd)
|
||||
{
|
||||
int predContext;
|
||||
// Note:
|
||||
// The mode info data structure has a one element border above and to the
|
||||
// left of the entries corresponding to real macroblocks.
|
||||
// The prediction flags in these dummy entries are initialized to 0.
|
||||
int fixRefIdx = cm.RefFrameSignBias[cm.CompFixedRef];
|
||||
int varRefIdx = fixRefIdx == 0 ? 1 : 0;
|
||||
|
||||
if (!xd.AboveMi.IsNull && !xd.LeftMi.IsNull)
|
||||
{ // Both edges available
|
||||
bool aboveIntra = !xd.AboveMi.Value.IsInterBlock();
|
||||
bool leftIntra = !xd.LeftMi.Value.IsInterBlock();
|
||||
|
||||
if (aboveIntra && leftIntra)
|
||||
{ // Intra/Intra (2)
|
||||
predContext = 2;
|
||||
}
|
||||
else if (aboveIntra || leftIntra)
|
||||
{ // Intra/Inter
|
||||
ref ModeInfo edgeMi = ref aboveIntra ? ref xd.LeftMi.Value : ref xd.AboveMi.Value;
|
||||
|
||||
if (!edgeMi.HasSecondRef()) // single pred (1/3)
|
||||
{
|
||||
predContext = 1 + 2 * (edgeMi.RefFrame[0] != cm.CompVarRef[1] ? 1 : 0);
|
||||
}
|
||||
else // Comp pred (1/3)
|
||||
{
|
||||
predContext = 1 + 2 * (edgeMi.RefFrame[varRefIdx] != cm.CompVarRef[1] ? 1 : 0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{ // Inter/Inter
|
||||
bool lSg = !xd.LeftMi.Value.HasSecondRef();
|
||||
bool aSg = !xd.AboveMi.Value.HasSecondRef();
|
||||
sbyte vrfa = aSg ? xd.AboveMi.Value.RefFrame[0] : xd.AboveMi.Value.RefFrame[varRefIdx];
|
||||
sbyte vrfl = lSg ? xd.LeftMi.Value.RefFrame[0] : xd.LeftMi.Value.RefFrame[varRefIdx];
|
||||
|
||||
if (vrfa == vrfl && cm.CompVarRef[1] == vrfa)
|
||||
{
|
||||
predContext = 0;
|
||||
}
|
||||
else if (lSg && aSg)
|
||||
{ // Single/Single
|
||||
if ((vrfa == cm.CompFixedRef && vrfl == cm.CompVarRef[0]) ||
|
||||
(vrfl == cm.CompFixedRef && vrfa == cm.CompVarRef[0]))
|
||||
{
|
||||
predContext = 4;
|
||||
}
|
||||
else if (vrfa == vrfl)
|
||||
{
|
||||
predContext = 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
predContext = 1;
|
||||
}
|
||||
}
|
||||
else if (lSg || aSg)
|
||||
{ // Single/Comp
|
||||
sbyte vrfc = lSg ? vrfa : vrfl;
|
||||
sbyte rfs = aSg ? vrfa : vrfl;
|
||||
if (vrfc == cm.CompVarRef[1] && rfs != cm.CompVarRef[1])
|
||||
{
|
||||
predContext = 1;
|
||||
}
|
||||
else if (rfs == cm.CompVarRef[1] && vrfc != cm.CompVarRef[1])
|
||||
{
|
||||
predContext = 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
predContext = 4;
|
||||
}
|
||||
}
|
||||
else if (vrfa == vrfl)
|
||||
{ // Comp/Comp
|
||||
predContext = 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
predContext = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (!xd.AboveMi.IsNull || !xd.LeftMi.IsNull)
|
||||
{ // One edge available
|
||||
ref ModeInfo edgeMi = ref !xd.AboveMi.IsNull ? ref xd.AboveMi.Value : ref xd.LeftMi.Value;
|
||||
|
||||
if (!edgeMi.IsInterBlock())
|
||||
{
|
||||
predContext = 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (edgeMi.HasSecondRef())
|
||||
{
|
||||
predContext = 4 * (edgeMi.RefFrame[varRefIdx] != cm.CompVarRef[1] ? 1 : 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
predContext = 3 * (edgeMi.RefFrame[0] != cm.CompVarRef[1] ? 1 : 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{ // No edges available (2)
|
||||
predContext = 2;
|
||||
}
|
||||
Debug.Assert(predContext >= 0 && predContext < Constants.RefContexts);
|
||||
return predContext;
|
||||
}
|
||||
|
||||
public static int GetPredContextSingleRefP1(ref MacroBlockD xd)
|
||||
{
|
||||
int predContext;
|
||||
// Note:
|
||||
// The mode info data structure has a one element border above and to the
|
||||
// left of the entries corresponding to real macroblocks.
|
||||
// The prediction flags in these dummy entries are initialized to 0.
|
||||
if (!xd.AboveMi.IsNull && !xd.LeftMi.IsNull)
|
||||
{ // Both edges available
|
||||
bool aboveIntra = !xd.AboveMi.Value.IsInterBlock();
|
||||
bool leftIntra = !xd.LeftMi.Value.IsInterBlock();
|
||||
|
||||
if (aboveIntra && leftIntra)
|
||||
{ // Intra/Intra
|
||||
predContext = 2;
|
||||
}
|
||||
else if (aboveIntra || leftIntra)
|
||||
{ // Intra/Inter or Inter/Intra
|
||||
ref ModeInfo edgeMi = ref aboveIntra ? ref xd.LeftMi.Value : ref xd.AboveMi.Value;
|
||||
if (!edgeMi.HasSecondRef())
|
||||
{
|
||||
predContext = 4 * (edgeMi.RefFrame[0] == Constants.LastFrame ? 1 : 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
predContext = 1 + (edgeMi.RefFrame[0] == Constants.LastFrame ||
|
||||
edgeMi.RefFrame[1] == Constants.LastFrame ? 1 : 0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{ // Inter/Inter
|
||||
bool aboveHasSecond = xd.AboveMi.Value.HasSecondRef();
|
||||
bool leftHasSecond = xd.LeftMi.Value.HasSecondRef();
|
||||
sbyte above0 = xd.AboveMi.Value.RefFrame[0];
|
||||
sbyte above1 = xd.AboveMi.Value.RefFrame[1];
|
||||
sbyte left0 = xd.LeftMi.Value.RefFrame[0];
|
||||
sbyte left1 = xd.LeftMi.Value.RefFrame[1];
|
||||
|
||||
if (aboveHasSecond && leftHasSecond)
|
||||
{
|
||||
predContext = 1 + (above0 == Constants.LastFrame || above1 == Constants.LastFrame ||
|
||||
left0 == Constants.LastFrame || left1 == Constants.LastFrame ? 1 : 0);
|
||||
}
|
||||
else if (aboveHasSecond || leftHasSecond)
|
||||
{
|
||||
sbyte rfs = !aboveHasSecond ? above0 : left0;
|
||||
sbyte crf1 = aboveHasSecond ? above0 : left0;
|
||||
sbyte crf2 = aboveHasSecond ? above1 : left1;
|
||||
|
||||
if (rfs == Constants.LastFrame)
|
||||
{
|
||||
predContext = 3 + (crf1 == Constants.LastFrame || crf2 == Constants.LastFrame ? 1 : 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
predContext = (crf1 == Constants.LastFrame || crf2 == Constants.LastFrame ? 1 : 0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
predContext = 2 * (above0 == Constants.LastFrame ? 1 : 0) + 2 * (left0 == Constants.LastFrame ? 1 : 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (!xd.AboveMi.IsNull || !xd.LeftMi.IsNull)
|
||||
{ // One edge available
|
||||
ref ModeInfo edgeMi = ref !xd.AboveMi.IsNull ? ref xd.AboveMi.Value : ref xd.LeftMi.Value;
|
||||
if (!edgeMi.IsInterBlock())
|
||||
{ // Intra
|
||||
predContext = 2;
|
||||
}
|
||||
else
|
||||
{ // Inter
|
||||
if (!edgeMi.HasSecondRef())
|
||||
{
|
||||
predContext = 4 * (edgeMi.RefFrame[0] == Constants.LastFrame ? 1 : 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
predContext = 1 + (edgeMi.RefFrame[0] == Constants.LastFrame ||
|
||||
edgeMi.RefFrame[1] == Constants.LastFrame ? 1 : 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{ // No edges available
|
||||
predContext = 2;
|
||||
}
|
||||
Debug.Assert(predContext >= 0 && predContext < Constants.RefContexts);
|
||||
return predContext;
|
||||
}
|
||||
|
||||
public static int GetPredContextSingleRefP2(ref MacroBlockD xd)
|
||||
{
|
||||
int predContext;
|
||||
|
||||
// Note:
|
||||
// The mode info data structure has a one element border above and to the
|
||||
// left of the entries corresponding to real macroblocks.
|
||||
// The prediction flags in these dummy entries are initialized to 0.
|
||||
if (!xd.AboveMi.IsNull && !xd.LeftMi.IsNull)
|
||||
{ // Both edges available
|
||||
bool aboveIntra = !xd.AboveMi.Value.IsInterBlock();
|
||||
bool leftIntra = !xd.LeftMi.Value.IsInterBlock();
|
||||
|
||||
if (aboveIntra && leftIntra)
|
||||
{ // Intra/Intra
|
||||
predContext = 2;
|
||||
}
|
||||
else if (aboveIntra || leftIntra)
|
||||
{ // Intra/Inter or Inter/Intra
|
||||
ref ModeInfo edgeMi = ref aboveIntra ? ref xd.LeftMi.Value : ref xd.AboveMi.Value;
|
||||
if (!edgeMi.HasSecondRef())
|
||||
{
|
||||
if (edgeMi.RefFrame[0] == Constants.LastFrame)
|
||||
{
|
||||
predContext = 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
predContext = 4 * (edgeMi.RefFrame[0] == Constants.GoldenFrame ? 1 : 0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
predContext = 1 + 2 * (edgeMi.RefFrame[0] == Constants.GoldenFrame ||
|
||||
edgeMi.RefFrame[1] == Constants.GoldenFrame ? 1 : 0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{ // Inter/Inter
|
||||
bool aboveHasSecond = xd.AboveMi.Value.HasSecondRef();
|
||||
bool leftHasSecond = xd.LeftMi.Value.HasSecondRef();
|
||||
sbyte above0 = xd.AboveMi.Value.RefFrame[0];
|
||||
sbyte above1 = xd.AboveMi.Value.RefFrame[1];
|
||||
sbyte left0 = xd.LeftMi.Value.RefFrame[0];
|
||||
sbyte left1 = xd.LeftMi.Value.RefFrame[1];
|
||||
|
||||
if (aboveHasSecond && leftHasSecond)
|
||||
{
|
||||
if (above0 == left0 && above1 == left1)
|
||||
{
|
||||
predContext = 3 * (above0 == Constants.GoldenFrame || above1 == Constants.GoldenFrame ||
|
||||
left0 == Constants.GoldenFrame || left1 == Constants.GoldenFrame ? 1 : 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
predContext = 2;
|
||||
}
|
||||
}
|
||||
else if (aboveHasSecond || leftHasSecond)
|
||||
{
|
||||
sbyte rfs = !aboveHasSecond ? above0 : left0;
|
||||
sbyte crf1 = aboveHasSecond ? above0 : left0;
|
||||
sbyte crf2 = aboveHasSecond ? above1 : left1;
|
||||
|
||||
if (rfs == Constants.GoldenFrame)
|
||||
{
|
||||
predContext = 3 + (crf1 == Constants.GoldenFrame || crf2 == Constants.GoldenFrame ? 1 : 0);
|
||||
}
|
||||
else if (rfs == Constants.AltRefFrame)
|
||||
{
|
||||
predContext = crf1 == Constants.GoldenFrame || crf2 == Constants.GoldenFrame ? 1 : 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
predContext = 1 + 2 * (crf1 == Constants.GoldenFrame || crf2 == Constants.GoldenFrame ? 1 : 0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (above0 == Constants.LastFrame && left0 == Constants.LastFrame)
|
||||
{
|
||||
predContext = 3;
|
||||
}
|
||||
else if (above0 == Constants.LastFrame || left0 == Constants.LastFrame)
|
||||
{
|
||||
sbyte edge0 = (above0 == Constants.LastFrame) ? left0 : above0;
|
||||
predContext = 4 * (edge0 == Constants.GoldenFrame ? 1 : 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
predContext = 2 * (above0 == Constants.GoldenFrame ? 1 : 0) + 2 * (left0 == Constants.GoldenFrame ? 1 : 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (!xd.AboveMi.IsNull || !xd.LeftMi.IsNull)
|
||||
{ // One edge available
|
||||
ref ModeInfo edgeMi = ref !xd.AboveMi.IsNull ? ref xd.AboveMi.Value : ref xd.LeftMi.Value;
|
||||
|
||||
if (!edgeMi.IsInterBlock() || (edgeMi.RefFrame[0] == Constants.LastFrame && !edgeMi.HasSecondRef()))
|
||||
{
|
||||
predContext = 2;
|
||||
}
|
||||
else if (!edgeMi.HasSecondRef())
|
||||
{
|
||||
predContext = 4 * (edgeMi.RefFrame[0] == Constants.GoldenFrame ? 1 : 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
predContext = 3 * (edgeMi.RefFrame[0] == Constants.GoldenFrame ||
|
||||
edgeMi.RefFrame[1] == Constants.GoldenFrame ? 1 : 0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{ // No edges available (2)
|
||||
predContext = 2;
|
||||
}
|
||||
Debug.Assert(predContext >= 0 && predContext < Constants.RefContexts);
|
||||
return predContext;
|
||||
}
|
||||
}
|
||||
}
|
203
Ryujinx.Graphics.Nvdec.Vp9/QuantCommon.cs
Normal file
203
Ryujinx.Graphics.Nvdec.Vp9/QuantCommon.cs
Normal file
@ -0,0 +1,203 @@
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Types;
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
internal static class QuantCommon
|
||||
{
|
||||
public const int MinQ = 0;
|
||||
public const int MaxQ = 255;
|
||||
|
||||
private static readonly short[] DcQlookup = new short[]
|
||||
{
|
||||
4, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15, 16, 17, 18,
|
||||
19, 19, 20, 21, 22, 23, 24, 25, 26, 26, 27, 28, 29, 30,
|
||||
31, 32, 32, 33, 34, 35, 36, 37, 38, 38, 39, 40, 41, 42,
|
||||
43, 43, 44, 45, 46, 47, 48, 48, 49, 50, 51, 52, 53, 53,
|
||||
54, 55, 56, 57, 57, 58, 59, 60, 61, 62, 62, 63, 64, 65,
|
||||
66, 66, 67, 68, 69, 70, 70, 71, 72, 73, 74, 74, 75, 76,
|
||||
77, 78, 78, 79, 80, 81, 81, 82, 83, 84, 85, 85, 87, 88,
|
||||
90, 92, 93, 95, 96, 98, 99, 101, 102, 104, 105, 107, 108, 110,
|
||||
111, 113, 114, 116, 117, 118, 120, 121, 123, 125, 127, 129, 131, 134,
|
||||
136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 161, 164,
|
||||
166, 169, 172, 174, 177, 180, 182, 185, 187, 190, 192, 195, 199, 202,
|
||||
205, 208, 211, 214, 217, 220, 223, 226, 230, 233, 237, 240, 243, 247,
|
||||
250, 253, 257, 261, 265, 269, 272, 276, 280, 284, 288, 292, 296, 300,
|
||||
304, 309, 313, 317, 322, 326, 330, 335, 340, 344, 349, 354, 359, 364,
|
||||
369, 374, 379, 384, 389, 395, 400, 406, 411, 417, 423, 429, 435, 441,
|
||||
447, 454, 461, 467, 475, 482, 489, 497, 505, 513, 522, 530, 539, 549,
|
||||
559, 569, 579, 590, 602, 614, 626, 640, 654, 668, 684, 700, 717, 736,
|
||||
755, 775, 796, 819, 843, 869, 896, 925, 955, 988, 1022, 1058, 1098, 1139,
|
||||
1184, 1232, 1282, 1336,
|
||||
};
|
||||
|
||||
private static readonly short[] DcQlookup10 = new short[]
|
||||
{
|
||||
4, 9, 10, 13, 15, 17, 20, 22, 25, 28, 31, 34, 37,
|
||||
40, 43, 47, 50, 53, 57, 60, 64, 68, 71, 75, 78, 82,
|
||||
86, 90, 93, 97, 101, 105, 109, 113, 116, 120, 124, 128, 132,
|
||||
136, 140, 143, 147, 151, 155, 159, 163, 166, 170, 174, 178, 182,
|
||||
185, 189, 193, 197, 200, 204, 208, 212, 215, 219, 223, 226, 230,
|
||||
233, 237, 241, 244, 248, 251, 255, 259, 262, 266, 269, 273, 276,
|
||||
280, 283, 287, 290, 293, 297, 300, 304, 307, 310, 314, 317, 321,
|
||||
324, 327, 331, 334, 337, 343, 350, 356, 362, 369, 375, 381, 387,
|
||||
394, 400, 406, 412, 418, 424, 430, 436, 442, 448, 454, 460, 466,
|
||||
472, 478, 484, 490, 499, 507, 516, 525, 533, 542, 550, 559, 567,
|
||||
576, 584, 592, 601, 609, 617, 625, 634, 644, 655, 666, 676, 687,
|
||||
698, 708, 718, 729, 739, 749, 759, 770, 782, 795, 807, 819, 831,
|
||||
844, 856, 868, 880, 891, 906, 920, 933, 947, 961, 975, 988, 1001,
|
||||
1015, 1030, 1045, 1061, 1076, 1090, 1105, 1120, 1137, 1153, 1170, 1186, 1202,
|
||||
1218, 1236, 1253, 1271, 1288, 1306, 1323, 1342, 1361, 1379, 1398, 1416, 1436,
|
||||
1456, 1476, 1496, 1516, 1537, 1559, 1580, 1601, 1624, 1647, 1670, 1692, 1717,
|
||||
1741, 1766, 1791, 1817, 1844, 1871, 1900, 1929, 1958, 1990, 2021, 2054, 2088,
|
||||
2123, 2159, 2197, 2236, 2276, 2319, 2363, 2410, 2458, 2508, 2561, 2616, 2675,
|
||||
2737, 2802, 2871, 2944, 3020, 3102, 3188, 3280, 3375, 3478, 3586, 3702, 3823,
|
||||
3953, 4089, 4236, 4394, 4559, 4737, 4929, 5130, 5347,
|
||||
};
|
||||
|
||||
private static readonly short[] DcQlookup12 = new short[]
|
||||
{
|
||||
4, 12, 18, 25, 33, 41, 50, 60, 70, 80, 91,
|
||||
103, 115, 127, 140, 153, 166, 180, 194, 208, 222, 237,
|
||||
251, 266, 281, 296, 312, 327, 343, 358, 374, 390, 405,
|
||||
421, 437, 453, 469, 484, 500, 516, 532, 548, 564, 580,
|
||||
596, 611, 627, 643, 659, 674, 690, 706, 721, 737, 752,
|
||||
768, 783, 798, 814, 829, 844, 859, 874, 889, 904, 919,
|
||||
934, 949, 964, 978, 993, 1008, 1022, 1037, 1051, 1065, 1080,
|
||||
1094, 1108, 1122, 1136, 1151, 1165, 1179, 1192, 1206, 1220, 1234,
|
||||
1248, 1261, 1275, 1288, 1302, 1315, 1329, 1342, 1368, 1393, 1419,
|
||||
1444, 1469, 1494, 1519, 1544, 1569, 1594, 1618, 1643, 1668, 1692,
|
||||
1717, 1741, 1765, 1789, 1814, 1838, 1862, 1885, 1909, 1933, 1957,
|
||||
1992, 2027, 2061, 2096, 2130, 2165, 2199, 2233, 2267, 2300, 2334,
|
||||
2367, 2400, 2434, 2467, 2499, 2532, 2575, 2618, 2661, 2704, 2746,
|
||||
2788, 2830, 2872, 2913, 2954, 2995, 3036, 3076, 3127, 3177, 3226,
|
||||
3275, 3324, 3373, 3421, 3469, 3517, 3565, 3621, 3677, 3733, 3788,
|
||||
3843, 3897, 3951, 4005, 4058, 4119, 4181, 4241, 4301, 4361, 4420,
|
||||
4479, 4546, 4612, 4677, 4742, 4807, 4871, 4942, 5013, 5083, 5153,
|
||||
5222, 5291, 5367, 5442, 5517, 5591, 5665, 5745, 5825, 5905, 5984,
|
||||
6063, 6149, 6234, 6319, 6404, 6495, 6587, 6678, 6769, 6867, 6966,
|
||||
7064, 7163, 7269, 7376, 7483, 7599, 7715, 7832, 7958, 8085, 8214,
|
||||
8352, 8492, 8635, 8788, 8945, 9104, 9275, 9450, 9639, 9832, 10031,
|
||||
10245, 10465, 10702, 10946, 11210, 11482, 11776, 12081, 12409, 12750, 13118,
|
||||
13501, 13913, 14343, 14807, 15290, 15812, 16356, 16943, 17575, 18237, 18949,
|
||||
19718, 20521, 21387,
|
||||
};
|
||||
|
||||
private static readonly short[] AcQlookup = new short[]
|
||||
{
|
||||
4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
|
||||
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
|
||||
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
|
||||
46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
|
||||
59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
|
||||
72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
|
||||
85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97,
|
||||
98, 99, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
|
||||
120, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144,
|
||||
146, 148, 150, 152, 155, 158, 161, 164, 167, 170, 173, 176, 179,
|
||||
182, 185, 188, 191, 194, 197, 200, 203, 207, 211, 215, 219, 223,
|
||||
227, 231, 235, 239, 243, 247, 251, 255, 260, 265, 270, 275, 280,
|
||||
285, 290, 295, 300, 305, 311, 317, 323, 329, 335, 341, 347, 353,
|
||||
359, 366, 373, 380, 387, 394, 401, 408, 416, 424, 432, 440, 448,
|
||||
456, 465, 474, 483, 492, 501, 510, 520, 530, 540, 550, 560, 571,
|
||||
582, 593, 604, 615, 627, 639, 651, 663, 676, 689, 702, 715, 729,
|
||||
743, 757, 771, 786, 801, 816, 832, 848, 864, 881, 898, 915, 933,
|
||||
951, 969, 988, 1007, 1026, 1046, 1066, 1087, 1108, 1129, 1151, 1173, 1196,
|
||||
1219, 1243, 1267, 1292, 1317, 1343, 1369, 1396, 1423, 1451, 1479, 1508, 1537,
|
||||
1567, 1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828,
|
||||
};
|
||||
|
||||
private static readonly short[] AcQlookup10 = new short[]
|
||||
{
|
||||
4, 9, 11, 13, 16, 18, 21, 24, 27, 30, 33, 37, 40,
|
||||
44, 48, 51, 55, 59, 63, 67, 71, 75, 79, 83, 88, 92,
|
||||
96, 100, 105, 109, 114, 118, 122, 127, 131, 136, 140, 145, 149,
|
||||
154, 158, 163, 168, 172, 177, 181, 186, 190, 195, 199, 204, 208,
|
||||
213, 217, 222, 226, 231, 235, 240, 244, 249, 253, 258, 262, 267,
|
||||
271, 275, 280, 284, 289, 293, 297, 302, 306, 311, 315, 319, 324,
|
||||
328, 332, 337, 341, 345, 349, 354, 358, 362, 367, 371, 375, 379,
|
||||
384, 388, 392, 396, 401, 409, 417, 425, 433, 441, 449, 458, 466,
|
||||
474, 482, 490, 498, 506, 514, 523, 531, 539, 547, 555, 563, 571,
|
||||
579, 588, 596, 604, 616, 628, 640, 652, 664, 676, 688, 700, 713,
|
||||
725, 737, 749, 761, 773, 785, 797, 809, 825, 841, 857, 873, 889,
|
||||
905, 922, 938, 954, 970, 986, 1002, 1018, 1038, 1058, 1078, 1098, 1118,
|
||||
1138, 1158, 1178, 1198, 1218, 1242, 1266, 1290, 1314, 1338, 1362, 1386, 1411,
|
||||
1435, 1463, 1491, 1519, 1547, 1575, 1603, 1631, 1663, 1695, 1727, 1759, 1791,
|
||||
1823, 1859, 1895, 1931, 1967, 2003, 2039, 2079, 2119, 2159, 2199, 2239, 2283,
|
||||
2327, 2371, 2415, 2459, 2507, 2555, 2603, 2651, 2703, 2755, 2807, 2859, 2915,
|
||||
2971, 3027, 3083, 3143, 3203, 3263, 3327, 3391, 3455, 3523, 3591, 3659, 3731,
|
||||
3803, 3876, 3952, 4028, 4104, 4184, 4264, 4348, 4432, 4516, 4604, 4692, 4784,
|
||||
4876, 4972, 5068, 5168, 5268, 5372, 5476, 5584, 5692, 5804, 5916, 6032, 6148,
|
||||
6268, 6388, 6512, 6640, 6768, 6900, 7036, 7172, 7312,
|
||||
};
|
||||
|
||||
private static readonly short[] AcQlookup12 = new short[]
|
||||
{
|
||||
4, 13, 19, 27, 35, 44, 54, 64, 75, 87, 99,
|
||||
112, 126, 139, 154, 168, 183, 199, 214, 230, 247, 263,
|
||||
280, 297, 314, 331, 349, 366, 384, 402, 420, 438, 456,
|
||||
475, 493, 511, 530, 548, 567, 586, 604, 623, 642, 660,
|
||||
679, 698, 716, 735, 753, 772, 791, 809, 828, 846, 865,
|
||||
884, 902, 920, 939, 957, 976, 994, 1012, 1030, 1049, 1067,
|
||||
1085, 1103, 1121, 1139, 1157, 1175, 1193, 1211, 1229, 1246, 1264,
|
||||
1282, 1299, 1317, 1335, 1352, 1370, 1387, 1405, 1422, 1440, 1457,
|
||||
1474, 1491, 1509, 1526, 1543, 1560, 1577, 1595, 1627, 1660, 1693,
|
||||
1725, 1758, 1791, 1824, 1856, 1889, 1922, 1954, 1987, 2020, 2052,
|
||||
2085, 2118, 2150, 2183, 2216, 2248, 2281, 2313, 2346, 2378, 2411,
|
||||
2459, 2508, 2556, 2605, 2653, 2701, 2750, 2798, 2847, 2895, 2943,
|
||||
2992, 3040, 3088, 3137, 3185, 3234, 3298, 3362, 3426, 3491, 3555,
|
||||
3619, 3684, 3748, 3812, 3876, 3941, 4005, 4069, 4149, 4230, 4310,
|
||||
4390, 4470, 4550, 4631, 4711, 4791, 4871, 4967, 5064, 5160, 5256,
|
||||
5352, 5448, 5544, 5641, 5737, 5849, 5961, 6073, 6185, 6297, 6410,
|
||||
6522, 6650, 6778, 6906, 7034, 7162, 7290, 7435, 7579, 7723, 7867,
|
||||
8011, 8155, 8315, 8475, 8635, 8795, 8956, 9132, 9308, 9484, 9660,
|
||||
9836, 10028, 10220, 10412, 10604, 10812, 11020, 11228, 11437, 11661, 11885,
|
||||
12109, 12333, 12573, 12813, 13053, 13309, 13565, 13821, 14093, 14365, 14637,
|
||||
14925, 15213, 15502, 15806, 16110, 16414, 16734, 17054, 17390, 17726, 18062,
|
||||
18414, 18766, 19134, 19502, 19886, 20270, 20670, 21070, 21486, 21902, 22334,
|
||||
22766, 23214, 23662, 24126, 24590, 25070, 25551, 26047, 26559, 27071, 27599,
|
||||
28143, 28687, 29247,
|
||||
};
|
||||
|
||||
public static short DcQuant(int qindex, int delta, BitDepth bitDepth)
|
||||
{
|
||||
switch (bitDepth)
|
||||
{
|
||||
case BitDepth.Bits8: return DcQlookup[Math.Clamp(qindex + delta, 0, MaxQ)];
|
||||
case BitDepth.Bits10: return DcQlookup10[Math.Clamp(qindex + delta, 0, MaxQ)];
|
||||
case BitDepth.Bits12: return DcQlookup12[Math.Clamp(qindex + delta, 0, MaxQ)];
|
||||
default:
|
||||
Debug.Assert(false, "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
public static short AcQuant(int qindex, int delta, BitDepth bitDepth)
|
||||
{
|
||||
switch (bitDepth)
|
||||
{
|
||||
case BitDepth.Bits8: return AcQlookup[Math.Clamp(qindex + delta, 0, MaxQ)];
|
||||
case BitDepth.Bits10: return AcQlookup10[Math.Clamp(qindex + delta, 0, MaxQ)];
|
||||
case BitDepth.Bits12: return AcQlookup12[Math.Clamp(qindex + delta, 0, MaxQ)];
|
||||
default:
|
||||
Debug.Assert(false, "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
public static int GetQIndex(ref Segmentation seg, int segmentId, int baseQIndex)
|
||||
{
|
||||
if (seg.IsSegFeatureActive(segmentId, SegLvlFeatures.SegLvlAltQ) != 0)
|
||||
{
|
||||
int data = seg.GetSegData(segmentId, SegLvlFeatures.SegLvlAltQ);
|
||||
int segQIndex = seg.AbsDelta == Constants.SegmentAbsData ? data : baseQIndex + data;
|
||||
return Math.Clamp(segQIndex, 0, MaxQ);
|
||||
}
|
||||
else
|
||||
{
|
||||
return baseQIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
234
Ryujinx.Graphics.Nvdec.Vp9/ReconInter.cs
Normal file
234
Ryujinx.Graphics.Nvdec.Vp9/ReconInter.cs
Normal file
@ -0,0 +1,234 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Types;
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.Runtime.CompilerServices;
|
||||
using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.Filter;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
internal static class ReconInter
|
||||
{
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static unsafe void InterPredictor(
|
||||
byte* src,
|
||||
int srcStride,
|
||||
byte* dst,
|
||||
int dstStride,
|
||||
int subpelX,
|
||||
int subpelY,
|
||||
ref ScaleFactors sf,
|
||||
int w,
|
||||
int h,
|
||||
int refr,
|
||||
Array8<short>[] kernel,
|
||||
int xs,
|
||||
int ys)
|
||||
{
|
||||
sf.InterPredict(
|
||||
subpelX != 0 ? 1 : 0,
|
||||
subpelY != 0 ? 1 : 0,
|
||||
refr,
|
||||
src,
|
||||
srcStride,
|
||||
dst,
|
||||
dstStride,
|
||||
subpelX,
|
||||
subpelY,
|
||||
w,
|
||||
h,
|
||||
kernel,
|
||||
xs,
|
||||
ys);
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static unsafe void HighbdInterPredictor(
|
||||
ushort* src,
|
||||
int srcStride,
|
||||
ushort* dst,
|
||||
int dstStride,
|
||||
int subpelX,
|
||||
int subpelY,
|
||||
ref ScaleFactors sf,
|
||||
int w,
|
||||
int h,
|
||||
int refr,
|
||||
Array8<short>[] kernel,
|
||||
int xs,
|
||||
int ys,
|
||||
int bd)
|
||||
{
|
||||
sf.HighbdInterPredict(
|
||||
subpelX != 0 ? 1 : 0,
|
||||
subpelY != 0 ? 1 : 0,
|
||||
refr,
|
||||
src,
|
||||
srcStride,
|
||||
dst,
|
||||
dstStride,
|
||||
subpelX,
|
||||
subpelY,
|
||||
w,
|
||||
h,
|
||||
kernel,
|
||||
xs,
|
||||
ys,
|
||||
bd);
|
||||
}
|
||||
|
||||
private static int RoundMvCompQ4(int value)
|
||||
{
|
||||
return (value < 0 ? value - 2 : value + 2) / 4;
|
||||
}
|
||||
|
||||
private static Mv MiMvPredQ4(ref ModeInfo mi, int idx)
|
||||
{
|
||||
Mv res = new Mv()
|
||||
{
|
||||
Row = (short)RoundMvCompQ4(
|
||||
mi.Bmi[0].Mv[idx].Row + mi.Bmi[1].Mv[idx].Row +
|
||||
mi.Bmi[2].Mv[idx].Row + mi.Bmi[3].Mv[idx].Row),
|
||||
Col = (short)RoundMvCompQ4(
|
||||
mi.Bmi[0].Mv[idx].Col + mi.Bmi[1].Mv[idx].Col +
|
||||
mi.Bmi[2].Mv[idx].Col + mi.Bmi[3].Mv[idx].Col)
|
||||
};
|
||||
return res;
|
||||
}
|
||||
|
||||
private static int RoundMvCompQ2(int value)
|
||||
{
|
||||
return (value < 0 ? value - 1 : value + 1) / 2;
|
||||
}
|
||||
|
||||
private static Mv MiMvPredQ2(ref ModeInfo mi, int idx, int block0, int block1)
|
||||
{
|
||||
Mv res = new Mv()
|
||||
{
|
||||
Row = (short)RoundMvCompQ2(
|
||||
mi.Bmi[block0].Mv[idx].Row +
|
||||
mi.Bmi[block1].Mv[idx].Row),
|
||||
Col = (short)RoundMvCompQ2(
|
||||
mi.Bmi[block0].Mv[idx].Col +
|
||||
mi.Bmi[block1].Mv[idx].Col)
|
||||
};
|
||||
return res;
|
||||
}
|
||||
|
||||
public static Mv ClampMvToUmvBorderSb(ref MacroBlockD xd, ref Mv srcMv, int bw, int bh, int ssX, int ssY)
|
||||
{
|
||||
// If the MV points so far into the UMV border that no visible pixels
|
||||
// are used for reconstruction, the subpel part of the MV can be
|
||||
// discarded and the MV limited to 16 pixels with equivalent results.
|
||||
int spelLeft = (Constants.Vp9InterpExtend + bw) << SubpelBits;
|
||||
int spelRight = spelLeft - SubpelShifts;
|
||||
int spelTop = (Constants.Vp9InterpExtend + bh) << SubpelBits;
|
||||
int spelBottom = spelTop - SubpelShifts;
|
||||
Mv clampedMv = new Mv()
|
||||
{
|
||||
Row = (short)(srcMv.Row * (1 << (1 - ssY))),
|
||||
Col = (short)(srcMv.Col * (1 << (1 - ssX)))
|
||||
};
|
||||
|
||||
Debug.Assert(ssX <= 1);
|
||||
Debug.Assert(ssY <= 1);
|
||||
|
||||
clampedMv.ClampMv(
|
||||
xd.MbToLeftEdge * (1 << (1 - ssX)) - spelLeft,
|
||||
xd.MbToRightEdge * (1 << (1 - ssX)) + spelRight,
|
||||
xd.MbToTopEdge * (1 << (1 - ssY)) - spelTop,
|
||||
xd.MbToBottomEdge * (1 << (1 - ssY)) + spelBottom);
|
||||
|
||||
return clampedMv;
|
||||
}
|
||||
|
||||
public static Mv AverageSplitMvs(ref MacroBlockDPlane pd, ref ModeInfo mi, int refr, int block)
|
||||
{
|
||||
int ssIdx = ((pd.SubsamplingX > 0 ? 1 : 0) << 1) | (pd.SubsamplingY > 0 ? 1 : 0);
|
||||
Mv res = new Mv();
|
||||
switch (ssIdx)
|
||||
{
|
||||
case 0: res = mi.Bmi[block].Mv[refr]; break;
|
||||
case 1: res = MiMvPredQ2(ref mi, refr, block, block + 2); break;
|
||||
case 2: res = MiMvPredQ2(ref mi, refr, block, block + 1); break;
|
||||
case 3: res = MiMvPredQ4(ref mi, refr); break;
|
||||
default: Debug.Assert(ssIdx <= 3 && ssIdx >= 0); break;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
private static int ScaledBufferOffset(int xOffset, int yOffset, int stride, Ptr<ScaleFactors> sf)
|
||||
{
|
||||
int x = !sf.IsNull ? sf.Value.ScaleValueX(xOffset) : xOffset;
|
||||
int y = !sf.IsNull ? sf.Value.ScaleValueY(yOffset) : yOffset;
|
||||
return y * stride + x;
|
||||
}
|
||||
|
||||
private static void SetupPredPlanes(
|
||||
ref Buf2D dst,
|
||||
ArrayPtr<byte> src,
|
||||
int stride,
|
||||
int miRow,
|
||||
int miCol,
|
||||
Ptr<ScaleFactors> scale,
|
||||
int subsamplingX,
|
||||
int subsamplingY)
|
||||
{
|
||||
int x = (Constants.MiSize * miCol) >> subsamplingX;
|
||||
int y = (Constants.MiSize * miRow) >> subsamplingY;
|
||||
dst.Buf = src.Slice(ScaledBufferOffset(x, y, stride, scale));
|
||||
dst.Stride = stride;
|
||||
}
|
||||
|
||||
public static void SetupDstPlanes(
|
||||
ref Array3<MacroBlockDPlane> planes,
|
||||
ref Surface src,
|
||||
int miRow,
|
||||
int miCol)
|
||||
{
|
||||
Span<ArrayPtr<byte>> buffers = stackalloc ArrayPtr<byte>[Constants.MaxMbPlane];
|
||||
buffers[0] = src.YBuffer;
|
||||
buffers[1] = src.UBuffer;
|
||||
buffers[2] = src.VBuffer;
|
||||
Span<int> strides = stackalloc int[Constants.MaxMbPlane];
|
||||
strides[0] = src.Stride;
|
||||
strides[1] = src.UvStride;
|
||||
strides[2] = src.UvStride;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < Constants.MaxMbPlane; ++i)
|
||||
{
|
||||
ref MacroBlockDPlane pd = ref planes[i];
|
||||
SetupPredPlanes(ref pd.Dst, buffers[i], strides[i], miRow, miCol, Ptr<ScaleFactors>.Null, pd.SubsamplingX, pd.SubsamplingY);
|
||||
}
|
||||
}
|
||||
|
||||
public static void SetupPrePlanes(
|
||||
ref MacroBlockD xd,
|
||||
int idx,
|
||||
ref Surface src,
|
||||
int miRow,
|
||||
int miCol,
|
||||
Ptr<ScaleFactors> sf)
|
||||
{
|
||||
if (!src.YBuffer.IsNull && !src.UBuffer.IsNull && !src.VBuffer.IsNull)
|
||||
{
|
||||
Span<ArrayPtr<byte>> buffers = stackalloc ArrayPtr<byte>[Constants.MaxMbPlane];
|
||||
buffers[0] = src.YBuffer;
|
||||
buffers[1] = src.UBuffer;
|
||||
buffers[2] = src.VBuffer;
|
||||
Span<int> strides = stackalloc int[Constants.MaxMbPlane];
|
||||
strides[0] = src.Stride;
|
||||
strides[1] = src.UvStride;
|
||||
strides[2] = src.UvStride;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < Constants.MaxMbPlane; ++i)
|
||||
{
|
||||
ref MacroBlockDPlane pd = ref xd.Plane[i];
|
||||
SetupPredPlanes(ref pd.Pre[idx], buffers[i], strides[i], miRow, miCol, sf, pd.SubsamplingX, pd.SubsamplingY);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
761
Ryujinx.Graphics.Nvdec.Vp9/ReconIntra.cs
Normal file
761
Ryujinx.Graphics.Nvdec.Vp9/ReconIntra.cs
Normal file
@ -0,0 +1,761 @@
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Common;
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Types;
|
||||
using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.IntraPred;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
internal static class ReconIntra
|
||||
{
|
||||
public static readonly TxType[] IntraModeToTxTypeLookup = new TxType[]
|
||||
{
|
||||
TxType.DctDct, // DC
|
||||
TxType.AdstDct, // V
|
||||
TxType.DctAdst, // H
|
||||
TxType.DctDct, // D45
|
||||
TxType.AdstAdst, // D135
|
||||
TxType.AdstDct, // D117
|
||||
TxType.DctAdst, // D153
|
||||
TxType.DctAdst, // D207
|
||||
TxType.AdstDct, // D63
|
||||
TxType.AdstAdst // TM
|
||||
};
|
||||
|
||||
private const int NeedLeft = 1 << 1;
|
||||
private const int NeedAbove = 1 << 2;
|
||||
private const int NeedAboveRight = 1 << 3;
|
||||
|
||||
private static readonly byte[] ExtendModes = new byte[]
|
||||
{
|
||||
NeedAbove | NeedLeft, // DC
|
||||
NeedAbove, // V
|
||||
NeedLeft, // H
|
||||
NeedAboveRight, // D45
|
||||
NeedLeft | NeedAbove, // D135
|
||||
NeedLeft | NeedAbove, // D117
|
||||
NeedLeft | NeedAbove, // D153
|
||||
NeedLeft, // D207
|
||||
NeedAboveRight, // D63
|
||||
NeedLeft | NeedAbove, // TM
|
||||
};
|
||||
|
||||
private unsafe delegate void IntraPredFn(byte* dst, int stride, byte* above, byte* left);
|
||||
|
||||
private static unsafe IntraPredFn[][] _pred = new IntraPredFn[][]
|
||||
{
|
||||
new IntraPredFn[]
|
||||
{
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null
|
||||
},
|
||||
new IntraPredFn[]
|
||||
{
|
||||
VPredictor4x4,
|
||||
VPredictor8x8,
|
||||
VPredictor16x16,
|
||||
VPredictor32x32
|
||||
},
|
||||
new IntraPredFn[]
|
||||
{
|
||||
HPredictor4x4,
|
||||
HPredictor8x8,
|
||||
HPredictor16x16,
|
||||
HPredictor32x32
|
||||
},
|
||||
new IntraPredFn[]
|
||||
{
|
||||
D45Predictor4x4,
|
||||
D45Predictor8x8,
|
||||
D45Predictor16x16,
|
||||
D45Predictor32x32
|
||||
},
|
||||
new IntraPredFn[]
|
||||
{
|
||||
D135Predictor4x4,
|
||||
D135Predictor8x8,
|
||||
D135Predictor16x16,
|
||||
D135Predictor32x32
|
||||
},
|
||||
new IntraPredFn[]
|
||||
{
|
||||
D117Predictor4x4,
|
||||
D117Predictor8x8,
|
||||
D117Predictor16x16,
|
||||
D117Predictor32x32
|
||||
},
|
||||
new IntraPredFn[]
|
||||
{
|
||||
D153Predictor4x4,
|
||||
D153Predictor8x8,
|
||||
D153Predictor16x16,
|
||||
D153Predictor32x32
|
||||
},
|
||||
new IntraPredFn[]
|
||||
{
|
||||
D207Predictor4x4,
|
||||
D207Predictor8x8,
|
||||
D207Predictor16x16,
|
||||
D207Predictor32x32
|
||||
},
|
||||
new IntraPredFn[]
|
||||
{
|
||||
D63Predictor4x4,
|
||||
D63Predictor8x8,
|
||||
D63Predictor16x16,
|
||||
D63Predictor32x32
|
||||
},
|
||||
new IntraPredFn[]
|
||||
{
|
||||
TMPredictor4x4,
|
||||
TMPredictor8x8,
|
||||
TMPredictor16x16,
|
||||
TMPredictor32x32
|
||||
}
|
||||
};
|
||||
|
||||
private static unsafe IntraPredFn[][][] _dcPred = new IntraPredFn[][][]
|
||||
{
|
||||
new IntraPredFn[][]
|
||||
{
|
||||
new IntraPredFn[]
|
||||
{
|
||||
Dc128Predictor4x4,
|
||||
Dc128Predictor8x8,
|
||||
Dc128Predictor16x16,
|
||||
Dc128Predictor32x32
|
||||
},
|
||||
new IntraPredFn[]
|
||||
{
|
||||
DcTopPredictor4x4,
|
||||
DcTopPredictor8x8,
|
||||
DcTopPredictor16x16,
|
||||
DcTopPredictor32x32
|
||||
}
|
||||
},
|
||||
new IntraPredFn[][]
|
||||
{
|
||||
new IntraPredFn[]
|
||||
{
|
||||
DcLeftPredictor4x4,
|
||||
DcLeftPredictor8x8,
|
||||
DcLeftPredictor16x16,
|
||||
DcLeftPredictor32x32
|
||||
},
|
||||
new IntraPredFn[]
|
||||
{
|
||||
DcPredictor4x4,
|
||||
DcPredictor8x8,
|
||||
DcPredictor16x16,
|
||||
DcPredictor32x32
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
private unsafe delegate void IntraHighPredFn(ushort* dst, int stride, ushort* above, ushort* left, int bd);
|
||||
|
||||
private static unsafe IntraHighPredFn[][] _predHigh = new IntraHighPredFn[][]
|
||||
{
|
||||
new IntraHighPredFn[]
|
||||
{
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null
|
||||
},
|
||||
new IntraHighPredFn[]
|
||||
{
|
||||
HighbdVPredictor4x4,
|
||||
HighbdVPredictor8x8,
|
||||
HighbdVPredictor16x16,
|
||||
HighbdVPredictor32x32
|
||||
},
|
||||
new IntraHighPredFn[]
|
||||
{
|
||||
HighbdHPredictor4x4,
|
||||
HighbdHPredictor8x8,
|
||||
HighbdHPredictor16x16,
|
||||
HighbdHPredictor32x32
|
||||
},
|
||||
new IntraHighPredFn[]
|
||||
{
|
||||
HighbdD45Predictor4x4,
|
||||
HighbdD45Predictor8x8,
|
||||
HighbdD45Predictor16x16,
|
||||
HighbdD45Predictor32x32
|
||||
},
|
||||
new IntraHighPredFn[]
|
||||
{
|
||||
HighbdD135Predictor4x4,
|
||||
HighbdD135Predictor8x8,
|
||||
HighbdD135Predictor16x16,
|
||||
HighbdD135Predictor32x32
|
||||
},
|
||||
new IntraHighPredFn[]
|
||||
{
|
||||
HighbdD117Predictor4x4,
|
||||
HighbdD117Predictor8x8,
|
||||
HighbdD117Predictor16x16,
|
||||
HighbdD117Predictor32x32
|
||||
},
|
||||
new IntraHighPredFn[]
|
||||
{
|
||||
HighbdD153Predictor4x4,
|
||||
HighbdD153Predictor8x8,
|
||||
HighbdD153Predictor16x16,
|
||||
HighbdD153Predictor32x32
|
||||
},
|
||||
new IntraHighPredFn[]
|
||||
{
|
||||
HighbdD207Predictor4x4,
|
||||
HighbdD207Predictor8x8,
|
||||
HighbdD207Predictor16x16,
|
||||
HighbdD207Predictor32x32
|
||||
},
|
||||
new IntraHighPredFn[]
|
||||
{
|
||||
HighbdD63Predictor4x4,
|
||||
HighbdD63Predictor8x8,
|
||||
HighbdD63Predictor16x16,
|
||||
HighbdD63Predictor32x32
|
||||
},
|
||||
new IntraHighPredFn[]
|
||||
{
|
||||
HighbdTMPredictor4x4,
|
||||
HighbdTMPredictor8x8,
|
||||
HighbdTMPredictor16x16,
|
||||
HighbdTMPredictor32x32
|
||||
}
|
||||
};
|
||||
|
||||
private static unsafe IntraHighPredFn[][][] _dcPredHigh = new IntraHighPredFn[][][]
|
||||
{
|
||||
new IntraHighPredFn[][]
|
||||
{
|
||||
new IntraHighPredFn[]
|
||||
{
|
||||
HighbdDc128Predictor4x4,
|
||||
HighbdDc128Predictor8x8,
|
||||
HighbdDc128Predictor16x16,
|
||||
HighbdDc128Predictor32x32
|
||||
},
|
||||
new IntraHighPredFn[]
|
||||
{
|
||||
HighbdDcTopPredictor4x4,
|
||||
HighbdDcTopPredictor8x8,
|
||||
HighbdDcTopPredictor16x16,
|
||||
HighbdDcTopPredictor32x32
|
||||
}
|
||||
},
|
||||
new IntraHighPredFn[][]
|
||||
{
|
||||
new IntraHighPredFn[]
|
||||
{
|
||||
HighbdDcLeftPredictor4x4,
|
||||
HighbdDcLeftPredictor8x8,
|
||||
HighbdDcLeftPredictor16x16,
|
||||
HighbdDcLeftPredictor32x32
|
||||
},
|
||||
new IntraHighPredFn[]
|
||||
{
|
||||
HighbdDcPredictor4x4,
|
||||
HighbdDcPredictor8x8,
|
||||
HighbdDcPredictor16x16,
|
||||
HighbdDcPredictor32x32
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
private static unsafe void BuildIntraPredictorsHigh(
|
||||
ref MacroBlockD xd,
|
||||
byte* ref8,
|
||||
int refStride,
|
||||
byte* dst8,
|
||||
int dstStride,
|
||||
PredictionMode mode,
|
||||
TxSize txSize,
|
||||
int upAvailable,
|
||||
int leftAvailable,
|
||||
int rightAvailable,
|
||||
int x,
|
||||
int y,
|
||||
int plane)
|
||||
{
|
||||
int i;
|
||||
ushort* dst = (ushort*)dst8;
|
||||
ushort* refr = (ushort*)ref8;
|
||||
ushort* leftCol = stackalloc ushort[32];
|
||||
ushort* aboveData = stackalloc ushort[64 + 16];
|
||||
ushort* aboveRow = aboveData + 16;
|
||||
ushort* constAboveRow = aboveRow;
|
||||
int bs = 4 << (int)txSize;
|
||||
int frameWidth, frameHeight;
|
||||
int x0, y0;
|
||||
ref MacroBlockDPlane pd = ref xd.Plane[plane];
|
||||
int needLeft = ExtendModes[(int)mode] & NeedLeft;
|
||||
int needAbove = ExtendModes[(int)mode] & NeedAbove;
|
||||
int needAboveRight = ExtendModes[(int)mode] & NeedAboveRight;
|
||||
int baseVal = 128 << (xd.Bd - 8);
|
||||
// 127 127 127 .. 127 127 127 127 127 127
|
||||
// 129 A B .. Y Z
|
||||
// 129 C D .. W X
|
||||
// 129 E F .. U V
|
||||
// 129 G H .. S T T T T T
|
||||
// For 10 bit and 12 bit, 127 and 129 are replaced by base -1 and base + 1.
|
||||
|
||||
// Get current frame pointer, width and height.
|
||||
if (plane == 0)
|
||||
{
|
||||
frameWidth = xd.CurBuf.Width;
|
||||
frameHeight = xd.CurBuf.Height;
|
||||
}
|
||||
else
|
||||
{
|
||||
frameWidth = xd.CurBuf.UvWidth;
|
||||
frameHeight = xd.CurBuf.UvHeight;
|
||||
}
|
||||
|
||||
// Get block position in current frame.
|
||||
x0 = (-xd.MbToLeftEdge >> (3 + pd.SubsamplingX)) + x;
|
||||
y0 = (-xd.MbToTopEdge >> (3 + pd.SubsamplingY)) + y;
|
||||
|
||||
// NEED_LEFT
|
||||
if (needLeft != 0)
|
||||
{
|
||||
if (leftAvailable != 0)
|
||||
{
|
||||
if (xd.MbToBottomEdge < 0)
|
||||
{
|
||||
/* slower path if the block needs border extension */
|
||||
if (y0 + bs <= frameHeight)
|
||||
{
|
||||
for (i = 0; i < bs; ++i)
|
||||
{
|
||||
leftCol[i] = refr[i * refStride - 1];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int extendBottom = frameHeight - y0;
|
||||
for (i = 0; i < extendBottom; ++i)
|
||||
{
|
||||
leftCol[i] = refr[i * refStride - 1];
|
||||
}
|
||||
|
||||
for (; i < bs; ++i)
|
||||
{
|
||||
leftCol[i] = refr[(extendBottom - 1) * refStride - 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* faster path if the block does not need extension */
|
||||
for (i = 0; i < bs; ++i)
|
||||
{
|
||||
leftCol[i] = refr[i * refStride - 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
MemoryUtil.Fill(leftCol, (ushort)(baseVal + 1), bs);
|
||||
}
|
||||
}
|
||||
|
||||
// NEED_ABOVE
|
||||
if (needAbove != 0)
|
||||
{
|
||||
if (upAvailable != 0)
|
||||
{
|
||||
ushort* aboveRef = refr - refStride;
|
||||
if (xd.MbToRightEdge < 0)
|
||||
{
|
||||
/* slower path if the block needs border extension */
|
||||
if (x0 + bs <= frameWidth)
|
||||
{
|
||||
MemoryUtil.Copy(aboveRow, aboveRef, bs);
|
||||
}
|
||||
else if (x0 <= frameWidth)
|
||||
{
|
||||
int r = frameWidth - x0;
|
||||
MemoryUtil.Copy(aboveRow, aboveRef, r);
|
||||
MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + bs - frameWidth);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* faster path if the block does not need extension */
|
||||
if (bs == 4 && rightAvailable != 0 && leftAvailable != 0)
|
||||
{
|
||||
constAboveRow = aboveRef;
|
||||
}
|
||||
else
|
||||
{
|
||||
MemoryUtil.Copy(aboveRow, aboveRef, bs);
|
||||
}
|
||||
}
|
||||
aboveRow[-1] = leftAvailable != 0 ? aboveRef[-1] : (ushort)(baseVal + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
MemoryUtil.Fill(aboveRow, (ushort)(baseVal - 1), bs);
|
||||
aboveRow[-1] = (ushort)(baseVal - 1);
|
||||
}
|
||||
}
|
||||
|
||||
// NEED_ABOVERIGHT
|
||||
if (needAboveRight != 0)
|
||||
{
|
||||
if (upAvailable != 0)
|
||||
{
|
||||
ushort* aboveRef = refr - refStride;
|
||||
if (xd.MbToRightEdge < 0)
|
||||
{
|
||||
/* slower path if the block needs border extension */
|
||||
if (x0 + 2 * bs <= frameWidth)
|
||||
{
|
||||
if (rightAvailable != 0 && bs == 4)
|
||||
{
|
||||
MemoryUtil.Copy(aboveRow, aboveRef, 2 * bs);
|
||||
}
|
||||
else
|
||||
{
|
||||
MemoryUtil.Copy(aboveRow, aboveRef, bs);
|
||||
MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs);
|
||||
}
|
||||
}
|
||||
else if (x0 + bs <= frameWidth)
|
||||
{
|
||||
int r = frameWidth - x0;
|
||||
if (rightAvailable != 0 && bs == 4)
|
||||
{
|
||||
MemoryUtil.Copy(aboveRow, aboveRef, r);
|
||||
MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + 2 * bs - frameWidth);
|
||||
}
|
||||
else
|
||||
{
|
||||
MemoryUtil.Copy(aboveRow, aboveRef, bs);
|
||||
MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs);
|
||||
}
|
||||
}
|
||||
else if (x0 <= frameWidth)
|
||||
{
|
||||
int r = frameWidth - x0;
|
||||
MemoryUtil.Copy(aboveRow, aboveRef, r);
|
||||
MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + 2 * bs - frameWidth);
|
||||
}
|
||||
aboveRow[-1] = leftAvailable != 0 ? aboveRef[-1] : (ushort)(baseVal + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* faster path if the block does not need extension */
|
||||
if (bs == 4 && rightAvailable != 0 && leftAvailable != 0)
|
||||
{
|
||||
constAboveRow = aboveRef;
|
||||
}
|
||||
else
|
||||
{
|
||||
MemoryUtil.Copy(aboveRow, aboveRef, bs);
|
||||
if (bs == 4 && rightAvailable != 0)
|
||||
{
|
||||
MemoryUtil.Copy(aboveRow + bs, aboveRef + bs, bs);
|
||||
}
|
||||
else
|
||||
{
|
||||
MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs);
|
||||
}
|
||||
|
||||
aboveRow[-1] = leftAvailable != 0 ? aboveRef[-1] : (ushort)(baseVal + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
MemoryUtil.Fill(aboveRow, (ushort)(baseVal - 1), bs * 2);
|
||||
aboveRow[-1] = (ushort)(baseVal - 1);
|
||||
}
|
||||
}
|
||||
|
||||
// Predict
|
||||
if (mode == PredictionMode.DcPred)
|
||||
{
|
||||
_dcPredHigh[leftAvailable][upAvailable][(int)txSize](dst, dstStride, constAboveRow, leftCol, xd.Bd);
|
||||
}
|
||||
else
|
||||
{
|
||||
_predHigh[(int)mode][(int)txSize](dst, dstStride, constAboveRow, leftCol, xd.Bd);
|
||||
}
|
||||
}
|
||||
|
||||
public static unsafe void BuildIntraPredictors(
|
||||
ref MacroBlockD xd,
|
||||
byte* refr,
|
||||
int refStride,
|
||||
byte* dst,
|
||||
int dstStride,
|
||||
PredictionMode mode,
|
||||
TxSize txSize,
|
||||
int upAvailable,
|
||||
int leftAvailable,
|
||||
int rightAvailable,
|
||||
int x,
|
||||
int y,
|
||||
int plane)
|
||||
{
|
||||
int i;
|
||||
byte* leftCol = stackalloc byte[32];
|
||||
byte* aboveData = stackalloc byte[64 + 16];
|
||||
byte* aboveRow = aboveData + 16;
|
||||
byte* constAboveRow = aboveRow;
|
||||
int bs = 4 << (int)txSize;
|
||||
int frameWidth, frameHeight;
|
||||
int x0, y0;
|
||||
ref MacroBlockDPlane pd = ref xd.Plane[plane];
|
||||
|
||||
// 127 127 127 .. 127 127 127 127 127 127
|
||||
// 129 A B .. Y Z
|
||||
// 129 C D .. W X
|
||||
// 129 E F .. U V
|
||||
// 129 G H .. S T T T T T
|
||||
// ..
|
||||
|
||||
// Get current frame pointer, width and height.
|
||||
if (plane == 0)
|
||||
{
|
||||
frameWidth = xd.CurBuf.Width;
|
||||
frameHeight = xd.CurBuf.Height;
|
||||
}
|
||||
else
|
||||
{
|
||||
frameWidth = xd.CurBuf.UvWidth;
|
||||
frameHeight = xd.CurBuf.UvHeight;
|
||||
}
|
||||
|
||||
// Get block position in current frame.
|
||||
x0 = (-xd.MbToLeftEdge >> (3 + pd.SubsamplingX)) + x;
|
||||
y0 = (-xd.MbToTopEdge >> (3 + pd.SubsamplingY)) + y;
|
||||
|
||||
// NEED_LEFT
|
||||
if ((ExtendModes[(int)mode] & NeedLeft) != 0)
|
||||
{
|
||||
if (leftAvailable != 0)
|
||||
{
|
||||
if (xd.MbToBottomEdge < 0)
|
||||
{
|
||||
/* Slower path if the block needs border extension */
|
||||
if (y0 + bs <= frameHeight)
|
||||
{
|
||||
for (i = 0; i < bs; ++i)
|
||||
{
|
||||
leftCol[i] = refr[i * refStride - 1];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int extendBottom = frameHeight - y0;
|
||||
for (i = 0; i < extendBottom; ++i)
|
||||
{
|
||||
leftCol[i] = refr[i * refStride - 1];
|
||||
}
|
||||
|
||||
for (; i < bs; ++i)
|
||||
{
|
||||
leftCol[i] = refr[(extendBottom - 1) * refStride - 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Faster path if the block does not need extension */
|
||||
for (i = 0; i < bs; ++i)
|
||||
{
|
||||
leftCol[i] = refr[i * refStride - 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
MemoryUtil.Fill(leftCol, (byte)129, bs);
|
||||
}
|
||||
}
|
||||
|
||||
// NEED_ABOVE
|
||||
if ((ExtendModes[(int)mode] & NeedAbove) != 0)
|
||||
{
|
||||
if (upAvailable != 0)
|
||||
{
|
||||
byte* aboveRef = refr - refStride;
|
||||
if (xd.MbToRightEdge < 0)
|
||||
{
|
||||
/* Slower path if the block needs border extension */
|
||||
if (x0 + bs <= frameWidth)
|
||||
{
|
||||
MemoryUtil.Copy(aboveRow, aboveRef, bs);
|
||||
}
|
||||
else if (x0 <= frameWidth)
|
||||
{
|
||||
int r = frameWidth - x0;
|
||||
MemoryUtil.Copy(aboveRow, aboveRef, r);
|
||||
MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + bs - frameWidth);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Faster path if the block does not need extension */
|
||||
if (bs == 4 && rightAvailable != 0 && leftAvailable != 0)
|
||||
{
|
||||
constAboveRow = aboveRef;
|
||||
}
|
||||
else
|
||||
{
|
||||
MemoryUtil.Copy(aboveRow, aboveRef, bs);
|
||||
}
|
||||
}
|
||||
aboveRow[-1] = leftAvailable != 0 ? aboveRef[-1] : (byte)129;
|
||||
}
|
||||
else
|
||||
{
|
||||
MemoryUtil.Fill(aboveRow, (byte)127, bs);
|
||||
aboveRow[-1] = 127;
|
||||
}
|
||||
}
|
||||
|
||||
// NEED_ABOVERIGHT
|
||||
if ((ExtendModes[(int)mode] & NeedAboveRight) != 0)
|
||||
{
|
||||
if (upAvailable != 0)
|
||||
{
|
||||
byte* aboveRef = refr - refStride;
|
||||
if (xd.MbToRightEdge < 0)
|
||||
{
|
||||
/* Slower path if the block needs border extension */
|
||||
if (x0 + 2 * bs <= frameWidth)
|
||||
{
|
||||
if (rightAvailable != 0 && bs == 4)
|
||||
{
|
||||
MemoryUtil.Copy(aboveRow, aboveRef, 2 * bs);
|
||||
}
|
||||
else
|
||||
{
|
||||
MemoryUtil.Copy(aboveRow, aboveRef, bs);
|
||||
MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs);
|
||||
}
|
||||
}
|
||||
else if (x0 + bs <= frameWidth)
|
||||
{
|
||||
int r = frameWidth - x0;
|
||||
if (rightAvailable != 0 && bs == 4)
|
||||
{
|
||||
MemoryUtil.Copy(aboveRow, aboveRef, r);
|
||||
MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + 2 * bs - frameWidth);
|
||||
}
|
||||
else
|
||||
{
|
||||
MemoryUtil.Copy(aboveRow, aboveRef, bs);
|
||||
MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs);
|
||||
}
|
||||
}
|
||||
else if (x0 <= frameWidth)
|
||||
{
|
||||
int r = frameWidth - x0;
|
||||
MemoryUtil.Copy(aboveRow, aboveRef, r);
|
||||
MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + 2 * bs - frameWidth);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Faster path if the block does not need extension */
|
||||
if (bs == 4 && rightAvailable != 0 && leftAvailable != 0)
|
||||
{
|
||||
constAboveRow = aboveRef;
|
||||
}
|
||||
else
|
||||
{
|
||||
MemoryUtil.Copy(aboveRow, aboveRef, bs);
|
||||
if (bs == 4 && rightAvailable != 0)
|
||||
{
|
||||
MemoryUtil.Copy(aboveRow + bs, aboveRef + bs, bs);
|
||||
}
|
||||
else
|
||||
{
|
||||
MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs);
|
||||
}
|
||||
}
|
||||
}
|
||||
aboveRow[-1] = leftAvailable != 0 ? aboveRef[-1] : (byte)129;
|
||||
}
|
||||
else
|
||||
{
|
||||
MemoryUtil.Fill(aboveRow, (byte)127, bs * 2);
|
||||
aboveRow[-1] = 127;
|
||||
}
|
||||
}
|
||||
|
||||
// Predict
|
||||
if (mode == PredictionMode.DcPred)
|
||||
{
|
||||
_dcPred[leftAvailable][upAvailable][(int)txSize](dst, dstStride, constAboveRow, leftCol);
|
||||
}
|
||||
else
|
||||
{
|
||||
_pred[(int)mode][(int)txSize](dst, dstStride, constAboveRow, leftCol);
|
||||
}
|
||||
}
|
||||
|
||||
public static unsafe void PredictIntraBlock(
|
||||
ref MacroBlockD xd,
|
||||
int bwlIn,
|
||||
TxSize txSize,
|
||||
PredictionMode mode,
|
||||
byte* refr,
|
||||
int refStride,
|
||||
byte* dst,
|
||||
int dstStride,
|
||||
int aoff,
|
||||
int loff,
|
||||
int plane)
|
||||
{
|
||||
int bw = 1 << bwlIn;
|
||||
int txw = 1 << (int)txSize;
|
||||
int haveTop = loff != 0 || !xd.AboveMi.IsNull ? 1 : 0;
|
||||
int haveLeft = aoff != 0 || !xd.LeftMi.IsNull ? 1 : 0;
|
||||
int haveRight = (aoff + txw) < bw ? 1 : 0;
|
||||
int x = aoff * 4;
|
||||
int y = loff * 4;
|
||||
|
||||
if (xd.CurBuf.HighBd)
|
||||
{
|
||||
BuildIntraPredictorsHigh(
|
||||
ref xd,
|
||||
refr,
|
||||
refStride,
|
||||
dst,
|
||||
dstStride,
|
||||
mode,
|
||||
txSize,
|
||||
haveTop,
|
||||
haveLeft,
|
||||
haveRight,
|
||||
x,
|
||||
y,
|
||||
plane);
|
||||
return;
|
||||
}
|
||||
BuildIntraPredictors(
|
||||
ref xd,
|
||||
refr,
|
||||
refStride,
|
||||
dst,
|
||||
dstStride,
|
||||
mode,
|
||||
txSize,
|
||||
haveTop,
|
||||
haveLeft,
|
||||
haveRight,
|
||||
x,
|
||||
y,
|
||||
plane);
|
||||
}
|
||||
}
|
||||
}
|
20
Ryujinx.Graphics.Nvdec.Vp9/Ryujinx.Graphics.Nvdec.Vp9.csproj
Normal file
20
Ryujinx.Graphics.Nvdec.Vp9/Ryujinx.Graphics.Nvdec.Vp9.csproj
Normal file
@ -0,0 +1,20 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>netcoreapp3.1</TargetFramework>
|
||||
</PropertyGroup>
|
||||
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
|
||||
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
||||
</PropertyGroup>
|
||||
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
|
||||
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" />
|
||||
<ProjectReference Include="..\Ryujinx.Graphics.Video\Ryujinx.Graphics.Video.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
10
Ryujinx.Graphics.Nvdec.Vp9/TileBuffer.cs
Normal file
10
Ryujinx.Graphics.Nvdec.Vp9/TileBuffer.cs
Normal file
@ -0,0 +1,10 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
internal struct TileBuffer
|
||||
{
|
||||
public ArrayPtr<byte> Data;
|
||||
public int Size;
|
||||
}
|
||||
}
|
15
Ryujinx.Graphics.Nvdec.Vp9/TileWorkerData.cs
Normal file
15
Ryujinx.Graphics.Nvdec.Vp9/TileWorkerData.cs
Normal file
@ -0,0 +1,15 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Dsp;
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Types;
|
||||
using Ryujinx.Graphics.Video;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
internal struct TileWorkerData
|
||||
{
|
||||
public Reader BitReader;
|
||||
public MacroBlockD Xd;
|
||||
/* dqcoeff are shared by all the planes. So planes must be decoded serially */
|
||||
public Array32<Array32<int>> Dqcoeff;
|
||||
}
|
||||
}
|
10
Ryujinx.Graphics.Nvdec.Vp9/Types/BModeInfo.cs
Normal file
10
Ryujinx.Graphics.Nvdec.Vp9/Types/BModeInfo.cs
Normal file
@ -0,0 +1,10 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
internal struct BModeInfo
|
||||
{
|
||||
public PredictionMode Mode;
|
||||
public Array2<Mv> Mv; // First, second inter predictor motion vectors
|
||||
}
|
||||
}
|
21
Ryujinx.Graphics.Nvdec.Vp9/Types/BlockSize.cs
Normal file
21
Ryujinx.Graphics.Nvdec.Vp9/Types/BlockSize.cs
Normal file
@ -0,0 +1,21 @@
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
internal enum BlockSize
|
||||
{
|
||||
Block4x4 = 0,
|
||||
Block4x8 = 1,
|
||||
Block8x4 = 2,
|
||||
Block8x8 = 3,
|
||||
Block8x16 = 4,
|
||||
Block16x8 = 5,
|
||||
Block16x16 = 6,
|
||||
Block16x32 = 7,
|
||||
Block32x16 = 8,
|
||||
Block32x32 = 9,
|
||||
Block32x64 = 10,
|
||||
Block64x32 = 11,
|
||||
Block64x64 = 12,
|
||||
BlockSizes = 13,
|
||||
BlockInvalid = BlockSizes
|
||||
}
|
||||
}
|
10
Ryujinx.Graphics.Nvdec.Vp9/Types/Buf2D.cs
Normal file
10
Ryujinx.Graphics.Nvdec.Vp9/Types/Buf2D.cs
Normal file
@ -0,0 +1,10 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
internal struct Buf2D
|
||||
{
|
||||
public ArrayPtr<byte> Buf;
|
||||
public int Stride;
|
||||
}
|
||||
}
|
8
Ryujinx.Graphics.Nvdec.Vp9/Types/FrameType.cs
Normal file
8
Ryujinx.Graphics.Nvdec.Vp9/Types/FrameType.cs
Normal file
@ -0,0 +1,8 @@
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
internal enum FrameType
|
||||
{
|
||||
KeyFrame = 0,
|
||||
InterFrame = 1
|
||||
}
|
||||
}
|
27
Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilter.cs
Normal file
27
Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilter.cs
Normal file
@ -0,0 +1,27 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
internal struct LoopFilter
|
||||
{
|
||||
public int FilterLevel;
|
||||
public int LastFiltLevel;
|
||||
|
||||
public int SharpnessLevel;
|
||||
public int LastSharpnessLevel;
|
||||
|
||||
public bool ModeRefDeltaEnabled;
|
||||
public bool ModeRefDeltaUpdate;
|
||||
|
||||
// 0 = Intra, Last, GF, ARF
|
||||
public Array4<sbyte> RefDeltas;
|
||||
public Array4<sbyte> LastRefDeltas;
|
||||
|
||||
// 0 = ZERO_MV, MV
|
||||
public Array2<sbyte> ModeDeltas;
|
||||
public Array2<sbyte> LastModeDeltas;
|
||||
|
||||
public ArrayPtr<LoopFilterMask> Lfm;
|
||||
public int LfmStride;
|
||||
}
|
||||
}
|
10
Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterInfoN.cs
Normal file
10
Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterInfoN.cs
Normal file
@ -0,0 +1,10 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
internal struct LoopFilterInfoN
|
||||
{
|
||||
public Array64<LoopFilterThresh> Lfthr;
|
||||
public Array8<Array4<Array2<byte>>> Lvl;
|
||||
}
|
||||
}
|
24
Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterMask.cs
Normal file
24
Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterMask.cs
Normal file
@ -0,0 +1,24 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
// This structure holds bit masks for all 8x8 blocks in a 64x64 region.
|
||||
// Each 1 bit represents a position in which we want to apply the loop filter.
|
||||
// Left_ entries refer to whether we apply a filter on the border to the
|
||||
// left of the block. Above_ entries refer to whether or not to apply a
|
||||
// filter on the above border. Int_ entries refer to whether or not to
|
||||
// apply borders on the 4x4 edges within the 8x8 block that each bit
|
||||
// represents.
|
||||
// Since each transform is accompanied by a potentially different type of
|
||||
// loop filter there is a different entry in the array for each transform size.
|
||||
internal struct LoopFilterMask
|
||||
{
|
||||
public Array4<ulong> LeftY;
|
||||
public Array4<ulong> AboveY;
|
||||
public ulong Int4x4Y;
|
||||
public Array4<ushort> LeftUv;
|
||||
public Array4<ushort> AboveUv;
|
||||
public ushort Int4x4Uv;
|
||||
public Array64<byte> LflY;
|
||||
}
|
||||
}
|
13
Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterThresh.cs
Normal file
13
Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterThresh.cs
Normal file
@ -0,0 +1,13 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
// Need to align this structure so when it is declared and
|
||||
// passed it can be loaded into vector registers.
|
||||
internal struct LoopFilterThresh
|
||||
{
|
||||
public Array16<byte> Mblim;
|
||||
public Array16<byte> Lim;
|
||||
public Array16<byte> HevThr;
|
||||
}
|
||||
}
|
179
Ryujinx.Graphics.Nvdec.Vp9/Types/MacroBlockD.cs
Normal file
179
Ryujinx.Graphics.Nvdec.Vp9/Types/MacroBlockD.cs
Normal file
@ -0,0 +1,179 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using Ryujinx.Graphics.Video;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
internal struct MacroBlockD
|
||||
{
|
||||
public Array3<MacroBlockDPlane> Plane;
|
||||
public byte BmodeBlocksWl;
|
||||
public byte BmodeBlocksHl;
|
||||
|
||||
public Ptr<Vp9BackwardUpdates> Counts;
|
||||
public TileInfo Tile;
|
||||
|
||||
public int MiStride;
|
||||
|
||||
// Grid of 8x8 cells is placed over the block.
|
||||
// If some of them belong to the same mbtree-block
|
||||
// they will just have same mi[i][j] value
|
||||
public ArrayPtr<Ptr<ModeInfo>> Mi;
|
||||
public Ptr<ModeInfo> LeftMi;
|
||||
public Ptr<ModeInfo> AboveMi;
|
||||
|
||||
public uint MaxBlocksWide;
|
||||
public uint MaxBlocksHigh;
|
||||
|
||||
public ArrayPtr<Array3<byte>> PartitionProbs;
|
||||
|
||||
/* Distance of MB away from frame edges */
|
||||
public int MbToLeftEdge;
|
||||
public int MbToRightEdge;
|
||||
public int MbToTopEdge;
|
||||
public int MbToBottomEdge;
|
||||
|
||||
public Ptr<Vp9EntropyProbs> Fc;
|
||||
|
||||
/* pointers to reference frames */
|
||||
public Array2<Ptr<RefBuffer>> BlockRefs;
|
||||
|
||||
/* pointer to current frame */
|
||||
public Surface CurBuf;
|
||||
|
||||
public Array3<ArrayPtr<sbyte>> AboveContext;
|
||||
public Array3<Array16<sbyte>> LeftContext;
|
||||
|
||||
public ArrayPtr<sbyte> AboveSegContext;
|
||||
public Array8<sbyte> LeftSegContext;
|
||||
|
||||
/* Bit depth: 8, 10, 12 */
|
||||
public int Bd;
|
||||
|
||||
public bool Lossless;
|
||||
public bool Corrupted;
|
||||
|
||||
public Ptr<InternalErrorInfo> ErrorInfo;
|
||||
|
||||
public int GetPredContextSegId()
|
||||
{
|
||||
sbyte aboveSip = !AboveMi.IsNull ? AboveMi.Value.SegIdPredicted : (sbyte)0;
|
||||
sbyte leftSip = !LeftMi.IsNull ? LeftMi.Value.SegIdPredicted : (sbyte)0;
|
||||
|
||||
return aboveSip + leftSip;
|
||||
}
|
||||
|
||||
public int GetSkipContext()
|
||||
{
|
||||
int aboveSkip = !AboveMi.IsNull ? AboveMi.Value.Skip : 0;
|
||||
int leftSkip = !LeftMi.IsNull ? LeftMi.Value.Skip : 0;
|
||||
return aboveSkip + leftSkip;
|
||||
}
|
||||
|
||||
public int GetPredContextSwitchableInterp()
|
||||
{
|
||||
// Note:
|
||||
// The mode info data structure has a one element border above and to the
|
||||
// left of the entries corresponding to real macroblocks.
|
||||
// The prediction flags in these dummy entries are initialized to 0.
|
||||
int leftType = !LeftMi.IsNull ? LeftMi.Value.InterpFilter : Constants.SwitchableFilters;
|
||||
int aboveType = !AboveMi.IsNull ? AboveMi.Value.InterpFilter : Constants.SwitchableFilters;
|
||||
|
||||
if (leftType == aboveType)
|
||||
{
|
||||
return leftType;
|
||||
}
|
||||
else if (leftType == Constants.SwitchableFilters)
|
||||
{
|
||||
return aboveType;
|
||||
}
|
||||
else if (aboveType == Constants.SwitchableFilters)
|
||||
{
|
||||
return leftType;
|
||||
}
|
||||
else
|
||||
{
|
||||
return Constants.SwitchableFilters;
|
||||
}
|
||||
}
|
||||
|
||||
// The mode info data structure has a one element border above and to the
|
||||
// left of the entries corresponding to real macroblocks.
|
||||
// The prediction flags in these dummy entries are initialized to 0.
|
||||
// 0 - inter/inter, inter/--, --/inter, --/--
|
||||
// 1 - intra/inter, inter/intra
|
||||
// 2 - intra/--, --/intra
|
||||
// 3 - intra/intra
|
||||
public int GetIntraInterContext()
|
||||
{
|
||||
if (!AboveMi.IsNull && !LeftMi.IsNull)
|
||||
{ // Both edges available
|
||||
bool aboveIntra = !AboveMi.Value.IsInterBlock();
|
||||
bool leftIntra = !LeftMi.Value.IsInterBlock();
|
||||
return leftIntra && aboveIntra ? 3 : (leftIntra || aboveIntra ? 1 : 0);
|
||||
}
|
||||
else if (!AboveMi.IsNull || !LeftMi.IsNull)
|
||||
{ // One edge available
|
||||
return 2 * (!(!AboveMi.IsNull ? AboveMi.Value : LeftMi.Value).IsInterBlock() ? 1 : 0);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Returns a context number for the given MB prediction signal
|
||||
// The mode info data structure has a one element border above and to the
|
||||
// left of the entries corresponding to real blocks.
|
||||
// The prediction flags in these dummy entries are initialized to 0.
|
||||
public int GetTxSizeContext()
|
||||
{
|
||||
int maxTxSize = (int)Luts.MaxTxSizeLookup[(int)Mi[0].Value.SbType];
|
||||
int aboveCtx = (!AboveMi.IsNull && AboveMi.Value.Skip == 0) ? (int)AboveMi.Value.TxSize : maxTxSize;
|
||||
int leftCtx = (!LeftMi.IsNull && LeftMi.Value.Skip == 0) ? (int)LeftMi.Value.TxSize : maxTxSize;
|
||||
if (LeftMi.IsNull)
|
||||
{
|
||||
leftCtx = aboveCtx;
|
||||
}
|
||||
|
||||
if (AboveMi.IsNull)
|
||||
{
|
||||
aboveCtx = leftCtx;
|
||||
}
|
||||
|
||||
return (aboveCtx + leftCtx) > maxTxSize ? 1 : 0;
|
||||
}
|
||||
|
||||
public void SetupBlockPlanes(int ssX, int ssY)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < Constants.MaxMbPlane; i++)
|
||||
{
|
||||
Plane[i].SubsamplingX = i != 0 ? ssX : 0;
|
||||
Plane[i].SubsamplingY = i != 0 ? ssY : 0;
|
||||
}
|
||||
}
|
||||
|
||||
public void SetSkipContext(int miRow, int miCol)
|
||||
{
|
||||
int aboveIdx = miCol * 2;
|
||||
int leftIdx = (miRow * 2) & 15;
|
||||
int i;
|
||||
for (i = 0; i < Constants.MaxMbPlane; ++i)
|
||||
{
|
||||
ref MacroBlockDPlane pd = ref Plane[i];
|
||||
pd.AboveContext = AboveContext[i].Slice(aboveIdx >> pd.SubsamplingX);
|
||||
pd.LeftContext = new ArrayPtr<sbyte>(ref LeftContext[i][leftIdx >> pd.SubsamplingY], 16 - (leftIdx >> pd.SubsamplingY));
|
||||
}
|
||||
}
|
||||
|
||||
internal void SetMiRowCol(ref TileInfo tile, int miRow, int bh, int miCol, int bw, int miRows, int miCols)
|
||||
{
|
||||
MbToTopEdge = -((miRow * Constants.MiSize) * 8);
|
||||
MbToBottomEdge = ((miRows - bh - miRow) * Constants.MiSize) * 8;
|
||||
MbToLeftEdge = -((miCol * Constants.MiSize) * 8);
|
||||
MbToRightEdge = ((miCols - bw - miCol) * Constants.MiSize) * 8;
|
||||
|
||||
// Are edges available for intra prediction?
|
||||
AboveMi = (miRow != 0) ? Mi[-MiStride] : Ptr<ModeInfo>.Null;
|
||||
LeftMi = (miCol > tile.MiColStart) ? Mi[-1] : Ptr<ModeInfo>.Null;
|
||||
}
|
||||
}
|
||||
}
|
21
Ryujinx.Graphics.Nvdec.Vp9/Types/MacroBlockDPlane.cs
Normal file
21
Ryujinx.Graphics.Nvdec.Vp9/Types/MacroBlockDPlane.cs
Normal file
@ -0,0 +1,21 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
internal struct MacroBlockDPlane
|
||||
{
|
||||
public ArrayPtr<int> DqCoeff;
|
||||
public int SubsamplingX;
|
||||
public int SubsamplingY;
|
||||
public Buf2D Dst;
|
||||
public Array2<Buf2D> Pre;
|
||||
public ArrayPtr<sbyte> AboveContext;
|
||||
public ArrayPtr<sbyte> LeftContext;
|
||||
public Array8<Array2<short>> SegDequant;
|
||||
|
||||
// Number of 4x4s in current block
|
||||
public ushort N4W, N4H;
|
||||
// Log2 of N4W, N4H
|
||||
public byte N4Wl, N4Hl;
|
||||
}
|
||||
}
|
66
Ryujinx.Graphics.Nvdec.Vp9/Types/ModeInfo.cs
Normal file
66
Ryujinx.Graphics.Nvdec.Vp9/Types/ModeInfo.cs
Normal file
@ -0,0 +1,66 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
internal struct ModeInfo
|
||||
{
|
||||
// Common for both Inter and Intra blocks
|
||||
public BlockSize SbType;
|
||||
public PredictionMode Mode;
|
||||
public TxSize TxSize;
|
||||
public sbyte Skip;
|
||||
public sbyte SegmentId;
|
||||
public sbyte SegIdPredicted; // Valid only when TemporalUpdate is enabled
|
||||
|
||||
// Only for Intra blocks
|
||||
public PredictionMode UvMode;
|
||||
|
||||
// Only for Inter blocks
|
||||
public byte InterpFilter;
|
||||
|
||||
// if ref_frame[idx] is equal to AltRefFrame then
|
||||
// MacroBlockD.BlockRef[idx] is an altref
|
||||
public Array2<sbyte> RefFrame;
|
||||
|
||||
public Array2<Mv> Mv;
|
||||
|
||||
public Array4<BModeInfo> Bmi;
|
||||
|
||||
public PredictionMode GetYMode(int block)
|
||||
{
|
||||
return SbType < BlockSize.Block8x8 ? Bmi[block].Mode : Mode;
|
||||
}
|
||||
|
||||
public TxSize GetUvTxSize(ref MacroBlockDPlane pd)
|
||||
{
|
||||
Debug.Assert(SbType < BlockSize.Block8x8 ||
|
||||
Luts.SsSizeLookup[(int)SbType][pd.SubsamplingX][pd.SubsamplingY] != BlockSize.BlockInvalid);
|
||||
return Luts.UvTxsizeLookup[(int)SbType][(int)TxSize][pd.SubsamplingX][pd.SubsamplingY];
|
||||
}
|
||||
|
||||
public bool IsInterBlock()
|
||||
{
|
||||
return RefFrame[0] > Constants.IntraFrame;
|
||||
}
|
||||
|
||||
public bool HasSecondRef()
|
||||
{
|
||||
return RefFrame[1] > Constants.IntraFrame;
|
||||
}
|
||||
|
||||
private static readonly int[][] IdxNColumnToSubblock = new int[][]
|
||||
{
|
||||
new int[] { 1, 2 }, new int[] { 1, 3 }, new int[] { 3, 2 }, new int[] { 3, 3 }
|
||||
};
|
||||
|
||||
// This function returns either the appropriate sub block or block's mv
|
||||
// on whether the block_size < 8x8 and we have check_sub_blocks set.
|
||||
public Mv GetSubBlockMv(int whichMv, int searchCol, int blockIdx)
|
||||
{
|
||||
return blockIdx >= 0 && SbType < BlockSize.Block8x8
|
||||
? Bmi[IdxNColumnToSubblock[blockIdx][searchCol == 0 ? 1 : 0]].Mv[whichMv]
|
||||
: Mv[whichMv];
|
||||
}
|
||||
}
|
||||
}
|
14
Ryujinx.Graphics.Nvdec.Vp9/Types/MotionVectorContext.cs
Normal file
14
Ryujinx.Graphics.Nvdec.Vp9/Types/MotionVectorContext.cs
Normal file
@ -0,0 +1,14 @@
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
internal enum MotionVectorContext
|
||||
{
|
||||
BothZero = 0,
|
||||
ZeroPlusPredicted = 1,
|
||||
BothPredicted = 2,
|
||||
NewPlusNonIntra = 3,
|
||||
BothNew = 4,
|
||||
IntraPlusNonIntra = 5,
|
||||
BothIntra = 6,
|
||||
InvalidCase = 9
|
||||
}
|
||||
}
|
189
Ryujinx.Graphics.Nvdec.Vp9/Types/Mv.cs
Normal file
189
Ryujinx.Graphics.Nvdec.Vp9/Types/Mv.cs
Normal file
@ -0,0 +1,189 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using Ryujinx.Graphics.Video;
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
internal struct Mv
|
||||
{
|
||||
public short Row;
|
||||
public short Col;
|
||||
|
||||
private static readonly byte[] LogInBase2 = new byte[]
|
||||
{
|
||||
0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10
|
||||
};
|
||||
|
||||
public bool UseMvHp()
|
||||
{
|
||||
const int kMvRefThresh = 64; // Threshold for use of high-precision 1/8 mv
|
||||
return Math.Abs(Row) < kMvRefThresh && Math.Abs(Col) < kMvRefThresh;
|
||||
}
|
||||
|
||||
public static bool MvJointVertical(MvJointType type)
|
||||
{
|
||||
return type == MvJointType.MvJointHzvnz || type == MvJointType.MvJointHnzvnz;
|
||||
}
|
||||
|
||||
public static bool MvJointHorizontal(MvJointType type)
|
||||
{
|
||||
return type == MvJointType.MvJointHnzvz || type == MvJointType.MvJointHnzvnz;
|
||||
}
|
||||
|
||||
private static int MvClassBase(MvClassType c)
|
||||
{
|
||||
return c != 0 ? Constants.Class0Size << ((int)c + 2) : 0;
|
||||
}
|
||||
|
||||
private static MvClassType GetMvClass(int z, Ptr<int> offset)
|
||||
{
|
||||
MvClassType c = (z >= Constants.Class0Size * 4096) ? MvClassType.MvClass10 : (MvClassType)LogInBase2[z >> 3];
|
||||
if (!offset.IsNull)
|
||||
{
|
||||
offset.Value = z - MvClassBase(c);
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
private static void IncMvComponent(int v, ref Vp9BackwardUpdates counts, int comp, int incr, int usehp)
|
||||
{
|
||||
int s, z, c, o = 0, d, e, f;
|
||||
Debug.Assert(v != 0); /* Should not be zero */
|
||||
s = v < 0 ? 1 : 0;
|
||||
counts.Sign[comp][s] += (uint)incr;
|
||||
z = (s != 0 ? -v : v) - 1; /* Magnitude - 1 */
|
||||
|
||||
c = (int)GetMvClass(z, new Ptr<int>(ref o));
|
||||
counts.Classes[comp][c] += (uint)incr;
|
||||
|
||||
d = (o >> 3); /* Int mv data */
|
||||
f = (o >> 1) & 3; /* Fractional pel mv data */
|
||||
e = (o & 1); /* High precision mv data */
|
||||
|
||||
if (c == (int)MvClassType.MvClass0)
|
||||
{
|
||||
counts.Class0[comp][d] += (uint)incr;
|
||||
counts.Class0Fp[comp][d][f] += (uint)incr;
|
||||
counts.Class0Hp[comp][e] += (uint)(usehp * incr);
|
||||
}
|
||||
else
|
||||
{
|
||||
int i;
|
||||
int b = c + Constants.Class0Bits - 1; // Number of bits
|
||||
for (i = 0; i < b; ++i)
|
||||
{
|
||||
counts.Bits[comp][i][((d >> i) & 1)] += (uint)incr;
|
||||
}
|
||||
|
||||
counts.Fp[comp][f] += (uint)incr;
|
||||
counts.Hp[comp][e] += (uint)(usehp * incr);
|
||||
}
|
||||
}
|
||||
|
||||
private MvJointType GetMvJoint()
|
||||
{
|
||||
if (Row == 0)
|
||||
{
|
||||
return Col == 0 ? MvJointType.MvJointZero : MvJointType.MvJointHnzvz;
|
||||
}
|
||||
else
|
||||
{
|
||||
return Col == 0 ? MvJointType.MvJointHzvnz : MvJointType.MvJointHnzvnz;
|
||||
}
|
||||
}
|
||||
|
||||
internal void IncMv(Ptr<Vp9BackwardUpdates> counts)
|
||||
{
|
||||
if (!counts.IsNull)
|
||||
{
|
||||
MvJointType j = GetMvJoint();
|
||||
++counts.Value.Joints[(int)j];
|
||||
|
||||
if (MvJointVertical(j))
|
||||
{
|
||||
IncMvComponent(Row, ref counts.Value, 0, 1, 1);
|
||||
}
|
||||
|
||||
if (MvJointHorizontal(j))
|
||||
{
|
||||
IncMvComponent(Col, ref counts.Value, 1, 1, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void ClampMv(int minCol, int maxCol, int minRow, int maxRow)
|
||||
{
|
||||
Col = (short)Math.Clamp(Col, minCol, maxCol);
|
||||
Row = (short)Math.Clamp(Row, minRow, maxRow);
|
||||
}
|
||||
|
||||
private const int MvBorder = (16 << 3); // Allow 16 pels in 1/8th pel units
|
||||
|
||||
public void ClampMvRef(ref MacroBlockD xd)
|
||||
{
|
||||
ClampMv(
|
||||
xd.MbToLeftEdge - MvBorder,
|
||||
xd.MbToRightEdge + MvBorder,
|
||||
xd.MbToTopEdge - MvBorder,
|
||||
xd.MbToBottomEdge + MvBorder);
|
||||
}
|
||||
|
||||
public void LowerMvPrecision(bool allowHP)
|
||||
{
|
||||
bool useHP = allowHP && UseMvHp();
|
||||
if (!useHP)
|
||||
{
|
||||
if ((Row & 1) != 0)
|
||||
{
|
||||
Row += (short)(Row > 0 ? -1 : 1);
|
||||
}
|
||||
|
||||
if ((Col & 1) != 0)
|
||||
{
|
||||
Col += (short)(Col > 0 ? -1 : 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
8
Ryujinx.Graphics.Nvdec.Vp9/Types/Mv32.cs
Normal file
8
Ryujinx.Graphics.Nvdec.Vp9/Types/Mv32.cs
Normal file
@ -0,0 +1,8 @@
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
internal struct Mv32
|
||||
{
|
||||
public int Row;
|
||||
public int Col;
|
||||
}
|
||||
}
|
17
Ryujinx.Graphics.Nvdec.Vp9/Types/MvClassType.cs
Normal file
17
Ryujinx.Graphics.Nvdec.Vp9/Types/MvClassType.cs
Normal file
@ -0,0 +1,17 @@
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
internal enum MvClassType
|
||||
{
|
||||
MvClass0 = 0, /* (0, 2] integer pel */
|
||||
MvClass1 = 1, /* (2, 4] integer pel */
|
||||
MvClass2 = 2, /* (4, 8] integer pel */
|
||||
MvClass3 = 3, /* (8, 16] integer pel */
|
||||
MvClass4 = 4, /* (16, 32] integer pel */
|
||||
MvClass5 = 5, /* (32, 64] integer pel */
|
||||
MvClass6 = 6, /* (64, 128] integer pel */
|
||||
MvClass7 = 7, /* (128, 256] integer pel */
|
||||
MvClass8 = 8, /* (256, 512] integer pel */
|
||||
MvClass9 = 9, /* (512, 1024] integer pel */
|
||||
MvClass10 = 10, /* (1024,2048] integer pel */
|
||||
}
|
||||
}
|
10
Ryujinx.Graphics.Nvdec.Vp9/Types/MvJointType.cs
Normal file
10
Ryujinx.Graphics.Nvdec.Vp9/Types/MvJointType.cs
Normal file
@ -0,0 +1,10 @@
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
internal enum MvJointType
|
||||
{
|
||||
MvJointZero = 0, /* Zero vector */
|
||||
MvJointHnzvz = 1, /* Vert zero, hor nonzero */
|
||||
MvJointHzvnz = 2, /* Hor zero, vert nonzero */
|
||||
MvJointHnzvnz = 3, /* Both components nonzero */
|
||||
}
|
||||
}
|
10
Ryujinx.Graphics.Nvdec.Vp9/Types/MvRef.cs
Normal file
10
Ryujinx.Graphics.Nvdec.Vp9/Types/MvRef.cs
Normal file
@ -0,0 +1,10 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
internal struct MvRef
|
||||
{
|
||||
public Array2<Mv> Mv;
|
||||
public Array2<sbyte> RefFrame;
|
||||
}
|
||||
}
|
12
Ryujinx.Graphics.Nvdec.Vp9/Types/PartitionType.cs
Normal file
12
Ryujinx.Graphics.Nvdec.Vp9/Types/PartitionType.cs
Normal file
@ -0,0 +1,12 @@
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
internal enum PartitionType
|
||||
{
|
||||
PartitionNone,
|
||||
PartitionHorz,
|
||||
PartitionVert,
|
||||
PartitionSplit,
|
||||
PartitionTypes,
|
||||
PartitionInvalid = PartitionTypes
|
||||
}
|
||||
}
|
9
Ryujinx.Graphics.Nvdec.Vp9/Types/PlaneType.cs
Normal file
9
Ryujinx.Graphics.Nvdec.Vp9/Types/PlaneType.cs
Normal file
@ -0,0 +1,9 @@
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
internal enum PlaneType
|
||||
{
|
||||
Y = 0,
|
||||
Uv = 1,
|
||||
PlaneTypes
|
||||
}
|
||||
}
|
14
Ryujinx.Graphics.Nvdec.Vp9/Types/Position.cs
Normal file
14
Ryujinx.Graphics.Nvdec.Vp9/Types/Position.cs
Normal file
@ -0,0 +1,14 @@
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
internal struct Position
|
||||
{
|
||||
public int Row;
|
||||
public int Col;
|
||||
|
||||
public Position(int row, int col)
|
||||
{
|
||||
Row = row;
|
||||
Col = col;
|
||||
}
|
||||
}
|
||||
}
|
21
Ryujinx.Graphics.Nvdec.Vp9/Types/PredictionMode.cs
Normal file
21
Ryujinx.Graphics.Nvdec.Vp9/Types/PredictionMode.cs
Normal file
@ -0,0 +1,21 @@
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
internal enum PredictionMode
|
||||
{
|
||||
DcPred = 0, // Average of above and left pixels
|
||||
VPred = 1, // Vertical
|
||||
HPred = 2, // Horizontal
|
||||
D45Pred = 3, // Directional 45 deg = round(arctan(1 / 1) * 180 / pi)
|
||||
D135Pred = 4, // Directional 135 deg = 180 - 45
|
||||
D117Pred = 5, // Directional 117 deg = 180 - 63
|
||||
D153Pred = 6, // Directional 153 deg = 180 - 27
|
||||
D207Pred = 7, // Directional 207 deg = 180 + 27
|
||||
D63Pred = 8, // Directional 63 deg = round(arctan(2 / 1) * 180 / pi)
|
||||
TmPred = 9, // True-motion
|
||||
NearestMv = 10,
|
||||
NearMv = 11,
|
||||
ZeroMv = 12,
|
||||
NewMv = 13,
|
||||
MbModeCount = 14
|
||||
}
|
||||
}
|
8
Ryujinx.Graphics.Nvdec.Vp9/Types/RefBuffer.cs
Normal file
8
Ryujinx.Graphics.Nvdec.Vp9/Types/RefBuffer.cs
Normal file
@ -0,0 +1,8 @@
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
internal struct RefBuffer
|
||||
{
|
||||
public Surface Buf;
|
||||
public ScaleFactors Sf;
|
||||
}
|
||||
}
|
10
Ryujinx.Graphics.Nvdec.Vp9/Types/ReferenceMode.cs
Normal file
10
Ryujinx.Graphics.Nvdec.Vp9/Types/ReferenceMode.cs
Normal file
@ -0,0 +1,10 @@
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
internal enum ReferenceMode
|
||||
{
|
||||
SingleReference = 0,
|
||||
CompoundReference = 1,
|
||||
ReferenceModeSelect = 2,
|
||||
ReferenceModes = 3
|
||||
}
|
||||
}
|
451
Ryujinx.Graphics.Nvdec.Vp9/Types/ScaleFactors.cs
Normal file
451
Ryujinx.Graphics.Nvdec.Vp9/Types/ScaleFactors.cs
Normal file
@ -0,0 +1,451 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using System.Runtime.CompilerServices;
|
||||
using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.Convolve;
|
||||
using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.Filter;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
internal struct ScaleFactors
|
||||
{
|
||||
private const int RefScaleShift = 14;
|
||||
private const int RefNoScale = (1 << RefScaleShift);
|
||||
private const int RefInvalidScale = -1;
|
||||
|
||||
private unsafe delegate void ConvolveFn(
|
||||
byte* src,
|
||||
int srcStride,
|
||||
byte* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] filter,
|
||||
int x0Q4,
|
||||
int xStepQ4,
|
||||
int y0Q4,
|
||||
int yStepQ4,
|
||||
int w,
|
||||
int h);
|
||||
|
||||
private unsafe delegate void HighbdConvolveFn(
|
||||
ushort* src,
|
||||
int srcStride,
|
||||
ushort* dst,
|
||||
int dstStride,
|
||||
Array8<short>[] filter,
|
||||
int x0Q4,
|
||||
int xStepQ4,
|
||||
int y0Q4,
|
||||
int yStepQ4,
|
||||
int w,
|
||||
int h,
|
||||
int bd);
|
||||
|
||||
private static readonly unsafe ConvolveFn[][][] PredictX16Y16 = new ConvolveFn[][][]
|
||||
{
|
||||
new ConvolveFn[][]
|
||||
{
|
||||
new ConvolveFn[]
|
||||
{
|
||||
ConvolveCopy,
|
||||
ConvolveAvg
|
||||
},
|
||||
new ConvolveFn[]
|
||||
{
|
||||
Convolve8Vert,
|
||||
Convolve8AvgVert
|
||||
}
|
||||
},
|
||||
new ConvolveFn[][]
|
||||
{
|
||||
new ConvolveFn[]
|
||||
{
|
||||
Convolve8Horiz,
|
||||
Convolve8AvgHoriz
|
||||
},
|
||||
new ConvolveFn[]
|
||||
{
|
||||
Convolve8,
|
||||
Convolve8Avg
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
private static readonly unsafe ConvolveFn[][][] PredictX16 = new ConvolveFn[][][]
|
||||
{
|
||||
new ConvolveFn[][]
|
||||
{
|
||||
new ConvolveFn[]
|
||||
{
|
||||
ScaledVert,
|
||||
ScaledAvgVert
|
||||
},
|
||||
new ConvolveFn[]
|
||||
{
|
||||
ScaledVert,
|
||||
ScaledAvgVert
|
||||
}
|
||||
},
|
||||
new ConvolveFn[][]
|
||||
{
|
||||
new ConvolveFn[]
|
||||
{
|
||||
Scaled2D,
|
||||
ScaledAvg2D
|
||||
},
|
||||
new ConvolveFn[]
|
||||
{
|
||||
Scaled2D,
|
||||
ScaledAvg2D
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
private static readonly unsafe ConvolveFn[][][] PredictY16 = new ConvolveFn[][][]
|
||||
{
|
||||
new ConvolveFn[][]
|
||||
{
|
||||
new ConvolveFn[]
|
||||
{
|
||||
ScaledHoriz,
|
||||
ScaledAvgHoriz
|
||||
},
|
||||
new ConvolveFn[]
|
||||
{
|
||||
Scaled2D,
|
||||
ScaledAvg2D
|
||||
}
|
||||
},
|
||||
new ConvolveFn[][]
|
||||
{
|
||||
new ConvolveFn[]
|
||||
{
|
||||
ScaledHoriz,
|
||||
ScaledAvgHoriz
|
||||
},
|
||||
new ConvolveFn[]
|
||||
{
|
||||
Scaled2D,
|
||||
ScaledAvg2D
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
private static readonly unsafe ConvolveFn[][][] Predict = new ConvolveFn[][][]
|
||||
{
|
||||
new ConvolveFn[][]
|
||||
{
|
||||
new ConvolveFn[]
|
||||
{
|
||||
Scaled2D,
|
||||
ScaledAvg2D
|
||||
},
|
||||
new ConvolveFn[]
|
||||
{
|
||||
Scaled2D,
|
||||
ScaledAvg2D
|
||||
}
|
||||
},
|
||||
new ConvolveFn[][]
|
||||
{
|
||||
new ConvolveFn[]
|
||||
{
|
||||
Scaled2D,
|
||||
ScaledAvg2D
|
||||
},
|
||||
new ConvolveFn[]
|
||||
{
|
||||
Scaled2D,
|
||||
ScaledAvg2D
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
private static readonly unsafe HighbdConvolveFn[][][] HighbdPredictX16Y16 = new HighbdConvolveFn[][][]
|
||||
{
|
||||
new HighbdConvolveFn[][]
|
||||
{
|
||||
new HighbdConvolveFn[]
|
||||
{
|
||||
HighbdConvolveCopy,
|
||||
HighbdConvolveAvg
|
||||
},
|
||||
new HighbdConvolveFn[]
|
||||
{
|
||||
HighbdConvolve8Vert,
|
||||
HighbdConvolve8AvgVert
|
||||
}
|
||||
},
|
||||
new HighbdConvolveFn[][]
|
||||
{
|
||||
new HighbdConvolveFn[]
|
||||
{
|
||||
HighbdConvolve8Horiz,
|
||||
HighbdConvolve8AvgHoriz
|
||||
},
|
||||
new HighbdConvolveFn[]
|
||||
{
|
||||
HighbdConvolve8,
|
||||
HighbdConvolve8Avg
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
private static readonly unsafe HighbdConvolveFn[][][] HighbdPredictX16 = new HighbdConvolveFn[][][]
|
||||
{
|
||||
new HighbdConvolveFn[][]
|
||||
{
|
||||
new HighbdConvolveFn[]
|
||||
{
|
||||
HighbdConvolve8Vert,
|
||||
HighbdConvolve8AvgVert
|
||||
},
|
||||
new HighbdConvolveFn[]
|
||||
{
|
||||
HighbdConvolve8Vert,
|
||||
HighbdConvolve8AvgVert
|
||||
}
|
||||
},
|
||||
new HighbdConvolveFn[][]
|
||||
{
|
||||
new HighbdConvolveFn[]
|
||||
{
|
||||
HighbdConvolve8,
|
||||
HighbdConvolve8Avg
|
||||
},
|
||||
new HighbdConvolveFn[]
|
||||
{
|
||||
HighbdConvolve8,
|
||||
HighbdConvolve8Avg
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
private static readonly unsafe HighbdConvolveFn[][][] HighbdPredictY16 = new HighbdConvolveFn[][][]
|
||||
{
|
||||
new HighbdConvolveFn[][]
|
||||
{
|
||||
new HighbdConvolveFn[]
|
||||
{
|
||||
HighbdConvolve8Horiz,
|
||||
HighbdConvolve8AvgHoriz
|
||||
},
|
||||
new HighbdConvolveFn[]
|
||||
{
|
||||
HighbdConvolve8,
|
||||
HighbdConvolve8Avg
|
||||
}
|
||||
},
|
||||
new HighbdConvolveFn[][]
|
||||
{
|
||||
new HighbdConvolveFn[]
|
||||
{
|
||||
HighbdConvolve8Horiz,
|
||||
HighbdConvolve8AvgHoriz
|
||||
},
|
||||
new HighbdConvolveFn[]
|
||||
{
|
||||
HighbdConvolve8,
|
||||
HighbdConvolve8Avg
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
private static readonly unsafe HighbdConvolveFn[][][] HighbdPredict = new HighbdConvolveFn[][][]
|
||||
{
|
||||
new HighbdConvolveFn[][]
|
||||
{
|
||||
new HighbdConvolveFn[]
|
||||
{
|
||||
HighbdConvolve8,
|
||||
HighbdConvolve8Avg
|
||||
},
|
||||
new HighbdConvolveFn[]
|
||||
{
|
||||
HighbdConvolve8,
|
||||
HighbdConvolve8Avg
|
||||
}
|
||||
},
|
||||
new HighbdConvolveFn[][]
|
||||
{
|
||||
new HighbdConvolveFn[]
|
||||
{
|
||||
HighbdConvolve8,
|
||||
HighbdConvolve8Avg
|
||||
},
|
||||
new HighbdConvolveFn[]
|
||||
{
|
||||
HighbdConvolve8,
|
||||
HighbdConvolve8Avg
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
public int XScaleFP; // Horizontal fixed point scale factor
|
||||
public int YScaleFP; // Vertical fixed point scale factor
|
||||
public int XStepQ4;
|
||||
public int YStepQ4;
|
||||
|
||||
public int ScaleValueX(int val)
|
||||
{
|
||||
return IsScaled() ? ScaledX(val) : val;
|
||||
}
|
||||
|
||||
public int ScaleValueY(int val)
|
||||
{
|
||||
return IsScaled() ? ScaledY(val) : val;
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public unsafe void InterPredict(
|
||||
int horiz,
|
||||
int vert,
|
||||
int avg,
|
||||
byte* src,
|
||||
int srcStride,
|
||||
byte* dst,
|
||||
int dstStride,
|
||||
int subpelX,
|
||||
int subpelY,
|
||||
int w,
|
||||
int h,
|
||||
Array8<short>[] kernel,
|
||||
int xs,
|
||||
int ys)
|
||||
{
|
||||
if (XStepQ4 == 16)
|
||||
{
|
||||
if (YStepQ4 == 16)
|
||||
{
|
||||
// No scaling in either direction.
|
||||
PredictX16Y16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h);
|
||||
}
|
||||
else
|
||||
{
|
||||
// No scaling in x direction. Must always scale in the y direction.
|
||||
PredictX16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (YStepQ4 == 16)
|
||||
{
|
||||
// No scaling in the y direction. Must always scale in the x direction.
|
||||
PredictY16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Must always scale in both directions.
|
||||
Predict[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public unsafe void HighbdInterPredict(
|
||||
int horiz,
|
||||
int vert,
|
||||
int avg,
|
||||
ushort* src,
|
||||
int srcStride,
|
||||
ushort* dst,
|
||||
int dstStride,
|
||||
int subpelX,
|
||||
int subpelY,
|
||||
int w,
|
||||
int h,
|
||||
Array8<short>[] kernel,
|
||||
int xs,
|
||||
int ys,
|
||||
int bd)
|
||||
{
|
||||
if (XStepQ4 == 16)
|
||||
{
|
||||
if (YStepQ4 == 16)
|
||||
{
|
||||
// No scaling in either direction.
|
||||
HighbdPredictX16Y16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h, bd);
|
||||
}
|
||||
else
|
||||
{
|
||||
// No scaling in x direction. Must always scale in the y direction.
|
||||
HighbdPredictX16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h, bd);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (YStepQ4 == 16)
|
||||
{
|
||||
// No scaling in the y direction. Must always scale in the x direction.
|
||||
HighbdPredictY16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h, bd);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Must always scale in both directions.
|
||||
HighbdPredict[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h, bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private int ScaledX(int val)
|
||||
{
|
||||
return (int)((long)val * XScaleFP >> RefScaleShift);
|
||||
}
|
||||
|
||||
private int ScaledY(int val)
|
||||
{
|
||||
return (int)((long)val * YScaleFP >> RefScaleShift);
|
||||
}
|
||||
|
||||
private static int GetFixedPointScaleFactor(int otherSize, int thisSize)
|
||||
{
|
||||
// Calculate scaling factor once for each reference frame
|
||||
// and use fixed point scaling factors in decoding and encoding routines.
|
||||
// Hardware implementations can calculate scale factor in device driver
|
||||
// and use multiplication and shifting on hardware instead of division.
|
||||
return (otherSize << RefScaleShift) / thisSize;
|
||||
}
|
||||
|
||||
public Mv32 ScaleMv(ref Mv mv, int x, int y)
|
||||
{
|
||||
int xOffQ4 = ScaledX(x << SubpelBits) & SubpelMask;
|
||||
int yOffQ4 = ScaledY(y << SubpelBits) & SubpelMask;
|
||||
Mv32 res = new Mv32()
|
||||
{
|
||||
Row = ScaledY(mv.Row) + yOffQ4,
|
||||
Col = ScaledX(mv.Col) + xOffQ4
|
||||
};
|
||||
return res;
|
||||
}
|
||||
|
||||
public bool IsValidScale()
|
||||
{
|
||||
return XScaleFP != RefInvalidScale && YScaleFP != RefInvalidScale;
|
||||
}
|
||||
|
||||
public bool IsScaled()
|
||||
{
|
||||
return IsValidScale() && (XScaleFP != RefNoScale || YScaleFP != RefNoScale);
|
||||
}
|
||||
|
||||
public static bool ValidRefFrameSize(int refWidth, int refHeight, int thisWidth, int thisHeight)
|
||||
{
|
||||
return 2 * thisWidth >= refWidth &&
|
||||
2 * thisHeight >= refHeight &&
|
||||
thisWidth <= 16 * refWidth &&
|
||||
thisHeight <= 16 * refHeight;
|
||||
}
|
||||
|
||||
public void SetupScaleFactorsForFrame(int otherW, int otherH, int thisW, int thisH)
|
||||
{
|
||||
if (!ValidRefFrameSize(otherW, otherH, thisW, thisH))
|
||||
{
|
||||
XScaleFP = RefInvalidScale;
|
||||
YScaleFP = RefInvalidScale;
|
||||
return;
|
||||
}
|
||||
|
||||
XScaleFP = GetFixedPointScaleFactor(otherW, thisW);
|
||||
YScaleFP = GetFixedPointScaleFactor(otherH, thisH);
|
||||
XStepQ4 = ScaledX(16);
|
||||
YStepQ4 = ScaledY(16);
|
||||
}
|
||||
}
|
||||
}
|
11
Ryujinx.Graphics.Nvdec.Vp9/Types/SegLvlFeatures.cs
Normal file
11
Ryujinx.Graphics.Nvdec.Vp9/Types/SegLvlFeatures.cs
Normal file
@ -0,0 +1,11 @@
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
internal enum SegLvlFeatures
|
||||
{
|
||||
SegLvlAltQ = 0, // Use alternate Quantizer ....
|
||||
SegLvlAltLf = 1, // Use alternate loop filter value...
|
||||
SegLvlRefFrame = 2, // Optional Segment reference frame
|
||||
SegLvlSkip = 3, // Optional Segment (0,0) + skip mode
|
||||
SegLvlMax = 4 // Number of features supported
|
||||
}
|
||||
}
|
71
Ryujinx.Graphics.Nvdec.Vp9/Types/Segmentation.cs
Normal file
71
Ryujinx.Graphics.Nvdec.Vp9/Types/Segmentation.cs
Normal file
@ -0,0 +1,71 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using System.Diagnostics;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
internal struct Segmentation
|
||||
{
|
||||
private static readonly int[] SegFeatureDataSigned = new int[] { 1, 1, 0, 0 };
|
||||
private static readonly int[] SegFeatureDataMax = new int[] { QuantCommon.MaxQ, Vp9.LoopFilter.MaxLoopFilter, 3, 0 };
|
||||
|
||||
public bool Enabled;
|
||||
public bool UpdateMap;
|
||||
public byte UpdateData;
|
||||
public byte AbsDelta;
|
||||
public bool TemporalUpdate;
|
||||
|
||||
public Array8<Array4<short>> FeatureData;
|
||||
public Array8<uint> FeatureMask;
|
||||
public int AqAvOffset;
|
||||
|
||||
public static byte GetPredProbSegId(ref Array3<byte> segPredProbs, ref MacroBlockD xd)
|
||||
{
|
||||
return segPredProbs[xd.GetPredContextSegId()];
|
||||
}
|
||||
|
||||
public void ClearAllSegFeatures()
|
||||
{
|
||||
MemoryMarshal.CreateSpan(ref FeatureData[0][0], 8 * 4).Fill(0);
|
||||
MemoryMarshal.CreateSpan(ref FeatureMask[0], 8).Fill(0);
|
||||
AqAvOffset = 0;
|
||||
}
|
||||
|
||||
internal void EnableSegFeature(int segmentId, SegLvlFeatures featureId)
|
||||
{
|
||||
FeatureMask[segmentId] |= 1u << (int)featureId;
|
||||
}
|
||||
|
||||
internal static int FeatureDataMax(SegLvlFeatures featureId)
|
||||
{
|
||||
return SegFeatureDataMax[(int)featureId];
|
||||
}
|
||||
|
||||
internal static int IsSegFeatureSigned(SegLvlFeatures featureId)
|
||||
{
|
||||
return SegFeatureDataSigned[(int)featureId];
|
||||
}
|
||||
|
||||
internal void SetSegData(int segmentId, SegLvlFeatures featureId, int segData)
|
||||
{
|
||||
Debug.Assert(segData <= SegFeatureDataMax[(int)featureId]);
|
||||
if (segData < 0)
|
||||
{
|
||||
Debug.Assert(SegFeatureDataSigned[(int)featureId] != 0);
|
||||
Debug.Assert(-segData <= SegFeatureDataMax[(int)featureId]);
|
||||
}
|
||||
|
||||
FeatureData[segmentId][(int)featureId] = (short)segData;
|
||||
}
|
||||
|
||||
internal int IsSegFeatureActive(int segmentId, SegLvlFeatures featureId)
|
||||
{
|
||||
return Enabled && (FeatureMask[segmentId] & (1 << (int)featureId)) != 0 ? 1 : 0;
|
||||
}
|
||||
|
||||
internal short GetSegData(int segmentId, SegLvlFeatures featureId)
|
||||
{
|
||||
return FeatureData[segmentId][(int)featureId];
|
||||
}
|
||||
}
|
||||
}
|
80
Ryujinx.Graphics.Nvdec.Vp9/Types/Surface.cs
Normal file
80
Ryujinx.Graphics.Nvdec.Vp9/Types/Surface.cs
Normal file
@ -0,0 +1,80 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using Ryujinx.Graphics.Video;
|
||||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
internal struct Surface : ISurface
|
||||
{
|
||||
public ArrayPtr<byte> YBuffer;
|
||||
public ArrayPtr<byte> UBuffer;
|
||||
public ArrayPtr<byte> VBuffer;
|
||||
|
||||
public unsafe Plane YPlane => new Plane((IntPtr)YBuffer.ToPointer(), YBuffer.Length);
|
||||
public unsafe Plane UPlane => new Plane((IntPtr)UBuffer.ToPointer(), UBuffer.Length);
|
||||
public unsafe Plane VPlane => new Plane((IntPtr)VBuffer.ToPointer(), VBuffer.Length);
|
||||
|
||||
public int Width { get; }
|
||||
public int Height { get; }
|
||||
public int AlignedWidth { get; }
|
||||
public int AlignedHeight { get; }
|
||||
public int Stride { get; }
|
||||
public int UvWidth { get; }
|
||||
public int UvHeight { get; }
|
||||
public int UvAlignedWidth { get; }
|
||||
public int UvAlignedHeight { get; }
|
||||
public int UvStride { get; }
|
||||
public bool HighBd => false;
|
||||
|
||||
private readonly IntPtr _pointer;
|
||||
|
||||
public Surface(int width, int height)
|
||||
{
|
||||
const int border = 32;
|
||||
const int ssX = 1;
|
||||
const int ssY = 1;
|
||||
const bool highbd = false;
|
||||
|
||||
int alignedWidth = (width + 7) & ~7;
|
||||
int alignedHeight = (height + 7) & ~7;
|
||||
int yStride = ((alignedWidth + 2 * border) + 31) & ~31;
|
||||
int yplaneSize = (alignedHeight + 2 * border) * yStride;
|
||||
int uvWidth = alignedWidth >> ssX;
|
||||
int uvHeight = alignedHeight >> ssY;
|
||||
int uvStride = yStride >> ssX;
|
||||
int uvBorderW = border >> ssX;
|
||||
int uvBorderH = border >> ssY;
|
||||
int uvplaneSize = (uvHeight + 2 * uvBorderH) * uvStride;
|
||||
|
||||
int frameSize = (highbd ? 2 : 1) * (yplaneSize + 2 * uvplaneSize);
|
||||
|
||||
IntPtr pointer = Marshal.AllocHGlobal(frameSize);
|
||||
_pointer = pointer;
|
||||
Width = width;
|
||||
Height = height;
|
||||
AlignedWidth = alignedWidth;
|
||||
AlignedHeight = alignedHeight;
|
||||
Stride = yStride;
|
||||
UvWidth = (width + ssX) >> ssX;
|
||||
UvHeight = (height + ssY) >> ssY;
|
||||
UvAlignedWidth = uvWidth;
|
||||
UvAlignedHeight = uvHeight;
|
||||
UvStride = uvStride;
|
||||
|
||||
ArrayPtr<byte> NewPlane(int start, int size, int border)
|
||||
{
|
||||
return new ArrayPtr<byte>(pointer + start + border, size - border);
|
||||
}
|
||||
|
||||
YBuffer = NewPlane(0, yplaneSize, (border * yStride) + border);
|
||||
UBuffer = NewPlane(yplaneSize, uvplaneSize, (uvBorderH * uvStride) + uvBorderW);
|
||||
VBuffer = NewPlane(yplaneSize + uvplaneSize, uvplaneSize, (uvBorderH * uvStride) + uvBorderW);
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
Marshal.FreeHGlobal(_pointer);
|
||||
}
|
||||
}
|
||||
}
|
85
Ryujinx.Graphics.Nvdec.Vp9/Types/TileInfo.cs
Normal file
85
Ryujinx.Graphics.Nvdec.Vp9/Types/TileInfo.cs
Normal file
@ -0,0 +1,85 @@
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Common;
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
internal struct TileInfo
|
||||
{
|
||||
private const int MinTileWidthB64 = 4;
|
||||
private const int MaxTileWidthB64 = 64;
|
||||
|
||||
public int MiRowStart, MiRowEnd;
|
||||
public int MiColStart, MiColEnd;
|
||||
|
||||
public static int MiColsAlignedToSb(int nMis)
|
||||
{
|
||||
return BitUtils.AlignPowerOfTwo(nMis, Constants.MiBlockSizeLog2);
|
||||
}
|
||||
|
||||
private static int GetTileOffset(int idx, int mis, int log2)
|
||||
{
|
||||
int sbCols = MiColsAlignedToSb(mis) >> Constants.MiBlockSizeLog2;
|
||||
int offset = ((idx * sbCols) >> log2) << Constants.MiBlockSizeLog2;
|
||||
return Math.Min(offset, mis);
|
||||
}
|
||||
|
||||
public void SetRow(ref Vp9Common cm, int row)
|
||||
{
|
||||
MiRowStart = GetTileOffset(row, cm.MiRows, cm.Log2TileRows);
|
||||
MiRowEnd = GetTileOffset(row + 1, cm.MiRows, cm.Log2TileRows);
|
||||
}
|
||||
|
||||
public void SetCol(ref Vp9Common cm, int col)
|
||||
{
|
||||
MiColStart = GetTileOffset(col, cm.MiCols, cm.Log2TileCols);
|
||||
MiColEnd = GetTileOffset(col + 1, cm.MiCols, cm.Log2TileCols);
|
||||
}
|
||||
|
||||
public void Init(ref Vp9Common cm, int row, int col)
|
||||
{
|
||||
SetRow(ref cm, row);
|
||||
SetCol(ref cm, col);
|
||||
}
|
||||
|
||||
// Checks that the given miRow, miCol and search point
|
||||
// are inside the borders of the tile.
|
||||
public bool IsInside(int miCol, int miRow, int miRows, ref Position miPos)
|
||||
{
|
||||
return !(miRow + miPos.Row < 0 ||
|
||||
miCol + miPos.Col < MiColStart ||
|
||||
miRow + miPos.Row >= miRows ||
|
||||
miCol + miPos.Col >= MiColEnd);
|
||||
}
|
||||
|
||||
private static int GetMinLog2TileCols(int sb64Cols)
|
||||
{
|
||||
int minLog2 = 0;
|
||||
while ((MaxTileWidthB64 << minLog2) < sb64Cols)
|
||||
{
|
||||
++minLog2;
|
||||
}
|
||||
|
||||
return minLog2;
|
||||
}
|
||||
|
||||
private static int GetMaxLog2TileCols(int sb64Cols)
|
||||
{
|
||||
int maxLog2 = 1;
|
||||
while ((sb64Cols >> maxLog2) >= MinTileWidthB64)
|
||||
{
|
||||
++maxLog2;
|
||||
}
|
||||
|
||||
return maxLog2 - 1;
|
||||
}
|
||||
|
||||
public static void GetTileNBits(int miCols, ref int minLog2TileCols, ref int maxLog2TileCols)
|
||||
{
|
||||
int sb64Cols = MiColsAlignedToSb(miCols) >> Constants.MiBlockSizeLog2;
|
||||
minLog2TileCols = GetMinLog2TileCols(sb64Cols);
|
||||
maxLog2TileCols = GetMaxLog2TileCols(sb64Cols);
|
||||
Debug.Assert(minLog2TileCols <= maxLog2TileCols);
|
||||
}
|
||||
}
|
||||
}
|
12
Ryujinx.Graphics.Nvdec.Vp9/Types/TxMode.cs
Normal file
12
Ryujinx.Graphics.Nvdec.Vp9/Types/TxMode.cs
Normal file
@ -0,0 +1,12 @@
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
public enum TxMode
|
||||
{
|
||||
Only4X4 = 0, // Only 4x4 transform used
|
||||
Allow8X8 = 1, // Allow block transform size up to 8x8
|
||||
Allow16X16 = 2, // Allow block transform size up to 16x16
|
||||
Allow32X32 = 3, // Allow block transform size up to 32x32
|
||||
TxModeSelect = 4, // Transform specified for each block
|
||||
TxModes = 5
|
||||
}
|
||||
}
|
11
Ryujinx.Graphics.Nvdec.Vp9/Types/TxSize.cs
Normal file
11
Ryujinx.Graphics.Nvdec.Vp9/Types/TxSize.cs
Normal file
@ -0,0 +1,11 @@
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
public enum TxSize
|
||||
{
|
||||
Tx4x4 = 0, // 4x4 transform
|
||||
Tx8x8 = 1, // 8x8 transform
|
||||
Tx16x16 = 2, // 16x16 transform
|
||||
Tx32x32 = 3, // 32x32 transform
|
||||
TxSizes = 4
|
||||
}
|
||||
}
|
11
Ryujinx.Graphics.Nvdec.Vp9/Types/TxType.cs
Normal file
11
Ryujinx.Graphics.Nvdec.Vp9/Types/TxType.cs
Normal file
@ -0,0 +1,11 @@
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
internal enum TxType
|
||||
{
|
||||
DctDct = 0, // DCT in both horizontal and vertical
|
||||
AdstDct = 1, // ADST in vertical, DCT in horizontal
|
||||
DctAdst = 2, // DCT in vertical, ADST in horizontal
|
||||
AdstAdst = 3, // ADST in both directions
|
||||
TxTypes = 4
|
||||
}
|
||||
}
|
334
Ryujinx.Graphics.Nvdec.Vp9/Types/Vp9Common.cs
Normal file
334
Ryujinx.Graphics.Nvdec.Vp9/Types/Vp9Common.cs
Normal file
@ -0,0 +1,334 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Common;
|
||||
using Ryujinx.Graphics.Video;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
|
||||
{
|
||||
internal struct Vp9Common
|
||||
{
|
||||
public MacroBlockD Mb;
|
||||
|
||||
public ArrayPtr<TileWorkerData> TileWorkerData;
|
||||
|
||||
public InternalErrorInfo Error;
|
||||
|
||||
public int Width;
|
||||
public int Height;
|
||||
|
||||
public int SubsamplingX;
|
||||
public int SubsamplingY;
|
||||
|
||||
public ArrayPtr<MvRef> PrevFrameMvs;
|
||||
public ArrayPtr<MvRef> CurFrameMvs;
|
||||
|
||||
public Array3<RefBuffer> FrameRefs;
|
||||
|
||||
public FrameType FrameType;
|
||||
|
||||
// Flag signaling that the frame is encoded using only Intra modes.
|
||||
public bool IntraOnly;
|
||||
|
||||
public bool AllowHighPrecisionMv;
|
||||
|
||||
// MBs, MbRows/Cols is in 16-pixel units; MiRows/Cols is in
|
||||
// ModeInfo (8-pixel) units.
|
||||
public int MBs;
|
||||
public int MbRows, MiRows;
|
||||
public int MbCols, MiCols;
|
||||
public int MiStride;
|
||||
|
||||
/* Profile settings */
|
||||
public TxMode TxMode;
|
||||
|
||||
public int BaseQindex;
|
||||
public int YDcDeltaQ;
|
||||
public int UvDcDeltaQ;
|
||||
public int UvAcDeltaQ;
|
||||
public Array8<Array2<short>> YDequant;
|
||||
public Array8<Array2<short>> UvDequant;
|
||||
|
||||
/* We allocate a ModeInfo struct for each macroblock, together with
|
||||
an extra row on top and column on the left to simplify prediction. */
|
||||
public ArrayPtr<ModeInfo> Mip; /* Base of allocated array */
|
||||
public ArrayPtr<ModeInfo> Mi; /* Corresponds to upper left visible macroblock */
|
||||
|
||||
public ArrayPtr<Ptr<ModeInfo>> MiGridBase;
|
||||
public ArrayPtr<Ptr<ModeInfo>> MiGridVisible;
|
||||
|
||||
// Whether to use previous frame's motion vectors for prediction.
|
||||
public bool UsePrevFrameMvs;
|
||||
|
||||
// Persistent mb segment id map used in prediction.
|
||||
public int SegMapIdx;
|
||||
public int PrevSegMapIdx;
|
||||
|
||||
public Array2<ArrayPtr<byte>> SegMapArray;
|
||||
public ArrayPtr<byte> LastFrameSegMap;
|
||||
public ArrayPtr<byte> CurrentFrameSegMap;
|
||||
|
||||
public byte InterpFilter;
|
||||
|
||||
public LoopFilterInfoN LfInfo;
|
||||
|
||||
public Array4<sbyte> RefFrameSignBias; /* Two state 0, 1 */
|
||||
|
||||
public LoopFilter Lf;
|
||||
public Segmentation Seg;
|
||||
|
||||
// Context probabilities for reference frame prediction
|
||||
public sbyte CompFixedRef;
|
||||
public Array2<sbyte> CompVarRef;
|
||||
public ReferenceMode ReferenceMode;
|
||||
|
||||
public Ptr<Vp9EntropyProbs> Fc;
|
||||
public Ptr<Vp9BackwardUpdates> Counts;
|
||||
|
||||
public bool FrameParallelDecodingMode;
|
||||
|
||||
public int Log2TileCols, Log2TileRows;
|
||||
|
||||
public ArrayPtr<sbyte> AboveSegContext;
|
||||
public ArrayPtr<sbyte> AboveContext;
|
||||
public int AboveContextAllocCols;
|
||||
|
||||
public bool FrameIsIntraOnly()
|
||||
{
|
||||
return FrameType == FrameType.KeyFrame || IntraOnly;
|
||||
}
|
||||
|
||||
public bool CompoundReferenceAllowed()
|
||||
{
|
||||
int i;
|
||||
for (i = 1; i < Constants.RefsPerFrame; ++i)
|
||||
{
|
||||
if (RefFrameSignBias[i + 1] != RefFrameSignBias[1])
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static int CalcMiSize(int len)
|
||||
{
|
||||
// Len is in mi units.
|
||||
return len + Constants.MiBlockSize;
|
||||
}
|
||||
|
||||
public void SetMbMi(int width, int height)
|
||||
{
|
||||
int alignedWidth = BitUtils.AlignPowerOfTwo(width, Constants.MiSizeLog2);
|
||||
int alignedHeight = BitUtils.AlignPowerOfTwo(height, Constants.MiSizeLog2);
|
||||
|
||||
MiCols = alignedWidth >> Constants.MiSizeLog2;
|
||||
MiRows = alignedHeight >> Constants.MiSizeLog2;
|
||||
MiStride = CalcMiSize(MiCols);
|
||||
|
||||
MbCols = (MiCols + 1) >> 1;
|
||||
MbRows = (MiRows + 1) >> 1;
|
||||
MBs = MbRows * MbCols;
|
||||
}
|
||||
|
||||
public void AllocTileWorkerData(MemoryAllocator allocator, int tileCols, int tileRows)
|
||||
{
|
||||
TileWorkerData = allocator.Allocate<TileWorkerData>(tileCols * tileRows);
|
||||
}
|
||||
|
||||
public void FreeTileWorkerData(MemoryAllocator allocator)
|
||||
{
|
||||
allocator.Free(TileWorkerData);
|
||||
}
|
||||
|
||||
private void AllocSegMap(MemoryAllocator allocator, int segMapSize)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < Constants.NumPingPongBuffers; ++i)
|
||||
{
|
||||
SegMapArray[i] = allocator.Allocate<byte>(segMapSize);
|
||||
}
|
||||
|
||||
// Init the index.
|
||||
SegMapIdx = 0;
|
||||
PrevSegMapIdx = 1;
|
||||
|
||||
CurrentFrameSegMap = SegMapArray[SegMapIdx];
|
||||
LastFrameSegMap = SegMapArray[PrevSegMapIdx];
|
||||
}
|
||||
|
||||
private void FreeSegMap(MemoryAllocator allocator)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < Constants.NumPingPongBuffers; ++i)
|
||||
{
|
||||
allocator.Free(SegMapArray[i]);
|
||||
SegMapArray[i] = ArrayPtr<byte>.Null;
|
||||
}
|
||||
|
||||
CurrentFrameSegMap = ArrayPtr<byte>.Null;
|
||||
LastFrameSegMap = ArrayPtr<byte>.Null;
|
||||
}
|
||||
|
||||
private void DecAllocMi(MemoryAllocator allocator, int miSize)
|
||||
{
|
||||
Mip = allocator.Allocate<ModeInfo>(miSize);
|
||||
MiGridBase = allocator.Allocate<Ptr<ModeInfo>>(miSize);
|
||||
}
|
||||
|
||||
private void DecFreeMi(MemoryAllocator allocator)
|
||||
{
|
||||
allocator.Free(Mip);
|
||||
Mip = ArrayPtr<ModeInfo>.Null;
|
||||
allocator.Free(MiGridBase);
|
||||
MiGridBase = ArrayPtr<Ptr<ModeInfo>>.Null;
|
||||
}
|
||||
|
||||
public void FreeContextBuffers(MemoryAllocator allocator)
|
||||
{
|
||||
DecFreeMi(allocator);
|
||||
FreeSegMap(allocator);
|
||||
allocator.Free(AboveContext);
|
||||
AboveContext = ArrayPtr<sbyte>.Null;
|
||||
allocator.Free(AboveSegContext);
|
||||
AboveSegContext = ArrayPtr<sbyte>.Null;
|
||||
allocator.Free(Lf.Lfm);
|
||||
Lf.Lfm = ArrayPtr<LoopFilterMask>.Null;
|
||||
allocator.Free(CurFrameMvs);
|
||||
CurFrameMvs = ArrayPtr<MvRef>.Null;
|
||||
if (UsePrevFrameMvs)
|
||||
{
|
||||
allocator.Free(PrevFrameMvs);
|
||||
PrevFrameMvs = ArrayPtr<MvRef>.Null;
|
||||
}
|
||||
}
|
||||
|
||||
private void AllocLoopFilter(MemoryAllocator allocator)
|
||||
{
|
||||
// Each lfm holds bit masks for all the 8x8 blocks in a 64x64 region. The
|
||||
// stride and rows are rounded up / truncated to a multiple of 8.
|
||||
Lf.LfmStride = (MiCols + (Constants.MiBlockSize - 1)) >> 3;
|
||||
Lf.Lfm = allocator.Allocate<LoopFilterMask>(((MiRows + (Constants.MiBlockSize - 1)) >> 3) * Lf.LfmStride);
|
||||
}
|
||||
|
||||
public void AllocContextBuffers(MemoryAllocator allocator, int width, int height)
|
||||
{
|
||||
SetMbMi(width, height);
|
||||
int newMiSize = MiStride * CalcMiSize(MiRows);
|
||||
if (newMiSize != 0)
|
||||
{
|
||||
DecAllocMi(allocator, newMiSize);
|
||||
}
|
||||
|
||||
if (MiRows * MiCols != 0)
|
||||
{
|
||||
// Create the segmentation map structure and set to 0.
|
||||
AllocSegMap(allocator, MiRows * MiCols);
|
||||
}
|
||||
|
||||
if (MiCols != 0)
|
||||
{
|
||||
AboveContext = allocator.Allocate<sbyte>(2 * TileInfo.MiColsAlignedToSb(MiCols) * Constants.MaxMbPlane);
|
||||
AboveSegContext = allocator.Allocate<sbyte>(TileInfo.MiColsAlignedToSb(MiCols));
|
||||
}
|
||||
|
||||
AllocLoopFilter(allocator);
|
||||
|
||||
CurFrameMvs = allocator.Allocate<MvRef>(MiRows * MiCols);
|
||||
// Using the same size as the current frame is fine here,
|
||||
// as this is never true when we have a resolution change.
|
||||
if (UsePrevFrameMvs)
|
||||
{
|
||||
PrevFrameMvs = allocator.Allocate<MvRef>(MiRows * MiCols);
|
||||
}
|
||||
}
|
||||
|
||||
private unsafe void DecSetupMi()
|
||||
{
|
||||
Mi = Mip.Slice(MiStride + 1);
|
||||
MiGridVisible = MiGridBase.Slice(MiStride + 1);
|
||||
MemoryUtil.Fill(MiGridBase.ToPointer(), Ptr<ModeInfo>.Null, MiStride * (MiRows + 1));
|
||||
}
|
||||
|
||||
public unsafe void InitContextBuffers()
|
||||
{
|
||||
DecSetupMi();
|
||||
if (!LastFrameSegMap.IsNull)
|
||||
{
|
||||
MemoryUtil.Fill(LastFrameSegMap.ToPointer(), (byte)0, MiRows * MiCols);
|
||||
}
|
||||
}
|
||||
|
||||
private void SetPartitionProbs(ref MacroBlockD xd)
|
||||
{
|
||||
xd.PartitionProbs = FrameIsIntraOnly()
|
||||
? new ArrayPtr<Array3<byte>>(ref Fc.Value.KfPartitionProb[0], 16)
|
||||
: new ArrayPtr<Array3<byte>>(ref Fc.Value.PartitionProb[0], 16);
|
||||
}
|
||||
|
||||
internal void InitMacroBlockD(ref MacroBlockD xd, ArrayPtr<int> dqcoeff)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < Constants.MaxMbPlane; ++i)
|
||||
{
|
||||
xd.Plane[i].DqCoeff = dqcoeff;
|
||||
xd.AboveContext[i] = AboveContext.Slice(i * 2 * TileInfo.MiColsAlignedToSb(MiCols));
|
||||
|
||||
if (i == 0)
|
||||
{
|
||||
MemoryUtil.Copy(ref xd.Plane[i].SegDequant, ref YDequant);
|
||||
}
|
||||
else
|
||||
{
|
||||
MemoryUtil.Copy(ref xd.Plane[i].SegDequant, ref UvDequant);
|
||||
}
|
||||
xd.Fc = new Ptr<Vp9EntropyProbs>(ref Fc.Value);
|
||||
}
|
||||
|
||||
xd.AboveSegContext = AboveSegContext;
|
||||
xd.MiStride = MiStride;
|
||||
xd.ErrorInfo = new Ptr<InternalErrorInfo>(ref Error);
|
||||
|
||||
SetPartitionProbs(ref xd);
|
||||
}
|
||||
|
||||
public void SetupSegmentationDequant()
|
||||
{
|
||||
const BitDepth bitDepth = BitDepth.Bits8; // TODO: Configurable
|
||||
// Build y/uv dequant values based on segmentation.
|
||||
if (Seg.Enabled)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < Constants.MaxSegments; ++i)
|
||||
{
|
||||
int qIndex = QuantCommon.GetQIndex(ref Seg, i, BaseQindex);
|
||||
YDequant[i][0] = QuantCommon.DcQuant(qIndex, YDcDeltaQ, bitDepth);
|
||||
YDequant[i][1] = QuantCommon.AcQuant(qIndex, 0, bitDepth);
|
||||
UvDequant[i][0] = QuantCommon.DcQuant(qIndex, UvDcDeltaQ, bitDepth);
|
||||
UvDequant[i][1] = QuantCommon.AcQuant(qIndex, UvAcDeltaQ, bitDepth);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int qIndex = BaseQindex;
|
||||
// When segmentation is disabled, only the first value is used. The
|
||||
// remaining are don't cares.
|
||||
YDequant[0][0] = QuantCommon.DcQuant(qIndex, YDcDeltaQ, bitDepth);
|
||||
YDequant[0][1] = QuantCommon.AcQuant(qIndex, 0, bitDepth);
|
||||
UvDequant[0][0] = QuantCommon.DcQuant(qIndex, UvDcDeltaQ, bitDepth);
|
||||
UvDequant[0][1] = QuantCommon.AcQuant(qIndex, UvAcDeltaQ, bitDepth);
|
||||
}
|
||||
}
|
||||
|
||||
public void SetupScaleFactors()
|
||||
{
|
||||
for (int i = 0; i < Constants.RefsPerFrame; ++i)
|
||||
{
|
||||
ref RefBuffer refBuf = ref FrameRefs[i];
|
||||
refBuf.Sf.SetupScaleFactorsForFrame(refBuf.Buf.Width, refBuf.Buf.Height, Width, Height);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,103 +0,0 @@
|
||||
using Ryujinx.Graphics.Gpu;
|
||||
using Ryujinx.Graphics.VDec;
|
||||
using Ryujinx.Graphics.Vic;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace Ryujinx.Graphics
|
||||
{
|
||||
public class CdmaProcessor
|
||||
{
|
||||
private const int MethSetMethod = 0x10;
|
||||
private const int MethSetData = 0x11;
|
||||
|
||||
private readonly VideoDecoder _videoDecoder;
|
||||
private readonly VideoImageComposer _videoImageComposer;
|
||||
|
||||
public CdmaProcessor()
|
||||
{
|
||||
_videoDecoder = new VideoDecoder();
|
||||
_videoImageComposer = new VideoImageComposer(_videoDecoder);
|
||||
}
|
||||
|
||||
public void PushCommands(GpuContext gpu, int[] cmdBuffer)
|
||||
{
|
||||
List<ChCommand> commands = new List<ChCommand>();
|
||||
|
||||
ChClassId currentClass = 0;
|
||||
|
||||
for (int index = 0; index < cmdBuffer.Length; index++)
|
||||
{
|
||||
int cmd = cmdBuffer[index];
|
||||
|
||||
int value = (cmd >> 0) & 0xffff;
|
||||
int methodOffset = (cmd >> 16) & 0xfff;
|
||||
|
||||
ChSubmissionMode submissionMode = (ChSubmissionMode)((cmd >> 28) & 0xf);
|
||||
|
||||
switch (submissionMode)
|
||||
{
|
||||
case ChSubmissionMode.SetClass: currentClass = (ChClassId)(value >> 6); break;
|
||||
|
||||
case ChSubmissionMode.Incrementing:
|
||||
{
|
||||
int count = value;
|
||||
|
||||
for (int argIdx = 0; argIdx < count; argIdx++)
|
||||
{
|
||||
int argument = cmdBuffer[++index];
|
||||
|
||||
commands.Add(new ChCommand(currentClass, methodOffset + argIdx, argument));
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case ChSubmissionMode.NonIncrementing:
|
||||
{
|
||||
int count = value;
|
||||
|
||||
int[] arguments = new int[count];
|
||||
|
||||
for (int argIdx = 0; argIdx < count; argIdx++)
|
||||
{
|
||||
arguments[argIdx] = cmdBuffer[++index];
|
||||
}
|
||||
|
||||
commands.Add(new ChCommand(currentClass, methodOffset, arguments));
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ProcessCommands(gpu, commands.ToArray());
|
||||
}
|
||||
|
||||
private void ProcessCommands(GpuContext gpu, ChCommand[] commands)
|
||||
{
|
||||
int methodOffset = 0;
|
||||
|
||||
foreach (ChCommand command in commands)
|
||||
{
|
||||
switch (command.MethodOffset)
|
||||
{
|
||||
case MethSetMethod: methodOffset = command.Arguments[0]; break;
|
||||
|
||||
case MethSetData:
|
||||
{
|
||||
if (command.ClassId == ChClassId.NvDec)
|
||||
{
|
||||
_videoDecoder.Process(gpu, methodOffset, command.Arguments);
|
||||
}
|
||||
else if (command.ClassId == ChClassId.GraphicsVic)
|
||||
{
|
||||
_videoImageComposer.Process(gpu, methodOffset, command.Arguments);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user